summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-snapshot.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-snapshot.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c644
1 files changed, 638 insertions, 6 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 513d83616..b00e8d7f5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -43,6 +43,355 @@
#include <mntent.h>
#endif
+
+
+/* This function will do the actual snapshot restore on the brick.
+ *
+ * @param brickinfo brickinfo structure
+ * @param snapname name of the snap which will be restored
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore_brick_snap (glusterd_brickinfo_t *brickinfo,
+ char *snapname)
+{
+ int ret = -1;
+ char *device = NULL;
+ xlator_t *this = NULL;
+ runner_t runner = {0,};
+ glusterd_conf_t *conf = NULL;
+ char msg[PATH_MAX] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (snapname);
+
+ /* Using the brickinfo get the actual device name */
+ device = glusterd_get_brick_mount_details (brickinfo);
+
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "Restoring snapshot of the brick %s:%s "
+ "to %s snap", brickinfo->hostname, brickinfo->path, snapname);
+
+ /* Command for restoring the snapshot */
+ runner_add_args (&runner, "/sbin/lvconvert", "--merge", device, NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+
+ synclock_unlock (&conf->big_lock);
+ /* Run the actual command */
+ ret = runner_run (&runner);
+ synclock_lock (&conf->big_lock);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "snapshot restore of the "
+ "brick (%s:%s) of device %s failed",
+ brickinfo->hostname, brickinfo->path, device);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* This function will restore the snapshot for the entire volume.
+ *
+ * @param snap snap object which needs to be restored
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore_snap (glusterd_snap_t *snap, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (snap);
+ GF_ASSERT (snap->snap_volume);
+ GF_ASSERT (op_errstr);
+
+ /* For restore always take volinfo stored in snap. Do not use
+ * volinfo of the original volume*/
+ volinfo = snap->snap_volume;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ /* This code is executed on each node of the volume. We need
+ * to run the restore only on those bricks which are present
+ * in this node. Therefore check if node belongs to this node
+ * or not.
+ */
+ if (uuid_compare (brickinfo->uuid, MY_UUID)) {
+ continue; /* Bricks not present in this node */
+ }
+
+ /* This case should never occur as volume is already stopped.
+ * Just to avoid a case where the brick is explicitly started
+ * by the user we have this check here.
+ */
+ if (glusterd_is_brick_started (brickinfo)) {
+ ret = gf_asprintf (op_errstr, "Brick (%s: %s) is "
+ "running therefore snapshot cannot "
+ "be restored", brickinfo->hostname,
+ brickinfo->path);
+ if (ret < 0) {
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ /* Do the actual snapshot restore */
+ ret = glusterd_snapshot_restore_brick_snap (brickinfo,
+ snap->snap_name);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Snapshot restore failed"
+ " for %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ if (ret < 0) {
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ }
+
+ /* TODO: Move this code to postvalidate */
+ snap->snap_restored = _gf_true;
+ /* TODO: persist the change in store */
+
+out:
+ return ret;
+}
+
+/* This function will restore a snapshot for the entire
+ * volume or the entire CG (Consistency Group)
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ char *volname = NULL;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ char key[PATH_MAX] = {0,};
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume count");
+ goto out;
+ }
+
+ /* If we are performing snapshot restore of a CG then volcount will be
+ * greater than 1 else volcount will be 1.
+ */
+ for (i = 0; i < volcount; ++i) {
+ /* TODO: Start the index from 0 when Jarvis code is fixed */
+ snprintf (key, sizeof (key), "volname%ld", i+1);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) not found",
+ volname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (volinfo, snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) not found",
+ snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ /* Restore the snap for the entire volume */
+ ret = glusterd_snapshot_restore_snap (snap, op_errstr);
+ if (ret) {
+ /* No need to update op_errstr because it is assumed
+ * that the called function will do that in case of
+ * failure.
+ */
+ gf_log (this->name, GF_LOG_ERROR, "Failed to restore "
+ "snap for %s volume", volname);
+ goto out;
+ }
+ }
+
+ ret = 0; /* Success */
+
+ /* TODO: Need to check if we need to delete the snap after the
+ * operation is successful or not. Also need to persist the state
+ * of restore operation in the store.
+ */
+out:
+ return ret;
+}
+
+/* This function is called before actual restore is taken place. This
+ * function will validate whether the volume or CG is ready to be restored
+ * or not.
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ gf_boolean_t snap_restored = _gf_false;
+ char *volname = NULL;
+ char *snapname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = {0, };
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume count");
+ goto out;
+ }
+
+ /* Snapshot restore will only work if the volume is stopped.
+ * If volume is running then snapshot restore will fail. In
+ * case of CG if any of the volume in the CG is running then
+ * snapshot restore for the entire CG will fail
+ */
+ for (i = 0; i < volcount; ++i) {
+ /* TODO: Start the index from 0 when Jarvis code is fixed */
+ snprintf (key, sizeof (key), "volname%ld", i+1);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) not found",
+ volname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_volume_started (volinfo)) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) is running",
+ volname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (volinfo, snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) not found",
+ snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ snap_restored = snap->snap_restored;
+
+ if (snap_restored) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) already "
+ "restored", snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
int
glusterd_snapshot_config_limit_prevalidate (dict_t *dict, char **op_errstr,
int config_command)
@@ -241,7 +590,7 @@ out:
int
glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr,
- dict_t *rsp_dict)
+ dict_t *rsp_dict)
{
int snap_command = 0;
xlator_t *this = NULL;
@@ -270,10 +619,17 @@ glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr,
ret = glusterd_snapshot_config_prevalidate (dict, op_errstr);
break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore_prevalidate (dict, op_errstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot restore "
+ "validation failed");
+ goto out;
+ }
+ break;
default:
gf_log (this->name, GF_LOG_WARNING, "invalid snap command");
goto out;
- break;
}
out:
return ret;
@@ -361,6 +717,33 @@ out:
}
glusterd_snap_t*
+glusterd_find_snap_by_index (glusterd_volinfo_t *volinfo, uint64_t index)
+{
+ uint64_t count = 0;
+ glusterd_snap_t *entry = NULL;
+ glusterd_snap_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+
+ LOCK (&volinfo->lock);
+ {
+ list_for_each_entry_safe (entry, tmp, &volinfo->snaps,
+ snap_list) {
+ if (index == count) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Found "
+ "snap %s (%s)", entry->snap_name,
+ uuid_utoa (entry->snap_id));
+ break;
+ }
+ ++count;
+ }
+ }
+ UNLOCK (&volinfo->lock);
+out:
+ return entry;
+}
+
+glusterd_snap_t*
glusterd_find_snap_by_name (glusterd_volinfo_t *volinfo, char *snap_name)
{
uint64_t count = -1;
@@ -1142,7 +1525,7 @@ glusterd_snapshot_cg_get_snaplist_lk (dict_t *dict, glusterd_snap_cg_t *cg,
char *value = NULL;
xlator_t *this = NULL;
int64_t i = 0;
- char key[256];
+ char key[256]= {0,};
this = THIS;
@@ -1383,7 +1766,7 @@ glusterd_snapshot_get_snaplist (dict_t *voldict, dict_t *rspdict,
int64_t i = 0;
char *volname = NULL;
xlator_t *this = NULL;
- char key[256];
+ char key[256] = {0,};
this = THIS;
@@ -1469,7 +1852,7 @@ glusterd_handle_snapshot_list (rpcsvc_request_t *req, glusterd_op_t op,
dict_t *voldict = NULL;
xlator_t *this = NULL;
char *err_str = "Operation failed";
- char key[256];
+ char key[256] = {0,};
this = THIS;
GF_ASSERT (this);
@@ -1584,6 +1967,233 @@ out:
return ret;
}
+/* TODO: This function needs a revisit.
+ *
+ * As of now only one snap is supported per CG. This function will
+ * retrieve the snap name which bleongs to the CG and put it in the
+ * dictionary.
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param cg CG object.
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_get_cg_snap_name_lk (dict_t *dict, glusterd_snap_cg_t *cg)
+{
+ int ret = -1;
+ uint64_t snap_count = 0;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (cg);
+ /* CG should have at least one volume*/
+ GF_ASSERT (cg->volume_count > 0);
+
+ /* TODO: As of now only one snap is supported per CG When CG
+ * management module comes in then this restriction can be removed.
+ */
+ snap_count = cg->volumes[0].snap_count;
+ if (1 != snap_count) {
+ gf_log (this->name, GF_LOG_ERROR, "More than one snap is "
+ "associated with the cg (%s)", cg->cg_name);
+ ret = -1;
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_index (&(cg->volumes[0]), 0);
+ if (NULL == snap) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get snap for "
+ "%s CG", cg->cg_name);
+ ret = -1;
+ goto out;
+ }
+
+ snapname = gf_strdup (snap->snap_name);
+ if (NULL == snapname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "snapname", snapname);
+ if (ret) {
+ GF_FREE (snapname);
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "set snap name");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* This is a helper function will get all the volume names present in CG
+ * and write into dictionary.
+ *
+ * @param dict dictionary where volume names should be written
+ * @param cg CG object.
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_get_cg_volume_names_lk (dict_t *dict, glusterd_snap_cg_t *cg)
+{
+ int ret = -1;
+ int64_t i = 0;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = {0,};
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (cg);
+
+ ret = dict_set_int64 (dict, "volcount", cg->volume_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "set volume count");
+ goto out;
+ }
+
+ /* Set volume name of all the volumes present in CG in dict so that
+ * Jarvis can use this to acquire volume locks on all the volume
+ * present in the CG.
+ */
+ for (i = 0; i < cg->volume_count; ++i) {
+ /* TODO: When Jarvis framework is fixed change the index
+ * to start from 0 instead of 1
+ */
+ snprintf (key, sizeof (key), "volname%ld", i+1);
+ volname = gf_strdup (cg->volumes[i].volname);
+ if (NULL == volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, key, volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set volname");
+ GF_FREE (volname);
+ volname = NULL;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This is a snapshot restore handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling jarvis framework to do the actual restore on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_restore (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ int64_t vol_count = 0;
+ char *volname = NULL;
+ char *snapname = NULL;
+ char *cgname = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_snap_cg_t *cg = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ /* If volume name is provided then volcount will be set */
+ ret = dict_get_int64 (dict, "volcount", &vol_count);
+ if (ret) {
+ /* If volcount is not provided then cgname must be there */
+ ret = dict_get_str (dict, "cgname", &cgname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get neither volcount nor cgname");
+ goto out;
+ }
+ } else {
+ /* TODO: Change the index to 0 when Jarvis code is fixed */
+ ret = dict_get_str (dict, "volname1", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get volname");
+ goto out;
+ }
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get snapname");
+ goto out;
+ }
+ }
+
+ if (NULL != cgname) { /* Snapshot restore of CG */
+ cg = glusterd_find_snap_cg_by_name (conf, cgname);
+
+ if (NULL == cg) {
+ snprintf (err_str, len, "CG %s not found", cgname);
+ gf_log (this->name, GF_LOG_WARNING, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ LOCK (&cg->lock);
+ {
+ /* First get the snap name of the CG */
+ ret = glusterd_get_cg_snap_name_lk (dict, cg);
+ if (ret) {
+ goto unlock;
+ }
+
+ /* Then get the volumes belong to CG */
+ ret = glusterd_get_cg_volume_names_lk (dict, cg);
+ }
+unlock:
+ UNLOCK (&cg->lock);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "volume names or snap name of %s CG", cgname);
+ goto out;
+ }
+ }
+
+ ret = glusterd_jarvis_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap "
+ "phases");
+ goto out;
+ }
+
+ ret = 0; /* Success */
+out:
+ return ret;
+}
+
/* this should be the last thing to be done.
1. Do op stage related checks such as whether volume is there or not etc
2. Do quorum checks.
@@ -2563,6 +3173,16 @@ glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
rsp_dict);
break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore (dict, op_errstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to "
+ "restore snapshot");
+ goto out;
+ }
+
+ break;
+
default:
gf_log (this->name, GF_LOG_WARNING, "invalid snap command");
goto out;
@@ -2653,13 +3273,25 @@ glusterd_handle_snapshot_fn (rpcsvc_request_t *req)
case GF_SNAP_OPTION_TYPE_CREATE:
ret = glusterd_jarvis_initiate_snap_phases (req, cli_op, dict);
break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_handle_snapshot_restore (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot restore "
+ "failed: %s", err_str);
+ }
+
+ break;
case GF_SNAP_OPTION_TYPE_LIST:
ret = glusterd_handle_snapshot_list (req, cli_op, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot list "
+ "failed");
+ }
break;
case GF_SNAP_OPTION_TYPE_CONFIG:
ret = glusterd_jarvis_initiate_all_phases (req, cli_op, dict);
break;
- case GF_SNAP_OPTION_TYPE_RESTORE:
case GF_SNAP_OPTION_TYPE_DELETE:
case GF_SNAP_OPTION_TYPE_START:
case GF_SNAP_OPTION_TYPE_STOP: