summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c309
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c284
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c118
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h23
5 files changed, 599 insertions, 145 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 0febaf20cbd..bcc965696de 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -1643,6 +1643,16 @@ glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
goto out;
}
+ if (cmd_args->brick_name) {
+ ret = dict_set_dynstr_with_alloc (dict, "brick_name",
+ cmd_args->brick_name);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to set brick_name in request dict");
+ goto out;
+ }
+ }
+
ret = dict_allocate_and_serialize (dict, &req.xdata.xdata_val,
&req.xdata.xdata_len);
if (ret < 0) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index 5078526e983..1797778d150 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -259,54 +259,274 @@ out:
/* Get and store op-versions of the clients sending the getspec request
* Clients of versions <= 3.3, don't send op-versions, their op-versions are
- * defaulted to 1
+ * defaulted to 1. Also fetch brick_name.
*/
-static int
-_get_client_op_versions (gf_getspec_req *args, peer_info_t *peerinfo)
+int32_t
+glusterd_get_args_from_dict (gf_getspec_req *args, peer_info_t *peerinfo,
+ char **brick_name)
{
- int ret = 0;
- int client_max_op_version = 1;
- int client_min_op_version = 1;
- dict_t *dict = NULL;
+ dict_t *dict = NULL;
+ int client_max_op_version = 1;
+ int client_min_op_version = 1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT (this);
GF_ASSERT (args);
GF_ASSERT (peerinfo);
- if (args->xdata.xdata_len) {
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
+ if (!args->xdata.xdata_len) {
+ ret = 0;
+ goto out;
+ }
- ret = dict_unserialize (args->xdata.xdata_val,
- args->xdata.xdata_len, &dict);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Failed to unserialize request dictionary");
- goto out;
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (args->xdata.xdata_val,
+ args->xdata.xdata_len, &dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to unserialize request dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "min-op-version",
+ &client_min_op_version);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Failed to get client-min-op-version");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "max-op-version",
+ &client_max_op_version);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Failed to get client-max-op-version");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "brick_name",
+ brick_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No brick name present");
+ ret = 0;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "brick_name = %s", *brick_name);
+out:
+ peerinfo->max_op_version = client_max_op_version;
+ peerinfo->min_op_version = client_min_op_version;
+
+ return ret;
+}
+
+/* Given the missed_snapinfo and snap_opinfo take the
+ * missed lvm snapshot
+ */
+int32_t
+glusterd_create_missed_snap (glusterd_missed_snap_info *missed_snapinfo,
+ glusterd_snap_op_t *snap_opinfo)
+{
+ char *device = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ int32_t i = 0;
+ uuid_t snap_uuid = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (missed_snapinfo);
+ GF_ASSERT (snap_opinfo);
+
+ uuid_parse (missed_snapinfo->snap_uuid, snap_uuid);
+
+ /* Find the snap-object */
+ snap = glusterd_find_snap_by_id (snap_uuid);
+ if (!snap) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to find the snap with snap_uuid %s",
+ missed_snapinfo->snap_uuid);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find the snap_vol */
+ list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ if (!strcmp (volinfo->volname,
+ snap_opinfo->snap_vol_id)) {
+ snap_vol = volinfo;
+ break;
}
+ }
- ret = dict_get_int32 (dict, "min-op-version",
- &client_min_op_version);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Failed to get client-min-op-version");
- goto out;
+ if (!snap_vol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to find the snap_vol(%s) "
+ "for snap(%s)", snap_opinfo->snap_vol_id,
+ snap->snapname);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find the missed brick in the snap volume */
+ list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ i++;
+ if (i == snap_opinfo->brick_num)
+ break;
+ }
+
+ if (brickinfo->snap_status != -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "The snap status of the missed "
+ "brick(%s) is not pending", brickinfo->path);
+ goto out;
+ }
+
+ /* Fetch the device path */
+ device = glusterd_take_lvm_snapshot (snap_vol, snap_opinfo->brick_path);
+ if (!device) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to take snapshot of %s",
+ snap_opinfo->brick_path);
+ goto out;
+ }
+
+ /* Create and mount the snap brick */
+ ret = glusterd_snap_brick_create (device, snap_vol,
+ snap_opinfo->brick_num,
+ brickinfo->mount_dir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ " create and mount the brick(%s) for the snap %s",
+ snap_opinfo->brick_path,
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ strncpy (brickinfo->device_path, device,
+ sizeof(brickinfo->device_path));
+ brickinfo->snap_status = 0;
+
+ ret = glusterd_store_volinfo (snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to store snapshot "
+ "volinfo (%s) for snap %s", snap_vol->volname,
+ snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "starting the "
+ "brick %s:%s for the snap %s failed",
+ brickinfo->hostname, brickinfo->path,
+ snap->snapname);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* Look into missed_snap_list, to see it the given brick_name,
+ * has any missed snap creates for the local node */
+int32_t
+glusterd_take_missing_brick_snapshots (char *brick_name)
+{
+ char *my_node_uuid = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ int32_t ret = -1;
+ gf_boolean_t update_list = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (brick_name);
+
+ my_node_uuid = uuid_utoa (MY_UUID);
+
+ list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ /* If the missed snap op is not for the local node
+ * then continue
+ */
+ if (strcmp (my_node_uuid, missed_snapinfo->node_uuid))
+ continue;
+
+ list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ /* Check if the missed snap's op is a create for
+ * the brick name in question
+ */
+ if ((snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) &&
+ (!strcmp (brick_name, snap_opinfo->brick_path))) {
+ /* Perform a snap create if the
+ * op is still pending
+ */
+ if (snap_opinfo->status ==
+ GD_MISSED_SNAP_PENDING) {
+ ret = glusterd_create_missed_snap
+ (missed_snapinfo,
+ snap_opinfo);
+ if (ret) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "Failed to create "
+ "missed snap for %s",
+ brick_name);
+ /* At this stage, we will mark
+ * the entry as done. Because
+ * of the failure other
+ * snapshots will not be
+ * affected, and neither the
+ * brick. Only the current snap
+ * brick will always remain as
+ * pending.
+ */
+ }
+ snap_opinfo->status =
+ GD_MISSED_SNAP_DONE;
+ update_list = _gf_true;
+ }
+ /* One snap-id won't have more than one missed
+ * create for the same brick path. Hence
+ * breaking in search of another missed create
+ * for the same brick path in the local node
+ */
+ break;
+ }
}
+ }
- ret = dict_get_int32 (dict, "max-op-version",
- &client_max_op_version);
+ if (update_list == _gf_true) {
+ ret = glusterd_store_update_missed_snaps ();
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Failed to get client-max-op-version");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to update missed_snaps_list");
goto out;
}
}
- peerinfo->max_op_version = client_max_op_version;
- peerinfo->min_op_version = client_min_op_version;
-
+ ret = 0;
out:
return ret;
}
@@ -350,11 +570,13 @@ int
__server_getspec (rpcsvc_request_t *req)
{
int32_t ret = -1;
+ int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t spec_fd = -1;
size_t file_len = 0;
char filename[PATH_MAX] = {0,};
struct stat stbuf = {0,};
+ char *brick_name = NULL;
char *volume = NULL;
char *tmp = NULL;
int cookie = 0;
@@ -363,6 +585,10 @@ __server_getspec (rpcsvc_request_t *req)
gf_getspec_rsp rsp = {0,};
char addrstr[RPCSVC_PEER_STRLEN] = {0};
peer_info_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
ret = xdr_to_generic (req->msg[0], &args,
(xdrproc_t)xdr_gf_getspec_req);
@@ -383,9 +609,12 @@ __server_getspec (rpcsvc_request_t *req)
else
strncpy (peerinfo->volname, volume, strlen(volume));
- ret = _get_client_op_versions (&args, peerinfo);
- if (ret)
+ ret = glusterd_get_args_from_dict (&args, peerinfo, &brick_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get args from dict");
goto fail;
+ }
if (!_client_supports_volume (peerinfo, &op_errno)) {
ret = -1;
@@ -452,6 +681,18 @@ __server_getspec (rpcsvc_request_t *req)
close (spec_fd);
}
+ if (brick_name) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Look for missing snap creates for %s", brick_name);
+ op_ret = glusterd_take_missing_brick_snapshots (brick_name);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to take missing brick snapshots");
+ ret = -1;
+ goto fail;
+ }
+ }
+
/* convert to XDR */
fail:
rsp.op_ret = ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 327155fee31..e7972bd8674 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -593,10 +593,12 @@ int
glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
int ret = -1;
+ int32_t volcount = -1;
char *snapname = NULL;
xlator_t *this = NULL;
glusterd_volinfo_t *snap_volinfo = NULL;
- glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ glusterd_volinfo_t *parent_volinfo = NULL;
glusterd_snap_t *snap = NULL;
glusterd_conf_t *priv = NULL;
@@ -629,42 +631,43 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
-
- /* TODO : As of now there is only volume in snapshot.
- * Change this when multiple volume snapshot is introduced
- */
- snap_volinfo = list_entry (snap->volumes.next, glusterd_volinfo_t,
- vol_list);
-
- ret = glusterd_volinfo_find (snap_volinfo->parent_volname, &volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of "
- "%s", snap_volinfo->parent_volname);
- goto out;
- }
-
- if (is_origin_glusterd (dict) == _gf_true) {
- /* From origin glusterd check if *
- * any peers with snap bricks is down */
- ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo,
- &priv->peers,
- GF_SNAP_OPTION_TYPE_RESTORE);
+ volcount = 0;
+ list_for_each_entry_safe (snap_volinfo, tmp, &snap->volumes, vol_list) {
+ volcount++;
+ ret = glusterd_volinfo_find (snap_volinfo->parent_volname,
+ &parent_volinfo);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
- "Failed to find missed snap restores");
+ "Could not get volinfo of %s",
+ snap_volinfo->parent_volname);
goto out;
}
- }
- ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo);
- if (ret) {
- /* No need to update op_errstr because it is assumed
- * that the called function will do that in case of
- * failure.
- */
- gf_log (this->name, GF_LOG_ERROR, "Failed to restore "
- "snap for %s", snapname);
- goto out;
+ if (is_origin_glusterd (dict) == _gf_true) {
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap
+ (rsp_dict, snap_volinfo,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to find missed snap restores");
+ goto out;
+ }
+ }
+
+ ret = gd_restore_snap_volume (dict, rsp_dict, parent_volinfo,
+ snap_volinfo, volcount);
+ if (ret) {
+ /* No need to update op_errstr because it is assumed
+ * that the called function will do that in case of
+ * failure.
+ */
+ gf_log (this->name, GF_LOG_ERROR, "Failed to restore "
+ "snap for %s", snapname);
+ goto out;
+ }
}
ret = 0;
@@ -693,11 +696,13 @@ glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr,
int ret = -1;
int32_t i = 0;
int32_t volcount = 0;
+ int32_t brick_count = 0;
gf_boolean_t snap_restored = _gf_false;
char key[PATH_MAX] = {0, };
char *volname = NULL;
char *snapname = NULL;
glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
glusterd_snap_t *snap = NULL;
xlator_t *this = NULL;
@@ -799,7 +804,64 @@ glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr,
}
}
- ret = 0;
+ /* Get brickinfo for snap_volumes */
+ volcount = 0;
+ list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ volcount++;
+ brick_count = 0;
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brick_count++;
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.path",
+ volcount, brick_count);
+ ret = dict_set_str (rsp_dict, key, brickinfo->path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.snap_status",
+ volcount, brick_count);
+ ret = dict_set_int32 (rsp_dict, key,
+ brickinfo->snap_status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.device_path",
+ volcount, brick_count);
+ ret = dict_set_str (rsp_dict, key,
+ brickinfo->device_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ snprintf (key, sizeof (key), "snap%d.brick_count", volcount);
+ ret = dict_set_int32 (rsp_dict, key, brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ goto out;
+ }
+
out:
return ret;
}
@@ -1140,6 +1202,101 @@ out:
return ret;
}
+/* Aggregate brickinfo's of the snap volumes to be restored from */
+int32_t
+glusterd_snap_restore_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char key[PATH_MAX] = "";
+ char *strvalue = NULL;
+ int32_t value = -1;
+ int32_t i = -1;
+ int32_t j = -1;
+ int32_t vol_count = -1;
+ int32_t brickcount = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dst || !src) {
+ gf_log (this->name, GF_LOG_ERROR, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (src, "volcount", &vol_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "No volumes");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 1; i <= vol_count; i++) {
+ snprintf (key, sizeof (key), "snap%d.brick_count", i);
+ ret = dict_get_int32 (src, key, &brickcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get %s", key);
+ goto out;
+ }
+
+ for (j = 1; j <= brickcount; j++) {
+ snprintf (key, sizeof (key), "snap%d.brick%d.path",
+ i, j);
+ ret = dict_get_str (src, key, &strvalue);
+ if (ret) {
+ /* The brickinfo will be present in
+ * another rsp_dict */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s not present", key);
+ ret = 0;
+ continue;
+ }
+ ret = dict_set_dynstr_with_alloc (dst, key, strvalue);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.snap_status", i, j);
+ ret = dict_get_int32 (src, key, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_int32 (dst, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key),
+ "snap%d.brick%d.device_path", i, j);
+ ret = dict_get_str (src, key, &strvalue);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc (dst, key, strvalue);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
int
glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src)
{
@@ -1172,6 +1329,14 @@ glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src)
goto out;
}
break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snap_restore_use_rsp_dict (dst, src);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to use "
+ "rsp dict");
+ goto out;
+ }
+ break;
default:
break;
}
@@ -1341,7 +1506,8 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr,
goto out;
}
- device = glusterd_get_brick_mount_details (brickinfo);
+ device = glusterd_get_brick_mount_details
+ (brickinfo->path);
if (!device) {
snprintf (err_str, sizeof (err_str),
"getting device name for the brick "
@@ -3282,7 +3448,7 @@ out:
*/
char *
glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol,
- glusterd_brickinfo_t *brickinfo)
+ char *brick_path)
{
char msg[NAME_MAX] = "";
char buf[PATH_MAX] = "";
@@ -3295,17 +3461,13 @@ glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol,
xlator_t *this = NULL;
this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brick_path);
- if (!brickinfo) {
- gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL");
- goto out;
- }
-
- device = glusterd_get_brick_mount_details (brickinfo);
+ device = glusterd_get_brick_mount_details (brick_path);
if (!device) {
gf_log (this->name, GF_LOG_ERROR, "getting device name for "
- "the brick %s:%s failed", brickinfo->hostname,
- brickinfo->path);
+ "the brick %s failed", brick_path);
goto out;
}
@@ -3338,8 +3500,8 @@ glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol,
/* Takng the actual snapshot */
runinit (&runner);
- snprintf (msg, sizeof (msg), "taking snapshot of the brick %s:%s",
- brickinfo->hostname, brickinfo->path);
+ snprintf (msg, sizeof (msg), "taking snapshot of the brick %s",
+ brick_path);
if (match == _gf_true)
runner_add_args (&runner, LVM_CREATE, "-s", device,
"--setactivationskip", "n", "--name",
@@ -3347,12 +3509,12 @@ glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol,
else
runner_add_args (&runner, LVM_CREATE, "-s", device,
"--name", snap_vol->volname, NULL);
- runner_log (&runner, "", GF_LOG_DEBUG, msg);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
ret = runner_start (&runner);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "taking snapshot of the "
- "brick (%s:%s) of device %s failed",
- brickinfo->hostname, brickinfo->path, device);
+ "brick (%s) of device %s failed",
+ brick_path, device);
runner_end (&runner);
goto out;
}
@@ -3374,7 +3536,6 @@ out:
int32_t
glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo,
- glusterd_brickinfo_t *original_brickinfo,
int32_t brick_count, char *snap_brick_dir)
{
int32_t ret = -1;
@@ -3389,12 +3550,11 @@ glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo,
GF_ASSERT (device);
GF_ASSERT (snap_volinfo);
- GF_ASSERT (original_brickinfo);
GF_ASSERT (snap_brick_dir);
snprintf (snap_brick_mount_path, sizeof (snap_brick_mount_path),
"%s/%s/brick%d", snap_mount_folder, snap_volinfo->volname,
- brick_count+1);
+ brick_count);
snprintf (snap_brick_path, sizeof (snap_brick_path), "%s%s",
snap_brick_mount_path, snap_brick_dir);
@@ -3574,7 +3734,6 @@ static int32_t
glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
glusterd_volinfo_t *snap_vol, dict_t *rsp_dict,
glusterd_brickinfo_t *original_brickinfo,
- glusterd_brickinfo_t *snap_brickinfo,
char *snap_brick_dir, int32_t brick_count)
{
char *device = NULL;
@@ -3587,10 +3746,10 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
GF_ASSERT (snap_vol);
GF_ASSERT (rsp_dict);
GF_ASSERT (original_brickinfo);
- GF_ASSERT (snap_brickinfo);
GF_ASSERT (snap_brick_dir);
- device = glusterd_take_lvm_snapshot (snap_vol, original_brickinfo);
+ device = glusterd_take_lvm_snapshot (snap_vol,
+ original_brickinfo->path);
/* Fail the snapshot even though snapshot on one of
the bricks fails. At the end when we check whether
the snapshot volume meets quorum or not, then the
@@ -3607,11 +3766,10 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
/* create the complete brick here */
ret = glusterd_snap_brick_create (device, snap_vol,
- original_brickinfo,
- brick_count, snap_brick_dir);
+ brick_count + 1, snap_brick_dir);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "not able to"
- " create the brickinfo for the snap %s"
+ " create the brick for the snap %s"
", volume %s", snap_vol->snapshot->snapname,
origin_vol->volname);
goto out;
@@ -3749,7 +3907,6 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
ret = glusterd_take_brick_snapshot (origin_vol, snap_vol,
rsp_dict, brickinfo,
- snap_brickinfo,
snap_brick_dir,
brick_count);
if (ret) {
@@ -4732,7 +4889,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict,
priv = this->private;
GF_ASSERT (priv);
- device = glusterd_get_brick_mount_details (brickinfo);
+ device = glusterd_get_brick_mount_details (brickinfo->path);
if (!device) {
gf_log (this->name, GF_LOG_ERROR, "Getting device name for "
"the brick %s:%s failed", brickinfo->hostname,
@@ -6638,9 +6795,10 @@ out:
* @return 0 on success and negative value on error
*/
int
-gd_restore_snap_volume (dict_t *rsp_dict,
+gd_restore_snap_volume (dict_t *dict, dict_t *rsp_dict,
glusterd_volinfo_t *orig_vol,
- glusterd_volinfo_t *snap_vol)
+ glusterd_volinfo_t *snap_vol,
+ int32_t volcount)
{
int ret = -1;
glusterd_volinfo_t *new_volinfo = NULL;
@@ -6652,6 +6810,7 @@ gd_restore_snap_volume (dict_t *rsp_dict,
this = THIS;
GF_ASSERT (this);
+ GF_ASSERT (dict);
GF_ASSERT (rsp_dict);
conf = this->private;
GF_ASSERT (conf);
@@ -6713,7 +6872,8 @@ gd_restore_snap_volume (dict_t *rsp_dict,
&new_volinfo->snap_volumes);
}
/* Copy the snap vol info to the new_volinfo.*/
- ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol);
+ ret = glusterd_snap_volinfo_restore (dict, rsp_dict, new_volinfo,
+ snap_vol, volcount);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap");
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index d508e74f510..b955fd9de9d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -647,10 +647,13 @@ out:
* TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves
*/
int32_t
-glusterd_snap_volinfo_restore (dict_t *rsp_dict,
+glusterd_snap_volinfo_restore (dict_t *dict, dict_t *rsp_dict,
glusterd_volinfo_t *new_volinfo,
- glusterd_volinfo_t *snap_volinfo)
+ glusterd_volinfo_t *snap_volinfo,
+ int32_t volcount)
{
+ char *value = NULL;
+ char key[PATH_MAX] = "";
int32_t brick_count = -1;
int32_t ret = -1;
xlator_t *this = NULL;
@@ -659,6 +662,7 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict,
this = THIS;
GF_ASSERT (this);
+ GF_ASSERT (dict);
GF_ASSERT (rsp_dict);
GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out);
@@ -666,6 +670,7 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict,
brick_count = 0;
list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) {
+ brick_count++;
ret = glusterd_brickinfo_new (&new_brickinfo);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to create "
@@ -681,6 +686,28 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict,
goto out;
}
+ /* Fetch values if present in dict These values won't
+ * be present in case of a missed restore. In that case
+ * it's fine to use the local node's value
+ */
+ snprintf (key, sizeof (key), "snap%d.brick%d.path",
+ volcount, brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret)
+ strncpy (new_brickinfo->path, value,
+ sizeof(new_brickinfo->path));
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.snap_status",
+ volcount, brick_count);
+ ret = dict_get_int32 (dict, key, &new_brickinfo->snap_status);
+
+ snprintf (key, sizeof (key), "snap%d.brick%d.device_path",
+ volcount, brick_count);
+ ret = dict_get_str (dict, key, &value);
+ if (!ret)
+ strncpy (new_brickinfo->device_path, value,
+ sizeof(new_brickinfo->device_path));
+
/* If the brick is not of this peer, or snapshot is missed *
* for the brick do not replace the xattr for it */
if ((!uuid_compare (brickinfo->uuid, MY_UUID)) &&
@@ -713,7 +740,7 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict,
(rsp_dict,
snap_volinfo,
brickinfo,
- brick_count + 1,
+ brick_count,
GF_SNAP_OPTION_TYPE_RESTORE);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -729,7 +756,6 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict,
&new_volinfo->bricks);
/* ownership of new_brickinfo is passed to new_volinfo */
new_brickinfo = NULL;
- brick_count++;
}
/* Regenerate all volfiles */
@@ -2336,7 +2362,8 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
- snprintf (key, sizeof (key), "volume%d.restored_from_snap", count);
+ snprintf (key, sizeof (key), "%s%d.restored_from_snap",
+ prefix, count);
ret = dict_set_dynstr_with_alloc
(dict, key,
uuid_utoa (volinfo->restored_from_snap));
@@ -4562,6 +4589,7 @@ glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op)
glusterd_conf_t *priv = NULL;
glusterd_volinfo_t *snap_volinfo = NULL;
glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
xlator_t *this = NULL;
uuid_t null_uuid = {0};
@@ -4590,35 +4618,37 @@ glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op)
break;
case GF_SNAP_OPTION_TYPE_RESTORE:
- /* TODO : As of now there is only volume in snapshot.
- * Change this when multiple volume snapshot is introduced
- */
- snap_volinfo = list_entry (snap->volumes.next,
- glusterd_volinfo_t, vol_list);
-
- /* Find the parent volinfo */
- ret = glusterd_volinfo_find (snap_volinfo->parent_volname,
- &volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not get volinfo of %s",
- snap_volinfo->parent_volname);
- goto out;
- }
+ list_for_each_entry_safe (snap_volinfo, tmp,
+ &snap->volumes, vol_list) {
+ ret = glusterd_volinfo_find
+ (snap_volinfo->parent_volname,
+ &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not get volinfo of %s",
+ snap_volinfo->parent_volname);
+ goto out;
+ }
- /* Bump down the original volinfo's version, coz it would have
- * incremented already due to volume handshake
- */
- volinfo->version--;
- uuid_copy (volinfo->restored_from_snap, null_uuid);
+ volinfo->version--;
+ uuid_copy (volinfo->restored_from_snap, null_uuid);
- /* Perform the restore */
- ret = gd_restore_snap_volume (dict, volinfo, snap_volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to restore "
- "snap for %s", snap->snapname);
- volinfo->version++;
- goto out;
+ /* gd_restore_snap_volume() uses the dict and volcount
+ * to fetch snap brick info from other nodes, which were
+ * collected during prevalidation. As this is an ad-hoc
+ * op and only local node's data matter, hence sending
+ * volcount as 0 and re-using the same dict because we
+ * need not record any missed creates in the rsp_dict.
+ */
+ ret = gd_restore_snap_volume (dict, dict, volinfo,
+ snap_volinfo, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to restore snap for %s",
+ snap->snapname);
+ volinfo->version++;
+ goto out;
+ }
}
break;
@@ -4851,7 +4881,9 @@ out:
return is_local;
}
-/* Check if the peer has missed any snap delete for the given snap_id */
+/* Check if the peer has missed any snap delete
+ * or restore for the given snap_id
+ */
gf_boolean_t
glusterd_peer_has_missed_snap_delete (glusterd_peerinfo_t *peerinfo,
char *peer_snap_id)
@@ -4885,8 +4917,10 @@ glusterd_peer_has_missed_snap_delete (glusterd_peerinfo_t *peerinfo,
list_for_each_entry (snap_opinfo,
&missed_snapinfo->snap_ops,
snap_ops_list) {
- if ((snap_opinfo->op ==
- GF_SNAP_OPTION_TYPE_DELETE) &&
+ if (((snap_opinfo->op ==
+ GF_SNAP_OPTION_TYPE_DELETE) ||
+ (snap_opinfo->op ==
+ GF_SNAP_OPTION_TYPE_RESTORE)) &&
(snap_opinfo->status ==
GD_MISSED_SNAP_PENDING)) {
missed_delete = _gf_true;
@@ -5141,7 +5175,7 @@ out:
* glusterd_compare_and_update_snap() implements the following algorithm to
* perform the above task:
* Step 1: Start.
- * Step 2: Check if the peer is missing a delete on the said snap.
+ * Step 2: Check if the peer is missing a delete or restore on the said snap.
* If yes, goto step 6.
* Step 3: Check if there is a conflict between the peer's data and the
* local snap. If no, goto step 5.
@@ -5205,8 +5239,8 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
goto out;
}
- /* Check if the peer has missed a snap delete for the
- * snap in question
+ /* Check if the peer has missed a snap delete or restore
+ * resulting in stale data for the snap in question
*/
missed_delete = glusterd_peer_has_missed_snap_delete (peerinfo,
peer_snap_id);
@@ -6983,7 +7017,7 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo,
}
char*
-glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo)
+glusterd_get_brick_mount_details (char *brick_path)
{
int ret = -1;
char *mnt_pt = NULL;
@@ -6994,12 +7028,12 @@ glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo)
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (brickinfo);
+ GF_ASSERT (brick_path);
- ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
+ ret = glusterd_get_brick_root (brick_path, &mnt_pt);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to get mount point "
- "for %s brick", brickinfo->path);
+ "for %s brick", brick_path);
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 2cf328f91ab..d36444e9d9a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -693,8 +693,7 @@ int32_t
glusterd_compare_volume_name(struct list_head *, struct list_head *);
char*
-glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo);
-
+glusterd_get_brick_mount_details (char *brick_path);
struct mntent *
glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab);
@@ -709,9 +708,11 @@ int
glusterd_compare_snap_vol_time(struct list_head *, struct list_head *);
int32_t
-glusterd_snap_volinfo_restore (dict_t *rsp_dict,
+glusterd_snap_volinfo_restore (dict_t *dict, dict_t *rsp_dict,
glusterd_volinfo_t *new_volinfo,
- glusterd_volinfo_t *snap_volinfo);
+ glusterd_volinfo_t *snap_volinfo,
+ int32_t volcount);
+
int32_t
glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol);
@@ -733,10 +734,11 @@ glusterd_add_missed_snaps_to_export_dict (dict_t *peer_data);
int32_t
glusterd_import_friend_missed_snap_list (dict_t *peer_data);
-int32_t
-gd_restore_snap_volume (dict_t *rsp_dict,
+int
+gd_restore_snap_volume (dict_t *dict, dict_t *rsp_dict,
glusterd_volinfo_t *orig_vol,
- glusterd_volinfo_t *snap_vol);
+ glusterd_volinfo_t *snap_vol,
+ int32_t volcount);
int32_t
glusterd_mount_lvm_snapshot (char *device_path, char *brick_mount_path);
@@ -787,4 +789,11 @@ glusterd_get_brick_mount_dir (char *brickpath, char *hostname, char *mount_dir);
int32_t
glusterd_aggr_brick_mount_dirs (dict_t *aggr, dict_t *rsp_dict);
+char *
+glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol,
+ char *brick_path);
+
+int32_t
+glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo,
+ int32_t brick_count, char *snap_brick_dir);
#endif