summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-snapshot.c
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2014-03-06 08:23:01 +0000
committerRajesh Joseph <rjoseph@redhat.com>2014-04-02 06:03:50 -0700
commitbff28bca0e79d67ecdec0ff7a240570b56fdcd7c (patch)
treeb5f63c1e39e34933cb5c775375f9e3c018e13163 /xlators/mgmt/glusterd/src/glusterd-snapshot.c
parent0ce369a0aa511e98fd71c0337181a5577b2d8a1f (diff)
glusterd/snapshot: Recording the snapshots missed in each brick.
Persisting missing snapshot info on disk as well as in memory in the following format: -------------NODE-UUID--------------:---------SNAP-UUID--------------=BRICKNUM:-------BRICKPATH--------:OPERATION:STATUS 927cb5fe-63da-48f5-82f6-e6a09ddc81c4:a17b4fe42c5a45f7a916438643edaa13= 3 :/brick/brick-dirs/brick3: 1 : 1 927cb5fe-63da-48f5-82f6-e6a09ddc81c4:a17b4fe42c5a45f7a916438643edaa13= 3 :/brick/brick-dirs/brick3: 3 : 1 927cb5fe-63da-48f5-82f6-e6a09ddc81c4:83a3cc05453b46b2a7eda4c9a9208638= 3 :/brick/brick-dirs/brick3: 1 : 1 This data will be stored on disk at /var/lib/glusterd/snaps/missed_snaps_list In memory we maintain the data as a list of glusterd_missed_snap_info in conf, the key for this list are the first two fields, i.e NODE-UUID:SNAP-UUID. For every NODE-UUID:SNAP-UUID, there can be multiple operations missed on multiple bricks. So we maintain a list of glusterd_snap_op_t for evert node of glusterd_missed_snap_info This list is maintained or updated during snapshot create, delete, and restore operations which are the only operations that if missed, are recorded in this list. During snapshot create, if a node is down, or a brick is down, we don't receive their mount point infos. snap_status of such bricks is marked as -1, and their brick details are added to this list. During snapshot delete, we check from originator node, if any other nodes, holding bricks of the said snap are down. Those are also added to the list. Also if the node is up, but the snapshot was pending for a snap brick, and its snap_status is -1, we add that to the list too. When a subsequent delete entry is processed for an already existing create entry, we just mark the create entries status as done (2), and don't add the delete entry to the list. During snapshot restore, we check from originator node, if any other nodes, holding bricks of the said snap are down. Those are also added to the list. Also if the node is up, but the snapshot was pending for a snap brick, and its snap_status is -1, we add that to the list too. Change-Id: I22578d14f81a54e13f6832966b70cd4cfdfd5b44 Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/7208 Reviewed-by: Vijaikumar Mallikarjuna <vmallika@redhat.com> Reviewed-by: Rajesh Joseph <rjoseph@redhat.com> Tested-by: Rajesh Joseph <rjoseph@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-snapshot.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c677
1 files changed, 643 insertions, 34 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 256c34e9b..5a0041535 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -46,6 +46,11 @@
char snap_mount_folder[PATH_MAX];
+static int32_t
+glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol,
+ char *snap_uuid, struct list_head *peers,
+ int32_t op);
+
/* This function will restore a snapshot volumes
*
* @param dict dictionary containing snapshot restore request
@@ -54,7 +59,7 @@ char snap_mount_folder[PATH_MAX];
* @return Negative value on Failure and 0 in success
*/
int
-glusterd_snapshot_restore (dict_t *dict, char **op_errstr)
+glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
int ret = -1;
char *volname = NULL;
@@ -63,13 +68,17 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr)
glusterd_volinfo_t *snap_volinfo = NULL;
glusterd_volinfo_t *volinfo = NULL;
glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
this = THIS;
GF_ASSERT (this);
GF_ASSERT (dict);
GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ priv = this->private;
+ GF_ASSERT (priv);
ret = dict_get_str (dict, "snapname", &snapname);
if (ret) {
@@ -103,7 +112,21 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr)
goto out;
}
- ret = gd_restore_snap_volume (volinfo, snap_volinfo);
+ if (is_origin_glusterd (dict) == _gf_true) {
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo,
+ snap_volinfo->volname,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to find missed snap restores");
+ goto out;
+ }
+ }
+
+ ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo);
if (ret) {
/* No need to update op_errstr because it is assumed
* that the called function will do that in case of
@@ -977,24 +1000,29 @@ out:
}
int32_t
-glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol)
+glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol)
{
- char *mnt_pt = NULL;
- struct mntent *entry = NULL;
- int32_t ret = -1;
- glusterd_brickinfo_t *brickinfo = NULL;
- xlator_t *this = NULL;
- FILE *mtab = NULL;
+ char *mnt_pt = NULL;
+ struct mntent *entry = NULL;
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ FILE *mtab = NULL;
this = THIS;
GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
if (!snap_vol) {
gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL");
goto out;
}
+ brick_count = -1;
list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ brick_count++;
if (uuid_compare (brickinfo->uuid, MY_UUID))
continue;
@@ -1004,6 +1032,23 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol)
"for brick %s:%s of the snap %s.",
brickinfo->hostname, brickinfo->path,
snap_vol->snapshot->snapname);
+
+ /* Adding missed delete to the dict */
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_vol->volname,
+ brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snapshot info "
+ "for %s:%s in the rsp_dict",
+ brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
continue;
}
@@ -1026,7 +1071,8 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol)
goto out;
}
ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo,
- mnt_pt, entry->mnt_fsname);
+ mnt_pt,
+ entry->mnt_fsname);
if (mtab)
endmntent (mtab);
if (ret) {
@@ -1035,6 +1081,7 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol)
brickinfo->path, entry->mnt_fsname);
goto out;
}
+
}
ret = 0;
@@ -1043,7 +1090,8 @@ out:
}
int32_t
-glusterd_snap_volume_remove (glusterd_volinfo_t *snap_vol,
+glusterd_snap_volume_remove (dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
gf_boolean_t remove_lvm,
gf_boolean_t force)
{
@@ -1054,6 +1102,9 @@ glusterd_snap_volume_remove (glusterd_volinfo_t *snap_vol,
xlator_t *this = NULL;
this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
if (!snap_vol) {
gf_log(this->name, GF_LOG_WARNING, "snap_vol in NULL");
@@ -1080,7 +1131,7 @@ glusterd_snap_volume_remove (glusterd_volinfo_t *snap_vol,
/* Only remove the backend lvm when required */
if (remove_lvm) {
- ret = glusterd_lvm_snapshot_remove (snap_vol);
+ ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "Failed to remove "
"lvm snapshot volume %s", snap_vol->volname);
@@ -1148,7 +1199,8 @@ glusterd_snapobject_delete (glusterd_snap_t *snap)
}
int32_t
-glusterd_snap_remove (glusterd_snap_t *snap,
+glusterd_snap_remove (dict_t *rsp_dict,
+ glusterd_snap_t *snap,
gf_boolean_t remove_lvm,
gf_boolean_t force)
{
@@ -1159,6 +1211,9 @@ glusterd_snap_remove (glusterd_snap_t *snap,
xlator_t *this = NULL;
this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap);
if (!snap) {
gf_log(this->name, GF_LOG_WARNING, "snap in NULL");
@@ -1167,7 +1222,7 @@ glusterd_snap_remove (glusterd_snap_t *snap,
}
list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) {
- ret = glusterd_snap_volume_remove (snap_vol,
+ ret = glusterd_snap_volume_remove (rsp_dict, snap_vol,
remove_lvm, force);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "Failed to remove "
@@ -2373,7 +2428,7 @@ out:
}
glusterd_snap_t*
-glusterd_create_snap_object (dict_t *dict)
+glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict)
{
char *snapname = NULL;
uuid_t *snap_id = NULL;
@@ -2388,6 +2443,7 @@ glusterd_create_snap_object (dict_t *dict)
priv = this->private;
GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
/* Fetch snapname, description, id and time from dict */
ret = dict_get_str (dict, "snapname", &snapname);
@@ -2476,7 +2532,8 @@ glusterd_create_snap_object (dict_t *dict)
out:
if (ret) {
if (snap)
- glusterd_snap_remove (snap, _gf_true,_gf_true);
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true);
snap=NULL;
}
@@ -2746,8 +2803,75 @@ out:
return ret;
}
+/* Added missed_snap_entry to rsp_dict */
+int32_t
+glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_number, int32_t op)
+{
+ char *buf = NULL;
+ char missed_snap_entry[PATH_MAX] = "";
+ char name_buf[PATH_MAX] = "";
+ int32_t missed_snap_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_uuid);
+ GF_ASSERT (brickinfo);
+
+ snprintf (missed_snap_entry, sizeof(missed_snap_entry),
+ "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid),
+ snap_uuid, brick_number, brickinfo->path, op,
+ GD_MISSED_SNAP_PENDING);
+
+ buf = gf_strdup (missed_snap_entry);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the missed_snap_count from the dict */
+ ret = dict_get_int32 (rsp_dict, "missed_snap_count",
+ &missed_snap_count);
+ if (ret) {
+ /* Initialize the missed_snap_count for the first time */
+ missed_snap_count = 0;
+ }
+
+ /* Setting the missed_snap_entry in the rsp_dict */
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ missed_snap_count);
+ ret = dict_set_dynstr (rsp_dict, name_buf, buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set missed_snap_entry (%s) "
+ "in the rsp_dict.", buf);
+ GF_FREE (buf);
+ goto out;
+ }
+ missed_snap_count++;
+
+ /* Setting the new missed_snap_count in the dict */
+ ret = dict_set_int32 (rsp_dict, "missed_snap_count",
+ missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set missed_snap_count for %s "
+ "in the rsp_dict.", missed_snap_entry);
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
static int32_t
-glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
+glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
glusterd_brickinfo_t *original_brickinfo,
glusterd_brickinfo_t *snap_brickinfo,
char **snap_brick_dir, int64_t volcount,
@@ -2762,6 +2886,7 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
this = THIS;
GF_ASSERT (this);
GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
GF_ASSERT (snap_vol);
GF_ASSERT (original_brickinfo);
GF_ASSERT (snap_brickinfo);
@@ -2779,6 +2904,25 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
"snap mount path (%s). Using original brickinfo", key);
snap_brickinfo->snap_status = -1;
strcpy (snap_brick_path, original_brickinfo->path);
+
+ /* In origiator node add snaps missed
+ * from different nodes to the dict
+ */
+ if (is_origin_glusterd (dict) == _gf_true) {
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict, snap_vol->volname,
+ original_brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_CREATE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snapshot info "
+ "for %s:%s in the rsp_dict",
+ original_brickinfo->hostname,
+ original_brickinfo->path);
+ goto out;
+ }
+ }
} else {
/* Create brick-path in the format /var/run/gluster/snaps/ *
* <snap-uuid>/<original-brick#>/snap-brick-dir *
@@ -2793,6 +2937,9 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol,
volcount, brick_count);
ret = dict_get_ptr (dict, key, (void **)&snap_device);
if (ret) {
+ /* If the device name is empty, so will be the brick path
+ * Hence the missed snap has already been added above
+ */
gf_log (this->name, GF_LOG_ERROR, "Unable to fetch "
"snap device (%s). Leaving empty", key);
} else
@@ -2825,7 +2972,7 @@ out:
static int32_t
glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
- glusterd_volinfo_t *snap_vol,
+ glusterd_volinfo_t *snap_vol, dict_t *rsp_dict,
glusterd_brickinfo_t *original_brickinfo,
glusterd_brickinfo_t *snap_brickinfo,
char *snap_brick_dir, int32_t brick_count)
@@ -2838,6 +2985,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
GF_ASSERT (this);
GF_ASSERT (origin_vol);
GF_ASSERT (snap_vol);
+ GF_ASSERT (rsp_dict);
GF_ASSERT (original_brickinfo);
GF_ASSERT (snap_brickinfo);
GF_ASSERT (snap_brick_dir);
@@ -2849,6 +2997,23 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
original_brickinfo->path,
origin_vol->volname,
snap_vol->snapshot->snapname);
+
+ /* Adding the not started bricks to the missed snaps list */
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_vol->volname,
+ original_brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_CREATE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snapshot info "
+ "for %s:%s in the rsp_dict",
+ original_brickinfo->hostname,
+ original_brickinfo->path);
+ goto out;
+ }
+
snap_brickinfo->snap_status = -1;
ret = 0;
goto out;
@@ -2889,9 +3054,73 @@ out:
return ret;
}
+/* Look for disconnected peers, for missed snap creates or deletes */
+static int32_t
+glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol,
+ char *snap_uuid, struct list_head *peers,
+ int32_t op)
+{
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (peers);
+ GF_ASSERT (vol);
+ GF_ASSERT (snap_uuid);
+
+ brick_count = 0;
+ list_for_each_entry (brickinfo, &vol->bricks, brick_list) {
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+ /* If the brick belongs to the same node */
+ brick_count++;
+ continue;
+ }
+
+ list_for_each_entry (peerinfo, peers, uuid_list) {
+ if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) {
+ /* If the brick doesnt belong to this peer */
+ continue;
+ }
+
+ /* Found peer who owns the brick, *
+ * if peer is not connected or not *
+ * friend add it to missed snap list */
+ if (!(peerinfo->connected) ||
+ (peerinfo->state.state !=
+ GD_FRIEND_STATE_BEFRIENDED)) {
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_uuid,
+ brickinfo,
+ brick_count + 1,
+ op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snapshot "
+ "info for %s:%s in the "
+ "rsp_dict", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+ }
+ }
+ brick_count++;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
glusterd_volinfo_t *
glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
- dict_t *dict, int64_t volcount)
+ dict_t *dict, dict_t *rsp_dict, int64_t volcount)
{
char key[PATH_MAX] = "";
char *snap_brick_dir = NULL;
@@ -2908,14 +3137,13 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
this = THIS;
GF_ASSERT (this);
-
priv = this->private;
GF_ASSERT (priv);
GF_ASSERT (origin_vol);
GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
/* fetch username, password and vol_id from dict*/
-
snprintf (key, sizeof(key), "volume%ld_username", volcount);
ret = dict_get_str (dict, key, &username);
if (ret) {
@@ -2974,7 +3202,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
goto out;
}
- ret = glusterd_add_bricks_to_snap_volume (dict,
+ ret = glusterd_add_bricks_to_snap_volume (dict, rsp_dict,
snap_vol,
brickinfo,
snap_brickinfo,
@@ -2997,9 +3225,8 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
continue;
}
- ret = glusterd_take_brick_snapshot (origin_vol,
- snap_vol,
- brickinfo,
+ ret = glusterd_take_brick_snapshot (origin_vol, snap_vol,
+ rsp_dict, brickinfo,
snap_brickinfo,
snap_brick_dir,
brick_count);
@@ -3090,7 +3317,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
out:
if (ret) {
if (snap_vol)
- glusterd_snap_volume_remove (snap_vol,
+ glusterd_snap_volume_remove (rsp_dict, snap_vol,
_gf_true, _gf_true);
snap_vol = NULL;
}
@@ -3311,7 +3538,6 @@ out :
return ret;
}
-
int32_t
glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr,
dict_t *rsp_dict)
@@ -3320,9 +3546,18 @@ glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr,
char *snapname = NULL;
char *dup_snapname = NULL;
glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
xlator_t *this = NULL;
this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ priv = this->private;
+ GF_ASSERT (priv);
if (!dict || !op_errstr) {
gf_log (this->name, GF_LOG_ERROR, "input parameters NULL");
@@ -3344,7 +3579,34 @@ glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr,
goto out;
}
- ret = glusterd_snap_remove (snap, _gf_true, _gf_false);
+ if (is_origin_glusterd (dict) == _gf_true) {
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = list_entry (snap->volumes.next,
+ glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo,
+ snap_volinfo->volname,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to find missed snap deletes");
+ goto out;
+ }
+ }
+
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false);
if (ret){
gf_log (this->name, GF_LOG_ERROR, "Failed to remove snap %s",
snapname);
@@ -3409,7 +3671,7 @@ glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- ret = glusterd_snap_remove (snap, _gf_true, _gf_true);
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed",
name);
@@ -3425,6 +3687,42 @@ out:
return ret;
}
+/* In case of a successful, delete or create operation, during post_validate *
+ * look for missed snap operations and update the missed snap lists */
+int32_t
+glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int32_t missed_snap_count = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_int32 (dict, "missed_snap_count",
+ &missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_store_update_missed_snaps (dict, missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to update missed_snaps_list");
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
int32_t
glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
dict_t *rsp_dict)
@@ -3443,6 +3741,10 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
glusterd_conf_t *priv = NULL;
this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
priv = this->private;
GF_ASSERT(priv);
@@ -3474,7 +3776,7 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
}
tmp_name = NULL;
- snap = glusterd_create_snap_object (dict);
+ snap = glusterd_create_snap_object (dict, rsp_dict);
if (!snap) {
gf_log (this->name, GF_LOG_ERROR, "creating the"
"snap object %s failed", snapname);
@@ -3502,7 +3804,8 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
/* TODO: Create a stub where the bricks are
added parallely by worker threads so that
the snap creating happens parallely. */
- snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict, i);
+ snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict,
+ rsp_dict, i);
if (!snap_vol) {
ret = -1;
gf_log (this->name, GF_LOG_WARNING, "taking the "
@@ -3524,7 +3827,8 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
out:
if (ret) {
if (snap)
- glusterd_snap_remove (snap, _gf_true, _gf_true);
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true);
snap=NULL;
}
@@ -4552,10 +4856,18 @@ glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret,
"failed");
goto out;
}
+ } else {
+ ret = glusterd_snapshot_update_snaps_post_validate (dict,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "create snapshot");
+ goto out;
+ }
}
ret = 0;
-
out:
return ret;
}
@@ -4612,7 +4924,8 @@ glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
break;
case GF_SNAP_OPTION_TYPE_RESTORE:
- ret = glusterd_snapshot_restore (dict, op_errstr);
+ ret = glusterd_snapshot_restore (dict, op_errstr,
+ rsp_dict);
if (ret) {
gf_log (this->name, GF_LOG_WARNING, "Failed to "
"restore snapshot");
@@ -4820,6 +5133,18 @@ glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,
}
break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_update_snaps_post_validate (dict,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "update missed snaps list");
+ goto out;
+ }
+ break;
+
default:
gf_log (this->name, GF_LOG_WARNING, "invalid snap command");
goto out;
@@ -4998,3 +5323,287 @@ glusterd_handle_snapshot (rpcsvc_request_t *req)
{
return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn);
}
+
+static inline void
+glusterd_free_snap_op (glusterd_snap_op_t *snap_op)
+{
+ if (snap_op) {
+ if (snap_op->brick_path)
+ GF_FREE (snap_op->brick_path);
+
+ GF_FREE (snap_op);
+ }
+}
+
+/* Look for duplicates and accordingly update the list */
+int32_t
+glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo,
+ glusterd_snap_op_t *missed_snap_op)
+{
+ int32_t ret = -1;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ gf_boolean_t match = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_snapinfo);
+ GF_ASSERT(missed_snap_op);
+
+ list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ if ((!strcmp (snap_opinfo->brick_path,
+ missed_snap_op->brick_path)) &&
+ (snap_opinfo->op == missed_snap_op->op)) {
+ /* If two entries have conflicting status
+ * GD_MISSED_SNAP_DONE takes precedence
+ */
+ if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) &&
+ (missed_snap_op->status == GD_MISSED_SNAP_DONE)) {
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ gf_log (this->name, GF_LOG_INFO,
+ "Updating missed snap status "
+ "for %s:%d:%s:%d as DONE",
+ missed_snapinfo->node_snap_info,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op);
+ ret = 0;
+ glusterd_free_snap_op (missed_snap_op);
+ goto out;
+ }
+ match = _gf_true;
+ break;
+ } else if ((snap_opinfo->brick_num ==
+ missed_snap_op->brick_num) &&
+ (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) &&
+ (missed_snap_op->op ==
+ GF_SNAP_OPTION_TYPE_DELETE)) {
+ /* Optimizing create and delete entries for the same
+ * brick and same node
+ */
+ gf_log (this->name, GF_LOG_INFO,
+ "Updating missed snap status "
+ "for %s:%d:%s:%d as DONE",
+ missed_snapinfo->node_snap_info,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op);
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ ret = 0;
+ glusterd_free_snap_op (missed_snap_op);
+ goto out;
+ }
+ }
+
+ if (match == _gf_true) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Duplicate entry. Not updating");
+ glusterd_free_snap_op (missed_snap_op);
+ } else {
+ list_add_tail (&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Add new missed snap entry to the missed_snaps list. */
+int32_t
+glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num,
+ char *brick_path, int32_t snap_op,
+ int32_t snap_status)
+{
+ int32_t ret = -1;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *missed_snap_op = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t free_missed_snap_info = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_info);
+ GF_ASSERT(brick_path);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Create the snap_op object consisting of the *
+ * snap id and the op */
+ ret = glusterd_missed_snap_op_new (&missed_snap_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create new missed snap object.");
+ ret = -1;
+ goto out;
+ }
+
+ missed_snap_op->brick_path = gf_strdup(brick_path);
+ if (!missed_snap_op->brick_path) {
+ ret = -1;
+ goto out;
+ }
+ missed_snap_op->brick_num = brick_num;
+ missed_snap_op->op = snap_op;
+ missed_snap_op->status = snap_status;
+
+ /* Look for other entries for the same node and same snap */
+ list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ if (!strcmp (missed_snapinfo->node_snap_info,
+ missed_info)) {
+ /* Found missed snapshot info for *
+ * the same node and same snap */
+ match = _gf_true;
+ break;
+ }
+ }
+
+ if (match == _gf_false) {
+ /* First snap op missed for the brick */
+ ret = glusterd_missed_snapinfo_new (&missed_snapinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create missed snapinfo");
+ goto out;
+ }
+ free_missed_snap_info = _gf_true;
+ missed_snapinfo->node_snap_info = gf_strdup(missed_info);
+ if (!missed_snapinfo->node_snap_info) {
+ ret = -1;
+ goto out;
+ }
+
+ list_add_tail (&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ list_add_tail (&missed_snapinfo->missed_snaps,
+ &priv->missed_snaps_list);
+
+ ret = 0;
+ goto out;
+ } else {
+ ret = glusterd_update_missed_snap_entry (missed_snapinfo,
+ missed_snap_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to update existing missed snap entry.");
+ goto out;
+ }
+ }
+
+out:
+ if (ret) {
+ glusterd_free_snap_op (missed_snap_op);
+
+ if (missed_snapinfo &&
+ (free_missed_snap_info == _gf_true)) {
+ if (missed_snapinfo->node_snap_info)
+ GF_FREE (missed_snapinfo->node_snap_info);
+
+ GF_FREE (missed_snapinfo);
+ }
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Add missing snap entries to the in-memory conf->missed_snap_list */
+int32_t
+glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count)
+{
+ char *buf = NULL;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *nodeid = NULL;
+ char *snap_uuid = NULL;
+ char *brick_path = NULL;
+ char missed_info[PATH_MAX] = "";
+ char name_buf[PATH_MAX] = "";
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t brick_num = -1;
+ int32_t snap_op = -1;
+ int32_t snap_status = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* We can update the missed_snaps_list without acquiring *
+ * any additional locks as big lock will be held. */
+ for (i = 0; i < missed_snap_count; i++) {
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ i);
+ ret = dict_get_str (dict, name_buf, &buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch %s", name_buf);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "missed_snap_entry = %s",
+ buf);
+
+ /* Need to make a duplicate string coz the same dictionary *
+ * is resent to the non-originator nodes */
+ tmp = gf_strdup (buf);
+ if (!tmp) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the node-id, snap-id, brick_num,
+ * brick_path, snap_op and snap status
+ */
+ nodeid = strtok_r (tmp, ":", &save_ptr);
+ snap_uuid = strtok_r (NULL, "=", &save_ptr);
+ brick_num = atoi(strtok_r (NULL, ":", &save_ptr));
+ brick_path = strtok_r (NULL, ":", &save_ptr);
+ snap_op = atoi(strtok_r (NULL, ":", &save_ptr));
+ snap_status = atoi(strtok_r (NULL, ":", &save_ptr));
+
+ if (!nodeid || !snap_uuid || !brick_path ||
+ brick_num < 1 || snap_op < 1 ||
+ snap_status < 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid missed_snap_entry");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (missed_info, sizeof(missed_info), "%s:%s",
+ nodeid, snap_uuid);
+
+ ret = glusterd_store_missed_snaps_list (missed_info,
+ brick_num,
+ brick_path,
+ snap_op,
+ snap_status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to store missed snaps_list");
+ goto out;
+ }
+
+ GF_FREE (tmp);
+ tmp = NULL;
+ }
+
+out:
+ if (tmp)
+ GF_FREE (tmp);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}