diff options
author | Avra Sengupta <asengupt@redhat.com> | 2014-04-07 05:25:28 +0000 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-04-27 22:00:20 -0700 |
commit | d7b3e068290c41b13ecd664771814202d7d26881 (patch) | |
tree | 7584335affe6ccb961d894d78ddfc95f103bdd7e /xlators/mgmt/glusterd | |
parent | d2cdc392accdd35995370ee5b52aee5e5af7dee4 (diff) |
glusterd/snapshot: Adding snap_vol_id and snap_uuid to missed_snap_list
Persisting missing snapshot info on disk as well as in memory in
the following format:
-------------NODE-UUID--------------:--------------SNAP-UUID-------------=---------SNAP-VOL-ID------------:BRICKNUM:-------BRICKPATH--------:OPERATION:STATUS
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13: 3 :/brick/brick-dirs/brick3: 1 : 1
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13: 3 :/brick/brick-dirs/brick3: 3 : 1
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=83a3cc05453b46b2a7eda4c9a9208638: 3 :/brick/brick-dirs/brick3: 1 : 1
This data will be stored on disk at /var/lib/glusterd/snaps/missed_snaps_list
In memory we maintain the data as a list of glusterd_missed_snap_info
in conf, the key for this list are the first two fields,
i.e NODE-UUID:SNAP-UUID.
For every NODE-UUID:SNAP-UUID, there can be multiple operations missed
on multiple bricks. So we maintain a list of glusterd_snap_op_t
for every node of glusterd_missed_snap_info
This list is maintained or updated during snapshot create, delete, and restore
operations which are the only operations that if missed, are recorded in this
list.
During snapshot create, if a node is down, or a brick is down, we don't
receive their mount point infos. snap_status of such bricks is marked as
-1, and their brick details are added to this list.
During snapshot delete, we check from originator node, if any other
nodes, holding bricks of the said snap are down. Those are also added to the list.
Also if the node is up, but the snapshot was pending for a snap
brick, and its snap_status is -1, we add that to the list too.
When a subsequent delete entry is processed for an already existing
create entry, we just mark the create entries status as done (2), and don't
add the delete entry to the list.
During snapshot restore, we check from originator node, if any other
nodes, holding bricks of the said snap are down. Those are also added to the list.
Also if the node is up, but the snapshot was pending for a snap
brick, and its snap_status is -1, we add that to the list too.
Like delete when a subsequent restore entry is processed for an already existing
create entry, we just mark the create entries status as done (2), and don't
add the restore entry to the list.
Change-Id: I54f63e28d3c40555d0f84528f38227103171f594
BUG: 1061685
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/7454
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 91 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot.c | 284 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 45 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 25 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 10 |
7 files changed, 305 insertions, 156 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 5869a88d4be..ed756f15bc0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -3896,6 +3896,52 @@ glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) } static int +get_volinfo_from_brickid (char *brickid, glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickid); + + brickid_dup = gf_strdup (brickid); + if (!brickid_dup) + goto out; + + volid_str = brickid_dup; + brick = strchr (brickid_dup, ':'); + if (!brick) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid brickid"); + goto out; + } + + *brick = '\0'; + brick++; + uuid_parse (volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id (volid, volinfo); + if (ret) { + /* Check if it is a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id (volid, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to find volinfo"); + goto out; + } + } + + ret = 0; +out: + GF_FREE (brickid_dup); + return ret; +} + +static int get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) { glusterd_volinfo_t *volinfo = NULL; @@ -3938,13 +3984,14 @@ out: int __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) + rpc_clnt_event_t event, void *data) { - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - int ret = 0; - char *brickid = NULL; - glusterd_brickinfo_t *brickinfo = NULL; + char *brickid = NULL; + int ret = 0; + glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; brickid = mydata; if (!brickid) @@ -3961,6 +4008,37 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, switch (event) { case RPC_CLNT_CONNECT: + /* If a node on coming back up, already starts a brick + * before the handshake, and the notification comes after + * the handshake is done, then we need to check if this + * is a restored brick with a snapshot pending. If so, we + * need to stop the brick + */ + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = get_volinfo_from_brickid (brickid, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volinfo from " + "brickid(%s)", brickid); + goto out; + } + + ret = glusterd_brick_stop (volinfo, brickinfo, + _gf_false); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to stop %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + + break; + } gf_log (this->name, GF_LOG_DEBUG, "Connected to %s:%s", brickinfo->hostname, brickinfo->path); glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); @@ -3986,6 +4064,7 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, break; } +out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index b294d1bc6b2..b99f2ac89ad 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -52,10 +52,66 @@ char snap_mount_folder[PATH_MAX]; +/* Look for disconnected peers, for missed snap creates or deletes */ static int32_t glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, - char *snap_uuid, struct list_head *peers, - int32_t op); + struct list_head *peers, int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (peers); + GF_ASSERT (vol); + + brick_count = 0; + list_for_each_entry (brickinfo, &vol->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + vol, brickinfo, + brick_count + 1, + op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } + } + } + brick_count++; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} /* This function will restore a snapshot volumes * @@ -105,6 +161,7 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } + /* TODO : As of now there is only volume in snapshot. * Change this when multiple volume snapshot is introduced */ @@ -122,7 +179,6 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) /* From origin glusterd check if * * any peers with snap bricks is down */ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, - snap_volinfo->volname, &priv->peers, GF_SNAP_OPTION_TYPE_RESTORE); if (ret) { @@ -1060,7 +1116,7 @@ glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) /* Adding missed delete to the dict */ ret = glusterd_add_missed_snaps_to_dict (rsp_dict, - snap_vol->volname, + snap_vol, brickinfo, brick_count + 1, GF_SNAP_OPTION_TYPE_DELETE); @@ -2556,11 +2612,12 @@ out: /* Added missed_snap_entry to rsp_dict */ int32_t -glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, glusterd_brickinfo_t *brickinfo, int32_t brick_number, int32_t op) { - char *buf = NULL; + char *snap_uuid = NULL; char missed_snap_entry[PATH_MAX] = ""; char name_buf[PATH_MAX] = ""; int32_t missed_snap_count = -1; @@ -2570,20 +2627,20 @@ glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, this = THIS; GF_ASSERT (this); GF_ASSERT (rsp_dict); - GF_ASSERT (snap_uuid); + GF_ASSERT (snap_vol); GF_ASSERT (brickinfo); - snprintf (missed_snap_entry, sizeof(missed_snap_entry), - "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), - snap_uuid, brick_number, brickinfo->path, op, - GD_MISSED_SNAP_PENDING); - - buf = gf_strdup (missed_snap_entry); - if (!buf) { + snap_uuid = gf_strdup (uuid_utoa (snap_vol->snapshot->snap_id)); + if (!snap_uuid) { ret = -1; goto out; } + snprintf (missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%s:%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, snap_vol->volname, brick_number, brickinfo->path, + op, GD_MISSED_SNAP_PENDING); + /* Fetch the missed_snap_count from the dict */ ret = dict_get_int32 (rsp_dict, "missed_snap_count", &missed_snap_count); @@ -2595,12 +2652,12 @@ glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, /* Setting the missed_snap_entry in the rsp_dict */ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", missed_snap_count); - ret = dict_set_dynstr (rsp_dict, name_buf, buf); + ret = dict_set_dynstr_with_alloc (rsp_dict, name_buf, + missed_snap_entry); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set missed_snap_entry (%s) " - "in the rsp_dict.", buf); - GF_FREE (buf); + "in the rsp_dict.", missed_snap_entry); goto out; } missed_snap_count++; @@ -2616,6 +2673,9 @@ glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, } out: + if (snap_uuid) + GF_FREE (snap_uuid); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } @@ -2948,7 +3008,7 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, if (add_missed_snap) { ret = glusterd_add_missed_snaps_to_dict (rsp_dict, - snap_vol->volname, + snap_vol, original_brickinfo, brick_count + 1, GF_SNAP_OPTION_TYPE_CREATE); @@ -3048,70 +3108,6 @@ out: return ret; } -/* Look for disconnected peers, for missed snap creates or deletes */ -static int32_t -glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, - char *snap_uuid, struct list_head *peers, - int32_t op) -{ - int32_t brick_count = -1; - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (rsp_dict); - GF_ASSERT (peers); - GF_ASSERT (vol); - GF_ASSERT (snap_uuid); - - brick_count = 0; - list_for_each_entry (brickinfo, &vol->bricks, brick_list) { - if (!uuid_compare (brickinfo->uuid, MY_UUID)) { - /* If the brick belongs to the same node */ - brick_count++; - continue; - } - - list_for_each_entry (peerinfo, peers, uuid_list) { - if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { - /* If the brick doesnt belong to this peer */ - continue; - } - - /* Found peer who owns the brick, * - * if peer is not connected or not * - * friend add it to missed snap list */ - if (!(peerinfo->connected) || - (peerinfo->state.state != - GD_FRIEND_STATE_BEFRIENDED)) { - ret = glusterd_add_missed_snaps_to_dict - (rsp_dict, - snap_uuid, - brickinfo, - brick_count + 1, - op); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add missed snapshot " - "info for %s:%s in the " - "rsp_dict", brickinfo->hostname, - brickinfo->path); - goto out; - } - } - } - brick_count++; - } - - ret = 0; -out: - gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); - return ret; -} - glusterd_volinfo_t * glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, dict_t *dict, dict_t *rsp_dict, int64_t volcount) @@ -3592,7 +3588,6 @@ glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, /* From origin glusterd check if * * any peers with snap bricks is down */ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, - snap_volinfo->volname, &priv->peers, GF_SNAP_OPTION_TYPE_DELETE); if (ret) { @@ -3707,7 +3702,14 @@ glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr, goto out; } - ret = glusterd_store_update_missed_snaps (dict, missed_snap_count); + ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_update_missed_snaps (); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to update missed_snaps_list"); @@ -5330,6 +5332,30 @@ glusterd_free_snap_op (glusterd_snap_op_t *snap_op) } } +static inline void +glusterd_free_missed_snapinfo (glusterd_missed_snap_info *missed_snapinfo) +{ + glusterd_snap_op_t *snap_opinfo = NULL; + glusterd_snap_op_t *tmp = NULL; + + if (missed_snapinfo) { + list_for_each_entry_safe (snap_opinfo, tmp, + &missed_snapinfo->snap_ops, + snap_ops_list) { + glusterd_free_snap_op (snap_opinfo); + snap_opinfo = NULL; + } + + if (missed_snapinfo->node_uuid) + GF_FREE (missed_snapinfo->node_uuid); + + if (missed_snapinfo->snap_uuid) + GF_FREE (missed_snapinfo->snap_uuid); + + GF_FREE (missed_snapinfo); + } +} + /* Look for duplicates and accordingly update the list */ int32_t glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, @@ -5347,6 +5373,13 @@ glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, snap_ops_list) { + /* If the entry is not for the same snap_vol_id + * then continue + */ + if (strcmp (snap_opinfo->snap_vol_id, + missed_snap_op->snap_vol_id)) + continue; + if ((!strcmp (snap_opinfo->brick_path, missed_snap_op->brick_path)) && (snap_opinfo->op == missed_snap_op->op)) { @@ -5358,8 +5391,10 @@ glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, snap_opinfo->status = GD_MISSED_SNAP_DONE; gf_log (this->name, GF_LOG_INFO, "Updating missed snap status " - "for %s:%d:%s:%d as DONE", - missed_snapinfo->node_snap_info, + "for %s:%s=%s:%d:%s:%d as DONE", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op); @@ -5372,15 +5407,19 @@ glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, } else if ((snap_opinfo->brick_num == missed_snap_op->brick_num) && (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && - (missed_snap_op->op == - GF_SNAP_OPTION_TYPE_DELETE)) { + ((missed_snap_op->op == + GF_SNAP_OPTION_TYPE_DELETE) || + (missed_snap_op->op == + GF_SNAP_OPTION_TYPE_RESTORE))) { /* Optimizing create and delete entries for the same * brick and same node */ gf_log (this->name, GF_LOG_INFO, "Updating missed snap status " - "for %s:%d:%s:%d as DONE", - missed_snapinfo->node_snap_info, + "for %s:%s=%s:%d:%s:%d as DONE", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op); @@ -5408,10 +5447,13 @@ out: /* Add new missed snap entry to the missed_snaps list. */ int32_t -glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, - char *brick_path, int32_t snap_op, - int32_t snap_status) +glusterd_add_new_entry_to_list (char *missed_info, char *snap_vol_id, + int32_t brick_num, char *brick_path, + int32_t snap_op, int32_t snap_status) { + char *buf = NULL; + char *save_ptr = NULL; + char node_snap_info[PATH_MAX] = ""; int32_t ret = -1; glusterd_missed_snap_info *missed_snapinfo = NULL; glusterd_snap_op_t *missed_snap_op = NULL; @@ -5423,6 +5465,7 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, this = THIS; GF_ASSERT(this); GF_ASSERT(missed_info); + GF_ASSERT(snap_vol_id); GF_ASSERT(brick_path); priv = this->private; @@ -5438,6 +5481,11 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, goto out; } + missed_snap_op->snap_vol_id = gf_strdup(snap_vol_id); + if (!missed_snap_op->snap_vol_id) { + ret = -1; + goto out; + } missed_snap_op->brick_path = gf_strdup(brick_path); if (!missed_snap_op->brick_path) { ret = -1; @@ -5450,8 +5498,10 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, /* Look for other entries for the same node and same snap */ list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, missed_snaps) { - if (!strcmp (missed_snapinfo->node_snap_info, - missed_info)) { + snprintf (node_snap_info, sizeof(node_snap_info), + "%s:%s", missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid); + if (!strcmp (node_snap_info, missed_info)) { /* Found missed snapshot info for * * the same node and same snap */ match = _gf_true; @@ -5468,8 +5518,24 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, goto out; } free_missed_snap_info = _gf_true; - missed_snapinfo->node_snap_info = gf_strdup(missed_info); - if (!missed_snapinfo->node_snap_info) { + buf = strtok_r (missed_info, ":", &save_ptr); + if (!buf) { + ret = -1; + goto out; + } + missed_snapinfo->node_uuid = gf_strdup(buf); + if (!missed_snapinfo->node_uuid) { + ret = -1; + goto out; + } + + buf = strtok_r (NULL, ":", &save_ptr); + if (!buf) { + ret = -1; + goto out; + } + missed_snapinfo->snap_uuid = gf_strdup(buf); + if (!missed_snapinfo->snap_uuid) { ret = -1; goto out; } @@ -5496,12 +5562,8 @@ out: glusterd_free_snap_op (missed_snap_op); if (missed_snapinfo && - (free_missed_snap_info == _gf_true)) { - if (missed_snapinfo->node_snap_info) - GF_FREE (missed_snapinfo->node_snap_info); - - GF_FREE (missed_snapinfo); - } + (free_missed_snap_info == _gf_true)) + glusterd_free_missed_snapinfo (missed_snapinfo); } gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); @@ -5517,6 +5579,7 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) char *save_ptr = NULL; char *nodeid = NULL; char *snap_uuid = NULL; + char *snap_vol_id = NULL; char *brick_path = NULL; char missed_info[PATH_MAX] = ""; char name_buf[PATH_MAX] = ""; @@ -5563,13 +5626,14 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) */ nodeid = strtok_r (tmp, ":", &save_ptr); snap_uuid = strtok_r (NULL, "=", &save_ptr); + snap_vol_id = strtok_r (NULL, ":", &save_ptr); brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); brick_path = strtok_r (NULL, ":", &save_ptr); snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); if (!nodeid || !snap_uuid || !brick_path || - brick_num < 1 || snap_op < 1 || + !snap_vol_id || brick_num < 1 || snap_op < 1 || snap_status < 1) { gf_log (this->name, GF_LOG_ERROR, "Invalid missed_snap_entry"); @@ -5580,11 +5644,12 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) snprintf (missed_info, sizeof(missed_info), "%s:%s", nodeid, snap_uuid); - ret = glusterd_store_missed_snaps_list (missed_info, - brick_num, - brick_path, - snap_op, - snap_status); + ret = glusterd_add_new_entry_to_list (missed_info, + snap_vol_id, + brick_num, + brick_path, + snap_op, + snap_status); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to store missed snaps_list"); @@ -5595,6 +5660,7 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) tmp = NULL; } + ret = 0; out: if (tmp) GF_FREE (tmp); diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index b803ba28253..9c04aab50c7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -2994,6 +2994,7 @@ glusterd_store_retrieve_missed_snaps_list (xlator_t *this) { char buf[PATH_MAX] = ""; char path[PATH_MAX] = ""; + char *snap_vol_id = NULL; char *missed_node_info = NULL; char *brick_path = NULL; char *value = NULL; @@ -3048,12 +3049,13 @@ glusterd_store_retrieve_missed_snaps_list (xlator_t *this) } /* Fetch the brick_num, brick_path, snap_op and snap status */ - brick_num = atoi(strtok_r (value, ":", &save_ptr)); + snap_vol_id = strtok_r (value, ":", &save_ptr); + brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); brick_path = strtok_r (NULL, ":", &save_ptr); snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); - if (!missed_node_info || !brick_path || + if (!missed_node_info || !brick_path || !snap_vol_id || brick_num < 1 || snap_op < 1 || snap_status < 1) { gf_log (this->name, GF_LOG_ERROR, @@ -3062,11 +3064,12 @@ glusterd_store_retrieve_missed_snaps_list (xlator_t *this) goto out; } - ret = glusterd_store_missed_snaps_list (missed_node_info, - brick_num, - brick_path, - snap_op, - snap_status); + ret = glusterd_add_new_entry_to_list (missed_node_info, + snap_vol_id, + brick_num, + brick_path, + snap_op, + snap_status); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to store missed snaps_list"); @@ -3145,6 +3148,7 @@ out: int32_t glusterd_store_write_missed_snapinfo (int32_t fd) { + char key[PATH_MAX] = ""; char value[PATH_MAX] = ""; int32_t ret = -1; glusterd_conf_t *priv = NULL; @@ -3164,14 +3168,15 @@ glusterd_store_write_missed_snapinfo (int32_t fd) list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, snap_ops_list) { - snprintf (value, sizeof(value), "%d:%s:%d:%d", + snprintf (key, sizeof(key), "%s:%s", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid); + snprintf (value, sizeof(value), "%s:%d:%s:%d:%d", + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op, snap_opinfo->status); - ret = gf_store_save_value - (fd, - missed_snapinfo->node_snap_info, - value); + ret = gf_store_save_value (fd, key, value); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to write missed snapinfo"); @@ -3189,7 +3194,7 @@ out: /* Adds the missed snap entries to the in-memory conf->missed_snap_list * * and writes them to disk */ int32_t -glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) +glusterd_store_update_missed_snaps () { int32_t fd = -1; int32_t ret = -1; @@ -3198,17 +3203,10 @@ glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) this = THIS; GF_ASSERT(this); - GF_ASSERT(dict); priv = this->private; GF_ASSERT (priv); - if (missed_snap_count < 1) { - gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); - ret = 0; - goto out; - } - ret = glusterd_store_create_missed_snaps_list_shandle_on_absence (); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to obtain " @@ -3224,13 +3222,6 @@ glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) goto out; } - ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add missed snaps to list"); - goto out; - } - ret = glusterd_store_write_missed_snapinfo (fd); if (ret) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 64c073a8a39..19123f0051b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -168,7 +168,6 @@ int32_t glusterd_store_snap (glusterd_snap_t *snap); int32_t -glusterd_store_update_missed_snaps (dict_t *dict, - int32_t missed_snap_count); +glusterd_store_update_missed_snaps (); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 74317eb446d..b4644432d7c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -709,7 +709,7 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict, /* Adding missed delete to the dict */ ret = glusterd_add_missed_snaps_to_dict (rsp_dict, - snap_volinfo->volname, + snap_volinfo, brickinfo, brick_count + 1, GF_SNAP_OPTION_TYPE_RESTORE); @@ -2693,8 +2693,10 @@ glusterd_add_missed_snaps_to_export_dict (dict_t *vols) snap_ops_list) { snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", missed_snap_count); - snprintf (value, sizeof(value), "%s=%d:%s:%d:%d", - missed_snapinfo->node_snap_info, + snprintf (value, sizeof(value), "%s:%s=%s:%d:%s:%d:%d", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op, @@ -3937,7 +3939,11 @@ glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo, old_brickinfo->hostname, old_brickinfo->path, new_volinfo, &new_brickinfo); - if (ret) { + /* If the brick is stale, i.e it's not a part of the new volume + * or if it's part of the new volume and is pending a snap, + * then stop the brick process + */ + if (ret || (new_brickinfo->snap_status == -1)) { /*TODO: may need to switch to 'atomic' flavour of * brick_stop, once we make peer rpc program also * synctask enabled*/ @@ -4240,7 +4246,14 @@ glusterd_import_friend_missed_snap_list (dict_t *vols) goto out; } - ret = glusterd_store_update_missed_snaps (vols, missed_snap_count); + ret = glusterd_add_missed_snaps_to_list (vols, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_update_missed_snaps (); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to update missed_snaps_list"); @@ -10086,7 +10099,6 @@ glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) if (!new_missed_snapinfo) goto out; - new_missed_snapinfo->node_snap_info = NULL; INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); @@ -10116,7 +10128,6 @@ glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) if (!new_snap_op) goto out; - new_snap_op->brick_path = NULL; new_snap_op->brick_num = -1; new_snap_op->op = -1; new_snap_op->status = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 0f1c5237294..ec65357f9a3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -715,7 +715,8 @@ int32_t glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op); int32_t -glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, glusterd_brickinfo_t *brickinfo, int32_t brick_number, int32_t op); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 7568a2e9ad4..d50fa03d23c 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -393,6 +393,7 @@ struct glusterd_snap_ { }; typedef struct glusterd_snap_op_ { + char *snap_vol_id; int32_t brick_num; char *brick_path; int32_t op; @@ -401,7 +402,8 @@ typedef struct glusterd_snap_op_ { } glusterd_snap_op_t; typedef struct glusterd_missed_snap_ { - char *node_snap_info; + char *node_uuid; + char *snap_uuid; struct list_head missed_snaps; struct list_head snap_ops; } glusterd_missed_snap_info; @@ -989,8 +991,8 @@ int32_t glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count); int32_t -glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, - char *brick_path, int32_t snap_op, - int32_t snap_status); +glusterd_add_new_entry_to_list (char *missed_info, char *snap_vol_id, + int32_t brick_num, char *brick_path, + int32_t snap_op, int32_t snap_status); #endif |