diff options
author | Avra Sengupta <asengupt@redhat.com> | 2014-04-07 05:25:28 +0000 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-04-27 22:00:20 -0700 |
commit | d7b3e068290c41b13ecd664771814202d7d26881 (patch) | |
tree | 7584335affe6ccb961d894d78ddfc95f103bdd7e /xlators/mgmt/glusterd/src/glusterd-handler.c | |
parent | d2cdc392accdd35995370ee5b52aee5e5af7dee4 (diff) |
glusterd/snapshot: Adding snap_vol_id and snap_uuid to missed_snap_list
Persisting missing snapshot info on disk as well as in memory in
the following format:
-------------NODE-UUID--------------:--------------SNAP-UUID-------------=---------SNAP-VOL-ID------------:BRICKNUM:-------BRICKPATH--------:OPERATION:STATUS
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13: 3 :/brick/brick-dirs/brick3: 1 : 1
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=a17b4fe42c5a45f7a916438643edaa13: 3 :/brick/brick-dirs/brick3: 3 : 1
927cb5fe-63da-48f5-82f6-e6a09ddc81c4:8258b18f-d408-483d-8239-204039dc6397=83a3cc05453b46b2a7eda4c9a9208638: 3 :/brick/brick-dirs/brick3: 1 : 1
This data will be stored on disk at /var/lib/glusterd/snaps/missed_snaps_list
In memory we maintain the data as a list of glusterd_missed_snap_info
in conf, the key for this list are the first two fields,
i.e NODE-UUID:SNAP-UUID.
For every NODE-UUID:SNAP-UUID, there can be multiple operations missed
on multiple bricks. So we maintain a list of glusterd_snap_op_t
for every node of glusterd_missed_snap_info
This list is maintained or updated during snapshot create, delete, and restore
operations which are the only operations that if missed, are recorded in this
list.
During snapshot create, if a node is down, or a brick is down, we don't
receive their mount point infos. snap_status of such bricks is marked as
-1, and their brick details are added to this list.
During snapshot delete, we check from originator node, if any other
nodes, holding bricks of the said snap are down. Those are also added to the list.
Also if the node is up, but the snapshot was pending for a snap
brick, and its snap_status is -1, we add that to the list too.
When a subsequent delete entry is processed for an already existing
create entry, we just mark the create entries status as done (2), and don't
add the delete entry to the list.
During snapshot restore, we check from originator node, if any other
nodes, holding bricks of the said snap are down. Those are also added to the list.
Also if the node is up, but the snapshot was pending for a snap
brick, and its snap_status is -1, we add that to the list too.
Like delete when a subsequent restore entry is processed for an already existing
create entry, we just mark the create entries status as done (2), and don't
add the restore entry to the list.
Change-Id: I54f63e28d3c40555d0f84528f38227103171f594
BUG: 1061685
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/7454
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-handler.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 91 |
1 files changed, 85 insertions, 6 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 5869a88d4be..ed756f15bc0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -3896,6 +3896,52 @@ glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) } static int +get_volinfo_from_brickid (char *brickid, glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickid); + + brickid_dup = gf_strdup (brickid); + if (!brickid_dup) + goto out; + + volid_str = brickid_dup; + brick = strchr (brickid_dup, ':'); + if (!brick) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid brickid"); + goto out; + } + + *brick = '\0'; + brick++; + uuid_parse (volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id (volid, volinfo); + if (ret) { + /* Check if it is a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id (volid, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to find volinfo"); + goto out; + } + } + + ret = 0; +out: + GF_FREE (brickid_dup); + return ret; +} + +static int get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) { glusterd_volinfo_t *volinfo = NULL; @@ -3938,13 +3984,14 @@ out: int __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) + rpc_clnt_event_t event, void *data) { - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - int ret = 0; - char *brickid = NULL; - glusterd_brickinfo_t *brickinfo = NULL; + char *brickid = NULL; + int ret = 0; + glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; brickid = mydata; if (!brickid) @@ -3961,6 +4008,37 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, switch (event) { case RPC_CLNT_CONNECT: + /* If a node on coming back up, already starts a brick + * before the handshake, and the notification comes after + * the handshake is done, then we need to check if this + * is a restored brick with a snapshot pending. If so, we + * need to stop the brick + */ + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = get_volinfo_from_brickid (brickid, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volinfo from " + "brickid(%s)", brickid); + goto out; + } + + ret = glusterd_brick_stop (volinfo, brickinfo, + _gf_false); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to stop %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + + break; + } gf_log (this->name, GF_LOG_DEBUG, "Connected to %s:%s", brickinfo->hostname, brickinfo->path); glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); @@ -3986,6 +4064,7 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, break; } +out: return ret; } |