From 123db32a53f7e2f99c0d63b368ed8a8ee6b41f62 Mon Sep 17 00:00:00 2001 From: Raghavendra Bhat Date: Thu, 20 Mar 2014 14:46:02 +0530 Subject: mgmt/glusterd: do cleanup of snapshots in post-validate phase if half baked objects are there Change-Id: I372cac98ad054cdc1a6fbc7f6c77c25981063b2f Signed-off-by: Raghavendra Bhat Reviewed-on: http://review.gluster.org/7237 Reviewed-by: Rajesh Joseph Tested-by: Rajesh Joseph --- xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c | 4 +- xlators/mgmt/glusterd/src/glusterd-mgmt.c | 166 +++++++++++++++++----- xlators/mgmt/glusterd/src/glusterd-mgmt.h | 2 +- xlators/mgmt/glusterd/src/glusterd-snapshot.c | 147 ++++++++++++++++++- xlators/mgmt/glusterd/src/glusterd-store.c | 57 ++++++++ xlators/mgmt/glusterd/src/glusterd-utils.c | 1 + xlators/mgmt/glusterd/src/glusterd.h | 5 + 7 files changed, 344 insertions(+), 38 deletions(-) (limited to 'xlators') diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c index 27d40b3a7..a2546ca94 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -659,8 +659,8 @@ glusterd_handle_post_validate_fn (rpcsvc_request_t *req) return -1; } - ret = gd_mgmt_v3_post_validate_fn (op_req.op, dict, &op_errstr, - rsp_dict); + ret = gd_mgmt_v3_post_validate_fn (op_req.op, op_req.op_ret, dict, + &op_errstr, rsp_dict); if (ret) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c index cdc51849f..380f149a9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -187,14 +187,35 @@ out: } int32_t -gd_mgmt_v3_post_validate_fn (glusterd_op_t op, dict_t *dict, +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, char **op_errstr, dict_t *rsp_dict) { int ret = -1; xlator_t *this = THIS; + GF_ASSERT (this); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "postvalidate operation failed"); + goto out; + } + break; + } + default: + break; + } + ret = 0; - gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); return ret; } @@ -211,9 +232,16 @@ gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, int op_errno = -1; GF_ASSERT(req); - GF_ASSERT(iov); GF_ASSERT(myframe); + /* Even though the lock command has failed, while collating the errors + (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be + used. @args is obtained from frame->local. So before checking the + status of the request and going out if its a failure, args should be + set to frame->local. Otherwise, while collating args will be NULL. + This applies to other phases such as prevalidate, brickop, commit and + postvalidate also. + */ frame = myframe; args = frame->local; peerinfo = frame->cookie; @@ -225,6 +253,12 @@ gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, goto out; } + if (!iov) { + gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); if (ret < 0) @@ -234,6 +268,7 @@ gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, op_ret = rsp.op_ret; op_errno = rsp.op_errno; + out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, GLUSTERD_MGMT_V3_LOCK, @@ -329,7 +364,7 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op, gd_synctask_barrier_wait((&args), peer_cnt); if (args.errstr) - *op_errstr = gf_strdup (args.errstr); + *op_errstr = gf_strdup (args.errstr); ret = args.op_ret; @@ -395,7 +430,6 @@ gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, dict_t *rsp_dict = NULL; GF_ASSERT(req); - GF_ASSERT(iov); GF_ASSERT(myframe); frame = myframe; @@ -409,6 +443,11 @@ gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, goto out; } + if (!iov) { + gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + } + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); if (ret < 0) @@ -451,6 +490,7 @@ gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, op_ret = rsp.op_ret; op_errno = rsp.op_errno; } + out: if (rsp_dict) dict_unref (rsp_dict); @@ -641,7 +681,6 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, int op_errno = -1; GF_ASSERT(req); - GF_ASSERT(iov); GF_ASSERT(myframe); frame = myframe; @@ -650,11 +689,21 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, frame->local = NULL; frame->cookie = NULL; + /* If the operation failed, then iov can be NULL. So better check the + status of the operation and then worry about iov (if the status of + the command is success) + */ if (-1 == req->rpc_status) { op_errno = ENOTCONN; goto out; } + if (!iov) { + gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); if (ret < 0) @@ -664,10 +713,11 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, op_ret = rsp.op_ret; op_errno = rsp.op_errno; + out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, - GLUSTERD_MGMT_V3_BRICK_OP, - peerinfo, rsp.uuid); + GLUSTERD_MGMT_V3_BRICK_OP, + peerinfo, rsp.uuid); STACK_DESTROY (frame->root); synctask_barrier_wake(args); return 0; @@ -810,7 +860,6 @@ gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov, dict_t *rsp_dict = NULL; GF_ASSERT(req); - GF_ASSERT(iov); GF_ASSERT(myframe); frame = myframe; @@ -824,6 +873,12 @@ gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov, goto out; } + if (!iov) { + gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); if (ret < 0) @@ -1022,7 +1077,6 @@ gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, int op_errno = -1; GF_ASSERT(req); - GF_ASSERT(iov); GF_ASSERT(myframe); frame = myframe; @@ -1036,6 +1090,12 @@ gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, goto out; } + if (!iov) { + gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); if (ret < 0) @@ -1045,6 +1105,7 @@ gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, op_ret = rsp.op_ret; op_errno = rsp.op_errno; + out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, GLUSTERD_MGMT_V3_POST_VALIDATE, @@ -1063,10 +1124,10 @@ gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov, } int -gd_mgmt_v3_post_validate (glusterd_op_t op, dict_t *op_ctx, - glusterd_peerinfo_t *peerinfo, - struct syncargs *args, uuid_t my_uuid, - uuid_t recv_uuid) +gd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) { int ret = -1; gd1_mgmt_v3_post_val_req req = {{0},}; @@ -1084,6 +1145,7 @@ gd_mgmt_v3_post_validate (glusterd_op_t op, dict_t *op_ctx, uuid_copy (req.uuid, my_uuid); req.op = op; + req.op_ret = op_ret; synclock_unlock (&conf->big_lock); ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, &gd_mgmt_v3_prog, @@ -1098,7 +1160,8 @@ out: int glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, - dict_t *req_dict, char **op_errstr, int npeers) + int32_t op_ret, dict_t *req_dict, + char **op_errstr, int npeers) { int ret = -1; int peer_cnt = 0; @@ -1120,8 +1183,8 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, } /* Post Validation on local node */ - ret = gd_mgmt_v3_post_validate_fn (op, req_dict, op_errstr, - rsp_dict); + ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr, + rsp_dict); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -1155,8 +1218,8 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, synctask_barrier_init((&args)); peer_cnt = 0; list_for_each_entry (peerinfo, peers, op_peers_list) { - gd_mgmt_v3_post_validate (op, req_dict, peerinfo, &args, - MY_UUID, peer_uuid); + gd_mgmt_v3_post_validate (op, op_ret, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); peer_cnt++; } gd_synctask_barrier_wait((&args), peer_cnt); @@ -1190,7 +1253,6 @@ gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, int op_errno = -1; GF_ASSERT(req); - GF_ASSERT(iov); GF_ASSERT(myframe); frame = myframe; @@ -1204,6 +1266,12 @@ gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, goto out; } + if (!iov) { + gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); if (ret < 0) @@ -1213,6 +1281,7 @@ gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, op_ret = rsp.op_ret; op_errno = rsp.op_errno; + out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, GLUSTERD_MGMT_V3_UNLOCK, @@ -1413,7 +1482,11 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, } /* POST-COMMIT VALIDATE PHASE */ - ret = glusterd_mgmt_v3_post_validate (conf, op, req_dict, + /* As of now, post_validate is not handling any other + commands other than snapshot. So as of now, I am + sending 0 (op_ret as 0). + */ + ret = glusterd_mgmt_v3_post_validate (conf, op, 0, req_dict, &op_errstr, npeers); if (ret) { gf_log ("", GF_LOG_ERROR, "Post Validation Failed"); @@ -1470,6 +1543,7 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, uuid_t *originator_uuid = NULL; gf_boolean_t success = _gf_false; char *tmp_errstr = NULL; + int op_ret = -1; this = THIS; GF_ASSERT (this); @@ -1549,11 +1623,28 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, &op_errstr, npeers); if (ret) { - gf_log ("", GF_LOG_ERROR, "Brick Ops Failed"); + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); goto unbarrier; } /* COMMIT OP PHASE */ + /* TODO: As of now, the plan is to do quorum check before sending the + commit fop and if the quorum succeeds, then commit is sent to all + the other glusterds. + snap create functionality now creates the in memory and on disk + objects for the snapshot (marking them as incomplete), takes the lvm + snapshot and then updates the status of the in memory and on disk + snap objects as complete. Suppose one of the glusterds goes down + after taking the lvm snapshot, but before updating the snap object, + then treat it as a snapshot create failure and trigger cleanup. + i.e the number of commit responses received by the originator + glusterd shold be the same as the number of peers it has sent the + request to (i.e npeers variable). If not, then originator glusterd + will initiate cleanup in post-validate fop. + Question: What if one of the other glusterds goes down as explained + above and along with it the originator glusterd also goes down? + Who will initiate the cleanup? + */ ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, &op_errstr, npeers); if (ret) { @@ -1576,25 +1667,31 @@ unbarrier: goto out; ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, &op_errstr, npeers); - if (ret || (success == _gf_false)) { - gf_log ("", GF_LOG_ERROR, "Brick Ops Failed"); - ret = -1; - goto out; - } - /* POST-COMMIT VALIDATE PHASE */ - ret = glusterd_mgmt_v3_post_validate (conf, op, req_dict, - &op_errstr, npeers); + if (ret) { - gf_log ("", GF_LOG_ERROR, "Post Validation Failed"); + gf_log ("", GF_LOG_ERROR, "Brick Ops Failed"); goto out; } ret = 0; + out: + op_ret = ret; + + if (success == _gf_false) + op_ret = -1; + + /* POST-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + op_ret = -1; + } /* UNLOCK PHASE FOR PEERS*/ (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, - ret, &op_errstr, + op_ret, &op_errstr, npeers, is_acquired); /* If the commit op (snapshot taking) failed, then the error is stored @@ -1604,7 +1701,7 @@ out: is sent to cli. */ if (tmp_errstr) { - if (ret && op_errstr) { + if (op_errstr) { gf_log (this->name, GF_LOG_ERROR, "unbarrier brick op" "failed with the error %s", op_errstr); GF_FREE (op_errstr); @@ -1614,7 +1711,8 @@ out: } /* SEND CLI RESPONSE */ - glusterd_op_send_cli_response (op, ret, 0, req, dict, op_errstr); + + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); /* LOCAL VOLUME(S) UNLOCK */ if (!is_acquired) diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h index 25ce32e01..685f59132 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.h +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -28,7 +28,7 @@ gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, char **op_errstr, dict_t *rsp_dict); int32_t -gd_mgmt_v3_post_validate_fn (glusterd_op_t op, dict_t *dict, +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, char **op_errstr, dict_t *rsp_dict); int32_t diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index db2d88831..5759bded4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -2685,7 +2685,12 @@ glusterd_create_snap_object (dict_t *dict) strcpy (snap->snapname, snapname); uuid_copy (snap->snap_id, *snap_id); snap->time_stamp = (time_t)time_stamp; - snap->snap_status = GD_SNAP_STATUS_IN_USE; + /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot + is taken and snap is really ready to use, set the status to + GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete + snapshots and cleaning them up. + */ + snap->snap_status = GD_SNAP_STATUS_INIT; if (description) { snap->description = gf_strdup (description); if (snap->description == NULL) { @@ -2705,8 +2710,10 @@ glusterd_create_snap_object (dict_t *dict) list_add_order (&snap->snap_list, &priv->snapshots, glusterd_compare_snap_time); + gf_log (this->name, GF_LOG_TRACE, "Snap %s added to the list", snap->snapname); + ret = 0; out: @@ -3557,6 +3564,61 @@ out: return ret; } +int32_t +glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + char *name = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "getting the snap " + "name failed (volume: %s)", volinfo->volname); + goto out; + } + + /* + If the snapname is not found that means the failure happened at + staging, or in commit, before the snap object is created, in which + case there is nothing to cleanup. So set ret to 0. + */ + snap = glusterd_find_snap_by_name (name); + if (!snap) { + gf_log (this->name, GF_LOG_INFO, "snap %s is not found", name); + ret = 0; + goto out; + } + + ret = glusterd_snap_remove (snap, _gf_true, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed", + name); + goto out; + } + + name = NULL; + + ret = 0; + +out: + + return ret; +} + int32_t glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, dict_t *rsp_dict) @@ -3643,6 +3705,14 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, } } + snap->snap_status = GD_SNAP_STATUS_IN_USE; + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + ret = 0; out: @@ -4569,6 +4639,38 @@ out : return ret; } +int32_t +glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret, + char **op_errstr, dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + if (op_ret) { + ret = glusterd_do_snap_cleanup (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "cleanup operation " + "failed"); + goto out; + } + } + + ret = 0; + +out: + return ret; +} + int32_t glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -4796,6 +4898,49 @@ out: return ret; } +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); //not sure if this is needed, verify. + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snapshot_create_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "post-validation failed"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + int glusterd_handle_snapshot_fn (rpcsvc_request_t *req) { diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index ae20b60db..613f03abc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -3030,6 +3030,35 @@ out: return ret; } +int32_t +glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "resolve brick failed in restore"); + goto out; + } + } + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return ret; +} + int glusterd_store_update_snap (glusterd_snap_t *snap) { @@ -3152,6 +3181,34 @@ glusterd_store_retrieve_snap (char *snapname) goto out; } + /* Unlike bricks of normal volumes which are resolved at the end of + the glusterd restore, the bricks belonging to the snap volumes of + each snap should be resolved as part of snapshot restore itself. + Because if the snapshot has to be removed, then resolving bricks + helps glusterd in understanding what all bricks have its own uuid + and killing those bricks. + */ + ret = glusterd_resolve_snap_bricks (this, snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "resolving the snap bricks" + " failed (snap: %s)", snap?snap->snapname:""); + + /* When the snapshot command from cli is received, the on disk and + in memory structures for the snapshot are created (with the status) + being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is + taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd + dies after taking the backend snapshot, but before updating the + status, then when glusterd comes up, it should treat that snapshot + as a failed snapshot and clean it up. + */ + if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { + ret = glusterd_snap_remove (snap, _gf_true, _gf_true); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "failed to remove" + " the snapshot %s", snap->snapname); + goto out; + } + /* TODO: list_add_order can do 'N-square' comparisions and is not efficient. Find a better solution to store the snap in order */ diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 90c624e2f..cd2fb806d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5316,6 +5316,7 @@ glusterd_hostname_to_uuid (char *hostname, uuid_t uuid) uuid_copy (uuid, MY_UUID); ret = 0; } else { + ret = 0; goto out; } } else { diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 7e065b77b..e81369ee4 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -947,9 +947,14 @@ int glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict); int glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict); char * glusterd_build_snap_device_path (char *device, char *snapname); int32_t glusterd_snap_remove (glusterd_snap_t *snap, gf_boolean_t remove_lvm, gf_boolean_t force); +int32_t +glusterd_snapshot_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict); #endif -- cgit