summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-mgmt.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-mgmt.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c166
1 files changed, 132 insertions, 34 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
index cdc51849f..380f149a9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -187,14 +187,35 @@ out:
}
int32_t
-gd_mgmt_v3_post_validate_fn (glusterd_op_t op, dict_t *dict,
+gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict,
char **op_errstr, dict_t *rsp_dict)
{
int ret = -1;
xlator_t *this = THIS;
+ GF_ASSERT (this);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot_postvalidate (dict, op_ret,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "postvalidate operation failed");
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
ret = 0;
- gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret);
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret);
return ret;
}
@@ -211,9 +232,16 @@ gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov,
int op_errno = -1;
GF_ASSERT(req);
- GF_ASSERT(iov);
GF_ASSERT(myframe);
+ /* Even though the lock command has failed, while collating the errors
+ (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be
+ used. @args is obtained from frame->local. So before checking the
+ status of the request and going out if its a failure, args should be
+ set to frame->local. Otherwise, while collating args will be NULL.
+ This applies to other phases such as prevalidate, brickop, commit and
+ postvalidate also.
+ */
frame = myframe;
args = frame->local;
peerinfo = frame->cookie;
@@ -225,6 +253,12 @@ gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov,
goto out;
}
+ if (!iov) {
+ gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
if (ret < 0)
@@ -234,6 +268,7 @@ gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov,
op_ret = rsp.op_ret;
op_errno = rsp.op_errno;
+
out:
gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
GLUSTERD_MGMT_V3_LOCK,
@@ -329,7 +364,7 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op,
gd_synctask_barrier_wait((&args), peer_cnt);
if (args.errstr)
- *op_errstr = gf_strdup (args.errstr);
+ *op_errstr = gf_strdup (args.errstr);
ret = args.op_ret;
@@ -395,7 +430,6 @@ gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
dict_t *rsp_dict = NULL;
GF_ASSERT(req);
- GF_ASSERT(iov);
GF_ASSERT(myframe);
frame = myframe;
@@ -409,6 +443,11 @@ gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
goto out;
}
+ if (!iov) {
+ gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ }
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp);
if (ret < 0)
@@ -451,6 +490,7 @@ gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
op_ret = rsp.op_ret;
op_errno = rsp.op_errno;
}
+
out:
if (rsp_dict)
dict_unref (rsp_dict);
@@ -641,7 +681,6 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov,
int op_errno = -1;
GF_ASSERT(req);
- GF_ASSERT(iov);
GF_ASSERT(myframe);
frame = myframe;
@@ -650,11 +689,21 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov,
frame->local = NULL;
frame->cookie = NULL;
+ /* If the operation failed, then iov can be NULL. So better check the
+ status of the operation and then worry about iov (if the status of
+ the command is success)
+ */
if (-1 == req->rpc_status) {
op_errno = ENOTCONN;
goto out;
}
+ if (!iov) {
+ gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp);
if (ret < 0)
@@ -664,10 +713,11 @@ gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov,
op_ret = rsp.op_ret;
op_errno = rsp.op_errno;
+
out:
gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
- GLUSTERD_MGMT_V3_BRICK_OP,
- peerinfo, rsp.uuid);
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ peerinfo, rsp.uuid);
STACK_DESTROY (frame->root);
synctask_barrier_wake(args);
return 0;
@@ -810,7 +860,6 @@ gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov,
dict_t *rsp_dict = NULL;
GF_ASSERT(req);
- GF_ASSERT(iov);
GF_ASSERT(myframe);
frame = myframe;
@@ -824,6 +873,12 @@ gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov,
goto out;
}
+ if (!iov) {
+ gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp);
if (ret < 0)
@@ -1022,7 +1077,6 @@ gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
int op_errno = -1;
GF_ASSERT(req);
- GF_ASSERT(iov);
GF_ASSERT(myframe);
frame = myframe;
@@ -1036,6 +1090,12 @@ gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
goto out;
}
+ if (!iov) {
+ gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp);
if (ret < 0)
@@ -1045,6 +1105,7 @@ gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
op_ret = rsp.op_ret;
op_errno = rsp.op_errno;
+
out:
gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
GLUSTERD_MGMT_V3_POST_VALIDATE,
@@ -1063,10 +1124,10 @@ gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov,
}
int
-gd_mgmt_v3_post_validate (glusterd_op_t op, dict_t *op_ctx,
- glusterd_peerinfo_t *peerinfo,
- struct syncargs *args, uuid_t my_uuid,
- uuid_t recv_uuid)
+gd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
{
int ret = -1;
gd1_mgmt_v3_post_val_req req = {{0},};
@@ -1084,6 +1145,7 @@ gd_mgmt_v3_post_validate (glusterd_op_t op, dict_t *op_ctx,
uuid_copy (req.uuid, my_uuid);
req.op = op;
+ req.op_ret = op_ret;
synclock_unlock (&conf->big_lock);
ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
&gd_mgmt_v3_prog,
@@ -1098,7 +1160,8 @@ out:
int
glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op,
- dict_t *req_dict, char **op_errstr, int npeers)
+ int32_t op_ret, dict_t *req_dict,
+ char **op_errstr, int npeers)
{
int ret = -1;
int peer_cnt = 0;
@@ -1120,8 +1183,8 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op,
}
/* Post Validation on local node */
- ret = gd_mgmt_v3_post_validate_fn (op, req_dict, op_errstr,
- rsp_dict);
+ ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr,
+ rsp_dict);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1155,8 +1218,8 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op,
synctask_barrier_init((&args));
peer_cnt = 0;
list_for_each_entry (peerinfo, peers, op_peers_list) {
- gd_mgmt_v3_post_validate (op, req_dict, peerinfo, &args,
- MY_UUID, peer_uuid);
+ gd_mgmt_v3_post_validate (op, op_ret, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
peer_cnt++;
}
gd_synctask_barrier_wait((&args), peer_cnt);
@@ -1190,7 +1253,6 @@ gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov,
int op_errno = -1;
GF_ASSERT(req);
- GF_ASSERT(iov);
GF_ASSERT(myframe);
frame = myframe;
@@ -1204,6 +1266,12 @@ gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov,
goto out;
}
+ if (!iov) {
+ gf_log (THIS->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
if (ret < 0)
@@ -1213,6 +1281,7 @@ gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov,
op_ret = rsp.op_ret;
op_errno = rsp.op_errno;
+
out:
gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
GLUSTERD_MGMT_V3_UNLOCK,
@@ -1413,7 +1482,11 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
}
/* POST-COMMIT VALIDATE PHASE */
- ret = glusterd_mgmt_v3_post_validate (conf, op, req_dict,
+ /* As of now, post_validate is not handling any other
+ commands other than snapshot. So as of now, I am
+ sending 0 (op_ret as 0).
+ */
+ ret = glusterd_mgmt_v3_post_validate (conf, op, 0, req_dict,
&op_errstr, npeers);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Post Validation Failed");
@@ -1470,6 +1543,7 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
uuid_t *originator_uuid = NULL;
gf_boolean_t success = _gf_false;
char *tmp_errstr = NULL;
+ int op_ret = -1;
this = THIS;
GF_ASSERT (this);
@@ -1549,11 +1623,28 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict,
&op_errstr, npeers);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Brick Ops Failed");
+ gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed");
goto unbarrier;
}
/* COMMIT OP PHASE */
+ /* TODO: As of now, the plan is to do quorum check before sending the
+ commit fop and if the quorum succeeds, then commit is sent to all
+ the other glusterds.
+ snap create functionality now creates the in memory and on disk
+ objects for the snapshot (marking them as incomplete), takes the lvm
+ snapshot and then updates the status of the in memory and on disk
+ snap objects as complete. Suppose one of the glusterds goes down
+ after taking the lvm snapshot, but before updating the snap object,
+ then treat it as a snapshot create failure and trigger cleanup.
+ i.e the number of commit responses received by the originator
+ glusterd shold be the same as the number of peers it has sent the
+ request to (i.e npeers variable). If not, then originator glusterd
+ will initiate cleanup in post-validate fop.
+ Question: What if one of the other glusterds goes down as explained
+ above and along with it the originator glusterd also goes down?
+ Who will initiate the cleanup?
+ */
ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict,
&op_errstr, npeers);
if (ret) {
@@ -1576,25 +1667,31 @@ unbarrier:
goto out;
ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict,
&op_errstr, npeers);
- if (ret || (success == _gf_false)) {
- gf_log ("", GF_LOG_ERROR, "Brick Ops Failed");
- ret = -1;
- goto out;
- }
- /* POST-COMMIT VALIDATE PHASE */
- ret = glusterd_mgmt_v3_post_validate (conf, op, req_dict,
- &op_errstr, npeers);
+
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Post Validation Failed");
+ gf_log ("", GF_LOG_ERROR, "Brick Ops Failed");
goto out;
}
ret = 0;
+
out:
+ op_ret = ret;
+
+ if (success == _gf_false)
+ op_ret = -1;
+
+ /* POST-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed");
+ op_ret = -1;
+ }
/* UNLOCK PHASE FOR PEERS*/
(void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict,
- ret, &op_errstr,
+ op_ret, &op_errstr,
npeers, is_acquired);
/* If the commit op (snapshot taking) failed, then the error is stored
@@ -1604,7 +1701,7 @@ out:
is sent to cli.
*/
if (tmp_errstr) {
- if (ret && op_errstr) {
+ if (op_errstr) {
gf_log (this->name, GF_LOG_ERROR, "unbarrier brick op"
"failed with the error %s", op_errstr);
GF_FREE (op_errstr);
@@ -1614,7 +1711,8 @@ out:
}
/* SEND CLI RESPONSE */
- glusterd_op_send_cli_response (op, ret, 0, req, dict, op_errstr);
+
+ glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr);
/* LOCAL VOLUME(S) UNLOCK */
if (!is_acquired)