From 4765dd1a1c51c67ab86687fbd871c89156680c34 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Fri, 16 Sep 2011 10:40:32 +0530 Subject: glusterd: Implemented cmd to trigger self-heal on a replicate volume. This cmd is used in the context of proactive self-heal for replicated volumes. User invokes the following cmd when (s)he suspects that self-heal needs to be done on a particular volume, gluster volume heal . Change-Id: I3954353b53488c28b70406e261808239b44997f3 BUG: 3602 Reviewed-on: http://review.gluster.com/454 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- cli/src/cli-cmd-volume.c | 45 +++++++ cli/src/cli-rpc-ops.c | 88 ++++++++++++- cli/src/cli.h | 4 + glusterfsd/src/glusterfsd-mgmt.c | 127 +++++++++++++++++- libglusterfs/src/globals.c | 1 + libglusterfs/src/glusterfs.h | 1 + rpc/rpc-lib/src/protocol-common.h | 2 + rpc/xdr/src/cli1-xdr.c | 30 +++++ rpc/xdr/src/cli1-xdr.h | 21 +++ rpc/xdr/src/cli1-xdr.x | 12 ++ xlators/cluster/afr/src/afr-common.c | 8 ++ xlators/mgmt/glusterd/src/glusterd-handler.c | 37 ++++++ xlators/mgmt/glusterd/src/glusterd-mem-types.h | 3 +- xlators/mgmt/glusterd/src/glusterd-op-sm.c | 166 +++++++++++++++++++++--- xlators/mgmt/glusterd/src/glusterd-op-sm.h | 2 +- xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 52 +++++--- xlators/mgmt/glusterd/src/glusterd-utils.c | 138 +++++++++++++++++++- xlators/mgmt/glusterd/src/glusterd-utils.h | 22 ++++ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 160 +++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd.c | 5 + xlators/mgmt/glusterd/src/glusterd.h | 23 +++- 21 files changed, 905 insertions(+), 42 deletions(-) diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index de3166e8d..a2ec50863 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1469,6 +1469,47 @@ cli_print_brick_status (char *brick, int port, int online, int pid) return 0; } +int +cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word, + const char **words, int wordcount) +{ + int ret = -1; + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + gf1_cli_heal_vol_req req = {0,}; + int sent = 0; + int parse_error = 0; + + frame = create_frame (THIS, THIS->ctx->pool); + if (!frame) + goto out; + + if (wordcount != 3) { + cli_usage_out (word->pattern); + parse_error = 1; + goto out; + } + + req.volname = (char *)words[2]; + if (!req.volname) + goto out; + + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME]; + + if (proc->fn) { + ret = proc->fn (frame, THIS, &req); + } + +out: + if (ret) { + cli_cmd_sent_status_get (&sent); + if ((sent == 0) && (parse_error == 0)) + cli_out ("Volume heal failed"); + } + + return ret; +} + struct cli_cmd volume_cmds[] = { { "volume info [all|]", cli_cmd_volume_info_cbk, @@ -1571,6 +1612,10 @@ struct cli_cmd volume_cmds[] = { cli_cmd_volume_status_cbk, "display status of specified volume"}, + { "volume heal ", + cli_cmd_volume_heal_cbk, + "Start healing of volume specified by "}, + { NULL, NULL, NULL } }; diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index ccd76d570..78d27b624 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -3912,6 +3912,91 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data) return ret; } +int +gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gf1_cli_heal_vol_rsp rsp = {0,}; + int ret = 0; + cli_local_t *local = NULL; + char *volname = NULL; + call_frame_t *frame = NULL; + + if (-1 == req->rpc_status) { + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_heal_vol_rsp); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error"); + goto out; + } + + frame = myframe; + + if (frame) { + local = frame->local; + frame->local = NULL; + } + + if (local) + volname = local->u.heal_vol.volname; + + gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume"); + + if (rsp.op_ret && strcmp (rsp.op_errstr, "")) + cli_out ("%s", rsp.op_errstr); + else + cli_out ("Starting heal on volume %s has been %s", volname, + (rsp.op_ret) ? "unsuccessful": "successful"); + + ret = rsp.op_ret; + +out: + cli_cmd_broadcast_response (ret); + if (local) + cli_local_wipe (local); + if (rsp.volname) + free (rsp.volname); + if (rsp.op_errstr) + free (rsp.op_errstr); + return ret; +} + +int32_t +gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this, + void *data) +{ + gf1_cli_heal_vol_req *req = NULL; + int ret = 0; + cli_local_t *local = NULL; + + if (!frame || !this || !data) { + ret = -1; + goto out; + } + + req = data; + local = cli_local_get (); + + if (local) { + local->u.heal_vol.volname = req->volname; + frame->local = local; + } + + ret = cli_cmd_submit (req, frame, cli_rpc_prog, + GLUSTER_CLI_HEAL_VOLUME, NULL, + this, gf_cli3_1_heal_volume_cbk, + (xdrproc_t) xdr_gf1_cli_heal_vol_req); + +out: + gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + + + struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_NULL] = {"NULL", NULL }, [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli3_1_probe}, @@ -3945,7 +4030,8 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli3_1_getwd}, [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli3_1_status_volume}, [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli3_1_mount}, - [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount} + [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount}, + [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli3_1_heal_volume} }; struct rpc_clnt_program cli_prog = { diff --git a/cli/src/cli.h b/cli/src/cli.h index bf3437827..4ef1dbe06 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -149,6 +149,10 @@ struct cli_local { char *volname; int flags; } get_vol; + + struct { + char *volname; + }heal_vol; } u; }; diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 43d49b3f4..adce060a6 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -315,6 +315,41 @@ out: return ret; } +int +glusterfs_translator_heal_response_send (rpcsvc_request_t *req, int op_ret, + char *msg, dict_t *output) +{ + gd1_mgmt_brick_op_rsp rsp = {0,}; + int ret = -1; + GF_ASSERT (msg); + GF_ASSERT (req); + GF_ASSERT (output); + + rsp.op_ret = op_ret; + rsp.op_errno = 0; + if (ret && msg[0]) + rsp.op_errstr = msg; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (output, &rsp.output.output_val, + (size_t *)&rsp.output.output_len); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't serialize " + "output dict."); + goto out; + } + + ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + +out: + if (rsp.output.output_val) + GF_FREE (rsp.output.output_val); + + return ret; +} + int glusterfs_handle_translator_info_get (rpcsvc_request_t *req) { @@ -614,6 +649,92 @@ out: return NULL; } +int +glusterfs_handle_translator_heal (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_brick_op_req xlator_req = {0,}; + dict_t *dict = NULL; + xlator_t *xlator = NULL; + xlator_t *any = NULL; + dict_t *output = NULL; + char msg[2048] = {0}; + char key[2048] = {0}; + char *xname = NULL; + glusterfs_ctx_t *ctx = NULL; + glusterfs_graph_t *active = NULL; + xlator_t *this = NULL; + int i = 0; + int count = 0; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ctx = glusterfs_ctx_get (); + GF_ASSERT (ctx); + + active = ctx->active; + any = active->first; + if (!xdr_to_generic (req->msg[0], &xlator_req, + (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + dict = dict_new (); + + ret = dict_unserialize (xlator_req.input.input_val, + xlator_req.input.input_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + + ret = dict_get_int32 (dict, "count", &count); + i = 0; + while (i < count) { + snprintf (key, sizeof (key), "heal-%d", i); + ret = dict_get_str (dict, key, &xname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Couldn't get " + "replicate xlator %s to trigger " + "self-heal", xname); + goto out; + } + xlator = xlator_search_by_name (any, xname); + if (!xlator) { + snprintf (msg, sizeof (msg), "xlator %s is not loaded", + xlator_req.name); + ret = -1; + goto out; + } + + ret = xlator_notify (xlator, GF_EVENT_TRIGGER_HEAL, dict, NULL); + i++; + } + output = dict_new (); + if (!output) + goto out; + + /* output dict is not used currently, could be used later. */ + ret = glusterfs_translator_heal_response_send (req, ret, msg, output); +out: + if (dict) + dict_unref (dict); + if (xlator_req.input.input_val) + free (xlator_req.input.input_val); // malloced by xdr + if (output) + dict_unref (output); + if (xlator_req.name) + free (xlator_req.name); //malloced by xdr + + return ret; +} + int glusterfs_handle_rpc_msg (rpcsvc_request_t *req) { @@ -627,6 +748,9 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req) case GF_BRICK_XLATOR_INFO: ret = glusterfs_handle_translator_info_get (req); break; + case GF_BRICK_XLATOR_HEAL: + ret = glusterfs_handle_translator_heal (req); + break; default: break; } @@ -681,7 +805,8 @@ rpc_clnt_prog_t clnt_handshake_prog = { rpcsvc_actor_t glusterfs_actors[] = { [GF_BRICK_NULL] = { "NULL", GF_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL}, [GF_BRICK_TERMINATE] = { "TERMINATE", GF_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL}, - [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL} + [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL}, + [GF_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GF_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL} }; struct rpcsvc_program glusterfs_mop_prog = { diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index fbae75dff..473a4604e 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -348,6 +348,7 @@ char eventstring[GF_EVENT_MAXVAL][64] = { "Transport Cleanup", "Transport Connected", "Volfile Modified", + "Volume Heal Triggered", }; /* Copy the string ptr contents if needed for yourself */ diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 25f32bd5b..8247c60fb 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -373,6 +373,7 @@ typedef enum { GF_EVENT_VOLFILE_MODIFIED, GF_EVENT_GRAPH_NEW, GF_EVENT_TRANSLATOR_INFO, + GF_EVENT_TRIGGER_HEAL, GF_EVENT_MAXVAL, } glusterfs_event_t; diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 3c4c8fc44..41197044b 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -205,6 +205,7 @@ enum gluster_cli_procnum { GLUSTER_CLI_STATUS_VOLUME, GLUSTER_CLI_MOUNT, GLUSTER_CLI_UMOUNT, + GLUSTER_CLI_HEAL_VOLUME, GLUSTER_CLI_MAXVALUE, }; @@ -212,6 +213,7 @@ enum gf_brick_procnum { GF_BRICK_NULL = 0, GF_BRICK_TERMINATE = 1, GF_BRICK_XLATOR_INFO = 2, + GF_BRICK_XLATOR_HEAL = 3, GF_BRICK_MAX_VALUE }; diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c index 25ab32a8f..1240dda8b 100644 --- a/rpc/xdr/src/cli1-xdr.c +++ b/rpc/xdr/src/cli1-xdr.c @@ -1068,3 +1068,33 @@ xdr_gf1_cli_umount_rsp (XDR *xdrs, gf1_cli_umount_rsp *objp) return FALSE; return TRUE; } + +bool_t +xdr_gf1_cli_heal_vol_req (XDR *xdrs, gf1_cli_heal_vol_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_string (xdrs, &objp->volname, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf1_cli_heal_vol_rsp (XDR *xdrs, gf1_cli_heal_vol_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_string (xdrs, &objp->volname, ~0)) + return FALSE; + if (!xdr_string (xdrs, &objp->op_errstr, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h index 4077ff3c1..f22c635f1 100644 --- a/rpc/xdr/src/cli1-xdr.h +++ b/rpc/xdr/src/cli1-xdr.h @@ -614,6 +614,23 @@ struct gf1_cli_umount_rsp { }; typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp; +struct gf1_cli_heal_vol_req { + char *volname; +}; +typedef struct gf1_cli_heal_vol_req gf1_cli_heal_vol_req; + +struct gf1_cli_heal_vol_rsp { + int op_ret; + int op_errno; + char *volname; + char *op_errstr; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gf1_cli_heal_vol_rsp gf1_cli_heal_vol_rsp; + /* the xdr functions */ #if defined(__STDC__) || defined(__cplusplus) @@ -687,6 +704,8 @@ extern bool_t xdr_gf1_cli_mount_req (XDR *, gf1_cli_mount_req*); extern bool_t xdr_gf1_cli_mount_rsp (XDR *, gf1_cli_mount_rsp*); extern bool_t xdr_gf1_cli_umount_req (XDR *, gf1_cli_umount_req*); extern bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*); +extern bool_t xdr_gf1_cli_heal_vol_req (XDR *, gf1_cli_heal_vol_req*); +extern bool_t xdr_gf1_cli_heal_vol_rsp (XDR *, gf1_cli_heal_vol_rsp*); #else /* K&R C */ extern bool_t xdr_gf_cli_defrag_type (); @@ -759,6 +778,8 @@ extern bool_t xdr_gf1_cli_mount_req (); extern bool_t xdr_gf1_cli_mount_rsp (); extern bool_t xdr_gf1_cli_umount_req (); extern bool_t xdr_gf1_cli_umount_rsp (); +extern bool_t xdr_gf1_cli_heal_vol_req (); +extern bool_t xdr_gf1_cli_heal_vol_rsp (); #endif /* K&R C */ diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x index 2a6168c04..9a1f77c0e 100644 --- a/rpc/xdr/src/cli1-xdr.x +++ b/rpc/xdr/src/cli1-xdr.x @@ -455,3 +455,15 @@ struct gf1_cli_umount_rsp { int op_ret; int op_errno; }; + +struct gf1_cli_heal_vol_req { + string volname<>; +} ; + +struct gf1_cli_heal_vol_rsp { + int op_ret; + int op_errno; + string volname<>; + string op_errstr<>; + opaque dict<>; +} ; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2e5ca71b2..c23e329df 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3498,7 +3498,15 @@ afr_notify (xlator_t *this, int32_t event, priv->last_event[idx] = event; } UNLOCK (&priv->lock); + + break; + + case GF_EVENT_TRIGGER_HEAL: + gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered" + " manually. Start crawling"); + call_psh = 1; break; + default: propagate = 1; break; diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index df0aa202c..2ab80c2ff 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -2561,6 +2561,42 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, return ret; } +int +glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, + void *data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = 0; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + switch (event) { + case RPC_CLNT_CONNECT: + gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); + (void) glusterd_shd_set_running (_gf_true); + ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + + break; + + case RPC_CLNT_DISCONNECT: + gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); + (void) glusterd_shd_set_running (_gf_false); + break; + + default: + gf_log (this->name, GF_LOG_TRACE, + "got some other RPC event %d", event); + break; + } + + return ret; +} + int glusterd_friend_remove_notify (glusterd_peerinfo_t *peerinfo, rpcsvc_request_t *req) { @@ -2742,6 +2778,7 @@ rpcsvc_actor_t gd_svc_cli_actors[] = { [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, NULL}, [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, NULL}, [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, NULL}, + [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, NULL} }; diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index e10cf1aca..196f5f50b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -70,7 +70,8 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_mount_comp_container = gf_common_mt_end + 44, gf_gld_mt_mount_component = gf_common_mt_end + 45, gf_gld_mt_mount_spec = gf_common_mt_end + 46, - gf_gld_mt_end = gf_common_mt_end + 47, + gf_gld_mt_nodesrv_t = gf_common_mt_end + 47, + gf_gld_mt_end = gf_common_mt_end + 48, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index c9d1c99a1..84280498e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -201,6 +201,17 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin brick_req->op = GF_BRICK_XLATOR_INFO; brick_req->name = brickinfo->path; + break; + case GD_OP_HEAL_VOLUME: + { + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + + brick_req->op = GF_BRICK_XLATOR_HEAL; + brick_req->name = ""; + } break; default: goto out; @@ -1483,6 +1494,7 @@ glusterd_op_build_payload (dict_t **req) case GD_OP_LOG_LEVEL: case GD_OP_STATUS_VOLUME: case GD_OP_REBALANCE: + case GD_OP_HEAL_VOLUME: { dict_t *dict = ctx; dict_copy (dict, req_dict); @@ -1777,19 +1789,15 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx) { int ret = 0; glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; - glusterd_brickinfo_t *brickinfo = NULL; gf_boolean_t free_errstr = _gf_false; GF_ASSERT (event); GF_ASSERT (ctx); ev_ctx = ctx; - brickinfo = ev_ctx->brickinfo; - GF_ASSERT (brickinfo); - ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo); + ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "unknown response received " - "from %s:%s", brickinfo->hostname, brickinfo->path); + gf_log ("glusterd", GF_LOG_ERROR, "unknown response received "); ret = -1; free_errstr = _gf_true; goto out; @@ -1828,7 +1836,7 @@ glusterd_op_brick_disconnect (void *data) ev_ctx = data; GF_ASSERT (ev_ctx); - brickinfo = ev_ctx->brickinfo; + brickinfo = ev_ctx->pending_node->node; GF_ASSERT (brickinfo); if (brickinfo->timer) { @@ -2260,6 +2268,10 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_op_stage_rebalance (dict, op_errstr); break; + case GD_OP_HEAL_VOLUME: + ret = glusterd_op_stage_heal_volume (dict, op_errstr); + break; + default: gf_log ("", GF_LOG_ERROR, "Unknown op %d", op); @@ -2351,6 +2363,10 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict); break; + case GD_OP_HEAL_VOLUME: + ret = glusterd_op_heal_volume (dict, op_errstr); + break; + default: gf_log ("", GF_LOG_ERROR, "Unknown op %d", op); @@ -2467,6 +2483,7 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr) goto out; } else { pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; list_add_tail (&pending_node->list, &opinfo.pending_bricks); pending_node = NULL; } @@ -2539,6 +2556,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr) goto out; } else { pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; list_add_tail (&pending_node->list, &opinfo.pending_bricks); pending_node = NULL; } @@ -2606,6 +2624,7 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr) goto out; } else { pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; list_add_tail (&pending_node->list, &opinfo.pending_bricks); pending_node = NULL; @@ -2673,6 +2692,117 @@ out: return ret; } +static int +_add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count) +{ + int ret = -1; + char key[128] = {0,}; + char *xname = NULL; + + snprintf (key, sizeof (key), "heal-%d", count); + ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index); + if (ret == -1) + goto out; + + ret = dict_set_dynstr (dict, key, xname); +out: + return ret; +} + +static int +glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + char msg[2048] = {0,}; + int replica_count = 0; + int index = 1; + int rxlator_count = 0; + uuid_t candidate = {0}; + glusterd_pending_node_t *pending_node = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), "Volume %s does not exist", + volname); + + *op_errstr = gf_strdup (msg); + gf_log ("", GF_LOG_ERROR, "%s", msg); + goto out; + } + + if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) { + replica_count = volinfo->replica_count; + + } else if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + replica_count = volinfo->sub_count; + + } else { + GF_ASSERT (0); + goto out; + } + + index = 1; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + + if (uuid_compare (brickinfo->uuid, candidate) > 0) + uuid_copy (candidate, brickinfo->uuid); + + if (index % replica_count == 0) { + if (!uuid_compare (priv->uuid, candidate)) { + _add_rxlator_to_dict (dict, volname, + (index-1)/replica_count, + rxlator_count); + rxlator_count++; + } + uuid_clear (candidate); + } + + index++; + } + ret = dict_set_int32 (dict, "count", rxlator_count); + if (ret) + goto out; + + if (rxlator_count) { + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = priv->shd; + pending_node->type = GD_NODE_SHD; + list_add_tail (&pending_node->list, + &opinfo.pending_bricks); + pending_node = NULL; + } + } + + +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); + return ret; + +} + static int glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx) { @@ -2723,7 +2853,6 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) { int ret = 0; glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; - glusterd_brickinfo_t *brickinfo = NULL; char *op_errstr = NULL; glusterd_op_t op = GD_OP_NONE; dict_t *op_ctx = NULL; @@ -2736,24 +2865,22 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) req_ctx = ev_ctx->commit_ctx; GF_ASSERT (req_ctx); - brickinfo = ev_ctx->brickinfo; - GF_ASSERT (brickinfo); + op = req_ctx->op; + op_ctx = glusterd_op_get_ctx (); - ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo); + ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, + ev_ctx->pending_node->node); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "unknown response received " - "from %s:%s", brickinfo->hostname, brickinfo->path); + gf_log ("glusterd", GF_LOG_ERROR, "unknown response received "); ret = -1; goto out; } if (opinfo.brick_pending_count > 0) opinfo.brick_pending_count--; - op = req_ctx->op; - op_ctx = glusterd_op_get_ctx (); - glusterd_handle_brick_rsp (brickinfo, op, ev_ctx->rsp_dict, - op_ctx, &op_errstr); + glusterd_handle_brick_rsp (ev_ctx->pending_node->node, op, ev_ctx->rsp_dict, + op_ctx, &op_errstr); if (opinfo.brick_pending_count > 0) goto out; @@ -2791,6 +2918,10 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr) ret = glusterd_bricks_select_profile_volume (dict, op_errstr); break; + case GD_OP_HEAL_VOLUME: + ret = glusterd_bricks_select_heal_volume (dict, op_errstr); + break; + default: break; } @@ -3344,6 +3475,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx) case GD_OP_LOG_LEVEL: case GD_OP_STATUS_VOLUME: case GD_OP_REBALANCE: + case GD_OP_HEAL_VOLUME: dict_unref (ctx); break; case GD_OP_DELETE_VOLUME: diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 0a75d4c84..97385e6a4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -152,7 +152,7 @@ typedef struct glusterd_op_brick_rsp_ctx_ { char *op_errstr; dict_t *rsp_dict; glusterd_req_ctx_t *commit_ctx; - glusterd_brickinfo_t *brickinfo; + glusterd_pending_node_t *pending_node; } glusterd_op_brick_rsp_ctx_t; typedef struct glusterd_pr_brick_rsp_conv_t { diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index c9f414052..9cdab97df 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -455,6 +455,21 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, xdrproc = (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp; break; } + case GD_OP_HEAL_VOLUME: + { + gf1_cli_heal_vol_rsp rsp = {0,}; + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + rsp.volname = ""; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + cli_rsp = &rsp; + xdrproc = (xdrproc_t) xdr_gf1_cli_heal_vol_rsp; + break; + + } case GD_OP_NONE: case GD_OP_MAX: { @@ -1922,7 +1937,7 @@ glusterd_start_brick_disconnect_timer (glusterd_op_brick_rsp_ctx_t *ev_ctx) timeout.tv_sec = 5; timeout.tv_usec = 0; - brickinfo = ev_ctx->brickinfo; + brickinfo = ev_ctx->pending_node->node; GF_ASSERT (brickinfo); this = THIS; GF_ASSERT (this); @@ -2000,7 +2015,7 @@ out: } else { event_type = GD_OP_EVENT_RCVD_ACC; } - ev_ctx->brickinfo = frame->cookie; + ev_ctx->pending_node = frame->cookie; ev_ctx->rsp_dict = dict; ev_ctx->commit_ctx = frame->local; op = glusterd_op_get_op (); @@ -2087,9 +2102,9 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this, call_frame_t *dummy_frame = NULL; char *op_errstr = NULL; int pending_bricks = 0; - glusterd_pending_node_t *pending_brick; - glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node; glusterd_req_ctx_t *req_ctx = NULL; + struct rpc_clnt *rpc = NULL; if (!this) { ret = -1; @@ -2109,25 +2124,30 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this, goto out; } - list_for_each_entry (pending_brick, &opinfo.pending_bricks, list) { + list_for_each_entry (pending_node, &opinfo.pending_bricks, list) { dummy_frame = create_frame (this, this->ctx->pool); - brickinfo = pending_brick->node; - if (!dummy_frame) continue; - if (_gf_false == glusterd_is_brick_started (brickinfo)) - continue; - - ret = glusterd_brick_op_build_payload (req_ctx->op, brickinfo, - (gd1_mgmt_brick_op_req **)&req, - req_ctx->dict); + ret = glusterd_brick_op_build_payload (req_ctx->op, + pending_node->node, + (gd1_mgmt_brick_op_req **)&req, + req_ctx->dict); if (ret) goto out; dummy_frame->local = data; - dummy_frame->cookie = brickinfo; - ret = glusterd_submit_request (brickinfo->rpc, req, dummy_frame, + dummy_frame->cookie = pending_node; + + rpc = glusterd_pending_node_get_rpc (pending_node); + if (!rpc) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Brick Op failed " + "due to rpc failure."); + goto out; + } + + ret = glusterd_submit_request (rpc, req, dummy_frame, &glusterd_glusterfs_3_1_mgmt_prog, req->op, NULL, this, glusterd3_1_brick_op_cbk, @@ -2143,7 +2163,7 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this, } gf_log ("glusterd", GF_LOG_DEBUG, "Sent op req to %d bricks", - pending_bricks); + pending_bricks); opinfo.brick_pending_count = pending_bricks; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e6c23e833..59609971b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2366,6 +2366,120 @@ glusterd_get_nodesvc_volfile (char *server, char *workdir, snprintf (volfile, len, "%s/%s-server.vol", dir, server); } +void +glusterd_shd_set_running (gf_boolean_t status) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + + priv->shd->running = status; +} + +gf_boolean_t +glusterd_shd_is_running () +{ + glusterd_conf_t *conf = NULL; + + conf = THIS->private; + GF_ASSERT (conf); + GF_ASSERT (conf->shd); + + return conf->shd->running; +} + +int32_t +glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid, + char *socketpath, int len) +{ + char sockfilepath[PATH_MAX] = {0,}; + char md5_str[PATH_MAX] = {0,}; + + snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s", + rundir, uuid_utoa (uuid)); + _get_md5_str (md5_str, sizeof (md5_str), + (uint8_t *)sockfilepath, sizeof (sockfilepath)); + snprintf (socketpath, len, "%s/%s.socket", glusterd_sock_dir, + md5_str); + return 0; +} + +struct rpc_clnt* +glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) +{ + struct rpc_clnt *rpc = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + nodesrv_t *shd = NULL; + GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out); + GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out); + + if (pending_node->type == GD_NODE_BRICK) { + brickinfo = pending_node->node; + rpc = brickinfo->rpc; + + } else if (pending_node->type == GD_NODE_SHD) { + shd = pending_node->node; + rpc = shd->rpc; + + } else { + GF_ASSERT (0); + } + +out: + return rpc; +} + +struct rpc_clnt* +glusterd_shd_get_rpc (void) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + + return priv->shd->rpc; +} + +int32_t +glusterd_shd_set_rpc (struct rpc_clnt *rpc) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + + priv->shd->rpc = rpc; + + return ret; +} + +int32_t +glusterd_shd_connect (char *socketpath) { + int ret = 0; + dict_t *options = NULL; + struct rpc_clnt *rpc = NULL; + + ret = rpc_clnt_transport_unix_options_build (&options, socketpath); + if (ret) + goto out; + ret = glusterd_rpc_create (&rpc, options, + glusterd_shd_rpc_notify, + NULL); + if (ret) + goto out; + (void) glusterd_shd_set_rpc (rpc); +out: + return ret; +} + int32_t glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin) { @@ -2376,6 +2490,7 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin) char logfile[PATH_MAX] = {0,}; char volfile[PATH_MAX] = {0,}; char rundir[PATH_MAX] = {0,}; + char shd_sockfpath[PATH_MAX] = {0,}; char volfileid[256] = {0}; this = THIS; @@ -2408,13 +2523,28 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin) server); snprintf (volfileid, sizeof (volfileid), "gluster/%s", server); - if (pmap_signin) + if (!strcmp (server, "glustershd")) { + glusterd_shd_set_socket_filepath (rundir, + priv->uuid, + shd_sockfpath, + sizeof (shd_sockfpath)); + } + + //TODO: kp:change the assumption that shd is the one which signs in + // use runner_add_args? + if (pmap_signin) { ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost", "--volfile-id", volfileid, - "-p", pidfile, "-l", logfile, NULL); - else + "-p", pidfile, "-l", logfile, + "-S", shd_sockfpath, NULL); + if (!ret) + glusterd_shd_connect (shd_sockfpath); + + } + else { ret = runcmd (SBIN_DIR"/glusterfs", "-f", volfile, "-p", pidfile, "-l", logfile, NULL); + } out: return ret; @@ -3742,7 +3872,7 @@ glusterd_remove_pending_entry (struct list_head *list, void *elem) { glusterd_pending_node_t *pending_node = NULL; glusterd_pending_node_t *tmp = NULL; - int ret = -1; + int ret = 0; list_for_each_entry_safe (pending_node, tmp, list, list) { if (elem == pending_node->node) { diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 93fa763bd..aca46eae1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -174,6 +174,28 @@ glusterd_shd_start (); int32_t glusterd_shd_stop (); +int32_t +glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid, + char *socketpath, int len); + +struct rpc_clnt* +glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node); + +struct rpc_clnt* +glusterd_shd_get_rpc (void); + +int32_t +glusterd_shd_set_rpc (struct rpc_clnt *rpc); + +int32_t +glusterd_shd_connect (char *socketpath); + +void +glusterd_shd_set_running (gf_boolean_t status); + +gf_boolean_t +glusterd_shd_is_running (); + int glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 560968def..39cc02c8e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -392,6 +392,62 @@ out: return ret; } +int +glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf1_cli_heal_vol_req cli_req = {0,}; + char *dup_volname = NULL; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_HEAL_VOLUME; + + GF_ASSERT (req); + + if (!xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf1_cli_heal_vol_req)) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log ("glusterd", GF_LOG_INFO, "Received heal vol req" + "for volume %s", cli_req.volname); + + dict = dict_new (); + + if (!dict) + goto out; + + dup_volname = gf_strdup (cli_req.volname); + if (!dup_volname) + goto out; + + ret = dict_set_dynstr (dict, "volname", dup_volname); + if (ret) + goto out; + + ret = glusterd_op_begin (req, GD_OP_HEAL_VOLUME, dict); + + gf_cmd_log ("volume heal","on volname: %s %s", cli_req.volname, + ((ret == 0) ? "SUCCESS": "FAILED")); + +out: + if (ret && dict) + dict_unref (dict); + if (cli_req.volname) + free (cli_req.volname); //its malloced by xdr + + glusterd_friend_sm (); + glusterd_op_sm (); + + if (ret) + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + NULL, "operation failed"); + + return ret; +} + + /* op-sm */ int glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) @@ -753,6 +809,101 @@ out: return ret; } +int +glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + gf_boolean_t exists = _gf_false; + gf_boolean_t enabled = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048]; + glusterd_conf_t *priv = NULL; + dict_t *opt_dict = NULL; + + priv = THIS->private; + if (!priv) { + gf_log (THIS->name, GF_LOG_ERROR, + "priv is NULL"); + ret = -1; + goto out; + } + + if (!glusterd_shd_is_running ()) { + ret = -1; + snprintf (msg, sizeof (msg), "Self-heal daemon is not " + "running."); + *op_errstr = gf_strdup (msg); + gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + goto out; + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + exists = glusterd_check_volume_exists (volname); + + if (!exists) { + snprintf (msg, sizeof (msg), "Volume %s does not exist", volname); + gf_log ("", GF_LOG_ERROR, "%s", + msg); + *op_errstr = gf_strdup (msg); + ret = -1; + } else { + ret = 0; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + if (!glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof (msg), "Volume %s is not started.", + volname); + gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + if (!glusterd_is_volume_replicate (volinfo)) { + snprintf (msg, sizeof (msg), "Volume %s is not of type." + "replicate", volname); + gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + opt_dict = volinfo->dict; + if (!opt_dict) { + ret = 0; + goto out; + } + + enabled = dict_get_str_boolean (opt_dict, "cluster.self-heal-daemon", + 1); + if (!enabled) { + snprintf (msg, sizeof (msg), "Self-heal-daemon is " + "disabled. Heal will not be triggered on volume %s", + volname); + gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + int glusterd_op_create_volume (dict_t *dict, char **op_errstr) { @@ -1034,3 +1185,12 @@ out: gf_log ("", GF_LOG_DEBUG, "returning %d", ret); return ret; } + +int +glusterd_op_heal_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + /* Necessary subtasks of heal are completed in brick op */ + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index d1582eae3..83bbd1b22 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -930,6 +930,10 @@ init (xlator_t *this) conf = GF_CALLOC (1, sizeof (glusterd_conf_t), gf_gld_mt_glusterd_conf_t); GF_VALIDATE_OR_GOTO(this->name, conf, out); + conf->shd = GF_CALLOC (1, sizeof (nodesrv_t), + gf_gld_mt_nodesrv_t); + GF_VALIDATE_OR_GOTO(this->name, conf->shd, out); + INIT_LIST_HEAD (&conf->peers); INIT_LIST_HEAD (&conf->volumes); pthread_mutex_init (&conf->mutex, NULL); @@ -961,6 +965,7 @@ init (xlator_t *this) } #endif this->private = conf; + (void) glusterd_shd_set_running (_gf_false); /* this->ctx->top = this;*/ ret = glusterd_uuid_init (first_time); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ab521af0f..b49e7d675 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -79,6 +79,7 @@ typedef enum glusterd_op_ { GD_OP_LOG_LEVEL, GD_OP_STATUS_VOLUME, GD_OP_REBALANCE, + GD_OP_HEAL_VOLUME, GD_OP_MAX, } glusterd_op_t; @@ -94,6 +95,11 @@ typedef struct glusterd_store_iter_ glusterd_store_iter_t; struct glusterd_volgen { dict_t *dict; }; +typedef struct { + struct rpc_clnt *rpc; + gf_boolean_t running; +} nodesrv_t; + typedef struct { struct _volfile_ctx *volfile; pthread_mutex_t mutex; @@ -104,6 +110,7 @@ typedef struct { uuid_t uuid; char workdir[PATH_MAX]; rpcsvc_t *rpc; + nodesrv_t *shd; struct pmap_registry *pmap; struct list_head volumes; struct list_head xprt_list; @@ -225,9 +232,16 @@ struct glusterd_volinfo_ { xlator_t *xl; }; +typedef enum gd_node_type_ { + GD_NODE_NONE, + GD_NODE_BRICK, + GD_NODE_SHD +} gd_node_type; + typedef struct glusterd_pending_node_ { - void *node; struct list_head list; + void *node; + gd_node_type type; } glusterd_pending_node_t; enum glusterd_op_ret { @@ -510,6 +524,10 @@ int glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, void *data); +int +glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); + int glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options, rpc_clnt_notify_t notify_fn, void *notify_data); @@ -535,8 +553,11 @@ int glusterd_handle_cli_delete_volume (rpcsvc_request_t *req); int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, size_t len, int cmd, defrag_cbk_fn_t cbk); +int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req); /* op-sm functions */ +int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr); +int glusterd_op_heal_volume (dict_t *dict, char **op_errstr); int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr); int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict); int glusterd_op_quota (dict_t *dict, char **op_errstr); -- cgit