glusterd: Implemented cmd to trigger self-heal on a replicate volume.v3.3.0qa10

This cmd is used in the context of proactive self-heal for replicated volumes. User invokes the following cmd when (s)he suspects that self-heal needs to be done on a particular volume, gluster volume heal <VOLNAME>. Change-Id: I3954353b53488c28b70406e261808239b44997f3 BUG: 3602 Reviewed-on: http://review.gluster.com/454 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
author: Krishnan Parthasarathi <kp@gluster.com> 2011-09-16 10:40:32 +0530
committer: Vijay Bellur <vijay@gluster.com> 2011-09-22 09:43:25 -0700
commit: 4765dd1a1c51c67ab86687fbd871c89156680c34 (patch)
tree: d1d3890457cbcb01131d21f66e40ec8a1c537cf1
parent: 53b5da6dfab2e6b11ab2e40119e92ff7d4527b2c (diff)
21 files changed, 905 insertions, 42 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index de3166e8d04..a2ec5086317 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1469,6 +1469,47 @@ cli_print_brick_status (char *brick, int port, int online, int pid)
         return 0;
 }
 
+int
+cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
+                          const char **words, int wordcount)
+{
+        int                     ret = -1;
+        rpc_clnt_procedure_t    *proc = NULL;
+        call_frame_t            *frame = NULL;
+        gf1_cli_heal_vol_req    req = {0,};
+        int                     sent = 0;
+        int                     parse_error = 0;
+
+        frame = create_frame (THIS, THIS->ctx->pool);
+        if (!frame)
+                goto out;
+
+        if (wordcount != 3) {
+               cli_usage_out (word->pattern);
+                parse_error = 1;
+               goto out;
+        }
+
+        req.volname = (char *)words[2];
+        if (!req.volname)
+                goto out;
+
+        proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+
+        if (proc->fn) {
+                ret = proc->fn (frame, THIS, &req);
+        }
+
+out:
+        if (ret) {
+                cli_cmd_sent_status_get (&sent);
+                if ((sent == 0) && (parse_error == 0))
+                        cli_out ("Volume heal failed");
+        }
+
+        return ret;
+}
+
 struct cli_cmd volume_cmds[] = {
         { "volume info [all|<VOLNAME>]",
           cli_cmd_volume_info_cbk,
@@ -1571,6 +1612,10 @@ struct cli_cmd volume_cmds[] = {
           cli_cmd_volume_status_cbk,
          "display status of specified volume"},
 
+        { "volume heal <VOLNAME>",
+          cli_cmd_volume_heal_cbk,
+          "Start healing of volume specified by <VOLNAME>"},
+
         { NULL, NULL, NULL }
 };
 
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index ccd76d57043..78d27b62424 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -3912,6 +3912,91 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
         return ret;
 }
 
+int
+gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
+                             int count, void *myframe)
+{
+        gf1_cli_heal_vol_rsp    rsp   = {0,};
+        int                     ret   = 0;
+        cli_local_t             *local = NULL;
+        char                    *volname = NULL;
+        call_frame_t            *frame = NULL;
+
+        if (-1 == req->rpc_status) {
+                goto out;
+        }
+
+        ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_heal_vol_rsp);
+        if (ret < 0) {
+                gf_log ("", GF_LOG_ERROR, "error");
+                goto out;
+        }
+
+        frame = myframe;
+
+        if (frame) {
+                local = frame->local;
+                frame->local = NULL;
+        }
+
+        if (local)
+                volname = local->u.heal_vol.volname;
+
+        gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume");
+
+        if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+                cli_out ("%s", rsp.op_errstr);
+        else
+                cli_out ("Starting heal on volume %s has been %s", volname,
+                        (rsp.op_ret) ? "unsuccessful": "successful");
+
+        ret = rsp.op_ret;
+
+out:
+        cli_cmd_broadcast_response (ret);
+        if (local)
+                cli_local_wipe (local);
+        if (rsp.volname)
+                free (rsp.volname);
+        if (rsp.op_errstr)
+                free (rsp.op_errstr);
+        return ret;
+}
+
+int32_t
+gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this,
+                         void *data)
+{
+        gf1_cli_heal_vol_req   *req = NULL;
+        int                     ret = 0;
+        cli_local_t             *local = NULL;
+
+        if (!frame || !this ||  !data) {
+                ret = -1;
+                goto out;
+        }
+
+        req = data;
+        local = cli_local_get ();
+
+        if (local) {
+                local->u.heal_vol.volname = req->volname;
+                frame->local = local;
+        }
+
+        ret = cli_cmd_submit (req, frame, cli_rpc_prog,
+                              GLUSTER_CLI_HEAL_VOLUME, NULL,
+                              this, gf_cli3_1_heal_volume_cbk,
+                              (xdrproc_t) xdr_gf1_cli_heal_vol_req);
+
+out:
+        gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+        return ret;
+}
+
+
+
 struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
         [GLUSTER_CLI_NULL]             = {"NULL", NULL },
         [GLUSTER_CLI_PROBE]            = {"PROBE_QUERY", gf_cli3_1_probe},
@@ -3945,7 +4030,8 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
         [GLUSTER_CLI_GETWD]            = {"GETWD", gf_cli3_1_getwd},
         [GLUSTER_CLI_STATUS_VOLUME]    = {"STATUS_VOLUME", gf_cli3_1_status_volume},
         [GLUSTER_CLI_MOUNT]            = {"MOUNT", gf_cli3_1_mount},
-        [GLUSTER_CLI_UMOUNT]           = {"UMOUNT", gf_cli3_1_umount}
+        [GLUSTER_CLI_UMOUNT]           = {"UMOUNT", gf_cli3_1_umount},
+        [GLUSTER_CLI_HEAL_VOLUME]      = {"HEAL_VOLUME", gf_cli3_1_heal_volume}
 };
 
 struct rpc_clnt_program cli_prog = {
diff --git a/cli/src/cli.h b/cli/src/cli.h
index bf3437827ec..4ef1dbe06f6 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -149,6 +149,10 @@ struct cli_local {
                         char    *volname;
                         int     flags;
                 } get_vol;
+
+                struct {
+                        char    *volname;
+                }heal_vol;
         } u;
 };
 
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 43d49b3f4e8..adce060a6d2 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -316,6 +316,41 @@ out:
 }
 
 int
+glusterfs_translator_heal_response_send (rpcsvc_request_t *req, int op_ret,
+                                         char *msg, dict_t *output)
+{
+        gd1_mgmt_brick_op_rsp    rsp = {0,};
+        int                      ret = -1;
+        GF_ASSERT (msg);
+        GF_ASSERT (req);
+        GF_ASSERT (output);
+
+        rsp.op_ret = op_ret;
+        rsp.op_errno = 0;
+        if (ret && msg[0])
+                rsp.op_errstr = msg;
+        else
+                rsp.op_errstr = "";
+
+        ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
+                                        (size_t *)&rsp.output.output_len);
+        if (ret) {
+                gf_log (THIS->name, GF_LOG_ERROR, "Couldn't serialize "
+                        "output dict.");
+                goto out;
+        }
+
+        ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+                                     (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+
+out:
+        if (rsp.output.output_val)
+                GF_FREE (rsp.output.output_val);
+
+        return ret;
+}
+
+int
 glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
 {
         int32_t                  ret     = -1;
@@ -615,6 +650,92 @@ out:
 }
 
 int
+glusterfs_handle_translator_heal (rpcsvc_request_t *req)
+{
+        int32_t                  ret     = -1;
+        gd1_mgmt_brick_op_req    xlator_req = {0,};
+        dict_t                   *dict    = NULL;
+        xlator_t                 *xlator = NULL;
+        xlator_t                 *any = NULL;
+        dict_t                   *output = NULL;
+        char                     msg[2048] = {0};
+        char                     key[2048] = {0};
+        char                    *xname = NULL;
+        glusterfs_ctx_t          *ctx = NULL;
+        glusterfs_graph_t        *active = NULL;
+        xlator_t                 *this = NULL;
+        int                      i = 0;
+        int                      count = 0;
+
+        GF_ASSERT (req);
+        this = THIS;
+        GF_ASSERT (this);
+
+        ctx = glusterfs_ctx_get ();
+        GF_ASSERT (ctx);
+
+        active = ctx->active;
+        any = active->first;
+        if (!xdr_to_generic (req->msg[0], &xlator_req,
+                             (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+                //failed to decode msg;
+                req->rpc_err = GARBAGE_ARGS;
+                goto out;
+        }
+        dict = dict_new ();
+
+        ret = dict_unserialize (xlator_req.input.input_val,
+                                xlator_req.input.input_len,
+                                &dict);
+        if (ret < 0) {
+                gf_log (this->name, GF_LOG_ERROR,
+                        "failed to "
+                        "unserialize req-buffer to dictionary");
+                goto out;
+        }
+
+        ret = dict_get_int32 (dict, "count", &count);
+        i = 0;
+        while (i < count)  {
+                snprintf (key, sizeof (key), "heal-%d", i);
+                ret = dict_get_str (dict, key, &xname);
+                if (ret) {
+                        gf_log (this->name, GF_LOG_ERROR, "Couldn't get "
+                                "replicate xlator %s to trigger "
+                                "self-heal", xname);
+                        goto out;
+                }
+                xlator = xlator_search_by_name (any, xname);
+                if (!xlator) {
+                        snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+                                  xlator_req.name);
+                        ret = -1;
+                        goto out;
+                }
+
+                ret = xlator_notify (xlator, GF_EVENT_TRIGGER_HEAL, dict, NULL);
+                i++;
+        }
+        output = dict_new ();
+        if (!output)
+                goto out;
+
+        /* output dict is not used currently, could be used later. */
+        ret = glusterfs_translator_heal_response_send (req, ret, msg, output);
+out:
+        if (dict)
+                dict_unref (dict);
+        if (xlator_req.input.input_val)
+                free (xlator_req.input.input_val); // malloced by xdr
+        if (output)
+                dict_unref (output);
+        if (xlator_req.name)
+                free (xlator_req.name); //malloced by xdr
+
+        return ret;
+}
+
+int
 glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
 {
         int     ret = -1;
@@ -627,6 +748,9 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
         case GF_BRICK_XLATOR_INFO:
                 ret = glusterfs_handle_translator_info_get (req);
                 break;
+        case GF_BRICK_XLATOR_HEAL:
+                ret = glusterfs_handle_translator_heal (req);
+                break;
         default:
                 break;
         }
@@ -681,7 +805,8 @@ rpc_clnt_prog_t clnt_handshake_prog = {
 rpcsvc_actor_t glusterfs_actors[] = {
         [GF_BRICK_NULL]        = { "NULL",    GF_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL},
         [GF_BRICK_TERMINATE] = { "TERMINATE", GF_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL},
-        [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL}
+        [GF_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GF_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL},
+        [GF_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GF_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL}
 };
 
 struct rpcsvc_program glusterfs_mop_prog = {
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index fbae75dffba..473a4604e1f 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -348,6 +348,7 @@ char eventstring[GF_EVENT_MAXVAL][64] = {
         "Transport Cleanup",
         "Transport Connected",
         "Volfile Modified",
+        "Volume Heal Triggered",
 };
 
 /* Copy the string ptr contents if needed for yourself */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 25f32bd5b88..8247c60fbf6 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -373,6 +373,7 @@ typedef enum {
         GF_EVENT_VOLFILE_MODIFIED,
         GF_EVENT_GRAPH_NEW,
         GF_EVENT_TRANSLATOR_INFO,
+        GF_EVENT_TRIGGER_HEAL,
         GF_EVENT_MAXVAL,
 } glusterfs_event_t;
 
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 3c4c8fc444f..41197044b66 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -205,6 +205,7 @@ enum gluster_cli_procnum {
         GLUSTER_CLI_STATUS_VOLUME,
         GLUSTER_CLI_MOUNT,
         GLUSTER_CLI_UMOUNT,
+        GLUSTER_CLI_HEAL_VOLUME,
         GLUSTER_CLI_MAXVALUE,
 };
 
@@ -212,6 +213,7 @@ enum gf_brick_procnum {
         GF_BRICK_NULL = 0,
         GF_BRICK_TERMINATE = 1,
         GF_BRICK_XLATOR_INFO = 2,
+        GF_BRICK_XLATOR_HEAL = 3,
         GF_BRICK_MAX_VALUE
 };
 
diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c
index 25ab32a8fe9..1240dda8bf5 100644
--- a/rpc/xdr/src/cli1-xdr.c
+++ b/rpc/xdr/src/cli1-xdr.c
@@ -1068,3 +1068,33 @@ xdr_gf1_cli_umount_rsp (XDR *xdrs, gf1_cli_umount_rsp *objp)
 		 return FALSE;
 	return TRUE;
 }
+
+bool_t
+xdr_gf1_cli_heal_vol_req (XDR *xdrs, gf1_cli_heal_vol_req *objp)
+{
+	register int32_t *buf;
+        buf = NULL;
+
+	 if (!xdr_string (xdrs, &objp->volname, ~0))
+		 return FALSE;
+	return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_heal_vol_rsp (XDR *xdrs, gf1_cli_heal_vol_rsp *objp)
+{
+	register int32_t *buf;
+        buf = NULL;
+
+	 if (!xdr_int (xdrs, &objp->op_ret))
+		 return FALSE;
+	 if (!xdr_int (xdrs, &objp->op_errno))
+		 return FALSE;
+	 if (!xdr_string (xdrs, &objp->volname, ~0))
+		 return FALSE;
+	 if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+		 return FALSE;
+	 if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+		 return FALSE;
+	return TRUE;
+}
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
index 4077ff3c11b..f22c635f176 100644
--- a/rpc/xdr/src/cli1-xdr.h
+++ b/rpc/xdr/src/cli1-xdr.h
@@ -614,6 +614,23 @@ struct gf1_cli_umount_rsp {
 };
 typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp;
 
+struct gf1_cli_heal_vol_req {
+	char *volname;
+};
+typedef struct gf1_cli_heal_vol_req gf1_cli_heal_vol_req;
+
+struct gf1_cli_heal_vol_rsp {
+	int op_ret;
+	int op_errno;
+	char *volname;
+	char *op_errstr;
+	struct {
+		u_int dict_len;
+		char *dict_val;
+	} dict;
+};
+typedef struct gf1_cli_heal_vol_rsp gf1_cli_heal_vol_rsp;
+
 /* the xdr functions */
 
 #if defined(__STDC__) || defined(__cplusplus)
@@ -687,6 +704,8 @@ extern  bool_t xdr_gf1_cli_mount_req (XDR *, gf1_cli_mount_req*);
 extern  bool_t xdr_gf1_cli_mount_rsp (XDR *, gf1_cli_mount_rsp*);
 extern  bool_t xdr_gf1_cli_umount_req (XDR *, gf1_cli_umount_req*);
 extern  bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*);
+extern  bool_t xdr_gf1_cli_heal_vol_req (XDR *, gf1_cli_heal_vol_req*);
+extern  bool_t xdr_gf1_cli_heal_vol_rsp (XDR *, gf1_cli_heal_vol_rsp*);
 
 #else /* K&R C */
 extern bool_t xdr_gf_cli_defrag_type ();
@@ -759,6 +778,8 @@ extern bool_t xdr_gf1_cli_mount_req ();
 extern bool_t xdr_gf1_cli_mount_rsp ();
 extern bool_t xdr_gf1_cli_umount_req ();
 extern bool_t xdr_gf1_cli_umount_rsp ();
+extern bool_t xdr_gf1_cli_heal_vol_req ();
+extern bool_t xdr_gf1_cli_heal_vol_rsp ();
 
 #endif /* K&R C */
 
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 2a6168c0456..9a1f77c0ef7 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -455,3 +455,15 @@ struct gf1_cli_umount_rsp {
        int op_ret;
        int op_errno;
 };
+
+struct gf1_cli_heal_vol_req {
+       string volname<>;
+}  ;
+
+struct gf1_cli_heal_vol_rsp {
+       int     op_ret;
+       int     op_errno;
+       string  volname<>;
+       string op_errstr<>;
+       opaque  dict<>;
+}  ;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2e5ca71b219..c23e329dfcb 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3498,7 +3498,15 @@ afr_notify (xlator_t *this, int32_t event,
                         priv->last_event[idx] = event;
                 }
                 UNLOCK (&priv->lock);
+
+                break;
+
+        case GF_EVENT_TRIGGER_HEAL:
+                gf_log (this->name, GF_LOG_INFO, "Self-heal was triggered"
+                        " manually. Start crawling");
+                call_psh = 1;
                 break;
+
         default:
                 propagate = 1;
                 break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index df0aa202cc6..2ab80c2ff63 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -2562,6 +2562,42 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
 }
 
 int
+glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+                         rpc_clnt_event_t event,
+                         void *data)
+{
+        xlator_t                *this = NULL;
+        glusterd_conf_t         *conf = NULL;
+        int                     ret = 0;
+
+        this = THIS;
+        GF_ASSERT (this);
+        conf = this->private;
+        GF_ASSERT (conf);
+
+        switch (event) {
+        case RPC_CLNT_CONNECT:
+                gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
+                (void) glusterd_shd_set_running (_gf_true);
+                ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+                break;
+
+        case RPC_CLNT_DISCONNECT:
+                gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT");
+                (void) glusterd_shd_set_running (_gf_false);
+                break;
+
+        default:
+                gf_log (this->name, GF_LOG_TRACE,
+                        "got some other RPC event %d", event);
+                break;
+        }
+
+        return ret;
+}
+
+int
 glusterd_friend_remove_notify (glusterd_peerinfo_t *peerinfo, rpcsvc_request_t *req)
 {
         int ret = -1;
@@ -2742,6 +2778,7 @@ rpcsvc_actor_t gd_svc_cli_actors[] = {
         [GLUSTER_CLI_STATUS_VOLUME]  = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, NULL},
         [GLUSTER_CLI_MOUNT]         = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, NULL},
         [GLUSTER_CLI_UMOUNT]        = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, NULL},
+        [GLUSTER_CLI_HEAL_VOLUME]  = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, NULL}
 
 };
 
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index e10cf1aca19..196f5f50bd8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -70,7 +70,8 @@ typedef enum gf_gld_mem_types_ {
         gf_gld_mt_mount_comp_container          = gf_common_mt_end + 44,
         gf_gld_mt_mount_component               = gf_common_mt_end + 45,
         gf_gld_mt_mount_spec                    = gf_common_mt_end + 46,
-        gf_gld_mt_end                           = gf_common_mt_end + 47,
+        gf_gld_mt_nodesrv_t                     = gf_common_mt_end + 47,
+        gf_gld_mt_end                           = gf_common_mt_end + 48,
 } gf_gld_mem_types_t;
 #endif
 
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index c9d1c99a1e1..84280498e8b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -202,6 +202,17 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
                 brick_req->name = brickinfo->path;
 
                 break;
+        case GD_OP_HEAL_VOLUME:
+        {
+                brick_req = GF_CALLOC (1, sizeof (*brick_req),
+                                       gf_gld_mt_mop_brick_req_t);
+                if (!brick_req)
+                        goto out;
+
+                brick_req->op = GF_BRICK_XLATOR_HEAL;
+                brick_req->name = "";
+        }
+                break;
         default:
                 goto out;
         break;
@@ -1483,6 +1494,7 @@ glusterd_op_build_payload (dict_t **req)
                 case GD_OP_LOG_LEVEL:
                 case GD_OP_STATUS_VOLUME:
                 case GD_OP_REBALANCE:
+                case GD_OP_HEAL_VOLUME:
                         {
                                 dict_t  *dict = ctx;
                                 dict_copy (dict, req_dict);
@@ -1777,19 +1789,15 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx)
 {
         int                     ret = 0;
         glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
-        glusterd_brickinfo_t        *brickinfo = NULL;
         gf_boolean_t                free_errstr = _gf_false;
 
         GF_ASSERT (event);
         GF_ASSERT (ctx);
         ev_ctx = ctx;
-        brickinfo = ev_ctx->brickinfo;
-        GF_ASSERT (brickinfo);
 
-        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo);
+        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node);
         if (ret) {
-                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received "
-                        "from %s:%s", brickinfo->hostname, brickinfo->path);
+                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");
                 ret = -1;
                 free_errstr = _gf_true;
                 goto out;
@@ -1828,7 +1836,7 @@ glusterd_op_brick_disconnect (void *data)
 
         ev_ctx = data;
         GF_ASSERT (ev_ctx);
-        brickinfo = ev_ctx->brickinfo;
+        brickinfo = ev_ctx->pending_node->node;
         GF_ASSERT (brickinfo);
 
         if (brickinfo->timer) {
@@ -2260,6 +2268,10 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
                         ret = glusterd_op_stage_rebalance (dict, op_errstr);
                         break;
 
+                case GD_OP_HEAL_VOLUME:
+                        ret = glusterd_op_stage_heal_volume (dict, op_errstr);
+                        break;
+
                 default:
                         gf_log ("", GF_LOG_ERROR, "Unknown op %d",
                                 op);
@@ -2351,6 +2363,10 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
                        ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);
                        break;
 
+               case GD_OP_HEAL_VOLUME:
+                       ret = glusterd_op_heal_volume (dict, op_errstr);
+                       break;
+
                 default:
                         gf_log ("", GF_LOG_ERROR, "Unknown op %d",
                                 op);
@@ -2467,6 +2483,7 @@ glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr)
                                 goto out;
                         } else {
                                 pending_node->node = brickinfo;
+                                pending_node->type = GD_NODE_BRICK;
                                 list_add_tail (&pending_node->list, &opinfo.pending_bricks);
                                 pending_node = NULL;
                         }
@@ -2539,6 +2556,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)
                                 goto out;
                         } else {
                                 pending_node->node = brickinfo;
+                                pending_node->type = GD_NODE_BRICK;
                                 list_add_tail (&pending_node->list, &opinfo.pending_bricks);
                                 pending_node = NULL;
                         }
@@ -2606,6 +2624,7 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr)
                                         goto out;
                                 } else {
                                         pending_node->node = brickinfo;
+                                        pending_node->type = GD_NODE_BRICK;
                                         list_add_tail (&pending_node->list,
                                                        &opinfo.pending_bricks);
                                         pending_node = NULL;
@@ -2674,6 +2693,117 @@ out:
 }
 
 static int
+_add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count)
+{
+        int     ret             = -1;
+        char    key[128]        = {0,};
+        char    *xname          = NULL;
+
+        snprintf (key, sizeof (key), "heal-%d", count);
+        ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index);
+        if (ret == -1)
+                goto out;
+
+        ret = dict_set_dynstr (dict, key, xname);
+out:
+        return ret;
+}
+
+static int
+glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)
+{
+        int                                     ret = -1;
+        char                                    *volname = NULL;
+        glusterd_conf_t                         *priv = NULL;
+        glusterd_volinfo_t                      *volinfo = NULL;
+        glusterd_brickinfo_t                    *brickinfo = NULL;
+        xlator_t                                *this = NULL;
+        char                                    msg[2048] = {0,};
+        int                                     replica_count = 0;
+        int                                     index = 1;
+        int                                     rxlator_count = 0;
+        uuid_t                                  candidate = {0};
+        glusterd_pending_node_t                 *pending_node = NULL;
+
+        this = THIS;
+        GF_ASSERT (this);
+        priv = this->private;
+        GF_ASSERT (priv);
+
+        ret = dict_get_str (dict, "volname", &volname);
+        if (ret) {
+                gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+                goto out;
+        }
+
+        ret = glusterd_volinfo_find (volname, &volinfo);
+        if (ret) {
+                snprintf (msg, sizeof (msg), "Volume %s does not exist",
+                          volname);
+
+                *op_errstr = gf_strdup (msg);
+                gf_log ("", GF_LOG_ERROR, "%s", msg);
+                goto out;
+        }
+
+        if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
+                replica_count = volinfo->replica_count;
+
+        } else if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+                replica_count = volinfo->sub_count;
+
+        } else {
+                GF_ASSERT (0);
+                goto out;
+        }
+
+        index = 1;
+        list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+                if (uuid_is_null (brickinfo->uuid))
+                        (void)glusterd_resolve_brick (brickinfo);
+
+                if (uuid_compare (brickinfo->uuid, candidate) > 0)
+                        uuid_copy (candidate, brickinfo->uuid);
+
+                if (index % replica_count == 0) {
+                        if (!uuid_compare (priv->uuid, candidate)) {
+                                _add_rxlator_to_dict (dict, volname,
+                                                      (index-1)/replica_count,
+                                                      rxlator_count);
+                                rxlator_count++;
+                        }
+                        uuid_clear (candidate);
+                }
+
+                index++;
+        }
+        ret = dict_set_int32 (dict, "count", rxlator_count);
+        if (ret)
+                goto out;
+
+        if (rxlator_count) {
+                pending_node = GF_CALLOC (1, sizeof (*pending_node),
+                                          gf_gld_mt_pending_node_t);
+                if (!pending_node) {
+                        ret = -1;
+                        goto out;
+                } else {
+                        pending_node->node = priv->shd;
+                        pending_node->type = GD_NODE_SHD;
+                        list_add_tail (&pending_node->list,
+                                       &opinfo.pending_bricks);
+                        pending_node = NULL;
+                }
+        }
+
+
+out:
+        gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret);
+        return ret;
+
+}
+
+static int
 glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx)
 {
         int                             ret = 0;
@@ -2723,7 +2853,6 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
 {
         int                     ret = 0;
         glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
-        glusterd_brickinfo_t        *brickinfo = NULL;
         char                        *op_errstr = NULL;
         glusterd_op_t               op = GD_OP_NONE;
         dict_t                      *op_ctx = NULL;
@@ -2736,24 +2865,22 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
         req_ctx = ev_ctx->commit_ctx;
         GF_ASSERT (req_ctx);
 
-        brickinfo = ev_ctx->brickinfo;
-        GF_ASSERT (brickinfo);
+        op = req_ctx->op;
+        op_ctx = glusterd_op_get_ctx ();
 
-        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo);
+        ret = glusterd_remove_pending_entry (&opinfo.pending_bricks,
+                                             ev_ctx->pending_node->node);
         if (ret) {
-                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received "
-                        "from %s:%s", brickinfo->hostname, brickinfo->path);
+                gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");
                 ret = -1;
                 goto out;
         }
 
         if (opinfo.brick_pending_count > 0)
                 opinfo.brick_pending_count--;
-        op = req_ctx->op;
-        op_ctx = glusterd_op_get_ctx ();
 
-        glusterd_handle_brick_rsp (brickinfo, op, ev_ctx->rsp_dict,
-                                   op_ctx, &op_errstr);
+        glusterd_handle_brick_rsp (ev_ctx->pending_node->node, op, ev_ctx->rsp_dict,
+                                           op_ctx, &op_errstr);
         if (opinfo.brick_pending_count > 0)
                 goto out;
 
@@ -2791,6 +2918,10 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr)
                 ret = glusterd_bricks_select_profile_volume (dict, op_errstr);
                 break;
 
+        case GD_OP_HEAL_VOLUME:
+                ret = glusterd_bricks_select_heal_volume (dict, op_errstr);
+                break;
+
         default:
                 break;
          }
@@ -3344,6 +3475,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx)
                 case GD_OP_LOG_LEVEL:
                 case GD_OP_STATUS_VOLUME:
                 case GD_OP_REBALANCE:
+                case GD_OP_HEAL_VOLUME:
                         dict_unref (ctx);
                         break;
                 case GD_OP_DELETE_VOLUME:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 0a75d4c8474..97385e6a49b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -152,7 +152,7 @@ typedef struct glusterd_op_brick_rsp_ctx_ {
         char *op_errstr;
         dict_t *rsp_dict;
         glusterd_req_ctx_t *commit_ctx;
-        glusterd_brickinfo_t *brickinfo;
+        glusterd_pending_node_t *pending_node;
 } glusterd_op_brick_rsp_ctx_t;
 
 typedef struct glusterd_pr_brick_rsp_conv_t {
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index c9f41405247..9cdab97df8b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -455,6 +455,21 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
                 xdrproc = (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp;
                 break;
         }
+        case GD_OP_HEAL_VOLUME:
+        {
+                gf1_cli_heal_vol_rsp rsp = {0,};
+                rsp.op_ret = op_ret;
+                rsp.op_errno = op_errno;
+                rsp.volname = "";
+                if (op_errstr)
+                        rsp.op_errstr = op_errstr;
+                else
+                        rsp.op_errstr = "";
+                cli_rsp = &rsp;
+                xdrproc = (xdrproc_t) xdr_gf1_cli_heal_vol_rsp;
+                break;
+
+        }
         case GD_OP_NONE:
         case GD_OP_MAX:
         {
@@ -1922,7 +1937,7 @@ glusterd_start_brick_disconnect_timer (glusterd_op_brick_rsp_ctx_t *ev_ctx)
 
         timeout.tv_sec  = 5;
         timeout.tv_usec = 0;
-        brickinfo = ev_ctx->brickinfo;
+        brickinfo = ev_ctx->pending_node->node;
         GF_ASSERT (brickinfo);
         this = THIS;
         GF_ASSERT (this);
@@ -2000,7 +2015,7 @@ out:
         } else {
                 event_type = GD_OP_EVENT_RCVD_ACC;
         }
-        ev_ctx->brickinfo = frame->cookie;
+        ev_ctx->pending_node = frame->cookie;
         ev_ctx->rsp_dict  = dict;
         ev_ctx->commit_ctx = frame->local;
         op = glusterd_op_get_op ();
@@ -2087,9 +2102,9 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
         call_frame_t                    *dummy_frame = NULL;
         char                            *op_errstr = NULL;
         int                             pending_bricks = 0;
-        glusterd_pending_node_t         *pending_brick;
-        glusterd_brickinfo_t            *brickinfo = NULL;
+        glusterd_pending_node_t         *pending_node;
         glusterd_req_ctx_t               *req_ctx = NULL;
+        struct rpc_clnt                 *rpc = NULL;
 
         if (!this) {
                 ret = -1;
@@ -2109,25 +2124,30 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
                 goto out;
         }
 
-        list_for_each_entry (pending_brick, &opinfo.pending_bricks, list) {
+        list_for_each_entry (pending_node, &opinfo.pending_bricks, list) {
                 dummy_frame = create_frame (this, this->ctx->pool);
-                brickinfo = pending_brick->node;
-
                 if (!dummy_frame)
                         continue;
-                if (_gf_false == glusterd_is_brick_started (brickinfo))
-                        continue;
-
-                ret = glusterd_brick_op_build_payload (req_ctx->op, brickinfo,
-                                                (gd1_mgmt_brick_op_req **)&req,
-                                                 req_ctx->dict);
 
+                ret = glusterd_brick_op_build_payload (req_ctx->op,
+                                                       pending_node->node,
+                                                       (gd1_mgmt_brick_op_req **)&req,
+                                                       req_ctx->dict);
                 if (ret)
                         goto out;
 
                 dummy_frame->local = data;
-                dummy_frame->cookie = brickinfo;
-                ret = glusterd_submit_request (brickinfo->rpc, req, dummy_frame,
+                dummy_frame->cookie = pending_node;
+
+                rpc = glusterd_pending_node_get_rpc (pending_node);
+                if (!rpc) {
+                        ret = -1;
+                        gf_log (this->name, GF_LOG_ERROR, "Brick Op failed "
+                                "due to rpc failure.");
+                        goto out;
+                }
+
+                ret = glusterd_submit_request (rpc, req, dummy_frame,
                                                &glusterd_glusterfs_3_1_mgmt_prog,
                                                req->op, NULL,
                                                this, glusterd3_1_brick_op_cbk,
@@ -2143,7 +2163,7 @@ glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
         }
 
         gf_log ("glusterd", GF_LOG_DEBUG, "Sent op req to %d bricks",
-                                            pending_bricks);
+                pending_bricks);
         opinfo.brick_pending_count = pending_bricks;
 
 out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index e6c23e8337e..59609971bd8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2366,6 +2366,120 @@ glusterd_get_nodesvc_volfile (char *server, char *workdir,
         snprintf (volfile, len, "%s/%s-server.vol", dir, server);
 }
 
+void
+glusterd_shd_set_running (gf_boolean_t status)
+{
+        glusterd_conf_t *priv = NULL;
+
+        priv = THIS->private;
+        GF_ASSERT (priv);
+        GF_ASSERT (priv->shd);
+
+        priv->shd->running = status;
+}
+
+gf_boolean_t
+glusterd_shd_is_running ()
+{
+        glusterd_conf_t *conf = NULL;
+
+        conf = THIS->private;
+        GF_ASSERT (conf);
+        GF_ASSERT (conf->shd);
+
+        return conf->shd->running;
+}
+
+int32_t
+glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid,
+                                  char *socketpath, int len)
+{
+        char                    sockfilepath[PATH_MAX] = {0,};
+        char                    md5_str[PATH_MAX] = {0,};
+
+        snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s",
+                  rundir, uuid_utoa (uuid));
+        _get_md5_str (md5_str, sizeof (md5_str),
+                      (uint8_t *)sockfilepath, sizeof (sockfilepath));
+        snprintf (socketpath, len, "%s/%s.socket", glusterd_sock_dir,
+                  md5_str);
+        return 0;
+}
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
+{
+        struct rpc_clnt *rpc = NULL;
+        glusterd_brickinfo_t    *brickinfo = NULL;
+        nodesrv_t               *shd       = NULL;
+        GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out);
+        GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out);
+
+        if (pending_node->type == GD_NODE_BRICK) {
+                brickinfo = pending_node->node;
+                rpc       = brickinfo->rpc;
+
+        } else if (pending_node->type == GD_NODE_SHD) {
+                shd       = pending_node->node;
+                rpc       = shd->rpc;
+
+        } else {
+                GF_ASSERT (0);
+        }
+
+out:
+        return rpc;
+}
+
+struct rpc_clnt*
+glusterd_shd_get_rpc (void)
+{
+        glusterd_conf_t *priv   = NULL;
+
+        priv = THIS->private;
+        GF_ASSERT (priv);
+        GF_ASSERT (priv->shd);
+
+        return priv->shd->rpc;
+}
+
+int32_t
+glusterd_shd_set_rpc (struct rpc_clnt *rpc)
+{
+        int             ret   = 0;
+        xlator_t        *this = NULL;
+        glusterd_conf_t *priv = NULL;
+
+        this = THIS;
+        GF_ASSERT (this);
+        priv = this->private;
+        GF_ASSERT (priv);
+        GF_ASSERT (priv->shd);
+
+        priv->shd->rpc = rpc;
+
+        return ret;
+}
+
+int32_t
+glusterd_shd_connect (char *socketpath) {
+        int                     ret = 0;
+        dict_t                  *options = NULL;
+        struct rpc_clnt         *rpc = NULL;
+
+        ret = rpc_clnt_transport_unix_options_build (&options, socketpath);
+        if (ret)
+                goto out;
+        ret = glusterd_rpc_create (&rpc, options,
+                                   glusterd_shd_rpc_notify,
+                                   NULL);
+        if (ret)
+                goto out;
+        (void) glusterd_shd_set_rpc (rpc);
+out:
+        return ret;
+}
+
 int32_t
 glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)
 {
@@ -2376,6 +2490,7 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)
         char                    logfile[PATH_MAX] = {0,};
         char                    volfile[PATH_MAX] = {0,};
         char                    rundir[PATH_MAX] = {0,};
+        char                    shd_sockfpath[PATH_MAX] = {0,};
         char                    volfileid[256]   = {0};
 
         this = THIS;
@@ -2408,13 +2523,28 @@ glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin)
                   server);
         snprintf (volfileid, sizeof (volfileid), "gluster/%s", server);
 
-        if (pmap_signin)
+        if (!strcmp (server, "glustershd")) {
+                glusterd_shd_set_socket_filepath (rundir,
+                                                  priv->uuid,
+                                                  shd_sockfpath,
+                                                  sizeof (shd_sockfpath));
+        }
+
+        //TODO: kp:change the assumption that shd is the one which signs in
+        // use runner_add_args?
+        if (pmap_signin) {
                 ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost",
                               "--volfile-id", volfileid,
-                              "-p", pidfile, "-l", logfile, NULL);
-        else
+                              "-p", pidfile, "-l", logfile,
+                              "-S", shd_sockfpath, NULL);
+                if (!ret)
+                        glusterd_shd_connect (shd_sockfpath);
+
+        }
+        else {
                 ret = runcmd (SBIN_DIR"/glusterfs", "-f", volfile,
                               "-p", pidfile, "-l", logfile, NULL);
+        }
 
 out:
         return ret;
@@ -3742,7 +3872,7 @@ glusterd_remove_pending_entry (struct list_head *list, void *elem)
 {
         glusterd_pending_node_t *pending_node = NULL;
         glusterd_pending_node_t *tmp = NULL;
-        int                     ret = -1;
+        int                     ret = 0;
 
         list_for_each_entry_safe (pending_node, tmp, list, list) {
                 if (elem == pending_node->node) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 93fa763bd51..aca46eae120 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -174,6 +174,28 @@ glusterd_shd_start ();
 int32_t
 glusterd_shd_stop ();
 
+int32_t
+glusterd_shd_set_socket_filepath (char *rundir, uuid_t uuid,
+                                  char *socketpath, int len);
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node);
+
+struct rpc_clnt*
+glusterd_shd_get_rpc (void);
+
+int32_t
+glusterd_shd_set_rpc (struct rpc_clnt *rpc);
+
+int32_t
+glusterd_shd_connect (char *socketpath);
+
+void
+glusterd_shd_set_running (gf_boolean_t status);
+
+gf_boolean_t
+glusterd_shd_is_running ();
+
 int
 glusterd_remote_hostname_get (rpcsvc_request_t *req,
                               char *remote_host, int len);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 560968defe7..39cc02c8ee0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -392,6 +392,62 @@ out:
         return ret;
 }
 
+int
+glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
+{
+        int32_t                         ret = -1;
+        gf1_cli_heal_vol_req           cli_req = {0,};
+        char                            *dup_volname = NULL;
+        dict_t                          *dict = NULL;
+        glusterd_op_t                   cli_op = GD_OP_HEAL_VOLUME;
+
+        GF_ASSERT (req);
+
+        if (!xdr_to_generic (req->msg[0], &cli_req,
+                             (xdrproc_t)xdr_gf1_cli_heal_vol_req)) {
+                //failed to decode msg;
+                req->rpc_err = GARBAGE_ARGS;
+                goto out;
+        }
+
+        gf_log ("glusterd", GF_LOG_INFO, "Received heal vol req"
+                "for volume %s", cli_req.volname);
+
+        dict = dict_new ();
+
+        if (!dict)
+                goto out;
+
+        dup_volname = gf_strdup (cli_req.volname);
+        if (!dup_volname)
+                goto out;
+
+        ret = dict_set_dynstr (dict, "volname", dup_volname);
+        if (ret)
+                goto out;
+
+        ret = glusterd_op_begin (req, GD_OP_HEAL_VOLUME, dict);
+
+        gf_cmd_log ("volume heal","on volname: %s %s", cli_req.volname,
+                    ((ret == 0) ? "SUCCESS": "FAILED"));
+
+out:
+        if (ret && dict)
+                dict_unref (dict);
+        if (cli_req.volname)
+                free (cli_req.volname); //its malloced by xdr
+
+        glusterd_friend_sm ();
+        glusterd_op_sm ();
+
+        if (ret)
+                ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+                                                     NULL, "operation failed");
+
+        return ret;
+}
+
+
 /* op-sm */
 int
 glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr)
@@ -754,6 +810,101 @@ out:
 }
 
 int
+glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)
+{
+        int                                     ret = 0;
+        char                                    *volname = NULL;
+        gf_boolean_t                            exists  = _gf_false;
+        gf_boolean_t                            enabled = _gf_false;
+        glusterd_volinfo_t                      *volinfo = NULL;
+        char                                    msg[2048];
+        glusterd_conf_t                         *priv = NULL;
+        dict_t                                  *opt_dict = NULL;
+
+        priv = THIS->private;
+        if (!priv) {
+                gf_log (THIS->name, GF_LOG_ERROR,
+                        "priv is NULL");
+                ret = -1;
+                goto out;
+        }
+
+        if (!glusterd_shd_is_running ()) {
+                ret = -1;
+                snprintf (msg, sizeof (msg), "Self-heal daemon is not "
+                          "running.");
+                *op_errstr = gf_strdup (msg);
+                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+                goto out;
+        }
+
+        ret = dict_get_str (dict, "volname", &volname);
+        if (ret) {
+                gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+                goto out;
+        }
+
+        exists = glusterd_check_volume_exists (volname);
+
+        if (!exists) {
+                snprintf (msg, sizeof (msg), "Volume %s does not exist", volname);
+                gf_log ("", GF_LOG_ERROR, "%s",
+                        msg);
+                *op_errstr = gf_strdup (msg);
+                ret = -1;
+        } else {
+                ret = 0;
+        }
+
+        ret  = glusterd_volinfo_find (volname, &volinfo);
+
+        if (ret)
+                goto out;
+
+        if (!glusterd_is_volume_started (volinfo)) {
+                snprintf (msg, sizeof (msg), "Volume %s is not started.",
+                          volname);
+                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+                *op_errstr = gf_strdup (msg);
+                ret = -1;
+                goto out;
+        }
+
+        if (!glusterd_is_volume_replicate (volinfo)) {
+                snprintf (msg, sizeof (msg), "Volume %s is not of type."
+                          "replicate", volname);
+                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+                *op_errstr = gf_strdup (msg);
+                ret = -1;
+                goto out;
+        }
+
+        opt_dict = volinfo->dict;
+        if (!opt_dict) {
+                ret = 0;
+                goto out;
+        }
+
+        enabled = dict_get_str_boolean (opt_dict, "cluster.self-heal-daemon",
+                                        1);
+        if (!enabled) {
+                snprintf (msg, sizeof (msg), "Self-heal-daemon is "
+                          "disabled. Heal will not be triggered on volume %s",
+                          volname);
+                gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+                *op_errstr = gf_strdup (msg);
+                ret = -1;
+                goto out;
+        }
+
+        ret = 0;
+out:
+        gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+        return ret;
+}
+
+int
 glusterd_op_create_volume (dict_t *dict, char **op_errstr)
 {
         int                   ret        = 0;
@@ -1034,3 +1185,12 @@ out:
         gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
         return ret;
 }
+
+int
+glusterd_op_heal_volume (dict_t *dict, char **op_errstr)
+{
+        int                                     ret = 0;
+        /* Necessary subtasks of heal are completed in brick op */
+
+        return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index d1582eae38a..83bbd1b2238 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -930,6 +930,10 @@ init (xlator_t *this)
         conf = GF_CALLOC (1, sizeof (glusterd_conf_t),
                           gf_gld_mt_glusterd_conf_t);
         GF_VALIDATE_OR_GOTO(this->name, conf, out);
+        conf->shd = GF_CALLOC (1, sizeof (nodesrv_t),
+                               gf_gld_mt_nodesrv_t);
+        GF_VALIDATE_OR_GOTO(this->name, conf->shd, out);
+
         INIT_LIST_HEAD (&conf->peers);
         INIT_LIST_HEAD (&conf->volumes);
         pthread_mutex_init (&conf->mutex, NULL);
@@ -961,6 +965,7 @@ init (xlator_t *this)
         }
 #endif
         this->private = conf;
+        (void) glusterd_shd_set_running (_gf_false);
         /* this->ctx->top = this;*/
 
         ret = glusterd_uuid_init (first_time);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index ab521af0f4b..b49e7d6751f 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -79,6 +79,7 @@ typedef enum glusterd_op_ {
         GD_OP_LOG_LEVEL,
         GD_OP_STATUS_VOLUME,
         GD_OP_REBALANCE,
+        GD_OP_HEAL_VOLUME,
         GD_OP_MAX,
 } glusterd_op_t;
 
@@ -95,6 +96,11 @@ struct glusterd_volgen {
         dict_t *dict;
 };
 typedef struct {
+        struct rpc_clnt         *rpc;
+        gf_boolean_t            running;
+} nodesrv_t;
+
+typedef struct {
         struct _volfile_ctx *volfile;
 	pthread_mutex_t   mutex;
 	struct list_head  peers;
@@ -104,6 +110,7 @@ typedef struct {
         uuid_t            uuid;
         char              workdir[PATH_MAX];
         rpcsvc_t          *rpc;
+        nodesrv_t         *shd;
         struct pmap_registry *pmap;
         struct list_head  volumes;
         struct list_head  xprt_list;
@@ -225,9 +232,16 @@ struct glusterd_volinfo_ {
         xlator_t                *xl;
 };
 
+typedef enum gd_node_type_ {
+        GD_NODE_NONE,
+        GD_NODE_BRICK,
+        GD_NODE_SHD
+} gd_node_type;
+
 typedef struct glusterd_pending_node_ {
-        void   *node;
         struct list_head list;
+        void   *node;
+        gd_node_type type;
 } glusterd_pending_node_t;
 
 enum glusterd_op_ret {
@@ -511,6 +525,10 @@ glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
                           rpc_clnt_event_t event, void *data);
 
 int
+glusterd_shd_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+                          rpc_clnt_event_t event, void *data);
+
+int
 glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options,
                      rpc_clnt_notify_t notify_fn, void *notify_data);
 
@@ -535,8 +553,11 @@ int glusterd_handle_cli_delete_volume (rpcsvc_request_t *req);
 
 int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
                                   size_t len, int cmd, defrag_cbk_fn_t cbk);
+int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req);
 
 /* op-sm functions */
+int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_heal_volume (dict_t *dict, char **op_errstr);
 int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr);
 int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
 int glusterd_op_quota (dict_t *dict, char **op_errstr);
author	Krishnan Parthasarathi <kp@gluster.com>	2011-09-16 10:40:32 +0530
committer	Vijay Bellur <vijay@gluster.com>	2011-09-22 09:43:25 -0700
commit	4765dd1a1c51c67ab86687fbd871c89156680c34 (patch)
tree	d1d3890457cbcb01131d21f66e40ec8a1c537cf1
parent	53b5da6dfab2e6b11ab2e40119e92ff7d4527b2c (diff)