From 2915e10dd0ffe529ddf747451051cf9924c708f3 Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Wed, 22 Sep 2010 05:21:28 +0000 Subject: Restart all bricks which are down when glusterd comes up This is only done is the volume is started. Signed-off-by: shishir gowda Signed-off-by: Vijay Bellur BUG: 1562 (insufficient consistency check on start of glusterd) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1562 --- xlators/mgmt/glusterd/src/glusterd-pmap.c | 16 ++++- xlators/mgmt/glusterd/src/glusterd-pmap.h | 4 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 109 +++++++++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-utils.h | 13 ++++ xlators/mgmt/glusterd/src/glusterd.c | 3 +- xlators/mgmt/glusterd/src/glusterd.h | 10 +++ 6 files changed, 150 insertions(+), 5 deletions(-) (limited to 'xlators/mgmt') diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index aaa6ed131..1e72b694b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -399,6 +399,8 @@ gluster_pmap_signin (rpcsvc_request_t *req) { pmap_signin_req args = {0,}; pmap_signin_rsp rsp = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signin_req)) { req->rpc_err = GARBAGE_ARGS; @@ -408,6 +410,11 @@ gluster_pmap_signin (rpcsvc_request_t *req) rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick, GF_PMAP_PORT_BRICKSERVER, req->trans); + ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, + &brickinfo); + if (!ret) + glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); + fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (gd_serialize_t)xdr_from_pmap_signin_rsp); @@ -424,7 +431,8 @@ gluster_pmap_signout (rpcsvc_request_t *req) { pmap_signout_req args = {0,}; pmap_signout_rsp rsp = {0,}; - + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signout_req)) { //failed to decode msg; @@ -435,6 +443,11 @@ gluster_pmap_signout (rpcsvc_request_t *req) rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick, GF_PMAP_PORT_BRICKSERVER, req->trans); + ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, + &brickinfo); + if (!ret) + glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); + fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, (gd_serialize_t)xdr_from_pmap_signout_rsp); @@ -444,7 +457,6 @@ fail: return 0; } - rpcsvc_actor_t gluster_pmap_actors[] = { [GF_PMAP_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, NULL }, [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h index a021da030..fcad50db8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.h +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h @@ -55,5 +55,7 @@ int pmap_registry_bind (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt); int pmap_registry_remove (xlator_t *this, int port, const char *brickname, gf_pmap_port_type_t type, void *xprt); - +int +pmap_registry_search (xlator_t *this, const char *brickname, + gf_pmap_port_type_t type); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e3592891b..a59a06905 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -41,6 +41,7 @@ #include "glusterd-utils.h" #include "glusterd-store.h" #include "glusterd-volgen.h" +#include "glusterd-pmap.h" #include #include @@ -1750,3 +1751,111 @@ glusterd_is_exisiting_brick (char *hostname, char *path) out: return ret; } + +int +glusterd_restart_bricks (glusterd_conf_t *conf, xlator_t *this) +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + char pidfile[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + int ret = -1; + struct stat stbuf = {0,}; + struct timespec timeout; + sigset_t mask; + + if (sigprocmask(SIG_BLOCK, &mask, NULL) < 0) { + perror ("sigprocmask"); + return -1; + } + + sigemptyset (&mask); + + timeout.tv_sec = 5; + timeout.tv_nsec = 0; + + sigtimedwait(&mask, NULL, &timeout); + GF_ASSERT (conf); + GF_ASSERT (this); + + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + //If volume status is not started, do not proceed + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + //Only bricks on localhost to started + if (glusterd_is_local_addr (brickinfo->hostname)) + continue; + //if started, implies already registered with pmap + if (!glusterd_is_brick_started(brickinfo)) + continue; + GLUSTERD_GET_VOLUME_DIR (path, volinfo, conf); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, + brickinfo->hostname, brickinfo->path); + ret = stat (pidfile, &stbuf); + //pid file not found, proceed to start + if (ret && errno == ENOENT) { + glusterd_volume_start_glusterfs ( + volinfo, brickinfo, 0); + } else if (!ret) { + ret = pmap_registry_search (this, + brickinfo->path, + GF_PMAP_PORT_BRICKSERVER); + if (ret) + continue; + //might be a stale pid file + ret = unlink (pidfile); + //goto out; + glusterd_volume_start_glusterfs ( + volinfo, brickinfo, 0); + } + } + glusterd_check_generate_start_nfs (volinfo); + } + } + return ret; +} + +int +glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, + gf_boolean_t localhost, glusterd_brickinfo_t **brickinfo) +{ + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *tmpbrkinfo = NULL; + int ret = -1; + + GF_ASSERT (brickname); + GF_ASSERT (this); + + priv = this->private; + list_for_each_entry (volinfo, &priv->volumes, vol_list) { + list_for_each_entry (tmpbrkinfo, &volinfo->bricks, + brick_list) { + if (localhost && glusterd_is_local_addr (tmpbrkinfo->hostname)) + continue; + if (!strcmp(tmpbrkinfo->path, brickname) && + (tmpbrkinfo->port == port)) { + *brickinfo = tmpbrkinfo; + return 0; + } + } + } + return ret; +} + +void +glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, + gf_brick_status_t status) +{ + GF_ASSERT (brickinfo); + brickinfo->status = status; +} + +int +glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo) +{ + GF_ASSERT (brickinfo); + return (!(brickinfo->status == GF_BRICK_STARTED)); +} + diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 72715a0f5..bc20c2d35 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -160,4 +160,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count); int glusterd_is_exisiting_brick (char *hostname, char *path); + +int +glusterd_get_brickinfo (xlator_t *this, const char *brickname, + int port, gf_boolean_t localhost, + glusterd_brickinfo_t **brickinfo); + +void +glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, + gf_brick_status_t status); + +int +glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index da40c285c..b64b001f7 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -44,7 +44,6 @@ #include "glusterd-op-sm.h" #include "glusterd-store.h" - static uuid_t glusterd_uuid; extern struct rpcsvc_program glusterd1_mop_prog; extern struct rpcsvc_program gluster_handshake_prog; @@ -414,7 +413,7 @@ init (xlator_t *this) glusterd_op_sm_init (); glusterd_opinfo_init (); - + glusterd_restart_bricks(conf, this); ret = 0; out: if (ret == -1) { diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index b9890760f..abfb72cfa 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -75,6 +75,11 @@ typedef struct { glusterd_store_handle_t *handle; } glusterd_conf_t; +typedef enum gf_brick_status { + GF_BRICK_STOPPED, + GF_BRICK_STARTED, +} gf_brick_status_t; + struct glusterd_brickinfo { char hostname[1024]; char path[PATH_MAX]; @@ -83,6 +88,7 @@ struct glusterd_brickinfo { int port; char *logfile; glusterd_store_handle_t *shandle; + gf_brick_status_t status; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -393,7 +399,11 @@ glusterd_fetchspec_notify (xlator_t *this); int32_t glusterd_sync_volume (rpcsvc_request_t *req, dict_t *ctx); + int glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, dict_t *volumes, int count); + +int +glusterd_restart_bricks(glusterd_conf_t *conf, xlator_t *this); #endif -- cgit