diff options
author | Atin Mukherjee <amukherj@redhat.com> | 2017-10-26 14:26:30 +0530 |
---|---|---|
committer | jiffin tony Thottan <jthottan@redhat.com> | 2017-11-06 06:09:21 +0000 |
commit | 44e3c3b5c813168d72f10ecb3c058ac3489c719c (patch) | |
tree | 9ae418d5cecd1b738a9926f227238d097bad8ca2 /xlators/mgmt/glusterd/src | |
parent | 4e6dc4f134ed81005bb91f9cb4e18bf5836dffb5 (diff) |
glusterd: fix brick restart parallelism
glusterd's brick restart logic is not always sequential as there is
atleast three different ways how the bricks are restarted.
1. through friend-sm and glusterd_spawn_daemons ()
2. through friend-sm and handling volume quorum action
3. through friend handshaking when there is a mimatch on quorum on
friend import.
In a brick multiplexing setup, glusterd ended up trying to spawn the
same brick process couple of times as almost in fraction of milliseconds
two threads hit glusterd_brick_start () because of which glusterd didn't
have any choice of rejecting any one of them as for both the case brick
start criteria met.
As a solution, it'd be better to control this madness by two different
flags, one is a boolean called start_triggered which indicates a brick
start has been triggered and it continues to be true till a brick dies
or killed, the second is a mutex lock to ensure for a particular brick
we don't end up getting into glusterd_brick_start () more than once at
same point of time.
Change-Id: I292f1e58d6971e111725e1baea1fe98b890b43e2
BUG: 1508283
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
(cherry picked from commit 82be66ef8e9e3127d41a4c843daf74c1d8aec4aa)
Diffstat (limited to 'xlators/mgmt/glusterd/src')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 24 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 31 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 15 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 39 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 2 |
6 files changed, 87 insertions, 32 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 185186a8ad6..37ef277f2b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -5946,16 +5946,22 @@ glusterd_mark_bricks_stopped_by_proc (glusterd_brick_proc_t *brick_proc) { int ret = -1; cds_list_for_each_entry (brickinfo, &brick_proc->bricks, brick_list) { - ret = glusterd_get_volinfo_from_brick (brickinfo->path, &volinfo); + ret = glusterd_get_volinfo_from_brick (brickinfo->path, + &volinfo); if (ret) { - gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, - "Failed to get volinfo from brick(%s)", - brickinfo->path); + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo" + " from brick(%s)", brickinfo->path); goto out; } - cds_list_for_each_entry (brickinfo_tmp, &volinfo->bricks, brick_list) { - if (strcmp (brickinfo->path, brickinfo_tmp->path) == 0) - glusterd_set_brick_status (brickinfo_tmp, GF_BRICK_STOPPED); + cds_list_for_each_entry (brickinfo_tmp, &volinfo->bricks, + brick_list) { + if (strcmp (brickinfo->path, + brickinfo_tmp->path) == 0) { + glusterd_set_brick_status (brickinfo_tmp, + GF_BRICK_STOPPED); + brickinfo_tmp->start_triggered = _gf_false; + } } } return 0; @@ -6104,8 +6110,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, if (temp == 1) break; } - } else + } else { glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); + brickinfo->start_triggered = _gf_false; + } break; case RPC_CLNT_DESTROY: diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 83680cf7a7a..51579fe3826 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -2402,18 +2402,25 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_brick_start (volinfo, brickinfo, _gf_false); - if (ret) { - gf_msg (THIS->name, GF_LOG_ERROR, 0, - GD_MSG_BRICK_DISCONNECTED, - "Failed to start %s:%s for %s", - brickinfo->hostname, brickinfo->path, - volinfo->volname); - gf_event (EVENT_BRICK_START_FAILED, - "peer=%s;volume=%s;brick=%s", - brickinfo->hostname, volinfo->volname, - brickinfo->path); - goto out; + if (!brickinfo->start_triggered) { + pthread_mutex_lock (&brickinfo->restart_mutex); + { + ret = glusterd_brick_start (volinfo, brickinfo, + _gf_false); + } + pthread_mutex_unlock (&brickinfo->restart_mutex); + if (ret) { + gf_msg (THIS->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_DISCONNECTED, + "Failed to start %s:%s for %s", + brickinfo->hostname, brickinfo->path, + volinfo->volname); + gf_event (EVENT_BRICK_START_FAILED, + "peer=%s;volume=%s;brick=%s", + brickinfo->hostname, volinfo->volname, + brickinfo->path); + goto out; + } } } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c index 659ff9ddfc9..3084c1eacb9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c @@ -361,10 +361,19 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { if (!glusterd_is_local_brick (this, volinfo, brickinfo)) continue; - if (quorum_status == DOESNT_MEET_QUORUM) + if (quorum_status == DOESNT_MEET_QUORUM) { glusterd_brick_stop (volinfo, brickinfo, _gf_false); - else - glusterd_brick_start (volinfo, brickinfo, _gf_false); + } else { + if (!brickinfo->start_triggered) { + pthread_mutex_lock (&brickinfo->restart_mutex); + { + glusterd_brick_start (volinfo, + brickinfo, + _gf_false); + } + pthread_mutex_unlock (&brickinfo->restart_mutex); + } + } } volinfo->quorum_status = quorum_status; if (quorum_status == MEETS_QUORUM) { diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 118a946f27c..c4681b2486c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1084,7 +1084,7 @@ glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) goto out; CDS_INIT_LIST_HEAD (&new_brickinfo->brick_list); - + pthread_mutex_init (&new_brickinfo->restart_mutex, NULL); *brickinfo = new_brickinfo; ret = 0; @@ -2481,7 +2481,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, (void) sys_unlink (pidfile); brickinfo->status = GF_BRICK_STOPPED; - + brickinfo->start_triggered = _gf_false; if (del_brick) glusterd_delete_brick (volinfo, brickinfo); out: @@ -5817,13 +5817,14 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, * three different triggers for an attempt to start the brick process * due to the quorum handling code in glusterd_friend_sm. */ - if (brickinfo->status == GF_BRICK_STARTING) { + if (brickinfo->status == GF_BRICK_STARTING || + brickinfo->start_triggered) { gf_msg_debug (this->name, 0, "brick %s is already in starting " "phase", brickinfo->path); ret = 0; goto out; } - + brickinfo->start_triggered = _gf_true; GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); if (gf_is_service_running (pidfile, &pid)) { if (brickinfo->status != GF_BRICK_STARTING && @@ -5936,6 +5937,9 @@ run: } out: + if (ret && brickinfo) { + brickinfo->start_triggered = _gf_false; + } gf_msg_debug (this->name, 0, "returning %d ", ret); return ret; } @@ -5997,11 +6001,19 @@ glusterd_restart_bricks (glusterd_conf_t *conf) start_svcs = _gf_true; glusterd_svcs_manager (NULL); } - cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - glusterd_brick_start (volinfo, brickinfo, - _gf_false); + if (!brickinfo->start_triggered) { + pthread_mutex_lock + (&brickinfo->restart_mutex); + { + glusterd_brick_start + (volinfo, brickinfo, + _gf_false); + } + pthread_mutex_unlock + (&brickinfo->restart_mutex); + } } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); @@ -6040,8 +6052,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf) "volume %s", volinfo->volname); cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - glusterd_brick_start (volinfo, brickinfo, - _gf_false); + if (!brickinfo->start_triggered) { + pthread_mutex_lock + (&brickinfo->restart_mutex); + { + glusterd_brick_start + (volinfo, brickinfo, + _gf_false); + } + pthread_mutex_unlock + (&brickinfo->restart_mutex); + } } ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 9d340738452..7c037e843b8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2545,6 +2545,14 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags, GF_ASSERT (volinfo); cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + /* Mark start_triggered to false so that in case if this brick + * was brought down through gf_attach utility, the + * brickinfo->start_triggered wouldn't have been updated to + * _gf_false + */ + if (flags & GF_CLI_FLAG_OP_FORCE) { + brickinfo->start_triggered = _gf_false; + } ret = glusterd_brick_start (volinfo, brickinfo, wait); /* If 'force' try to start all bricks regardless of success or * failure diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 722d2f8f420..d4bb236b36b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -240,6 +240,8 @@ struct glusterd_brickinfo { uint64_t statfs_fsid; uint32_t fs_share_count; gf_boolean_t port_registered; + gf_boolean_t start_triggered; + pthread_mutex_t restart_mutex; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; |