summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2017-10-26 14:26:30 +0530
committerjiffin tony Thottan <jthottan@redhat.com>2017-11-06 06:09:21 +0000
commit44e3c3b5c813168d72f10ecb3c058ac3489c719c (patch)
tree9ae418d5cecd1b738a9926f227238d097bad8ca2 /xlators/mgmt/glusterd/src
parent4e6dc4f134ed81005bb91f9cb4e18bf5836dffb5 (diff)
glusterd: fix brick restart parallelism
glusterd's brick restart logic is not always sequential as there is atleast three different ways how the bricks are restarted. 1. through friend-sm and glusterd_spawn_daemons () 2. through friend-sm and handling volume quorum action 3. through friend handshaking when there is a mimatch on quorum on friend import. In a brick multiplexing setup, glusterd ended up trying to spawn the same brick process couple of times as almost in fraction of milliseconds two threads hit glusterd_brick_start () because of which glusterd didn't have any choice of rejecting any one of them as for both the case brick start criteria met. As a solution, it'd be better to control this madness by two different flags, one is a boolean called start_triggered which indicates a brick start has been triggered and it continues to be true till a brick dies or killed, the second is a mutex lock to ensure for a particular brick we don't end up getting into glusterd_brick_start () more than once at same point of time. Change-Id: I292f1e58d6971e111725e1baea1fe98b890b43e2 BUG: 1508283 Signed-off-by: Atin Mukherjee <amukherj@redhat.com> (cherry picked from commit 82be66ef8e9e3127d41a4c843daf74c1d8aec4aa)
Diffstat (limited to 'xlators/mgmt/glusterd/src')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c31
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c39
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h2
6 files changed, 87 insertions, 32 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 185186a8ad6..37ef277f2b7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5946,16 +5946,22 @@ glusterd_mark_bricks_stopped_by_proc (glusterd_brick_proc_t *brick_proc) {
int ret = -1;
cds_list_for_each_entry (brickinfo, &brick_proc->bricks, brick_list) {
- ret = glusterd_get_volinfo_from_brick (brickinfo->path, &volinfo);
+ ret = glusterd_get_volinfo_from_brick (brickinfo->path,
+ &volinfo);
if (ret) {
- gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
- "Failed to get volinfo from brick(%s)",
- brickinfo->path);
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLINFO_GET_FAIL, "Failed to get volinfo"
+ " from brick(%s)", brickinfo->path);
goto out;
}
- cds_list_for_each_entry (brickinfo_tmp, &volinfo->bricks, brick_list) {
- if (strcmp (brickinfo->path, brickinfo_tmp->path) == 0)
- glusterd_set_brick_status (brickinfo_tmp, GF_BRICK_STOPPED);
+ cds_list_for_each_entry (brickinfo_tmp, &volinfo->bricks,
+ brick_list) {
+ if (strcmp (brickinfo->path,
+ brickinfo_tmp->path) == 0) {
+ glusterd_set_brick_status (brickinfo_tmp,
+ GF_BRICK_STOPPED);
+ brickinfo_tmp->start_triggered = _gf_false;
+ }
}
}
return 0;
@@ -6104,8 +6110,10 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
if (temp == 1)
break;
}
- } else
+ } else {
glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
+ brickinfo->start_triggered = _gf_false;
+ }
break;
case RPC_CLNT_DESTROY:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 83680cf7a7a..51579fe3826 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2402,18 +2402,25 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo);
cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- ret = glusterd_brick_start (volinfo, brickinfo, _gf_false);
- if (ret) {
- gf_msg (THIS->name, GF_LOG_ERROR, 0,
- GD_MSG_BRICK_DISCONNECTED,
- "Failed to start %s:%s for %s",
- brickinfo->hostname, brickinfo->path,
- volinfo->volname);
- gf_event (EVENT_BRICK_START_FAILED,
- "peer=%s;volume=%s;brick=%s",
- brickinfo->hostname, volinfo->volname,
- brickinfo->path);
- goto out;
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock (&brickinfo->restart_mutex);
+ {
+ ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_false);
+ }
+ pthread_mutex_unlock (&brickinfo->restart_mutex);
+ if (ret) {
+ gf_msg (THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED,
+ "Failed to start %s:%s for %s",
+ brickinfo->hostname, brickinfo->path,
+ volinfo->volname);
+ gf_event (EVENT_BRICK_START_FAILED,
+ "peer=%s;volume=%s;brick=%s",
+ brickinfo->hostname, volinfo->volname,
+ brickinfo->path);
+ goto out;
+ }
}
}
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
index 659ff9ddfc9..3084c1eacb9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -361,10 +361,19 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
if (!glusterd_is_local_brick (this, volinfo, brickinfo))
continue;
- if (quorum_status == DOESNT_MEET_QUORUM)
+ if (quorum_status == DOESNT_MEET_QUORUM) {
glusterd_brick_stop (volinfo, brickinfo, _gf_false);
- else
- glusterd_brick_start (volinfo, brickinfo, _gf_false);
+ } else {
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock (&brickinfo->restart_mutex);
+ {
+ glusterd_brick_start (volinfo,
+ brickinfo,
+ _gf_false);
+ }
+ pthread_mutex_unlock (&brickinfo->restart_mutex);
+ }
+ }
}
volinfo->quorum_status = quorum_status;
if (quorum_status == MEETS_QUORUM) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 118a946f27c..c4681b2486c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1084,7 +1084,7 @@ glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo)
goto out;
CDS_INIT_LIST_HEAD (&new_brickinfo->brick_list);
-
+ pthread_mutex_init (&new_brickinfo->restart_mutex, NULL);
*brickinfo = new_brickinfo;
ret = 0;
@@ -2481,7 +2481,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
(void) sys_unlink (pidfile);
brickinfo->status = GF_BRICK_STOPPED;
-
+ brickinfo->start_triggered = _gf_false;
if (del_brick)
glusterd_delete_brick (volinfo, brickinfo);
out:
@@ -5817,13 +5817,14 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
* three different triggers for an attempt to start the brick process
* due to the quorum handling code in glusterd_friend_sm.
*/
- if (brickinfo->status == GF_BRICK_STARTING) {
+ if (brickinfo->status == GF_BRICK_STARTING ||
+ brickinfo->start_triggered) {
gf_msg_debug (this->name, 0, "brick %s is already in starting "
"phase", brickinfo->path);
ret = 0;
goto out;
}
-
+ brickinfo->start_triggered = _gf_true;
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
if (gf_is_service_running (pidfile, &pid)) {
if (brickinfo->status != GF_BRICK_STARTING &&
@@ -5936,6 +5937,9 @@ run:
}
out:
+ if (ret && brickinfo) {
+ brickinfo->start_triggered = _gf_false;
+ }
gf_msg_debug (this->name, 0, "returning %d ", ret);
return ret;
}
@@ -5997,11 +6001,19 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
start_svcs = _gf_true;
glusterd_svcs_manager (NULL);
}
-
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
brick_list) {
- glusterd_brick_start (volinfo, brickinfo,
- _gf_false);
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock
+ (&brickinfo->restart_mutex);
+ {
+ glusterd_brick_start
+ (volinfo, brickinfo,
+ _gf_false);
+ }
+ pthread_mutex_unlock
+ (&brickinfo->restart_mutex);
+ }
}
ret = glusterd_store_volinfo
(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
@@ -6040,8 +6052,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
"volume %s", volinfo->volname);
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
brick_list) {
- glusterd_brick_start (volinfo, brickinfo,
- _gf_false);
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock
+ (&brickinfo->restart_mutex);
+ {
+ glusterd_brick_start
+ (volinfo, brickinfo,
+ _gf_false);
+ }
+ pthread_mutex_unlock
+ (&brickinfo->restart_mutex);
+ }
}
ret = glusterd_store_volinfo
(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 9d340738452..7c037e843b8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2545,6 +2545,14 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
GF_ASSERT (volinfo);
cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ /* Mark start_triggered to false so that in case if this brick
+ * was brought down through gf_attach utility, the
+ * brickinfo->start_triggered wouldn't have been updated to
+ * _gf_false
+ */
+ if (flags & GF_CLI_FLAG_OP_FORCE) {
+ brickinfo->start_triggered = _gf_false;
+ }
ret = glusterd_brick_start (volinfo, brickinfo, wait);
/* If 'force' try to start all bricks regardless of success or
* failure
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 722d2f8f420..d4bb236b36b 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -240,6 +240,8 @@ struct glusterd_brickinfo {
uint64_t statfs_fsid;
uint32_t fs_share_count;
gf_boolean_t port_registered;
+ gf_boolean_t start_triggered;
+ pthread_mutex_t restart_mutex;
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;