diff options
author | Sanju Rakonde <srakonde@redhat.com> | 2018-02-21 12:46:25 +0530 |
---|---|---|
committer | Atin Mukherjee <amukherj@redhat.com> | 2018-03-28 04:27:18 +0000 |
commit | a60fc2ddc03134fb23c5ed5c0bcb195e1649416b (patch) | |
tree | c4f42085b4c6c6761bf4f3d23a24b8821cf292a8 /xlators/mgmt/glusterd/src/glusterd-utils.c | |
parent | 95601229b8318f015a19d7eff89f73853b684a49 (diff) |
glusterd: handling brick termination in brick-mux
Problem: There's a race between the last glusterfs_handle_terminate()
response sent to glusterd and the kill that happens immediately if the
terminated brick is the last brick.
Solution: When it is a last brick for the brick process, instead of glusterfsd
killing itself, glusterd will kill the process in case of brick multiplexing.
And also changing gf_attach utility accordingly.
Change-Id: I386c19ca592536daa71294a13d9fc89a26d7e8c0
fixes: bz#1545048
BUG: 1545048
Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 56 |
1 files changed, 50 insertions, 6 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index af30756c947..8e71756b927 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -69,6 +69,7 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/socket.h> +#include <sys/wait.h> #include <rpc/pmap_clnt.h> #include <unistd.h> #include <fnmatch.h> @@ -2313,7 +2314,8 @@ glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc) } int -glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) +glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo, + int *kill_pid) { int ret = -1; xlator_t *this = NULL; @@ -2352,6 +2354,7 @@ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo) /* If all bricks have been removed, delete the brick process */ if (brick_proc->brick_count == 0) { + *kill_pid = 1; ret = glusterd_brickprocess_delete (brick_proc); if (ret) goto out; @@ -2454,7 +2457,11 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_conf_t *conf = NULL; int ret = -1; char *op_errstr = NULL; - char pidfile[PATH_MAX] = {0,}; + char pidfile_path[PATH_MAX] = {0,}; + int kill_pid = -1; + FILE *pidfile = NULL; + pid_t pid = -1; + int status = -1; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -2467,7 +2474,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, ret = 0; - ret = glusterd_brick_process_remove_brick (brickinfo); + ret = glusterd_brick_process_remove_brick (brickinfo, &kill_pid); if (ret) { gf_msg_debug (this->name, 0, "Couldn't remove brick from" " brick process"); @@ -2510,10 +2517,47 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, ret = 0; } - GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf); + GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf); + if (kill_pid == 1 && is_brick_mx_enabled ()) { + pidfile = fopen (pidfile_path, "r"); + if (!pidfile) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_FILE_OP_FAILED, + "Unable to open pidfile: %s", pidfile_path); + ret = -1; + goto out; + } + + ret = fscanf (pidfile, "%d", &pid); + if (ret <= 0) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_FILE_OP_FAILED, + "Unable to get pid of brick process"); + ret = -1; + goto out; + } + + if (conf->op_version >= GD_OP_VERSION_4_1_0) { + while (conf->blockers) { + synclock_unlock (&conf->big_lock); + sleep (1); + synclock_lock (&conf->big_lock); + } + } + gf_log (this->name, GF_LOG_INFO, + "terminating the brick process " + "%d after loss of last brick %s of the volume %s", + pid, brickinfo->path, volinfo->volname); + kill (pid, SIGTERM); + waitpid (pid, &status, 0); + pmap_registry_remove (this, brickinfo->port, brickinfo->path, + GF_PMAP_PORT_BRICKSERVER, NULL, + _gf_true); + ret = 0; + } - gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile); - (void) sys_unlink (pidfile); + gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile_path); + (void) sys_unlink (pidfile_path); brickinfo->status = GF_BRICK_STOPPED; brickinfo->start_triggered = _gf_false; |