summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorSanju Rakonde <srakonde@redhat.com>2018-02-21 12:46:25 +0530
committerAtin Mukherjee <amukherj@redhat.com>2018-03-28 04:27:18 +0000
commita60fc2ddc03134fb23c5ed5c0bcb195e1649416b (patch)
treec4f42085b4c6c6761bf4f3d23a24b8821cf292a8 /xlators
parent95601229b8318f015a19d7eff89f73853b684a49 (diff)
glusterd: handling brick termination in brick-mux
Problem: There's a race between the last glusterfs_handle_terminate() response sent to glusterd and the kill that happens immediately if the terminated brick is the last brick. Solution: When it is a last brick for the brick process, instead of glusterfsd killing itself, glusterd will kill the process in case of brick multiplexing. And also changing gf_attach utility accordingly. Change-Id: I386c19ca592536daa71294a13d9fc89a26d7e8c0 fixes: bz#1545048 BUG: 1545048 Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c56
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h3
4 files changed, 55 insertions, 25 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index 98b1aaa63af..8ef285bf48d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -557,6 +557,7 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
glusterd_brickinfo_t *brickinfo = NULL;
char pidfile[PATH_MAX] = {0};
char brick_path[PATH_MAX] = {0,};
+ int kill_pid = -1;
this = THIS;
GF_VALIDATE_OR_GOTO ("glusterd", this, fail);
@@ -606,7 +607,8 @@ __gluster_pmap_signout (rpcsvc_request_t *req)
* removed in the brick op phase. This situation would
* arise when the brick is killed explicitly from the
* backend */
- ret = glusterd_brick_process_remove_brick (brickinfo);
+ ret = glusterd_brick_process_remove_brick (brickinfo,
+ &kill_pid);
if (ret) {
gf_msg_debug (this->name, 0, "Couldn't remove "
"brick %s:%s from brick process",
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 31b08d76adc..e5d4421deb4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -958,7 +958,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
gd1_mgmt_brick_op_req *req = NULL;
int ret = 0;
xlator_t *this = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
this = THIS;
args.op_ret = -1;
@@ -987,22 +986,6 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
else
GF_FREE (args.errstr);
}
- if (op == GD_OP_STOP_VOLUME || op == GD_OP_REMOVE_BRICK) {
- if (args.op_ret == 0) {
- brickinfo = pnode->node;
- ret = glusterd_brick_process_remove_brick (brickinfo);
- if (ret) {
- gf_msg ("glusterd", GF_LOG_ERROR, 0,
- GD_MSG_BRICKPROC_REM_BRICK_FAILED,
- "Removing brick %s:%s from brick"
- " process failed",
- brickinfo->hostname,
- brickinfo->path);
- args.op_ret = ret;
- goto out;
- }
- }
- }
if (GD_OP_STATUS_VOLUME == op) {
ret = dict_set_int32 (args.dict, "index", pnode->index);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index af30756c947..8e71756b927 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -69,6 +69,7 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
+#include <sys/wait.h>
#include <rpc/pmap_clnt.h>
#include <unistd.h>
#include <fnmatch.h>
@@ -2313,7 +2314,8 @@ glusterd_brickprocess_delete (glusterd_brick_proc_t *brick_proc)
}
int
-glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo)
+glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo,
+ int *kill_pid)
{
int ret = -1;
xlator_t *this = NULL;
@@ -2352,6 +2354,7 @@ glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo)
/* If all bricks have been removed, delete the brick process */
if (brick_proc->brick_count == 0) {
+ *kill_pid = 1;
ret = glusterd_brickprocess_delete (brick_proc);
if (ret)
goto out;
@@ -2454,7 +2457,11 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
glusterd_conf_t *conf = NULL;
int ret = -1;
char *op_errstr = NULL;
- char pidfile[PATH_MAX] = {0,};
+ char pidfile_path[PATH_MAX] = {0,};
+ int kill_pid = -1;
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+ int status = -1;
GF_ASSERT (volinfo);
GF_ASSERT (brickinfo);
@@ -2467,7 +2474,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
ret = 0;
- ret = glusterd_brick_process_remove_brick (brickinfo);
+ ret = glusterd_brick_process_remove_brick (brickinfo, &kill_pid);
if (ret) {
gf_msg_debug (this->name, 0, "Couldn't remove brick from"
" brick process");
@@ -2510,10 +2517,47 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
ret = 0;
}
- GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf);
+ if (kill_pid == 1 && is_brick_mx_enabled ()) {
+ pidfile = fopen (pidfile_path, "r");
+ if (!pidfile) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Unable to open pidfile: %s", pidfile_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ GD_MSG_FILE_OP_FAILED,
+ "Unable to get pid of brick process");
+ ret = -1;
+ goto out;
+ }
+
+ if (conf->op_version >= GD_OP_VERSION_4_1_0) {
+ while (conf->blockers) {
+ synclock_unlock (&conf->big_lock);
+ sleep (1);
+ synclock_lock (&conf->big_lock);
+ }
+ }
+ gf_log (this->name, GF_LOG_INFO,
+ "terminating the brick process "
+ "%d after loss of last brick %s of the volume %s",
+ pid, brickinfo->path, volinfo->volname);
+ kill (pid, SIGTERM);
+ waitpid (pid, &status, 0);
+ pmap_registry_remove (this, brickinfo->port, brickinfo->path,
+ GF_PMAP_PORT_BRICKSERVER, NULL,
+ _gf_true);
+ ret = 0;
+ }
- gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile);
- (void) sys_unlink (pidfile);
+ gf_msg_debug (this->name, 0, "Unlinking pidfile %s", pidfile_path);
+ (void) sys_unlink (pidfile_path);
brickinfo->status = GF_BRICK_STOPPED;
brickinfo->start_triggered = _gf_false;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 0e9e54a0687..8118c994974 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -182,7 +182,8 @@ glusterd_brick_process_add_brick (glusterd_brickinfo_t *brickinfo,
glusterd_volinfo_t *volinfo);
int
-glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo);
+glusterd_brick_process_remove_brick (glusterd_brickinfo_t *brickinfo,
+ int *kill_pid);
int
glusterd_brick_proc_for_port (int port, glusterd_brick_proc_t **brickprocess);