summaryrefslogtreecommitdiffstats
path: root/glusterfsd
diff options
context:
space:
mode:
authorSanju Rakonde <srakonde@redhat.com>2018-02-21 12:46:25 +0530
committerAtin Mukherjee <amukherj@redhat.com>2018-03-28 04:27:18 +0000
commita60fc2ddc03134fb23c5ed5c0bcb195e1649416b (patch)
treec4f42085b4c6c6761bf4f3d23a24b8821cf292a8 /glusterfsd
parent95601229b8318f015a19d7eff89f73853b684a49 (diff)
glusterd: handling brick termination in brick-mux
Problem: There's a race between the last glusterfs_handle_terminate() response sent to glusterd and the kill that happens immediately if the terminated brick is the last brick. Solution: When it is a last brick for the brick process, instead of glusterfsd killing itself, glusterd will kill the process in case of brick multiplexing. And also changing gf_attach utility accordingly. Change-Id: I386c19ca592536daa71294a13d9fc89a26d7e8c0 fixes: bz#1545048 BUG: 1545048 Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
Diffstat (limited to 'glusterfsd')
-rw-r--r--glusterfsd/src/gf_attach.c41
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c33
2 files changed, 60 insertions, 14 deletions
diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
index 3f248292ddf..0eb4868263b 100644
--- a/glusterfsd/src/gf_attach.c
+++ b/glusterfsd/src/gf_attach.c
@@ -11,6 +11,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
//#include "config.h"
#include "glusterfs.h"
@@ -23,6 +26,7 @@
int done = 0;
int rpc_status;
+glfs_t *fs;
struct rpc_clnt_procedure gf_attach_actors[GLUSTERD_BRICK_MAXVALUE] = {
[GLUSTERD_BRICK_NULL] = {"NULL", NULL },
@@ -71,11 +75,43 @@ my_notify (struct rpc_clnt *rpc, void *mydata,
}
int32_t
-my_callback (struct rpc_req *req, struct iovec *iov, int count, void *frame)
+my_callback (struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
{
+ gd1_mgmt_brick_op_rsp rsp;
+ dict_t *dict = NULL;
+ pid_t pid = -1;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = fs->ctx->master;
+ memset (&rsp, 0, sizeof (rsp));
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+
+ if (ret < 0) {
+ fprintf (stderr, "xdr decoding failed\n");
+ goto out;
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, dict,
+ (rsp.output.output_val),
+ (rsp.output.output_len),
+ ret, rsp.op_errno, out);
+ if (dict) {
+ if (dict_get_int32 (dict, "last_brick_terminated", &pid) == 0) {
+ int status = 0;
+
+ gf_log ("gf_attach", GF_LOG_INFO, "Killing %d", pid);
+ kill (pid, SIGTERM);
+ waitpid (pid, &status, 0);
+ }
+ dict_unref (dict);
+ }
+
rpc_status = req->rpc_status;
done = 1;
- return 0;
+ ret = 0;
+out:
+ return ret;
}
/* copied from gd_syncop_submit_request */
@@ -170,7 +206,6 @@ usage (char *prog)
int
main (int argc, char *argv[])
{
- glfs_t *fs;
struct rpc_clnt *rpc;
dict_t *options;
int ret;
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index d2b39494e51..c4df275077f 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -159,21 +159,31 @@ out:
}
int
-glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret)
+glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret,
+ gf_boolean_t last_brick)
{
gd1_mgmt_brick_op_rsp rsp = {0,};
dict_t *dict = NULL;
- int ret = 0;
+ int ret = -1;
rsp.op_ret = op_ret;
rsp.op_errno = 0;
rsp.op_errstr = "";
dict = dict_new ();
- if (dict)
+ if (dict) {
+ /* Setting the last_brick_terminated key in dictionary is
+ * required to for standalone gf_attach utility to work.
+ * gf_attach utility will receive this dictionary and kill
+ * the process.
+ */
+ if (last_brick) {
+ ret = dict_set_int32 (dict, "last_brick_terminated",
+ getpid());
+ }
ret = dict_allocate_and_serialize (dict, &rsp.output.output_val,
&rsp.output.output_len);
-
+ }
if (ret == 0)
ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
@@ -262,6 +272,7 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
xlator_t *victim = NULL;
xlator_list_t **trav_p = NULL;
gf_boolean_t lockflag = _gf_false;
+ gf_boolean_t last_brick = _gf_false;
ret = xdr_to_generic (req->msg[0], &xlator_req,
(xdrproc_t)xdr_gd1_mgmt_brick_op_req);
@@ -294,17 +305,16 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
* make sure it's down and if it's already down that's
* good enough.
*/
- glusterfs_terminate_response_send (req, 0);
+ glusterfs_terminate_response_send (req, 0, last_brick);
goto err;
}
- glusterfs_terminate_response_send (req, 0);
if ((trav_p == &top->children) && !(*trav_p)->next) {
- gf_log (THIS->name, GF_LOG_INFO,
- "terminating after loss of last child %s",
- xlator_req.name);
- rpc_clnt_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
- kill (getpid(), SIGTERM);
+ last_brick = _gf_true;
+ glusterfs_terminate_response_send (req, 0, last_brick);
+ gf_log (THIS->name, GF_LOG_INFO, "This is last brick of process."
+ "glusterD will kill the process and takes care of "
+ "removal of entries from port map register");
} else {
/*
* This is terribly unsafe without quiescing or shutting
@@ -313,6 +323,7 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
*
* TBD: finish implementing this "detach" code properly
*/
+ glusterfs_terminate_response_send (req, 0, last_brick);
UNLOCK (&ctx->volfile_lock);
lockflag = _gf_true;
gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"