diff options
author | Mohammed Rafi KC <rkavunga@redhat.com> | 2019-06-24 12:00:20 +0530 |
---|---|---|
committer | Rinku Kothiya <rkothiya@redhat.com> | 2019-07-24 10:29:17 +0000 |
commit | 47fcbc4c055a7880d2926e918ae1e1f57c7db20d (patch) | |
tree | 6e2576a1d904aef082229ace4714c99c539428df /xlators/mgmt/glusterd | |
parent | 3c3b6377d6bdea9bffec31da88dd629347617b6d (diff) |
glusterd/svc: update pid of mux volumes from the shd process
For a normal volume, we are updating the pid from a the
process while we do a daemonization or at the end of the
init if it is no-daemon mode. Along with updating the pid
we also lock the file, to make sure that the process is
running fine.
With brick mux, we were updating the pidfile from gluterd
after an attach/detach request.
There are two problems with this approach.
1) We are not holding a pidlock for any file other than parent
process.
2) There is a chance for possible race conditions with attach/detach.
For example, shd start and a volume stop could race. Let's say
we are starting an shd and it is attached to a volume.
While we trying to link the pid file to the running process,
this would have deleted by the thread that doing a volume stop.
Backport of : https://review.gluster.org/#/c/glusterfs/+/22935/
>Change-Id: I29a00352102877ce09ea3f376ca52affceb5cf1a
>Updates: bz#1722541
>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Change-Id: I29a00352102877ce09ea3f376ca52affceb5cf1a
Updates: bz#1732668
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handshake.c | 42 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c | 25 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 57 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 6 |
9 files changed, 114 insertions, 35 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 0044586cf55..462991776a1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -5179,6 +5179,8 @@ glusterd_print_client_details(FILE *fp, dict_t *dict, brick_req->op = GLUSTERD_BRICK_STATUS; brick_req->name = ""; + brick_req->dict.dict_val = NULL; + brick_req->dict.dict_len = 0; ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"), brickinfo->path); diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index f27b60513b7..fc7448e7fa0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -202,7 +202,7 @@ out: size_t build_volfile_path(char *volume_id, char *path, size_t path_len, - char *trusted_str) + char *trusted_str, dict_t *dict) { struct stat stbuf = { 0, @@ -319,11 +319,19 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, ret = glusterd_volinfo_find(volid_ptr, &volinfo); if (ret == -1) { - gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Couldn't find volinfo for volid=%s", volid_ptr); goto out; } glusterd_svc_build_shd_volfile_path(volinfo, path, path_len); + + ret = glusterd_svc_set_shd_pidfile(volinfo, dict); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set pidfile in dict for volid=%s", volid_ptr); + goto out; + } ret = 0; goto out; } @@ -898,6 +906,7 @@ __server_getspec(rpcsvc_request_t *req) char addrstr[RPCSVC_PEER_STRLEN] = {0}; peer_info_t *peerinfo = NULL; xlator_t *this = NULL; + dict_t *dict = NULL; this = THIS; GF_ASSERT(this); @@ -955,6 +964,12 @@ __server_getspec(rpcsvc_request_t *req) goto fail; } + dict = dict_new(); + if (!dict) { + ret = -ENOMEM; + goto fail; + } + trans = req->trans; /* addrstr will be empty for cli socket connections */ ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr)); @@ -977,12 +992,26 @@ __server_getspec(rpcsvc_request_t *req) */ if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) { ret = build_volfile_path(volume, filename, sizeof(filename), - TRUSTED_PREFIX); + TRUSTED_PREFIX, dict); } else { - ret = build_volfile_path(volume, filename, sizeof(filename), NULL); + ret = build_volfile_path(volume, filename, sizeof(filename), NULL, + dict); } if (ret == 0) { + if (dict->count > 0) { + ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val, + &rsp.xdata.xdata_len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SERL_LENGTH_GET_FAIL, + "Failed to serialize dict " + "to request buffer"); + goto fail; + } + dict->extra_free = rsp.xdata.xdata_val; + } + /* to allocate the proper buffer to hold the file data */ ret = sys_stat(filename, &stbuf); if (ret < 0) { @@ -1024,7 +1053,6 @@ __server_getspec(rpcsvc_request_t *req) goto fail; } } - /* convert to XDR */ fail: if (spec_fd >= 0) @@ -1047,6 +1075,10 @@ fail: (xdrproc_t)xdr_gf_getspec_rsp); free(args.key); // malloced by xdr free(rsp.spec); + + if (dict) + dict_unref(dict); + if (args.xdata.xdata_val) free(args.xdata.xdata_val); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 9ab8930be26..91533489db5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -646,6 +646,8 @@ glusterd_brick_op_build_payload(glusterd_op_t op, break; } + brick_req->dict.dict_len = 0; + brick_req->dict.dict_val = NULL; ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, &brick_req->input.input_len); if (ret) @@ -714,6 +716,8 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, goto out; } + brick_req->dict.dict_len = 0; + brick_req->dict.dict_val = NULL; ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, &brick_req->input.input_len); diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c index 57ceda93eb7..5661e391a9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c @@ -126,3 +126,28 @@ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) out: return; } + +int +glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict) +{ + int ret = -1; + glusterd_svc_t *svc = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + svc = &(volinfo->shd.svc); + + ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set pidfile %s in dict", svc->proc.pidfile); + goto out; + } + ret = 0; +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h index 59466ec9e3b..1f0984ba857 100644 --- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h @@ -36,4 +36,7 @@ glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, int glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo); +int +glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c index dbdf356bcb3..aad47203d5f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c @@ -258,14 +258,20 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) gf_boolean_t shd_restart = _gf_false; conf = THIS->private; - volinfo = data; GF_VALIDATE_OR_GOTO("glusterd", conf, out); GF_VALIDATE_OR_GOTO("glusterd", svc, out); + volinfo = data; GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); if (volinfo) glusterd_volinfo_ref(volinfo); + if (volinfo->is_snap_volume) { + /* healing of a snap volume is not supported yet*/ + ret = 0; + goto out; + } + while (conf->restart_shd) { synclock_unlock(&conf->big_lock); sleep(2); diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c index 70cc539b510..e4924f831b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c @@ -517,7 +517,7 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) /* Take first entry from the process */ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, mux_svc); - sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); + glusterd_copy_file(parent_svc->proc.pidfile, svc->proc.pidfile); mux_conn = &parent_svc->conn; if (volinfo) volinfo->shd.attached = _gf_true; @@ -621,12 +621,9 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, glusterd_volinfo_t *volinfo = NULL; glusterd_shdsvc_t *shd = NULL; glusterd_svc_t *svc = frame->cookie; - glusterd_svc_t *parent_svc = NULL; - glusterd_svc_proc_t *mux_proc = NULL; glusterd_conf_t *conf = NULL; int *flag = (int *)frame->local; xlator_t *this = THIS; - int pid = -1; int ret = -1; gf_getspec_rsp rsp = { 0, @@ -677,27 +674,7 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, } if (rsp.op_ret == 0) { - pthread_mutex_lock(&conf->attach_lock); - { - if (!strcmp(svc->name, "glustershd")) { - mux_proc = svc->svc_proc; - if (mux_proc && - !gf_is_service_running(svc->proc.pidfile, &pid)) { - /* - * When svc's are restarting, there is a chance that the - * attached svc might not have updated it's pid. Because - * it was at connection stage. So in that case, we need - * to retry the pid file copy. - */ - parent_svc = cds_list_entry(mux_proc->svcs.next, - glusterd_svc_t, mux_svc); - if (parent_svc) - sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); - } - } - svc->online = _gf_true; - } - pthread_mutex_unlock(&conf->attach_lock); + svc->online = _gf_true; gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, "svc %s of volume %s attached successfully to pid %d", svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); @@ -724,7 +701,7 @@ out: extern size_t build_volfile_path(char *volume_id, char *path, size_t path_len, - char *trusted_str); + char *trusted_str, dict_t *dict); int __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, @@ -749,6 +726,7 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, ssize_t req_size = 0; call_frame_t *frame = NULL; gd1_mgmt_brick_op_req brick_req; + dict_t *dict = NULL; void *req = &brick_req; void *errlbl = &&err; struct rpc_clnt_connection *conn; @@ -774,6 +752,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, brick_req.name = volfile_id; brick_req.input.input_val = NULL; brick_req.input.input_len = 0; + brick_req.dict.dict_val = NULL; + brick_req.dict.dict_len = 0; frame = create_frame(this, this->ctx->pool); if (!frame) { @@ -781,7 +761,13 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, } if (op == GLUSTERD_SVC_ATTACH) { - (void)build_volfile_path(volfile_id, path, sizeof(path), NULL); + dict = dict_new(); + if (!dict) { + ret = -ENOMEM; + goto *errlbl; + } + + (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict); ret = sys_stat(path, &stbuf); if (ret < 0) { @@ -816,6 +802,18 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, ret = -EIO; goto *errlbl; } + if (dict->count > 0) { + ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val, + &brick_req.dict.dict_len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SERL_LENGTH_GET_FAIL, + "Failed to serialize dict " + "to request buffer"); + goto *errlbl; + } + dict->extra_free = brick_req.dict.dict_val; + } frame->cookie = svc; frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int); @@ -860,6 +858,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, GF_ATOMIC_INC(conf->blockers); ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); + if (dict) + dict_unref(dict); GF_FREE(volfile_content); if (spec_fd >= 0) sys_close(spec_fd); @@ -872,6 +872,9 @@ maybe_free_iobuf: iobuf_unref(iobuf); } err: + if (dict) + dict_unref(dict); + GF_FREE(volfile_content); if (spec_fd >= 0) sys_close(spec_fd); diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 1c85c87a3f0..466f3e32223 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -143,6 +143,8 @@ gd_brick_op_req_free(gd1_mgmt_brick_op_req *req) if (!req) return; + if (req->dict.dict_val) + GF_FREE(req->dict.dict_val); GF_FREE(req->input.input_val); GF_FREE(req); } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index f8b6f8fa096..7cf86f96429 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5817,6 +5817,8 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, brick_req.name = path; brick_req.input.input_val = NULL; brick_req.input.input_len = 0; + brick_req.dict.dict_val = NULL; + brick_req.dict.dict_len = 0; req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); @@ -5880,7 +5882,7 @@ err: extern size_t build_volfile_path(char *volume_id, char *path, size_t path_len, - char *trusted_str); + char *trusted_str, dict_t *dict); static int attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, @@ -5925,7 +5927,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, goto out; } - (void)build_volfile_path(full_id, path, sizeof(path), NULL); + (void)build_volfile_path(full_id, path, sizeof(path), NULL, NULL); for (tries = 15; tries > 0; --tries) { rpc = rpc_clnt_ref(other_brick->rpc); |