diff options
author | Avra Sengupta <asengupt@redhat.com> | 2016-02-29 14:43:58 +0530 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-03-10 09:24:33 -0800 |
commit | 2bfdc30e0e7fba6f97d8829b2618a1c5907dc404 (patch) | |
tree | 35fefca2eb0502aa42993e1644304aebbbe0c233 | |
parent | 537822d7eb7732f2f65300668355b6d7db448f39 (diff) |
protocol client/server: Fix client-server handshake
Problem:
Currently on a successful connection between protocol
server and client, the protocol client initiates a
CHILD_UP event in the client stack. At this point in
time, only the connection between server and client is
established, and there is no guarantee that the server
side stack is ready to serve requests.
It works fine now, as most server side translators are
not dependent on any other factors, before being able
to serve requests today and hence they are up by the time
the client stack translators receive the CHILD_UP (initiated
by client handshake).
The gap here is exposed when certain server side translators
like NSR-Server for example, have a couple of protocol clients
as their child(connecting them to other bricks), and they
can't really serve requests till a quorum of their children are
up. Hence these translators should defer sending CHILD_UP
till they have enough children up, and the same needs to be
propagated to the client stack translators.
Fix:
Maintain a child_up variable in both the protocol client
and protocol server translators. The protocol server should
update this value based on the CHILD_UP and CHILD_DOWN
events it receives from the translators below it. On receiving
such an event it should forward that event to the client.
The protocol client on receiving such an event should forward
it up the client stack, thereby letting the client translators
correctly know that the server is up and ready to serve.
The clients connecting later(long after a server has initialized
and processed it's CHILD_UP events), will receive a child_up status
as part of the handshake, and based on the status of the server's
child_up, can either propagate a CHILD_UP event or defer it.
Change-Id: I0807141e62118d8de9d9cde57a53a607be44a0e0
BUG: 1312845
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/13549
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-callback.c | 52 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 30 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-messages.h | 1 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.c | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.h | 3 | ||||
-rw-r--r-- | xlators/protocol/server/src/server-handshake.c | 6 | ||||
-rw-r--r-- | xlators/protocol/server/src/server.c | 71 | ||||
-rw-r--r-- | xlators/protocol/server/src/server.h | 2 |
9 files changed, 157 insertions, 12 deletions
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 4dec4634d7c..4058295af0b 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -134,6 +134,8 @@ enum gf_cbk_procnum { GF_CBK_EVENT_NOTIFY, GF_CBK_GET_SNAPS, GF_CBK_CACHE_INVALIDATION, + GF_CBK_CHILD_UP, + GF_CBK_CHILD_DOWN, GF_CBK_MAXVALUE, }; diff --git a/xlators/protocol/client/src/client-callback.c b/xlators/protocol/client/src/client-callback.c index 16f5441a778..7ee2113762a 100644 --- a/xlators/protocol/client/src/client-callback.c +++ b/xlators/protocol/client/src/client-callback.c @@ -81,13 +81,53 @@ out: return 0; } +int +client_cbk_child_up (struct rpc_clnt *rpc, void *mydata, void *data) +{ + clnt_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, rpc, out); + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + gf_msg_debug (this->name, 0, "Received CHILD_UP"); + conf->child_up = _gf_true; + + this->notify (this, GF_EVENT_CHILD_UP, NULL); +out: + return 0; +} + +int +client_cbk_child_down (struct rpc_clnt *rpc, void *mydata, void *data) +{ + clnt_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, rpc, out); + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + gf_msg_debug (this->name, 0, "Received CHILD_DOWN"); + conf->child_up = _gf_false; + + this->notify (this, GF_EVENT_CHILD_DOWN, NULL); +out: + return 0; +} + rpcclnt_cb_actor_t gluster_cbk_actors[GF_CBK_MAXVALUE] = { - [GF_CBK_NULL] = {"NULL", GF_CBK_NULL, client_cbk_null }, - [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, client_cbk_fetchspec }, - [GF_CBK_INO_FLUSH] = {"INO_FLUSH", GF_CBK_INO_FLUSH, client_cbk_ino_flush }, - [GF_CBK_CACHE_INVALIDATION] = {"CACHE_INVALIDATION", - GF_CBK_CACHE_INVALIDATION, - client_cbk_cache_invalidation }, + [GF_CBK_NULL] = {"NULL", GF_CBK_NULL, client_cbk_null }, + [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, client_cbk_fetchspec }, + [GF_CBK_INO_FLUSH] = {"INO_FLUSH", GF_CBK_INO_FLUSH, client_cbk_ino_flush }, + [GF_CBK_CACHE_INVALIDATION] = {"CACHE_INVALIDATION", GF_CBK_CACHE_INVALIDATION, client_cbk_cache_invalidation }, + [GF_CBK_CHILD_UP] = {"CHILD_UP", GF_CBK_CHILD_UP, client_cbk_child_up }, + [GF_CBK_CHILD_DOWN] = {"CHILD_DOWN", GF_CBK_CHILD_DOWN, client_cbk_child_down }, }; diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 3b170b8d186..5352e549abf 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -129,13 +129,26 @@ client_notify_parents_child_up (xlator_t *this) clnt_conf_t *conf = NULL; int ret = 0; + GF_VALIDATE_OR_GOTO("client", this, out); conf = this->private; - ret = client_notify_dispatch_uniq (this, GF_EVENT_CHILD_UP, NULL); - if (ret) - gf_msg (this->name, GF_LOG_INFO, 0, - PC_MSG_CHILD_UP_NOTIFY_FAILED, "notify of CHILD_UP " - "failed"); + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->child_up) { + ret = client_notify_dispatch_uniq (this, GF_EVENT_CHILD_UP, + NULL); + if (ret) { + gf_msg (this->name, GF_LOG_INFO, 0, + PC_MSG_CHILD_UP_NOTIFY_FAILED, + "notify of CHILD_UP failed"); + goto out; + } + } else { + gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CHILD_STATUS, + "Defering sending CHILD_UP message as the client " + "translators are not yet ready to serve."); + } +out: return 0; } @@ -1157,6 +1170,13 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m goto out; } + ret = dict_get_uint32 (reply, "child_up", &conf->child_up); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_GET_FAILED, + "failed to find key 'child_up' in the options"); + goto out; + } + ret = dict_get_uint32 (reply, "clnt-lk-version", &lk_ver); if (ret) { gf_msg (this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_GET_FAILED, diff --git a/xlators/protocol/client/src/client-messages.h b/xlators/protocol/client/src/client-messages.h index 0fc9d31729c..a4b391b8331 100644 --- a/xlators/protocol/client/src/client-messages.h +++ b/xlators/protocol/client/src/client-messages.h @@ -617,6 +617,7 @@ * @recommendedaction * */ +#define PC_MSG_CHILD_STATUS (GLFS_PC_BASE + 64) /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 29fe44c4974..f1f58eb822e 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -2467,6 +2467,8 @@ init (xlator_t *this) pthread_mutex_init (&conf->lock, NULL); INIT_LIST_HEAD (&conf->saved_fds); + conf->child_up = _gf_false; + /* Initialize parameters for lock self healing*/ conf->lk_version = 1; conf->grace_timer = NULL; diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 415fabbd7e1..a4d4d9f75a5 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -133,6 +133,9 @@ typedef struct clnt_conf { gf_boolean_t destroy; /* if enabled implies fini was called * on @this xlator instance */ + + gf_boolean_t child_up; /* Set to true, when child is up, and + * false, when child is down */ } clnt_conf_t; typedef struct _client_fd_ctx { diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index 293509c5d3f..fe5dfbab516 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -385,6 +385,12 @@ server_setvolume (rpcsvc_request_t *req) goto fail; } + ret = dict_set_int32 (reply, "child_up", conf->child_up); + if (ret < 0) + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_DICT_GET_FAILED, "Failed to set 'child_up' " + "in the reply dict"); + buf = memdup (args.dict.dict_val, args.dict.dict_len); if (buf == NULL) { op_ret = -1; diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index d07b840b3fc..24e31500453 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -958,6 +958,8 @@ init (xlator_t *this) if (ret) conf->conf_dir = CONFDIR; + conf->child_up = _gf_false; + /*ret = dict_get_str (this->options, "statedump-path", &statedump_path); if (!ret) { gf_path_strip_trailing_slashes (statedump_path); @@ -1237,6 +1239,35 @@ out: } int +server_process_child_event (xlator_t *this, int32_t event, void *data, + enum gf_cbk_procnum cbk_procnum) +{ + int ret = -1; + server_conf_t *conf = NULL; + rpc_transport_t *xprt = NULL; + + GF_VALIDATE_OR_GOTO(this->name, data, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + pthread_mutex_lock (&conf->mutex); + { + list_for_each_entry (xprt, &conf->xprt_list, list) { + rpcsvc_callback_submit (conf->rpc, xprt, + &server_cbk_prog, + cbk_procnum, + NULL, 0); + } + } + pthread_mutex_unlock (&conf->mutex); + ret = 0; +out: + return ret; +} + + +int notify (xlator_t *this, int32_t event, void *data, ...) { int ret = -1; @@ -1246,6 +1277,10 @@ notify (xlator_t *this, int32_t event, void *data, ...) server_conf_t *conf = NULL; va_list ap; + GF_VALIDATE_OR_GOTO (THIS->name, this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + dict = data; va_start (ap, data); output = va_arg (ap, dict_t*); @@ -1272,7 +1307,41 @@ notify (xlator_t *this, int32_t event, void *data, ...) conf->parent_up = _gf_true; - /* fall through and notify the event to children */ + default_notify (this, event, data); + break; + } + + case GF_EVENT_CHILD_UP: + { + conf->child_up = _gf_true; + ret = server_process_child_event (this, event, data, + GF_CBK_CHILD_UP); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_SERVER_EVENT_UPCALL_FAILED, + "server_process_child_event failed"); + goto out; + } + + default_notify (this, event, data); + break; + } + + case GF_EVENT_CHILD_DOWN: + { + conf->child_up = _gf_false; + ret = server_process_child_event (this, event, data, + GF_CBK_CHILD_DOWN); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + PS_MSG_SERVER_EVENT_UPCALL_FAILED, + "server_process_child_event failed"); + goto out; + } + + default_notify (this, event, data); + break; + } default: diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index c0e2752cad7..7980d300be7 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -72,6 +72,8 @@ struct server_conf { * in case if volume set options * (say *.allow | *.reject) are * tweeked */ + gf_boolean_t child_up; /* Set to true, when child is up, and + * false, when child is down */ }; typedef struct server_conf server_conf_t; |