summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrishnan Parthasarathi <kparthas@redhat.com>2015-02-05 15:41:35 +0530
committerVijay Bellur <vbellur@redhat.com>2015-02-07 13:25:26 -0800
commitf18a3f30bbeaf3bb067b913082830d7f874555ca (patch)
tree9d283dc48f6158b24acfc4be9176db7b48e50c1c
parenta7f5893c9243c8c563db215352fa7e47f6968e8b (diff)
protocol/client: sequence CHILD_UP, CHILD_DOWN etc notifications
... from all bricks in the volume This patch is important in the context of MT epoll. With MT epoll, notification events from client xlators could reach cluster xlators like afr, dht, ec, stripe etc. in different orders. For e.g, In a distributed replicate volume of 2 bricks, namely Brick1 and Brick2, the following network events are observed by a mount process. - connection to Brick1 is broken. - connection to Brick1 has been restored. - connection to Brick2 is broken. - connection to Brick2 has been restored. Without establishing a total ordering of events, we can't guarantee that cluster xlators like afr, dht perceive them in the same order. While we would expect afr (say) to perceive it as only one of Brick1 and Brick2 going down at any given time, it is possible for the notification of Brick2 going offline to race with the notification of Brick1 coming back online. Change-Id: I78f5a52bfb05593335d0e9ad53ebfff98995593d BUG: 1104462 Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-on: http://review.gluster.org/9591 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--glusterfsd/src/glusterfsd.c2
-rw-r--r--libglusterfs/src/glusterfs.h4
-rw-r--r--xlators/protocol/client/src/client-handshake.c15
-rw-r--r--xlators/protocol/client/src/client.c78
-rw-r--r--xlators/protocol/client/src/client.h2
5 files changed, 77 insertions, 24 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index a46385aa292..c410ffd40d9 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1394,6 +1394,8 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
goto out;
pthread_mutex_init (&(ctx->lock), NULL);
+ pthread_mutex_init (&ctx->notify_lock, NULL);
+ pthread_cond_init (&ctx->notify_cond, NULL);
ctx->clienttable = gf_clienttable_alloc();
if (!ctx->clienttable)
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 9c078e1d5f9..8f993b2086a 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -533,6 +533,10 @@ struct _glusterfs_ctx {
/* Buffer to 'save' backtrace even under OOM-kill like situations*/
char btbuf[GF_BACKTRACE_LEN];
+ pthread_mutex_t notify_lock;
+ pthread_cond_t notify_cond;
+ int notifying;
+
};
typedef struct _glusterfs_ctx glusterfs_ctx_t;
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 42b7ac0745e..531b38eaf83 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -131,12 +131,11 @@ client_notify_parents_child_up (xlator_t *this)
int ret = 0;
conf = this->private;
- ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+ ret = client_notify_dispatch (this, GF_EVENT_CHILD_UP, NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of CHILD_UP failed");
- conf->last_sent_event = GF_EVENT_CHILD_UP;
return 0;
}
@@ -1146,11 +1145,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
op_ret = 0;
}
if (op_errno == ESTALE) {
- ret = default_notify (this, GF_EVENT_VOLFILE_MODIFIED, NULL);
+ ret = client_notify_dispatch (this,
+ GF_EVENT_VOLFILE_MODIFIED,
+ NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of VOLFILE_MODIFIED failed");
- conf->last_sent_event = GF_EVENT_VOLFILE_MODIFIED;
}
goto out;
}
@@ -1223,13 +1223,12 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
out:
if (auth_fail) {
gf_log (this->name, GF_LOG_INFO, "sending AUTH_FAILED event");
- ret = default_notify (this, GF_EVENT_AUTH_FAILED, NULL);
+ ret = client_notify_dispatch (this, GF_EVENT_AUTH_FAILED, NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of AUTH_FAILED failed");
conf->connecting = 0;
conf->connected = 0;
- conf->last_sent_event = GF_EVENT_AUTH_FAILED;
ret = -1;
}
if (-1 == op_ret) {
@@ -1238,11 +1237,11 @@ out:
* tell the parents that i am all ok..
*/
gf_log (this->name, GF_LOG_INFO, "sending CHILD_CONNECTING event");
- ret = default_notify (this, GF_EVENT_CHILD_CONNECTING, NULL);
+ ret = client_notify_dispatch (this, GF_EVENT_CHILD_CONNECTING,
+ NULL);
if (ret)
gf_log (this->name, GF_LOG_INFO,
"notify of CHILD_CONNECTING failed");
- conf->last_sent_event = GF_EVENT_CHILD_CONNECTING;
conf->connecting= 1;
ret = 0;
}
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 999a4a5c836..00d88d6e7a1 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -34,6 +34,55 @@ int client_init_rpc (xlator_t *this);
int client_destroy_rpc (xlator_t *this);
int client_mark_fd_bad (xlator_t *this);
+static int
+client_notify_dispatch_uniq (xlator_t *this, int32_t event, void *data, ...)
+{
+ clnt_conf_t *conf = this->private;
+
+ if (conf->last_sent_event == event)
+ return 0;
+
+ return client_notify_dispatch (this, event, data);
+}
+
+int
+client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = -1;
+ glusterfs_ctx_t *ctx = this->ctx;
+ clnt_conf_t *conf = this->private;
+
+ pthread_mutex_lock (&ctx->notify_lock);
+ {
+ while (ctx->notifying)
+ pthread_cond_wait (&ctx->notify_cond,
+ &ctx->notify_lock);
+ ctx->notifying = 1;
+ }
+ pthread_mutex_unlock (&ctx->notify_lock);
+
+ /* We assume that all translators in the graph handle notification
+ * events in sequence.
+ * */
+ ret = default_notify (this, event, data);
+
+ /* NB (Even) with MT-epoll and EPOLLET|EPOLLONESHOT we are guaranteed
+ * that there would be atmost one poller thread executing this
+ * notification function. This allows us to update last_sent_event
+ * without explicit synchronization. See epoll(7).
+ */
+ conf->last_sent_event = event;
+
+ pthread_mutex_lock (&ctx->notify_lock);
+ {
+ ctx->notifying = 0;
+ pthread_cond_signal (&ctx->notify_cond);
+ }
+ pthread_mutex_unlock (&ctx->notify_lock);
+
+ return ret;
+}
+
int32_t
client_type_to_gf_type (short l_type)
{
@@ -2169,14 +2218,12 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
"handshake msg returned %d", ret);
} else {
//conf->rpc->connected = 1;
- if (conf->last_sent_event != GF_EVENT_CHILD_UP) {
- ret = default_notify (this, GF_EVENT_CHILD_UP,
- NULL);
- if (ret)
- gf_log (this->name, GF_LOG_INFO,
- "CHILD_UP notify failed");
- conf->last_sent_event = GF_EVENT_CHILD_UP;
- }
+ ret = client_notify_dispatch_uniq (this,
+ GF_EVENT_CHILD_UP,
+ NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "CHILD_UP notify failed");
}
/* Cancel grace timer if set */
@@ -2224,14 +2271,13 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
may get screwed up.. (eg. CHILD_MODIFIED event in
replicate), hence make sure events which are passed
to parent are genuine */
- if (conf->last_sent_event != GF_EVENT_CHILD_DOWN) {
- ret = default_notify (this, GF_EVENT_CHILD_DOWN,
- NULL);
- if (ret)
- gf_log (this->name, GF_LOG_INFO,
- "CHILD_DOWN notify failed");
- conf->last_sent_event = GF_EVENT_CHILD_DOWN;
- }
+ ret = client_notify_dispatch_uniq (this,
+ GF_EVENT_CHILD_DOWN,
+ NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "CHILD_DOWN notify failed");
+
} else {
if (conf->connected)
gf_log (this->name, GF_LOG_DEBUG,
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index af70926b178..1aea1353727 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -262,4 +262,6 @@ int client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd,
int64_t remote_fd);
gf_boolean_t
__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx);
+int
+client_notify_dispatch (xlator_t *this, int32_t event, void *data, ...);
#endif /* !_CLIENT_H */