From d6978803395fb1d1635dd454894e26d9feb806d9 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Mon, 11 Oct 2010 07:30:55 +0000 Subject: dht: change behaviour CHILD_UP/DOWN/CONNECTING event propagation The first CHILD_UP/DOWN/CONNECTING event to pass dht upwards should be only after all subvols have reported their status atleast once. Signed-off-by: Anand V. Avati Signed-off-by: Vijay Bellur BUG: 1643 (Initial requests after mount ESTALE if DHT subvolumes connect after nfs startup) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1643 --- xlators/cluster/dht/src/dht-common.c | 85 +++++++++++++++++++++++++++++++++++- xlators/cluster/dht/src/dht-common.h | 1 + 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index b7a61be7d47..3d6b0422fdb 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -4742,6 +4742,13 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) return -1; } + conf->last_event = GF_CALLOC (cnt, sizeof (int), + gf_dht_mt_char); + if (!conf->last_event) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + return -1; + } return 0; } @@ -4754,12 +4761,24 @@ dht_notify (xlator_t *this, int event, void *data, ...) int i = -1; dht_conf_t *conf = NULL; int ret = -1; + int propagate = 0; + + int had_heard_from_all = 0; + int have_heard_from_all = 0; conf = this->private; if (!conf) return ret; + /* had all subvolumes reported status once till now? */ + had_heard_from_all = 1; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->last_event[i]) { + had_heard_from_all = 0; + } + } + switch (event) { case GF_EVENT_CHILD_UP: subvol = data; @@ -4783,6 +4802,7 @@ dht_notify (xlator_t *this, int event, void *data, ...) LOCK (&conf->subvolume_lock); { conf->subvolume_status[cnt] = 1; + conf->last_event[cnt] = event; } UNLOCK (&conf->subvolume_lock); @@ -4811,13 +4831,76 @@ dht_notify (xlator_t *this, int event, void *data, ...) LOCK (&conf->subvolume_lock); { conf->subvolume_status[cnt] = 0; + conf->last_event[cnt] = event; + } + UNLOCK (&conf->subvolume_lock); + + break; + + case GF_EVENT_CHILD_CONNECTING: + subvol = data; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } + + if (cnt == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_CHILD_CONNECTING bad subvolume %s", + subvol->name); + break; + } + + LOCK (&conf->subvolume_lock); + { + conf->last_event[cnt] = event; } UNLOCK (&conf->subvolume_lock); break; + default: + propagate = 1; + break; } - ret = default_notify (this, event, data); + + /* have all subvolumes reported status once by now? */ + have_heard_from_all = 1; + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->last_event[i]) + have_heard_from_all = 0; + } + + /* if all subvols have reported status, no need to hide anything + or wait for anything else. Just propagate blindly */ + if (have_heard_from_all) + propagate = 1; + + if (!had_heard_from_all && have_heard_from_all) { + /* This is the first event which completes aggregation + of events from all subvolumes. If at least one subvol + had come up, propagate CHILD_UP, but only this time + */ + event = GF_EVENT_CHILD_DOWN; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->last_event[i] == GF_EVENT_CHILD_UP) { + event = GF_EVENT_CHILD_UP; + break; + } + + if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) { + event = GF_EVENT_CHILD_CONNECTING; + /* continue to check other events for CHILD_UP */ + } + } + } + + if (propagate) + ret = default_notify (this, event, data); return ret; } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 32e5cf566e8..f0510f868f3 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -144,6 +144,7 @@ struct dht_conf { int subvolume_cnt; xlator_t **subvolumes; char *subvolume_status; + int *last_event; dht_layout_t **file_layouts; dht_layout_t **dir_layouts; dht_layout_t *default_dir_layout; -- cgit