diff options
author | Avra Sengupta <asengupt@redhat.com> | 2016-02-12 14:57:47 +0530 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-03-31 10:13:17 -0700 |
commit | b4cbfdac0d35e6896f337b4ae7b75dcf4e714a1a (patch) | |
tree | 290ece66f510129a16ea8dd44dbd5d89a24613ec /xlators/experimental/nsr-server | |
parent | b2a5eed9b17a82ec4b6366b0107fe2271328c16a (diff) |
nsr: Introducing a happy path test case
Write infra for nsr_server to not send a
CHILD_UP before it gets a CHILD_UP from a
quorum of it's children. Using the CHILD_UP
received in the nsr client translator from
the server, to decide the right time for
starting the I/Os
Change-Id: I9551638b306bdcbc6bae6aeda00316576ea832fe
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/13623
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'xlators/experimental/nsr-server')
-rw-r--r-- | xlators/experimental/nsr-server/src/all-templates.c | 12 | ||||
-rw-r--r-- | xlators/experimental/nsr-server/src/nsr-internal.h | 2 | ||||
-rw-r--r-- | xlators/experimental/nsr-server/src/nsr.c | 90 |
3 files changed, 97 insertions, 7 deletions
diff --git a/xlators/experimental/nsr-server/src/all-templates.c b/xlators/experimental/nsr-server/src/all-templates.c index 300abea959d..c3819d2af54 100644 --- a/xlators/experimental/nsr-server/src/all-templates.c +++ b/xlators/experimental/nsr-server/src/all-templates.c @@ -83,6 +83,9 @@ nsr_@NAME@ (call_frame_t *frame, xlator_t *this, if (result == _gf_false) { /* Emulate the AFR client-side-quorum behavior. */ + gf_msg (this->name, GF_LOG_ERROR, EROFS, + N_MSG_QUORUM_NOT_MET, "Sufficient number of " + "subvolumes are not up to meet quorum."); op_errno = EROFS; goto err; } @@ -309,6 +312,10 @@ nsr_@NAME@_continue (call_frame_t *frame, xlator_t *this, result = fop_quorum_check (this, (double)priv->n_children, (double)local->successful_acks + 1); if (result == _gf_false) { + gf_msg (this->name, GF_LOG_ERROR, EROFS, + N_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " + "to meet quorum. Failing the operation without trying " + "it on the leader."); STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, @ERROR_ARGS@); } else { @@ -406,8 +413,9 @@ nsr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, if (result == _gf_false) { op_ret = -1; op_errno = EROFS; - gf_msg_debug (this->name, 0, - "Quorum is not met. The operation has failed."); + gf_msg (this->name, GF_LOG_ERROR, EROFS, + N_MSG_QUORUM_NOT_MET, "Quorum is not met. " + "The operation has failed."); } else { #if defined(NSR_CG_NEED_FD) op_ret = local->successful_op_ret; diff --git a/xlators/experimental/nsr-server/src/nsr-internal.h b/xlators/experimental/nsr-server/src/nsr-internal.h index b8c7fc314b7..d43fbac9a53 100644 --- a/xlators/experimental/nsr-server/src/nsr-internal.h +++ b/xlators/experimental/nsr-server/src/nsr-internal.h @@ -74,6 +74,8 @@ typedef struct { * TBD: re-evaluate how to manage this */ char term_buf[CHANGELOG_ENTRY_SIZE]; + gf_boolean_t child_up; /* To maintain the state of * + * the translator */ } nsr_private_t; typedef struct { diff --git a/xlators/experimental/nsr-server/src/nsr.c b/xlators/experimental/nsr-server/src/nsr.c index 48966ab15a1..0fb618f236e 100644 --- a/xlators/experimental/nsr-server/src/nsr.c +++ b/xlators/experimental/nsr-server/src/nsr.c @@ -860,13 +860,23 @@ nsr_get_child_index (xlator_t *this, xlator_t *kid) int nsr_notify (xlator_t *this, int event, void *data, ...) { - nsr_private_t *priv = this->private; - int index; + nsr_private_t *priv = this->private; + int index = -1; + int ret = -1; + gf_boolean_t result = _gf_false; + gf_boolean_t relevant = _gf_false; switch (event) { case GF_EVENT_CHILD_UP: index = nsr_get_child_index(this, data); if (index >= 0) { + /* Check if the child was previously down + * and it's not a false CHILD_UP + */ + if (!(priv->kid_state & (1 << index))) { + relevant = _gf_true; + } + priv->kid_state |= (1 << index); priv->up_children = nsr_count_up_kids(priv); gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC, @@ -876,27 +886,96 @@ nsr_notify (xlator_t *this, int event, void *data, ...) if (!priv->config_leader && (priv->up_children > 1)) { priv->leader = _gf_false; } + + /* If it's not relevant, or we have already * + * sent CHILD_UP just break */ + if (!relevant || priv->child_up) + break; + + /* If it's not a leader, just send the notify up */ + if (!priv->leader) { + ret = default_notify(this, event, data); + if (!ret) + priv->child_up = _gf_true; + break; + } + + result = fop_quorum_check (this, + (double)(priv->n_children - 1), + (double)(priv->up_children - 1)); + if (result == _gf_false) { + gf_msg (this->name, GF_LOG_INFO, 0, + N_MSG_GENERIC, "Not enough children " + "are up to meet quorum. Waiting to " + "send CHILD_UP from leader"); + } else { + gf_msg (this->name, GF_LOG_INFO, 0, + N_MSG_GENERIC, "Enough children are up " + "to meet quorum. Sending CHILD_UP " + "from leader"); + ret = default_notify(this, event, data); + if (!ret) + priv->child_up = _gf_true; + } } break; case GF_EVENT_CHILD_DOWN: index = nsr_get_child_index(this, data); if (index >= 0) { + /* Check if the child was previously up + * and it's not a false CHILD_DOWN + */ + if (priv->kid_state & (1 << index)) { + relevant = _gf_true; + } priv->kid_state &= ~(1 << index); priv->up_children = nsr_count_up_kids(priv); gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC, "got CHILD_DOWN for %s, now %u kids", ((xlator_t *)data)->name, priv->up_children); - if (!priv->config_leader && (priv->up_children < 2)) { + if (!priv->config_leader && (priv->up_children < 2) + && relevant) { priv->leader = _gf_true; } + + /* If it's not relevant, or we have already * + * sent CHILD_DOWN just break */ + if (!relevant || !priv->child_up) + break; + + /* If it's not a leader, just break coz we shouldn't * + * propagate the failure from the failure till it * + * itself goes down * + */ + if (!priv->leader) { + break; + } + + result = fop_quorum_check (this, + (double)(priv->n_children - 1), + (double)(priv->up_children - 1)); + if (result == _gf_false) { + gf_msg (this->name, GF_LOG_INFO, 0, + N_MSG_GENERIC, "Enough children are " + "to down to fail quorum. " + "Sending CHILD_DOWN from leader"); + ret = default_notify(this, event, data); + if (!ret) + priv->child_up = _gf_false; + } else { + gf_msg (this->name, GF_LOG_INFO, 0, + N_MSG_GENERIC, "Not enough children " + "are down to fail quorum. Waiting to " + "send CHILD_DOWN from leader"); + } } break; default: - ; + ret = default_notify(this, event, data); } - return default_notify(this, event, data); + return ret; } @@ -995,6 +1074,7 @@ nsr_init (xlator_t *this) GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err); priv->leader = priv->config_leader; + priv->child_up = _gf_false; if (pthread_create(&kid, NULL, nsr_flush_thread, this) != 0) { |