summaryrefslogtreecommitdiffstats
path: root/xlators/experimental/nsr-server
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2016-02-12 14:57:47 +0530
committerJeff Darcy <jdarcy@redhat.com>2016-03-31 10:13:17 -0700
commitb4cbfdac0d35e6896f337b4ae7b75dcf4e714a1a (patch)
tree290ece66f510129a16ea8dd44dbd5d89a24613ec /xlators/experimental/nsr-server
parentb2a5eed9b17a82ec4b6366b0107fe2271328c16a (diff)
nsr: Introducing a happy path test case
Write infra for nsr_server to not send a CHILD_UP before it gets a CHILD_UP from a quorum of it's children. Using the CHILD_UP received in the nsr client translator from the server, to decide the right time for starting the I/Os Change-Id: I9551638b306bdcbc6bae6aeda00316576ea832fe Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/13623 CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'xlators/experimental/nsr-server')
-rw-r--r--xlators/experimental/nsr-server/src/all-templates.c12
-rw-r--r--xlators/experimental/nsr-server/src/nsr-internal.h2
-rw-r--r--xlators/experimental/nsr-server/src/nsr.c90
3 files changed, 97 insertions, 7 deletions
diff --git a/xlators/experimental/nsr-server/src/all-templates.c b/xlators/experimental/nsr-server/src/all-templates.c
index 300abea959d..c3819d2af54 100644
--- a/xlators/experimental/nsr-server/src/all-templates.c
+++ b/xlators/experimental/nsr-server/src/all-templates.c
@@ -83,6 +83,9 @@ nsr_@NAME@ (call_frame_t *frame, xlator_t *this,
if (result == _gf_false) {
/* Emulate the AFR client-side-quorum behavior. */
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ N_MSG_QUORUM_NOT_MET, "Sufficient number of "
+ "subvolumes are not up to meet quorum.");
op_errno = EROFS;
goto err;
}
@@ -309,6 +312,10 @@ nsr_@NAME@_continue (call_frame_t *frame, xlator_t *this,
result = fop_quorum_check (this, (double)priv->n_children,
(double)local->successful_acks + 1);
if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ N_MSG_QUORUM_NOT_MET, "Didn't receive enough acks "
+ "to meet quorum. Failing the operation without trying "
+ "it on the leader.");
STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS,
@ERROR_ARGS@);
} else {
@@ -406,8 +413,9 @@ nsr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
if (result == _gf_false) {
op_ret = -1;
op_errno = EROFS;
- gf_msg_debug (this->name, 0,
- "Quorum is not met. The operation has failed.");
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ N_MSG_QUORUM_NOT_MET, "Quorum is not met. "
+ "The operation has failed.");
} else {
#if defined(NSR_CG_NEED_FD)
op_ret = local->successful_op_ret;
diff --git a/xlators/experimental/nsr-server/src/nsr-internal.h b/xlators/experimental/nsr-server/src/nsr-internal.h
index b8c7fc314b7..d43fbac9a53 100644
--- a/xlators/experimental/nsr-server/src/nsr-internal.h
+++ b/xlators/experimental/nsr-server/src/nsr-internal.h
@@ -74,6 +74,8 @@ typedef struct {
* TBD: re-evaluate how to manage this
*/
char term_buf[CHANGELOG_ENTRY_SIZE];
+ gf_boolean_t child_up; /* To maintain the state of *
+ * the translator */
} nsr_private_t;
typedef struct {
diff --git a/xlators/experimental/nsr-server/src/nsr.c b/xlators/experimental/nsr-server/src/nsr.c
index 48966ab15a1..0fb618f236e 100644
--- a/xlators/experimental/nsr-server/src/nsr.c
+++ b/xlators/experimental/nsr-server/src/nsr.c
@@ -860,13 +860,23 @@ nsr_get_child_index (xlator_t *this, xlator_t *kid)
int
nsr_notify (xlator_t *this, int event, void *data, ...)
{
- nsr_private_t *priv = this->private;
- int index;
+ nsr_private_t *priv = this->private;
+ int index = -1;
+ int ret = -1;
+ gf_boolean_t result = _gf_false;
+ gf_boolean_t relevant = _gf_false;
switch (event) {
case GF_EVENT_CHILD_UP:
index = nsr_get_child_index(this, data);
if (index >= 0) {
+ /* Check if the child was previously down
+ * and it's not a false CHILD_UP
+ */
+ if (!(priv->kid_state & (1 << index))) {
+ relevant = _gf_true;
+ }
+
priv->kid_state |= (1 << index);
priv->up_children = nsr_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
@@ -876,27 +886,96 @@ nsr_notify (xlator_t *this, int event, void *data, ...)
if (!priv->config_leader && (priv->up_children > 1)) {
priv->leader = _gf_false;
}
+
+ /* If it's not relevant, or we have already *
+ * sent CHILD_UP just break */
+ if (!relevant || priv->child_up)
+ break;
+
+ /* If it's not a leader, just send the notify up */
+ if (!priv->leader) {
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_true;
+ break;
+ }
+
+ result = fop_quorum_check (this,
+ (double)(priv->n_children - 1),
+ (double)(priv->up_children - 1));
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Not enough children "
+ "are up to meet quorum. Waiting to "
+ "send CHILD_UP from leader");
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Enough children are up "
+ "to meet quorum. Sending CHILD_UP "
+ "from leader");
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_true;
+ }
}
break;
case GF_EVENT_CHILD_DOWN:
index = nsr_get_child_index(this, data);
if (index >= 0) {
+ /* Check if the child was previously up
+ * and it's not a false CHILD_DOWN
+ */
+ if (priv->kid_state & (1 << index)) {
+ relevant = _gf_true;
+ }
priv->kid_state &= ~(1 << index);
priv->up_children = nsr_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
"got CHILD_DOWN for %s, now %u kids",
((xlator_t *)data)->name,
priv->up_children);
- if (!priv->config_leader && (priv->up_children < 2)) {
+ if (!priv->config_leader && (priv->up_children < 2)
+ && relevant) {
priv->leader = _gf_true;
}
+
+ /* If it's not relevant, or we have already *
+ * sent CHILD_DOWN just break */
+ if (!relevant || !priv->child_up)
+ break;
+
+ /* If it's not a leader, just break coz we shouldn't *
+ * propagate the failure from the failure till it *
+ * itself goes down *
+ */
+ if (!priv->leader) {
+ break;
+ }
+
+ result = fop_quorum_check (this,
+ (double)(priv->n_children - 1),
+ (double)(priv->up_children - 1));
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Enough children are "
+ "to down to fail quorum. "
+ "Sending CHILD_DOWN from leader");
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_false;
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Not enough children "
+ "are down to fail quorum. Waiting to "
+ "send CHILD_DOWN from leader");
+ }
}
break;
default:
- ;
+ ret = default_notify(this, event, data);
}
- return default_notify(this, event, data);
+ return ret;
}
@@ -995,6 +1074,7 @@ nsr_init (xlator_t *this)
GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err);
priv->leader = priv->config_leader;
+ priv->child_up = _gf_false;
if (pthread_create(&kid, NULL, nsr_flush_thread,
this) != 0) {