diff options
| author | Avra Sengupta <asengupt@redhat.com> | 2016-02-12 14:57:47 +0530 | 
|---|---|---|
| committer | Jeff Darcy <jdarcy@redhat.com> | 2016-03-31 10:13:17 -0700 | 
| commit | b4cbfdac0d35e6896f337b4ae7b75dcf4e714a1a (patch) | |
| tree | 290ece66f510129a16ea8dd44dbd5d89a24613ec | |
| parent | b2a5eed9b17a82ec4b6366b0107fe2271328c16a (diff) | |
nsr: Introducing a happy path test case
Write infra for nsr_server to not send a
CHILD_UP before it gets a CHILD_UP from a
quorum of it's children. Using the CHILD_UP
received in the nsr client translator from
the server, to decide the right time for
starting the I/Os
Change-Id: I9551638b306bdcbc6bae6aeda00316576ea832fe
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/13623
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
| -rwxr-xr-x | tests/basic/nsr/nsr.t | 33 | ||||
| -rw-r--r-- | tests/volume.rc | 18 | ||||
| -rw-r--r-- | xlators/experimental/nsr-client/src/nsrc.c | 110 | ||||
| -rw-r--r-- | xlators/experimental/nsr-client/src/nsrc.h | 3 | ||||
| -rw-r--r-- | xlators/experimental/nsr-server/src/all-templates.c | 12 | ||||
| -rw-r--r-- | xlators/experimental/nsr-server/src/nsr-internal.h | 2 | ||||
| -rw-r--r-- | xlators/experimental/nsr-server/src/nsr.c | 90 | 
7 files changed, 248 insertions, 20 deletions
diff --git a/tests/basic/nsr/nsr.t b/tests/basic/nsr/nsr.t new file mode 100755 index 00000000000..b5a4aaf1058 --- /dev/null +++ b/tests/basic/nsr/nsr.t @@ -0,0 +1,33 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc +. $(dirname $0)/../../snapshot.rc + +cleanup; + +TEST verify_lvm_version; +#Create cluster with 3 nodes +TEST launch_cluster 3; +TEST setup_lvm 3 + +TEST $CLI_1 peer probe $H2; +TEST $CLI_1 peer probe $H3; +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count; + +TEST $CLI_1 volume create $V0 replica 3 $H1:$L1 $H2:$L2 $H3:$L3 +TEST $CLI_1 volume set $V0 cluster.nsr on +#TEST $CLI_1 volume set $V0 diagnostics.brick-log-level DEBUG +TEST $CLI_1 volume start $V0 + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H1 --entry-timeout=0 $M0; + +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" nsrc_child_up_status $V0 0 + +echo "file" > $M0/file1 +TEST stat $L1/file1 +TEST stat $L2/file1 +TEST stat $L3/file1 + +cleanup; diff --git a/tests/volume.rc b/tests/volume.rc index e488aa73b1c..71b40b72d66 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -110,6 +110,24 @@ function snap_client_connected_status {           echo "$up"  } +function _nsrc_child_up_status { +        local vol=$1 +        #brick_id is (brick-num in volume info - 1) +        local brick_id=$2 +        local gen_state_dump=$3 +        local fpath=$($gen_state_dump $vol) +        up=$(grep -a -B1 child_$brick_id=$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=') +        rm -f $fpath +        echo "$up" +} + +function nsrc_child_up_status { +        local vol=$1 +        #brick_id is (brick-num in volume info - 1) +        local brick_id=$2 +        _nsrc_child_up_status $vol $brick_id generate_mount_statedump +} +  function _afr_child_up_status {          local vol=$1          #brick_id is (brick-num in volume info - 1) diff --git a/xlators/experimental/nsr-client/src/nsrc.c b/xlators/experimental/nsr-client/src/nsrc.c index dd3ad20544e..13f1a2d38c5 100644 --- a/xlators/experimental/nsr-client/src/nsrc.c +++ b/xlators/experimental/nsr-client/src/nsrc.c @@ -18,6 +18,7 @@  #include "xlator.h"  #include "nsr-messages.h"  #include "nsrc.h" +#include "statedump.h"  #define SCAR_LIMIT      20  #define HILITE(x)       ("[1;33m"x"[0m") @@ -168,6 +169,7 @@ int32_t  nsrc_init (xlator_t *this)  {          nsrc_private_t  *priv   = NULL; +        xlator_list_t   *trav   = NULL;          this->local_pool = mem_pool_new (nsrc_local_t, 128);          if (!this->local_pool) { @@ -181,6 +183,10 @@ nsrc_init (xlator_t *this)                  goto err;          } +        for (trav = this->children; trav; trav = trav->next) { +                ++(priv->n_children); +        } +          priv->active = FIRST_CHILD(this);          this->private = priv;          return 0; @@ -198,33 +204,111 @@ nsrc_fini (xlator_t *this)          GF_FREE(this->private);  } +int +nsrc_get_child_index (xlator_t *this, xlator_t *kid) +{ +        xlator_list_t   *trav; +        int             retval = -1; + +        for (trav = this->children; trav; trav = trav->next) { +                ++retval; +                if (trav->xlator == kid) { +                        return retval; +                } +        } + +        return -1; +} + +uint8_t +nsrc_count_up_kids (nsrc_private_t *priv) +{ +        uint8_t         retval  = 0; +        uint8_t         i; + +        for (i = 0; i < priv->n_children; ++i) { +                if (priv->kid_state & (1 << i)) { +                        ++retval; +                } +        } + +        return retval; +} +  int32_t  nsrc_notify (xlator_t *this, int32_t event, void *data, ...)  { -        int32_t         ret     = 0; +        int32_t           ret        = 0; +        int32_t           index      = 0; +        nsrc_private_t   *priv       = NULL; + +        GF_VALIDATE_OR_GOTO (THIS->name, this, out); +        priv = this->private; +        GF_VALIDATE_OR_GOTO (this->name, priv, out);          switch (event) { +        case GF_EVENT_CHILD_UP: +                index = nsrc_get_child_index(this, data); +                if (index >= 0) { +                        priv->kid_state |= (1 << index); +                        priv->up_children = nsrc_count_up_kids(priv); +                        gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC, +                                "got CHILD_UP for %s, now %u kids", +                                ((xlator_t *)data)->name, +                                priv->up_children); +                } +                ret = default_notify (this, event, data); +                break;          case GF_EVENT_CHILD_DOWN: -                /* -                 * TBD: handle this properly -                 * -                 * What we really should do is propagate this only if it caused -                 * us to lose quorum, and likewise for GF_EVENT_CHILD_UP only -                 * if it caused us to gain quorum.  However, that requires -                 * tracking child states and for now it's easier to swallow -                 * these unconditionally.  The consequence of failing to do -                 * this is that DHT sees the first GF_EVENT_CHILD_DOWN and gets -                 * confused, so it doesn't call us and doesn't get up-to-date -                 * directory listings etc. -                 */ +                index = nsrc_get_child_index(this, data); +                if (index >= 0) { +                        priv->kid_state &= ~(1 << index); +                        priv->up_children = nsrc_count_up_kids(priv); +                        gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC, +                                "got CHILD_DOWN for %s, now %u kids", +                                ((xlator_t *)data)->name, +                                priv->up_children); +                }                  break;          default:                  ret = default_notify (this, event, data);          } +out:          return ret;  } +int +nsrc_priv_dump (xlator_t *this) +{ +        nsrc_private_t     *priv = NULL; +        char                key_prefix[GF_DUMP_MAX_BUF_LEN]; +        xlator_list_t      *trav = NULL; +        int32_t             i    = -1; + +        GF_VALIDATE_OR_GOTO (THIS->name, this, out); +        priv = this->private; +        GF_VALIDATE_OR_GOTO (this->name, priv, out); + +        snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", +                 this->type, this->name); +        gf_proc_dump_add_section(key_prefix); + +        gf_proc_dump_write("up_children", "%u", priv->up_children); + +        for (trav = this->children, i = 0; trav; trav = trav->next, i++) { +                snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i); +                gf_proc_dump_write(key_prefix, "%s", trav->xlator->name); +        } + +out: +        return 0; +} + +struct xlator_dumpops dumpops = { +        .priv       = nsrc_priv_dump, +}; +  class_methods_t class_methods = {          .init           = nsrc_init,          .fini           = nsrc_fini, diff --git a/xlators/experimental/nsr-client/src/nsrc.h b/xlators/experimental/nsr-client/src/nsrc.h index 0c61d7a9fa8..15f0d7c85a0 100644 --- a/xlators/experimental/nsr-client/src/nsrc.h +++ b/xlators/experimental/nsr-client/src/nsrc.h @@ -13,6 +13,9 @@  typedef struct {          xlator_t        *active; +        uint8_t          up_children; +        uint8_t          n_children; +        uint32_t         kid_state;  } nsrc_private_t;  typedef struct { diff --git a/xlators/experimental/nsr-server/src/all-templates.c b/xlators/experimental/nsr-server/src/all-templates.c index 300abea959d..c3819d2af54 100644 --- a/xlators/experimental/nsr-server/src/all-templates.c +++ b/xlators/experimental/nsr-server/src/all-templates.c @@ -83,6 +83,9 @@ nsr_@NAME@ (call_frame_t *frame, xlator_t *this,                  if (result == _gf_false) {                          /* Emulate the AFR client-side-quorum behavior. */ +                        gf_msg (this->name, GF_LOG_ERROR, EROFS, +                                N_MSG_QUORUM_NOT_MET, "Sufficient number of " +                                "subvolumes are not up to meet quorum.");                          op_errno = EROFS;                          goto err;                  } @@ -309,6 +312,10 @@ nsr_@NAME@_continue (call_frame_t *frame, xlator_t *this,          result = fop_quorum_check (this, (double)priv->n_children,                                     (double)local->successful_acks + 1);          if (result == _gf_false) { +                gf_msg (this->name, GF_LOG_ERROR, EROFS, +                        N_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " +                        "to meet quorum. Failing the operation without trying " +                        "it on the leader.");                  STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS,                                       @ERROR_ARGS@);          } else { @@ -406,8 +413,9 @@ nsr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,                  if (result == _gf_false) {                          op_ret = -1;                          op_errno = EROFS; -                        gf_msg_debug (this->name, 0, -                                      "Quorum is not met. The operation has failed."); +                        gf_msg (this->name, GF_LOG_ERROR, EROFS, +                                N_MSG_QUORUM_NOT_MET, "Quorum is not met. " +                                "The operation has failed.");                  } else {  #if defined(NSR_CG_NEED_FD)                          op_ret = local->successful_op_ret; diff --git a/xlators/experimental/nsr-server/src/nsr-internal.h b/xlators/experimental/nsr-server/src/nsr-internal.h index b8c7fc314b7..d43fbac9a53 100644 --- a/xlators/experimental/nsr-server/src/nsr-internal.h +++ b/xlators/experimental/nsr-server/src/nsr-internal.h @@ -74,6 +74,8 @@ typedef struct {           * TBD: re-evaluate how to manage this           */          char                    term_buf[CHANGELOG_ENTRY_SIZE]; +        gf_boolean_t            child_up; /* To maintain the state of * +                                           * the translator */  } nsr_private_t;  typedef struct { diff --git a/xlators/experimental/nsr-server/src/nsr.c b/xlators/experimental/nsr-server/src/nsr.c index 48966ab15a1..0fb618f236e 100644 --- a/xlators/experimental/nsr-server/src/nsr.c +++ b/xlators/experimental/nsr-server/src/nsr.c @@ -860,13 +860,23 @@ nsr_get_child_index (xlator_t *this, xlator_t *kid)  int  nsr_notify (xlator_t *this, int event, void *data, ...)  { -        nsr_private_t   *priv   = this->private; -        int             index; +        nsr_private_t   *priv         = this->private; +        int             index         = -1; +        int             ret           = -1; +        gf_boolean_t    result        = _gf_false; +        gf_boolean_t    relevant      = _gf_false;          switch (event) {          case GF_EVENT_CHILD_UP:                  index = nsr_get_child_index(this, data);                  if (index >= 0) { +                        /* Check if the child was previously down +                         * and it's not a false CHILD_UP +                         */ +                        if (!(priv->kid_state & (1 << index))) { +                                relevant = _gf_true; +                        } +                          priv->kid_state |= (1 << index);                          priv->up_children = nsr_count_up_kids(priv);                          gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC, @@ -876,27 +886,96 @@ nsr_notify (xlator_t *this, int event, void *data, ...)                          if (!priv->config_leader && (priv->up_children > 1)) {                                  priv->leader = _gf_false;                          } + +                        /* If it's not relevant, or we have already * +                         * sent CHILD_UP just break */ +                        if (!relevant || priv->child_up) +                                break; + +                        /* If it's not a leader, just send the notify up */ +                        if (!priv->leader) { +                                ret = default_notify(this, event, data); +                                if (!ret) +                                        priv->child_up = _gf_true; +                                break; +                        } + +                        result = fop_quorum_check (this, +                                                (double)(priv->n_children - 1), +                                               (double)(priv->up_children - 1)); +                        if (result == _gf_false) { +                                gf_msg (this->name, GF_LOG_INFO, 0, +                                        N_MSG_GENERIC, "Not enough children " +                                        "are up to meet quorum. Waiting to " +                                        "send CHILD_UP from leader"); +                        } else { +                                gf_msg (this->name, GF_LOG_INFO, 0, +                                        N_MSG_GENERIC, "Enough children are up " +                                        "to meet quorum. Sending CHILD_UP " +                                        "from leader"); +                                ret = default_notify(this, event, data); +                                if (!ret) +                                        priv->child_up = _gf_true; +                        }                  }                  break;          case GF_EVENT_CHILD_DOWN:                  index = nsr_get_child_index(this, data);                  if (index >= 0) { +                        /* Check if the child was previously up +                         * and it's not a false CHILD_DOWN +                         */ +                        if (priv->kid_state & (1 << index)) { +                                relevant = _gf_true; +                        }                          priv->kid_state &= ~(1 << index);                          priv->up_children = nsr_count_up_kids(priv);                          gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,                                  "got CHILD_DOWN for %s, now %u kids",                                  ((xlator_t *)data)->name,                                  priv->up_children); -                        if (!priv->config_leader && (priv->up_children < 2)) { +                        if (!priv->config_leader && (priv->up_children < 2) +                            && relevant) {                                  priv->leader = _gf_true;                          } + +                        /* If it's not relevant, or we have already * +                         * sent CHILD_DOWN just break */ +                        if (!relevant || !priv->child_up) +                                break; + +                        /* If it's not a leader, just break coz we shouldn't  * +                         * propagate the failure from the failure till it     * +                         * itself goes down                                   * +                         */ +                        if (!priv->leader) { +                                break; +                        } + +                        result = fop_quorum_check (this, +                                           (double)(priv->n_children - 1), +                                           (double)(priv->up_children - 1)); +                        if (result == _gf_false) { +                                gf_msg (this->name, GF_LOG_INFO, 0, +                                        N_MSG_GENERIC, "Enough children are " +                                        "to down to fail quorum. " +                                        "Sending CHILD_DOWN from leader"); +                                ret = default_notify(this, event, data); +                                if (!ret) +                                        priv->child_up = _gf_false; +                        } else { +                                gf_msg (this->name, GF_LOG_INFO, 0, +                                        N_MSG_GENERIC, "Not enough children " +                                        "are down to fail quorum. Waiting to " +                                        "send CHILD_DOWN from leader"); +                        }                  }                  break;          default: -                ; +                ret = default_notify(this, event, data);          } -        return default_notify(this, event, data); +        return ret;  } @@ -995,6 +1074,7 @@ nsr_init (xlator_t *this)          GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err);          priv->leader = priv->config_leader; +        priv->child_up = _gf_false;          if (pthread_create(&kid, NULL, nsr_flush_thread,                             this) != 0) {  | 
