diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2012-11-16 07:08:30 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2012-12-11 16:23:53 -0800 | 
| commit | cc9701d4f533bf7337d2925424e2498ab64fdf13 (patch) | |
| tree | 565527862f7f235c7b747a325155a126ad006d89 | |
| parent | 179e7333740fe990cac6fc2e63fbace844b17b8d (diff) | |
cluster/afr: Remember type of split-brain in inode-ctx
Along with this change, fixed the race of setting the
split-brain status in inode-ctx after unwinding the fop from
self-heal in case of back-ground self-heal.
Change-Id: Ifc829300df485f50f139443802e8b6dc7038b4ad
BUG: 873962
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/4198
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
| -rwxr-xr-x | tests/bugs/bug-873962.t | 85 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 169 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 12 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 28 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 15 | 
6 files changed, 199 insertions, 117 deletions
diff --git a/tests/bugs/bug-873962.t b/tests/bugs/bug-873962.t new file mode 100755 index 00000000..c18c71f6 --- /dev/null +++ b/tests/bugs/bug-873962.t @@ -0,0 +1,85 @@ +#!/bin/bash + +#AFR TEST-IDENTIFIER SPLIT-BRAIN +. $(dirname $0)/../include.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +B0_hiphenated=`echo $B0 | tr '/' '-'` +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} + +#Make sure self-heal is not triggered when the bricks are re-started +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume start $V0 +TEST glusterfs -s $H0 --volfile-id=$V0 $M0 +TEST glusterfs -s $H0 --volfile-id=$V0 $M1 +TEST cd $M0 +TEST touch a +TEST touch b +TEST touch c +TEST touch d +echo "1" > b +echo "1" > d +kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-${V0}2.pid` +echo "1" > a +echo "1" > c +TEST setfattr -n trusted.mdata -v abc b +TEST setfattr -n trusted.mdata -v abc d +TEST $CLI volume start $V0 force +sleep 5 +kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-${V0}1.pid` +echo "2" > a +echo "2" > c +TEST setfattr -n trusted.mdata -v def b +TEST setfattr -n trusted.mdata -v def d +TEST $CLI volume start $V0 force +sleep 10 + +#Files are in split-brain, so open should fail +TEST ! cat a; +TEST ! cat $M1/a; +TEST ! cat b; +TEST ! cat $M1/b; + +#Reset split-brain status +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a; +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/b; + +sleep 1 +#The operations should do self-heal and give correct output +EXPECT "2" cat a; +EXPECT "2" cat $M1/a; +EXPECT "def" getfattr -n trusted.mdata --only-values b 2>/dev/null +EXPECT "def" getfattr -n trusted.mdata --only-values $M1/b 2>/dev/null + +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +sleep 5 #Wait for vol file to be updated + +sleep 1 +#Files are in split-brain, so open should fail +TEST ! cat c +TEST ! cat $M1/c +TEST ! cat d +TEST ! cat $M1/d + +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d + +sleep 1 +#The operations should NOT do self-heal but give correct output +EXPECT "2" cat c +EXPECT "2" cat $M1/c +EXPECT "1" cat d +EXPECT "1" cat $M1/d + +#Check that the self-heal is not triggered. +EXPECT "1" cat $B0/${V0}1/c +EXPECT "abc" getfattr -n trusted.mdata --only-values $B0/${V0}1/d 2>/dev/null +cd +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index c38e5272..ee96944f 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -49,8 +49,7 @@  #include "afr-self-heald.h"  #include "pump.h" -#define AFR_ICTX_OPENDIR_DONE_MASK     0x0000000200000000ULL -#define AFR_ICTX_SPLIT_BRAIN_MASK      0x0000000100000000ULL +#define AFR_ICTX_OPENDIR_DONE_MASK     0x0000000100000000ULL  #define AFR_ICTX_READ_CHILD_MASK       0x00000000FFFFFFFFULL  int @@ -203,59 +202,86 @@ out:          return ret;  } -afr_inode_ctx_t* -afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count) +void +afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)  { -        int             ret  = -1; -        afr_inode_ctx_t *ctx = NULL; -        size_t          size = 0; +        if (!ctx) +                return; +        GF_FREE (ctx->fresh_children); +        GF_FREE (ctx); +} -        GF_ASSERT (child_count > 0); +afr_inode_ctx_t* +__afr_inode_ctx_get (inode_t *inode, xlator_t *this) +{ +        int             ret      = 0; +        uint64_t        ctx_addr = 0; +        afr_inode_ctx_t *ctx     = NULL; +        afr_private_t   *priv    = NULL; -        if (!addr) { -                ctx = GF_CALLOC (1, sizeof (*ctx), -                                 gf_afr_mt_inode_ctx_t); -                if (!ctx) -                        goto out; -                size = sizeof (*ctx->fresh_children); -                ctx->fresh_children = GF_CALLOC (child_count, size, -                                                 gf_afr_mt_int32_t); -                if (!ctx->fresh_children) -                        goto out; -        } else { -                ctx = (afr_inode_ctx_t*) (long) addr; +        priv = this->private; +        ret = __inode_ctx_get (inode, this, &ctx_addr); +        if (ret < 0) +                ctx_addr = 0; +        if (ctx_addr != 0) { +                ctx = (afr_inode_ctx_t*) (long) ctx_addr; +                goto out;          } -        ret = 0; +        ctx = GF_CALLOC (1, sizeof (*ctx), +                         gf_afr_mt_inode_ctx_t); +        if (!ctx) +                goto fail; +        ctx->fresh_children = GF_CALLOC (priv->child_count, +                                         sizeof (*ctx->fresh_children), +                                         gf_afr_mt_int32_t); +        if (!ctx->fresh_children) +                goto fail; +        ret = __inode_ctx_put (inode, this, (uint64_t)ctx); +        if (ret) { +                gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to " +                                  "set the inode ctx (%s)", +                                  uuid_utoa (inode->gfid)); +                goto fail; +        } +  out: -        if (ret && ctx) { -                GF_FREE (ctx->fresh_children); -                GF_FREE (ctx); -                ctx = NULL; +        return ctx; + +fail: +        afr_inode_ctx_destroy (ctx); +        return NULL; +} + +afr_inode_ctx_t* +afr_inode_ctx_get (inode_t *inode, xlator_t *this) +{ +        afr_inode_ctx_t *ctx = NULL; + +        LOCK (&inode->lock); +        { +                ctx = __afr_inode_ctx_get (inode, this);          } +        UNLOCK (&inode->lock);          return ctx;  }  void -afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) +afr_inode_get_ctx_params (xlator_t *this, inode_t *inode, +                          afr_inode_params_t *params)  {          GF_ASSERT (inode);          GF_ASSERT (params); -        int             ret = 0;          afr_inode_ctx_t *ctx = NULL;          afr_private_t   *priv = NULL;          int             i = 0; -        uint64_t        ctx_addr   = 0;          int32_t         read_child = -1;          int32_t         *fresh_children = NULL;          priv = this->private;          LOCK (&inode->lock);          { -                ret = __inode_ctx_get (inode, this, &ctx_addr); -                if (ret < 0) -                        goto unlock; -                ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); +                ctx = __afr_inode_ctx_get (inode, this);                  if (!ctx)                          goto unlock;                  switch (params->op) { @@ -274,12 +300,6 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)                          if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)                                  params->u.value = _gf_true;                          break; -                case AFR_INODE_GET_SPLIT_BRAIN: -                        params->u.value = _gf_false; -                        if (ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK) -                                params->u.value = _gf_true; -                        ; -                        break;                  default:                          GF_ASSERT (0);                          break; @@ -292,11 +312,16 @@ unlock:  gf_boolean_t  afr_is_split_brain (xlator_t *this, inode_t *inode)  { -        afr_inode_params_t params = {0}; +        afr_inode_ctx_t *ctx = NULL; +        gf_boolean_t    spb  = _gf_false; -        params.op = AFR_INODE_GET_SPLIT_BRAIN; -        afr_inode_get_ctx (this, inode, ¶ms); -        return params.u.value; +        ctx = afr_inode_ctx_get (inode, this); +        if (!ctx) +                goto out; +        if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB)) +                spb = _gf_true; +out: +        return spb;  }  gf_boolean_t @@ -305,11 +330,10 @@ afr_is_opendir_done (xlator_t *this, inode_t *inode)          afr_inode_params_t params = {0};          params.op = AFR_INODE_GET_OPENDIR_DONE; -        afr_inode_get_ctx (this, inode, ¶ms); +        afr_inode_get_ctx_params (this, inode, ¶ms);          return params.u.value;  } -  int32_t  afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)  { @@ -317,7 +341,7 @@ afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)          params.op = AFR_INODE_GET_READ_CTX;          params.u.read_ctx.children = fresh_children; -        afr_inode_get_ctx (this, inode, ¶ms); +        afr_inode_get_ctx_params (this, inode, ¶ms);          return params.u.read_ctx.read_child;  } @@ -379,31 +403,14 @@ afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)  }  void -afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set) -{ -        uint64_t        remaining_mask = 0; -        uint64_t        mask = 0; - -        if (set) { -                remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); -                mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); -                ctx->masks = remaining_mask | mask; -        } else { -                ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); -        } -} - -void -afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) +afr_inode_set_ctx_params (xlator_t *this, inode_t *inode, +                          afr_inode_params_t *params)  {          GF_ASSERT (inode);          GF_ASSERT (params); -        int             ret = 0;          afr_inode_ctx_t *ctx            = NULL;          afr_private_t   *priv           = NULL; -        uint64_t        ctx_addr        = 0; -        gf_boolean_t    set             = _gf_false;          int32_t         read_child      = -1;          int32_t         *fresh_children = NULL;          int32_t         *stale_children = NULL; @@ -411,10 +418,7 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)          priv = this->private;          LOCK (&inode->lock);          { -                ret = __inode_ctx_get (inode, this, &ctx_addr); -                if (ret < 0) -                        ctx_addr = 0; -                ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); +                ctx = __afr_inode_ctx_get (inode, this);                  if (!ctx)                          goto unlock;                  switch (params->op) { @@ -434,33 +438,26 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)                  case AFR_INODE_SET_OPENDIR_DONE:                          afr_inode_ctx_set_opendir_done (ctx);                          break; -                case AFR_INODE_SET_SPLIT_BRAIN: -                        set = params->u.value; -                        afr_inode_ctx_set_splitbrain (ctx, set); -                        break;                  default:                          GF_ASSERT (0);                          break;                  } -                ret = __inode_ctx_put (inode, this, (uint64_t)ctx); -                if (ret) { -                        gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to " -                                          "set the inode ctx (%s)", -                                          uuid_utoa (inode->gfid)); -                }          }  unlock:          UNLOCK (&inode->lock);  }  void -afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) +afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb, +                     afr_spb_state_t data_spb)  { -        afr_inode_params_t      params = {0}; +        afr_inode_ctx_t *ctx = NULL; -        params.op = AFR_INODE_SET_SPLIT_BRAIN; -        params.u.value          = set; -        afr_inode_set_ctx (this, inode, ¶ms); +        ctx = afr_inode_ctx_get (inode, this); +        if (mdata_spb != DONT_KNOW) +                ctx->mdata_spb = mdata_spb; +        if (data_spb != DONT_KNOW) +                ctx->data_spb = data_spb;  }  void @@ -469,7 +466,7 @@ afr_set_opendir_done (xlator_t *this, inode_t *inode)          afr_inode_params_t params = {0};          params.op = AFR_INODE_SET_OPENDIR_DONE; -        afr_inode_set_ctx (this, inode, ¶ms); +        afr_inode_set_ctx_params (this, inode, ¶ms);  }  void @@ -488,7 +485,7 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,          params.op = AFR_INODE_SET_READ_CTX;          params.u.read_ctx.read_child     = read_child;          params.u.read_ctx.children = fresh_children; -        afr_inode_set_ctx (this, inode, ¶ms); +        afr_inode_set_ctx_params (this, inode, ¶ms);  }  void @@ -501,7 +498,7 @@ afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,          params.op = AFR_INODE_RM_STALE_CHILDREN;          params.u.read_ctx.children = stale_children; -        afr_inode_set_ctx (this, inode, ¶ms); +        afr_inode_set_ctx_params (this, inode, ¶ms);  }  gf_boolean_t diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 07603c5b..a0dfa59d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2067,19 +2067,16 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          afr_local_t *     orig_frame_local = NULL;          afr_self_heal_t * orig_frame_sh = NULL;          char              sh_type_str[256] = {0,}; -        gf_boolean_t      split_brain = _gf_false;          priv  = this->private;          local = bgsh_frame->local;          sh    = &local->self_heal; -        if (local->govinda_gOvinda || sh->mdata_spb || sh->data_spb) { -                split_brain = _gf_true; +        if (local->govinda_gOvinda) { +                afr_set_split_brain (this, sh->inode, SPB, SPB);                  sh->op_failed = 1;          } -        afr_set_split_brain (this, sh->inode, split_brain); -          afr_self_heal_type_str_get (sh, sh_type_str,                                      sizeof(sh_type_str));          if (sh->op_failed) { diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index c90ed7c1..96944368 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -716,12 +716,13 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)                          "split-brain). Please delete the file from all but "                          "the preferred subvolume.", local->loc.path); -                sh->data_spb = _gf_true; +                afr_set_split_brain (this, sh->inode, DONT_KNOW, SPB); +                  afr_sh_data_fail (frame, this);                  return 0;          } -        sh->data_spb = _gf_false; +        afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB);          ret = afr_sh_inode_set_read_ctx (sh, this);          if (ret) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -1401,13 +1402,6 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)          local = frame->local;          sh = &local->self_heal; -        /* Self-heal completion cbk changes inode split-brain status based on -         * govinda_gOvinda, mdata_spb, data_spb value. -         * Initialize data_spb with current split-brain status. -         * If for some reason self-heal fails(locking phase etc), it makes sure -         * we retain the split-brain status before this self-heal started. -         */ -        sh->data_spb = afr_is_split_brain (this, sh->inode);          if (afr_can_start_data_self_heal (sh, priv)) {                  if (IA_ISREG (sh->type)) {                          afr_sh_data_open (frame, this); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 647ee47a..cf8a33ad 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -88,6 +88,19 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)          return 0;  } +int +afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t         *local    = NULL; +        afr_self_heal_t     *sh       = NULL; + +        local    = frame->local; +        sh       = &local->self_heal; + +        sh->op_failed = 1; +        afr_sh_metadata_finish (frame, this); +        return 0; +}  int  afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie, @@ -397,13 +410,12 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,                          "(possible split-brain). Please fix the file on "                          "all backend volumes", local->loc.path); -                sh->mdata_spb = _gf_true; - -                afr_sh_metadata_finish (frame, this); +                afr_set_split_brain (this, sh->inode, SPB, DONT_KNOW); +                afr_sh_metadata_fail (frame, this);                  goto out;          } -        sh->mdata_spb = _gf_false; +        afr_set_split_brain (this, sh->inode, NO_SPB, DONT_KNOW);          if (nsources == 0) {                  gf_log (this->name, GF_LOG_TRACE,                          "No self-heal needed for %s", @@ -529,14 +541,6 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)          local = frame->local;          sh = &local->self_heal; -        /* Self-heal completion cbk changes inode split-brain status based on -         * govinda_gOvinda, mdata_spb, data_spb value. -         * Initialize mdata_spb with current split-brain status. -         * If for some reason self-heal fails(locking phase etc), it makes sure -         * we retain the split-brain status before this self-heal started. -         */ -        sh->mdata_spb = afr_is_split_brain (this, sh->inode); -          if (afr_can_start_metadata_self_heal (sh, priv)) {                  afr_sh_metadata_lock (frame, this);          } else { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index da9cc67c..475e5dda 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -58,10 +58,8 @@ typedef enum {          AFR_INODE_SET_READ_CTX = 1,          AFR_INODE_RM_STALE_CHILDREN,          AFR_INODE_SET_OPENDIR_DONE, -        AFR_INODE_SET_SPLIT_BRAIN,          AFR_INODE_GET_READ_CTX,          AFR_INODE_GET_OPENDIR_DONE, -        AFR_INODE_GET_SPLIT_BRAIN,  } afr_inode_op_t;  typedef struct afr_inode_params_ { @@ -75,9 +73,17 @@ typedef struct afr_inode_params_ {          } u;  } afr_inode_params_t; +typedef enum afr_spb_state { +        DONT_KNOW, +        SPB, +        NO_SPB +} afr_spb_state_t; +  typedef struct afr_inode_ctx_ {          uint64_t masks;          int32_t  *fresh_children;//increasing order of latency +        afr_spb_state_t mdata_spb; +        afr_spb_state_t data_spb;  } afr_inode_ctx_t;  typedef enum { @@ -272,8 +278,6 @@ typedef struct {          int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);          int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);          void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this); -        gf_boolean_t    mdata_spb; -        gf_boolean_t    data_spb;          call_frame_t *sh_frame;  } afr_self_heal_t; @@ -833,7 +837,8 @@ gf_boolean_t  afr_is_split_brain (xlator_t *this, inode_t *inode);  void -afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set); +afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb, +                     afr_spb_state_t data_spb);  int  afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,  | 
