diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2017-09-04 16:57:25 +0530 | 
|---|---|---|
| committer | jiffin tony Thottan <jthottan@redhat.com> | 2018-03-08 06:39:57 +0000 | 
| commit | e304d218602f3099dc4ba9bb86fd953cee8a8e59 (patch) | |
| tree | 24fc0ac1a189952ee003ea54156a24f794fc606e | |
| parent | afcb83f6eb11bc5b7a0e6c5c0b7c8f56af871840 (diff) | |
cluster/afr: Fail open on split-brain
Problem:
Append on a file with split-brain succeeds. Open is intercepted by open-behind,
when write comes on the file, open-behind does open+write. Open succeeds
because afr doesn't fail it. Then write succeeds because write-behind
intercepts it. Flush is also intercepted by write-behind, so the application
never gets to know that the write failed.
Fix:
Fail open on split-brain, so that when open-behind does open+write open fails
which leads to write failure. Application will know about this failure.
Change-Id: I4bff1c747c97bb2925d6987f4ced5f1ce75dbc15
BUG: 1544635
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
(cherry picked from commit 786343abca3474ff01aa1017210112d97cbc4843)
| -rw-r--r-- | tests/basic/afr/split-brain-open.t | 38 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 77 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-open.c | 93 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 11 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 58 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 43 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 5 | 
12 files changed, 246 insertions, 95 deletions
| diff --git a/tests/basic/afr/split-brain-open.t b/tests/basic/afr/split-brain-open.t new file mode 100644 index 00000000000..9b2f2856047 --- /dev/null +++ b/tests/basic/afr/split-brain-open.t @@ -0,0 +1,38 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 + +#Disable self-heal-daemon +TEST $CLI volume heal $V0 disable + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; + +TEST touch $M0/data-split-brain.txt + +#Create data split-brain +TEST kill_brick $V0 $H0 $B0/${V0}0 + +`echo "brick1_alive" > $M0/data-split-brain.txt` +TEST [ $? == 0 ]; + +TEST $CLI volume start $V0 force +TEST kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +`echo "brick0_alive" > $M0/data-split-brain.txt` +TEST [ $? == 0 ]; + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +echo "all-alive" >> $M0/data-split-brain.txt +TEST [ $? != 0 ]; + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9c96056a723..a83484e00d1 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -253,8 +253,9 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,                          local->transaction.in_flight_sb = _gf_true;                          metadatamap |= (1 << index);                  } -                if (metadatamap_old != metadatamap) +                if (metadatamap_old != metadatamap) {                          event = 0; +                }                  break;          case AFR_DATA_TRANSACTION: @@ -281,19 +282,71 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,          return ret;  } -int -afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, inode_t *inode) +gf_boolean_t +afr_is_symmetric_error (call_frame_t *frame, xlator_t *this)  { -        int            ret  = -1; +        afr_local_t *local = NULL;          afr_private_t *priv = NULL; +        int op_errno = 0; +        int i_errno = 0; +        gf_boolean_t matching_errors = _gf_true; +        int i = 0; + +        priv = this->private; +        local = frame->local; + +        for (i = 0; i < priv->child_count; i++) { +                if (!local->replies[i].valid) +                        continue; +                if (local->replies[i].op_ret != -1) { +                        /* Operation succeeded on at least one subvol, +                           so it is not a failed-everywhere situation. +                        */ +                        matching_errors = _gf_false; +                        break; +                } +                i_errno = local->replies[i].op_errno; + +                if (i_errno == ENOTCONN) { +                        /* ENOTCONN is not a symmetric error. We do not +                           know if the operation was performed on the +                           backend or not. +                        */ +                        matching_errors = _gf_false; +                        break; +                } + +                if (!op_errno) { +                        op_errno = i_errno; +                } else if (op_errno != i_errno) { +                        /* Mismatching op_errno's */ +                        matching_errors = _gf_false; +                        break; +                } +        } + +        return matching_errors; +} + +int +afr_set_in_flight_sb_status (xlator_t *this, call_frame_t *frame, +                             inode_t *inode) +{ +        int           ret    = -1; +        afr_private_t *priv  = NULL; +        afr_local_t   *local = NULL;          priv = this->private; +        local = frame->local;          /* If this transaction saw no failures, then exit. */          if (AFR_COUNT (local->transaction.failed_subvols,                         priv->child_count) == 0)                  return 0; +        if (afr_is_symmetric_error (frame, this)) +                return 0; +          LOCK (&inode->lock);          {                  ret = __afr_set_in_flight_sb_status (this, local, inode); @@ -546,8 +599,9 @@ afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,                  }          } else {                  /* For files, abort in case of data/metadata split-brain. */ -                if (!data_count || !metadata_count) +                if (!data_count || !metadata_count) {                          return -EIO; +                }          }          if (type == AFR_METADATA_TRANSACTION && readable) @@ -1956,6 +2010,11 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)                  GF_FREE (local->cont.opendir.checksum);          } +        { /* open */ +                if (local->cont.open.fd) +                        fd_unref (local->cont.open.fd); +        } +          { /* readdirp */                  if (local->cont.readdir.dict)                          dict_unref (local->cont.readdir.dict); @@ -2533,9 +2592,11 @@ afr_lookup_metadata_heal_check (call_frame_t *frame, xlator_t *this)          if (!afr_can_start_metadata_self_heal (frame, this))                  goto out; -        heal = afr_frame_create (this); -        if (!heal) +        heal = afr_frame_create (this, &ret); +        if (!heal) { +                ret = -ret;                  goto out; +        }          ret = synctask_new (this->ctx->env, afr_lookup_sh_metadata_wrap,                              afr_refresh_selfheal_done, heal, frame); @@ -2628,7 +2689,7 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)  	}  	if (need_heal) { -		heal = afr_frame_create (this); +		heal = afr_frame_create (this, NULL);  		if (!heal)                          goto metadata_heal; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 6651e92f482..97397f986b5 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -131,7 +131,7 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)  		}  	} -        afr_set_in_flight_sb_status (this, local, local->inode); +        afr_set_in_flight_sb_status (this, frame, local->inode);  } diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 7a628350c34..6c625ccf7da 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -66,16 +66,15 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          return 0;  } -  int  afr_open_cbk (call_frame_t *frame, void *cookie,                xlator_t *this, int32_t op_ret, int32_t op_errno,                fd_t *fd, dict_t *xdata)  { -        afr_local_t *  local       = NULL; -        int            call_count  = -1; -        int            child_index = (long) cookie; -	afr_fd_ctx_t  *fd_ctx = NULL; +        afr_local_t   *local           = NULL; +        int           call_count       = -1; +        int           child_index      = (long) cookie; +        afr_fd_ctx_t  *fd_ctx          = NULL;          local = frame->local;  	fd_ctx = local->fd_ctx; @@ -103,24 +102,62 @@ afr_open_cbk (call_frame_t *frame, void *cookie,                                      fd, 0, NULL);                  } else {                          AFR_STACK_UNWIND (open, frame, local->op_ret, -                                          local->op_errno, local->fd, -					  local->xdata_rsp); +                                          local->op_errno, local->cont.open.fd, +                                          local->xdata_rsp);                  }          }          return 0;  } + +int +afr_open_continue (call_frame_t *frame, xlator_t *this, int err) +{ +        afr_local_t   *local     = NULL; +        afr_private_t *priv      = NULL; +        int           call_count = 0; +        int           i          = 0; + +        local  = frame->local; +        priv   = this->private; + +        if (err) { +                AFR_STACK_UNWIND (open, frame, -1, -err, NULL, NULL); +        } else { +                local->call_count = AFR_COUNT (local->child_up, +                                               priv->child_count); +                call_count = local->call_count; + +                for (i = 0; i < priv->child_count; i++) { +                        if (local->child_up[i]) { +                                STACK_WIND_COOKIE (frame, afr_open_cbk, +                                                   (void *)(long)i, +                                                   priv->children[i], +                                                  priv->children[i]->fops->open, +                                                   &local->loc, +                                            (local->cont.open.flags & ~O_TRUNC), +                                                   local->cont.open.fd, +                                                   local->xdata_req); +                                if (!--call_count) +                                        break; +                        } +                } +        } +        return 0; +} +  int  afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,            fd_t *fd, dict_t *xdata)  { -        afr_private_t * priv       = NULL; -        afr_local_t *   local      = NULL; -        int             i          = 0; -        int32_t         call_count = 0; -        int32_t         op_errno   = 0; -	afr_fd_ctx_t   *fd_ctx = NULL; +        afr_private_t *priv            = NULL; +        afr_local_t   *local           = NULL; +        int           spb_choice       = 0; +        int           event_generation = 0; +        int           ret              = 0; +        int32_t       op_errno         = 0; +        afr_fd_ctx_t  *fd_ctx          = NULL;          //We can't let truncation to happen outside transaction. @@ -140,23 +177,27 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,          if (!afr_is_consistent_io_possible (local, priv, &op_errno))  		goto out; -        local->fd = fd_ref (fd); +        local->inode = inode_ref (loc->inode); +        loc_copy (&local->loc, loc);  	local->fd_ctx = fd_ctx;  	fd_ctx->flags = flags; - -        call_count = local->call_count; +        if (xdata) +                local->xdata_req = dict_ref (xdata);          local->cont.open.flags = flags; - -        for (i = 0; i < priv->child_count; i++) { -                if (local->child_up[i]) { -                        STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->open, -                                           loc, (flags & ~O_TRUNC), fd, xdata); -                        if (!--call_count) -                                break; -                } +        local->cont.open.fd = fd_ref (fd); + +        ret = afr_inode_get_readable (frame, local->inode, this, +                                      NULL, &event_generation, +                                      AFR_DATA_TRANSACTION); +        if ((ret < 0) && +            (afr_inode_split_brain_choice_get (local->inode, +                                             this, &spb_choice) == 0) && +            spb_choice < 0) { +                afr_inode_refresh (frame, this, local->inode, +                                   local->inode->gfid, afr_open_continue); +        } else { +                afr_open_continue (frame, this, 0);          }  	return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 20e81dd43e0..26d3860b234 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -66,9 +66,9 @@ afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,                  goto out;          } -        frame = afr_frame_create (this); +        frame = afr_frame_create (this, &ret);          if (!frame) { -                ret = -ENOMEM; +                ret = -ret;                  goto out;          } @@ -2349,18 +2349,17 @@ afr_inode_find (xlator_t *this, uuid_t gfid)  call_frame_t * -afr_frame_create (xlator_t *this) +afr_frame_create (xlator_t *this, int32_t *op_errno)  {  	call_frame_t *frame    = NULL;  	afr_local_t  *local    = NULL; -	int           op_errno = 0;  	pid_t         pid      = GF_CLIENT_PID_SELF_HEALD;  	frame = create_frame (this, this->ctx->pool);  	if (!frame)  		return NULL; -	local = AFR_FRAME_INIT (frame, op_errno); +	local = AFR_FRAME_INIT (frame, (*op_errno));  	if (!local) {  		STACK_DESTROY (frame->root);  		return NULL; @@ -2490,7 +2489,7 @@ afr_selfheal (xlator_t *this, uuid_t gfid)  	call_frame_t *frame = NULL;          afr_local_t *local = NULL; -	frame = afr_frame_create (this); +	frame = afr_frame_create (this, NULL);  	if (!frame)  		return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 2c254e80aa1..8cf43f2807b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -776,13 +776,37 @@ out:  	return ret;  } +int +afr_selfheal_data_open_cbk (call_frame_t *frame, void *cookie, +                            xlator_t *this, int32_t op_ret, int32_t op_errno, +                            fd_t *fd, dict_t *xdata) +{ +        afr_local_t *local = NULL; +        int         i      = (long) cookie; + +        local = frame->local; + +        local->replies[i].valid = 1; +        local->replies[i].op_ret = op_ret; +        local->replies[i].op_errno = op_errno; + +        syncbarrier_wake (&local->barrier); + +        return 0; +}  int  afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)  { -	int         ret    = 0; -        fd_t       *fd_tmp = NULL; -	loc_t       loc    = {0,}; +        int           ret      = 0; +        fd_t          *fd_tmp  = NULL; +        loc_t         loc      = {0,}; +        call_frame_t  *frame   = NULL; +        afr_local_t   *local   = NULL; +        afr_private_t *priv    = NULL; +        int           i        = 0; + +        priv = this->private;  	fd_tmp = fd_create (inode, 0);  	if (!fd_tmp) @@ -791,7 +815,31 @@ afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)  	loc.inode = inode_ref (inode);  	gf_uuid_copy (loc.gfid, inode->gfid); -	ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL, NULL); +        frame = afr_frame_create (this, &ret); +        if (!frame) { +                ret = -ret; +                fd_unref (fd_tmp); +                goto out; +        } +        local = frame->local; + +        AFR_ONLIST (local->child_up, frame, afr_selfheal_data_open_cbk, open, +                    &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL); + +        ret = -ENOTCONN; +        for (i = 0; i < priv->child_count; i++) { +                if (!local->replies[i].valid) +                        continue; + +                if (local->replies[i].op_ret < 0) { +                        ret = -local->replies[i].op_errno; +                        continue; +                } + +                ret = 0; +                break; +        } +  	if (ret < 0) {  		fd_unref (fd_tmp);                  goto out; @@ -802,6 +850,8 @@ afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)          *fd = fd_tmp;  out:          loc_wipe (&loc); +        if (frame) +                AFR_STACK_DESTROY (frame);  	return ret;  } diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index f23cf8ec6ee..199f8961480 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -486,9 +486,9 @@ afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf)                  goto out;          } -        frame = afr_frame_create (this); +        frame = afr_frame_create (this, &ret);          if (!frame) { -                ret = -ENOMEM; +                ret = -ret;                  goto out;          } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 352d151207e..556d14baf82 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -670,7 +670,7 @@ afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname,  	if (!parent)  		goto out; -	frame = afr_frame_create (this); +	frame = afr_frame_create (this, NULL);  	if (!frame)  		goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index a1da4331fb7..188a334384d 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -209,7 +209,7 @@ afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,  		      int subvol, dict_t *xattr, dict_t *xdata);  call_frame_t * -afr_frame_create (xlator_t *this); +afr_frame_create (xlator_t *this, int32_t *op_errno);  inode_t *  afr_inode_find (xlator_t *this, uuid_t gfid); diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 74c9bb67931..19cde88a6d5 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -260,7 +260,7 @@ afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid)          int raw[AFR_NUM_CHANGE_LOGS] = {0};          priv = this->private; -        frame = afr_frame_create (this); +        frame = afr_frame_create (this, NULL);          if (!frame)                  goto out;          inode = afr_inode_find (this, gfid); @@ -457,9 +457,9 @@ afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid)  	priv = healer->this->private;  	subvol = priv->children[healer->subvol]; -        frame = afr_frame_create (healer->this); +        frame = afr_frame_create (healer->this, &ret);          if (!frame) { -                ret = -ENOMEM; +                ret = -ret;                  goto out;          } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 35621d98493..791c70f6fcc 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -626,51 +626,10 @@ afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)          return _gf_true;  } -  void  afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)  { -	afr_local_t *local = NULL; -	afr_private_t *priv = NULL; -	int op_errno = 0; -	int i_errno = 0; -	gf_boolean_t matching_errors = _gf_true; -	int i = 0; - -	priv = this->private; -	local = frame->local; - -	for (i = 0; i < priv->child_count; i++) { -		if (!local->replies[i].valid) -			continue; -		if (local->replies[i].op_ret != -1) { -			/* Operation succeeded on at least on subvol, -			   so it is not a failed-everywhere situation. -			*/ -			matching_errors = _gf_false; -			break; -		} -		i_errno = local->replies[i].op_errno; - -		if (i_errno == ENOTCONN) { -			/* ENOTCONN is not a symmetric error. We do not -			   know if the operation was performed on the -			   backend or not. -			*/ -			matching_errors = _gf_false; -			break; -		} - -		if (!op_errno) { -			op_errno = i_errno; -		} else if (op_errno != i_errno) { -			/* Mismatching op_errno's */ -			matching_errors = _gf_false; -			break; -		} -	} - -	if (matching_errors) +	if (afr_is_symmetric_error (frame, this))  		__mark_all_success (frame, this);  } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c4ceb66914f..8cd687398f0 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -519,6 +519,7 @@ typedef struct _afr_local {                  struct {                          int32_t flags; +                        fd_t *fd;                  } open;                  struct { @@ -1213,7 +1214,7 @@ int  afr_get_msg_id (char *op_type);  int -afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, +afr_set_in_flight_sb_status (xlator_t *this, call_frame_t *frame,                               inode_t *inode);  int32_t @@ -1262,4 +1263,6 @@ int  afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,                                       char *buf, const char *default_str,                                       int32_t *serz_len, char delimiter); +gf_boolean_t +afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);  #endif /* __AFR_H__ */ | 
