diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2016-05-27 15:47:07 +0530 | 
|---|---|---|
| committer | Niels de Vos <ndevos@redhat.com> | 2016-06-10 08:36:21 -0700 | 
| commit | de56d9591ed94fc6f77e6f97ea6bbfaeae8e19fd (patch) | |
| tree | bef1e8d9a4a843f082029cb8e2f741574dfc88b1 | |
| parent | 1a8da06eb3e6adcca730451c5abd1c76efa226dd (diff) | |
cluster/afr: Unwind xdata_rsp even in case of failures
DHT expects GF_PREOP_CHECK_FAILED to be present in xdata_rsp in case of mkdir
failures because of stale layout. But AFR was unwinding null xdata_rsp in case
of failures. This was leading to mkdir failures just after remove-brick. Unwind
the xdata_rsp in case of failures to make sure the response from brick reaches
dht.
 >BUG: 1340623
 >Change-Id: Idd3f7b95730e8ea987b608e892011ff190e181d1
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/14553
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
 >Reviewed-by: Ravishankar N <ravishankar@redhat.com>
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
 >Reviewed-by: Anuradha Talur <atalur@redhat.com>
 >Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
BUG: 1342178
Change-Id: Iaacadcad0f76979fb250bd008b8e43f0e7acf642
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/14617
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 29 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 20 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 71 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 8 | 
7 files changed, 120 insertions, 21 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2043e11d1e8..c20c53218f9 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1269,6 +1269,7 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,  int  afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, +                     unsigned char *readables,  		     int *event_p, afr_transaction_type type,                       afr_read_subvol_args_t *args)  { @@ -1305,6 +1306,9 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,  		*subvol_p = subvol;  	if (event_p)  		*event_p = event; +        if (readables) +                memcpy (readables, readable, +                        sizeof (*readables) * priv->child_count);  	return subvol;  } @@ -1434,6 +1438,7 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)          GF_FREE (local->read_attempted);          GF_FREE (local->readable); +        GF_FREE (local->readable2);  	if (local->inode)  		inode_unref (local->inode); @@ -1593,8 +1598,8 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,          priv = this->private;          if (parent) -                par_read_subvol = afr_data_subvol_get (parent, this, 0, 0, -                                                       NULL); +                par_read_subvol = afr_data_subvol_get (parent, this, NULL, NULL, +                                                       NULL, NULL);          for (i = 0; i < priv->child_count; i++) {                  if (!replies[i].valid) @@ -1633,8 +1638,7 @@ afr_read_subvol_decide (inode_t *inode, xlator_t *this,          int data_subvol  = -1;          int mdata_subvol = -1; -        data_subvol = afr_data_subvol_get (inode, this, -                                           0, 0, args); +        data_subvol = afr_data_subvol_get (inode, this, NULL, NULL, NULL, args);          mdata_subvol = afr_metadata_subvol_get (inode, this,                                                  0, 0, args);          if (data_subvol == -1 || mdata_subvol == -1) @@ -1782,7 +1786,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)  			goto cant_interpret;  		} else {                          read_subvol = afr_data_subvol_get (local->inode, this, -                                                           0, 0, &args); +                                                       NULL, NULL, NULL, &args);  		}  	} else {  	cant_interpret: @@ -2409,7 +2413,7 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req  		return 0;  	} -	afr_read_subvol_get (loc->inode, this, NULL, &event, +	afr_read_subvol_get (loc->inode, this, NULL, NULL, &event,  			     AFR_DATA_TRANSACTION, NULL);  	if (event != local->event_generation) @@ -2560,7 +2564,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)                  }          } -	afr_read_subvol_get (loc->parent, this, NULL, &event, +	afr_read_subvol_get (loc->parent, this, NULL, NULL, &event,  			     AFR_DATA_TRANSACTION, NULL);  	if (event != local->event_generation) @@ -2883,7 +2887,8 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local; -	read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL); +	read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL, +                                           NULL, NULL);          LOCK (&frame->lock);          { @@ -4283,6 +4288,14 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)  		goto out;  	} +        local->readable2 = GF_CALLOC (priv->child_count, sizeof (char), +                                      gf_afr_mt_char); +        if (!local->readable2) { +                if (op_errno) +                        *op_errno = ENOMEM; +                goto out; +        } +  	local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),  				   gf_afr_mt_reply_t);  	if (!local->replies) { diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 841c64361cf..2260e5dac26 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -153,7 +153,8 @@ afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)           * -1 above due to gen being 0, which is why it is OK to pass NULL for           *  read_subvol_args here.           */ -        entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0, NULL); +        entry_read_subvol = afr_data_subvol_get (inode, this, NULL, NULL, +                                                 NULL, NULL);          if (entry_read_subvol != par_read_subvol)                  return -1; diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 55aec7429a7..f3de5352d7e 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -101,18 +101,21 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)  	if (local->inode) {  		afr_replies_interpret (frame, this, local->inode, NULL);  		inode_read_subvol = afr_data_subvol_get (local->inode, this, -							 NULL, NULL, &args); +                                                       NULL, NULL, NULL, &args);  	}  	if (local->parent)  		parent_read_subvol = afr_data_subvol_get (local->parent, this, -							  NULL, NULL, NULL); +                                             NULL, local->readable, NULL, NULL); +  	if (local->parent2)  		parent2_read_subvol = afr_data_subvol_get (local->parent2, this, -							   NULL, NULL, NULL); +                                            NULL, local->readable2, NULL, NULL);  	local->op_ret = -1;  	local->op_errno = afr_final_errno (local, priv); +        afr_pick_error_xdata (local, priv, local->parent, local->readable, +                              local->parent2, local->readable2);  	for (i = 0; i < priv->child_count; i++) {  		if (!local->replies[i].valid) @@ -144,6 +147,11 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)  				local->replies[i].preparent2;  			local->cont.dir_fop.postnewparent =  				local->replies[i].postparent2; +                        if (local->xdata_rsp) { +                                dict_unref (local->xdata_rsp); +                                local->xdata_rsp = NULL; +                        } +  			if (local->replies[i].xdata)  				local->xdata_rsp =  					dict_ref (local->replies[i].xdata); @@ -196,6 +204,9 @@ __afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,  	local->replies[child_index].valid = 1;  	local->replies[child_index].op_ret = op_ret;  	local->replies[child_index].op_errno = op_errno; +        if (xdata) +                local->replies[child_index].xdata = dict_ref (xdata); +  	if (op_ret >= 0) {  		if (poststat) @@ -208,9 +219,6 @@ __afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index,  			local->replies[child_index].preparent2 = *preparent2;  		if (postparent2)  			local->replies[child_index].postparent2 = *postparent2; -		if (xdata) -			local->replies[child_index].xdata = dict_ref (xdata); -  		if (fd_ctx)  			fd_ctx->opened_on[child_index] = AFR_FD_OPENED;  	} else { diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 36889429657..47320ce9412 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -84,7 +84,7 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)                                                                 &args);  		else  			read_subvol = afr_data_subvol_get (local->inode, this, -							   NULL, NULL, &args); +                                                       NULL, NULL, NULL, &args);  	}  	local->op_ret = -1; @@ -164,8 +164,8 @@ __afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,  			local->replies[child_index].poststat = *postbuf;  		if (xattr)  			local->replies[child_index].xattr = dict_ref (xattr); -		if (xdata) -			local->replies[child_index].xdata = dict_ref (xdata); +                if (xdata) +                        local->replies[child_index].xdata = dict_ref (xdata);  	} else {  		afr_transaction_fop_failed (frame, this, child_index);  	} diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 22b6997f2f7..9df8eba2192 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -57,6 +57,66 @@ afr_zero_fill_stat (afr_local_t *local)          }  } +/* In case of errors afr needs to choose which xdata from lower xlators it needs + * to unwind with. The way it is done is by checking if there are + * any good subvols which failed. Give preference to errnos other than + * ENOTCONN even if the child is source */ +void +afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv, +                      inode_t *inode1, unsigned char *readable1, +                      inode_t *inode2, unsigned char *readable2) +{ +        int     s = -1;/*selection*/ +        int     i = 0; +        unsigned char *readable = NULL; + +        if (local->xdata_rsp) { +                dict_unref (local->xdata_rsp); +                local->xdata_rsp = NULL; +        } + +        readable = alloca0 (priv->child_count * sizeof (*readable)); +        if (inode2 && readable2) {/*rename fop*/ +                AFR_INTERSECT (readable, readable1, readable2, +                               priv->child_count); +        } else { +                memcpy (readable, readable1, +                        sizeof (*readable) * priv->child_count); +        } + +        for (i = 0; i < priv->child_count; i++) { +                if (!local->replies[i].valid) +                        continue; + +                if (local->replies[i].op_ret >= 0) +                        continue; + +                if (local->replies[i].op_errno == ENOTCONN) +                        continue; + +                /*Order is important in the following condition*/ +                if ((s < 0) || (!readable[s] && readable[i])) +                        s = i; +        } + +        if (s != -1 && local->replies[s].xdata) { +                local->xdata_rsp = dict_ref (local->replies[s].xdata); +        } else if (s == -1) { +                for (i = 0; i < priv->child_count; i++) { +                        if (!local->replies[i].valid) +                                continue; + +                        if (local->replies[i].op_ret >= 0) +                                continue; + +                        if (!local->replies[i].xdata) +                                continue; +                        local->xdata_rsp = dict_ref (local->replies[i].xdata); +                        break; +                } +        } +} +  gf_boolean_t  afr_needs_changelog_update (afr_local_t *local)  { @@ -747,6 +807,17 @@ afr_handle_quorum (call_frame_t *frame)          local->op_errno = afr_final_errno (local, priv);          if (local->op_errno == 0)                  local->op_errno = afr_quorum_errno (priv); +        switch (local->transaction.type) { +        case AFR_ENTRY_TRANSACTION: +        case AFR_ENTRY_RENAME_TRANSACTION: +                afr_pick_error_xdata (local, priv, local->parent, +                                      local->readable, local->parent2, +                                      local->readable2); +                break; +        default: +                /*TBD*/ +                break; +        }  }  int diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index c58531eff44..ca8fcfefa89 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -55,4 +55,8 @@ gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this);  gf_boolean_t afr_needs_changelog_update (afr_local_t *local);  void afr_zero_fill_stat (afr_local_t *local); +void +afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv, +                      inode_t *inode1, unsigned char *readable1, +                      inode_t *inode2, unsigned char *readable2);  #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 5482dab25b2..c6afbbe20e0 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -426,6 +426,7 @@ typedef struct _afr_local {  	   performed. This is the output of afr_inode_refresh()  	*/  	unsigned char *readable; +	unsigned char *readable2; /*For rename transaction*/  	afr_inode_refresh_cbk_t refreshfn; @@ -860,14 +861,15 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,  				int type);  int  afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, +                     unsigned char *readables,  		     int *event_p, afr_transaction_type type,                       afr_read_subvol_args_t *args); -#define afr_data_subvol_get(i, t, s, e, a) \ -	afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION, a) +#define afr_data_subvol_get(i, t, s, r, e, a) \ +	afr_read_subvol_get(i, t, s, r, e, AFR_DATA_TRANSACTION, a)  #define afr_metadata_subvol_get(i, t, s, e, a) \ -	afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION, a) +	afr_read_subvol_get(i, t, s, NULL, e, AFR_METADATA_TRANSACTION, a)  int  afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,  | 
