summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-self-heal-common.c
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2012-04-14 14:33:53 +0530
committerAnand Avati <avati@redhat.com>2012-05-18 20:30:30 -0700
commitfa90243e20eeef91eda5a5bb249bed05066852e3 (patch)
tree7add1693e67dc25e69b8c1f8ed6d6e096c84461f /xlators/cluster/afr/src/afr-self-heal-common.c
parent24c1cbf4f7afd54a506a8265de9d22ce2b2e670f (diff)
cluster/afr: Enforce order in pre/post op
The xattrop order in pre/post op on all the subvols is client-0, client-1... client-n where n is (replica-count - 1). This order can lead to invalid split-brains if the brick dies in the middle of xattrops. Example: transaction completed pre-op, so on all the subvolumes xattrs have '1' changelog. Now post-op is sent to both the subvols. On subvol-0 change-log of client-0 is decremented to 0, before decrementing change-log of client-1 to 0 the brick dies. This change-log status on subvol-0 gives the meaning that a change is done on subvol-0 successfully but on subvol-1 it failed. Which is not what happened. Changes done when the subvol-0 was down will lead to pending change-log on subvol-1 for subvol-0. Which is correct. When the subvol-0 is brought back up, the change-log will be in split-brain state even when it is not a legitimate split-brain. If the brick dies in the middle of xattrops it should remain fool. Pre-op should perform xattrop of the local change-log first and post-op should perform xattrop of the local change-log last. In case of optimistic changelogs txn_changelog should be done last on local if it succeeds, first if it fails. Change-Id: Ib6eeb20cdc49b0b1fd2f454f25a9c8e08388c6e7 BUG: 765194 Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: http://review.gluster.com/3226 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c38
1 files changed, 29 insertions, 9 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index e0af66952..5d0db8f63 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -847,20 +847,24 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ int32_t *pending = NULL;
+ int32_t *local_pending = NULL;
+ afr_private_t *priv = NULL;
+ priv = this->private;
for (i = 0; i < child_count; i++) {
if (!xattr[i])
continue;
+ local_pending = NULL;
for (j = 0; j < child_count; j++) {
pending = GF_CALLOC (sizeof (int32_t), 3,
gf_afr_mt_int32_t);
@@ -873,12 +877,28 @@ afr_sh_delta_to_xattr (afr_private_t *priv,
pending[k] = hton32 (delta_matrix[i][j]);
+ if (j == i) {
+ local_pending = pending;
+ continue;
+ }
ret = dict_set_bin (xattr[i], priv->pending_key[j],
pending,
- 3 * sizeof (int32_t));
- if (ret < 0)
- gf_log (THIS->name, GF_LOG_WARNING,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
"Unable to set dict value.");
+ GF_FREE (pending);
+ }
+ }
+ if (local_pending) {
+ ret = dict_set_bin (xattr[i], priv->pending_key[i],
+ local_pending,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (local_pending);
+ }
}
}
return 0;