diff options
author | Pranith Kumar K <pranithk@gluster.com> | 2012-04-17 15:45:44 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-04-19 00:25:17 -0700 |
commit | c284511c1ff7639ff674bca9260553680a917aa3 (patch) | |
tree | 744bc413c579519109cc5d06c1bcf0b765a61f63 /xlators | |
parent | 64177411f7c032eeb2b65635a9ca4e5767a72b40 (diff) |
cluster/afr: Handle transient parent-entry xactions in lookup
This patch addresses the case when the lookup on an entry is performed
while it is being renamed. The lookup can possibly return 2 different
gfids when lookup on one subvol reached before rename and on other
after rename. In such cases the conflicting entry self-heal is
triggered to resolve the issue, but if there are lot entry transactions
going on the parent directory of the entry then the non-blocking
locks could fail resulting in EIO. To avoid this, lookup queries
locks xlator if there are any parent-entrylk on entry's basename.
If afr finds that there are such locks and gfids are differing then
it chooses the file with latest ctime as the iatt of the entry.
This solution is not foolproof, but it decreases the probability of
hitting the EIO. The correct solution is to take blocking locks on
the parent-entry to find out the correct source. Taking blocking
locks in lookup is not good. One stale entry lock can hang the whole
filesystem. So we chose to go with this for now.
Change-Id: Ibebb6c3074f56f80a96893b6bf5b77941e30d400
BUG: 765551
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.com/3179
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 101 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 |
2 files changed, 96 insertions, 6 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 42dd1299d38..bf4c0fb17fb 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -128,6 +128,13 @@ afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this, loc->path, GLUSTERFS_ENTRYLK_COUNT); } + ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "%s: Unable to set dict value for %s", + loc->path, GLUSTERFS_PARENT_ENTRYLK); + } + ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req); if (ret) { gf_log (this->name, GF_LOG_DEBUG, @@ -1114,6 +1121,7 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this, uint32_t inodelk_count = 0; uint32_t entrylk_count = 0; int ret = -1; + uint32_t parent_entrylk = 0; GF_ASSERT (local); GF_ASSERT (this); @@ -1129,6 +1137,10 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this, &entrylk_count); if (ret == 0) local->entrylk_count += entrylk_count; + ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK, + &parent_entrylk); + if (!ret) + local->cont.lookup.parent_entrylk += parent_entrylk; } static void @@ -1694,20 +1706,15 @@ afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, int32_t read_child = -1; int32_t ret = -1; afr_local_t *local = NULL; - afr_private_t *priv = NULL; gf_boolean_t fresh_lookup = _gf_false; local = frame->local; - priv = this->private; fresh_lookup = local->cont.lookup.fresh_lookup; if (local->loc.parent == NULL) fail_conflict = _gf_true; - if (afr_conflicting_iattrs (local->cont.lookup.bufs, - local->cont.lookup.success_children, - priv->child_count, local->loc.path, - this->name)) { + if (afr_lookup_conflicting_entries (local, this)) { if (fail_conflict == _gf_false) ret = 0; goto out; @@ -1739,6 +1746,84 @@ out: return ret; } +int +afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this) +{ + afr_private_t *priv = NULL; + int32_t *success_children = NULL; + struct iatt *bufs = NULL; + int i = 0; + int child = 0; + int lsubvol = -1; + + priv = this->private; + success_children = local->cont.lookup.success_children; + bufs = local->cont.lookup.bufs; + for (i = 0; i < priv->child_count; i++) { + child = success_children[i]; + if (child == -1) + break; + if (uuid_is_null (bufs[child].ia_gfid)) + continue; + if (lsubvol < 0) { + lsubvol = child; + } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) { + lsubvol = child; + } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) && + (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) { + lsubvol = child; + } + } + return lsubvol; +} + +void +afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this, + int subvol) +{ + afr_private_t *priv = NULL; + int32_t *success_children = NULL; + struct iatt *bufs = NULL; + int i = 0; + int child = 0; + + priv = this->private; + success_children = local->cont.lookup.success_children; + bufs = local->cont.lookup.bufs; + memcpy (local->fresh_children, success_children, + sizeof (*success_children) * priv->child_count); + for (i = 0; i < priv->child_count; i++) { + child = local->fresh_children[i]; + if (child == -1) + break; + if (child == subvol) + continue; + if (uuid_is_null (bufs[child].ia_gfid) && + (bufs[child].ia_type == bufs[subvol].ia_type)) + continue; + afr_children_rm_child (success_children, child, + priv->child_count); + local->success_count--; + } + afr_reset_children (local->fresh_children, priv->child_count); +} + +void +afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this) +{ + int lsubvol = 0; + + if (!afr_lookup_conflicting_entries (local, this)) + goto out; + + lsubvol = afr_lookup_get_latest_subvol (local, this); + if (lsubvol < 0) + goto out; + afr_lookup_mark_other_entries_stale (local, this, lsubvol); +out: + return; +} + static void afr_lookup_done (call_frame_t *frame, xlator_t *this) { @@ -1757,6 +1842,10 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) if (local->op_ret < 0) goto unwind; + + if (local->cont.lookup.parent_entrylk && local->success_count > 1) + afr_succeed_lookup_on_latest_iatt (local, this); + gfid_miss_count = afr_lookup_gfid_missing_count (local, this); up_children_count = afr_up_children_count (local->child_up, priv->child_count); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 954e9bb319f..2cd29f1c51e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -435,6 +435,7 @@ typedef struct _afr_local { } statfs; struct { + uint32_t parent_entrylk; uuid_t gfid_req; inode_t *inode; struct iatt buf; |