diff options
author | Venkatesh Somyajulu <vsomyaju@redhat.com> | 2014-06-17 15:09:50 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-06-17 05:39:22 -0700 |
commit | 93832829016a0a51a8938c0c89c6bd09b3229c9f (patch) | |
tree | 46bf0526f0585e1046141cd265dfb3afd1844cd5 /xlators/cluster | |
parent | 3a499d170de2c7df06b127b709d27c64cef98886 (diff) |
cluster/dht: Do layout self healing of directory for nameless lookup
Problem: Currently in the nameless lookup code path, if at the
end of the lookup, even if it detects that layout
anamolies are there, layout healing will not be done as
there is no code to heal it.
So there can be race between mkdir and lookup.
Assume mkdir is going on from some other mount point,
Say, M1. Directories are created on some nodes but layout
is not set yet.
Now from M2, nameless lookup goes, lookup will be success
full as the directory is present on some of the nodes, but
it won't heal layout. Now if create goes after lookup fop,
because layout is absent, file creation will fail.
Fix: Included the code of layout self-heal in the nameless
lookup path. At the end of lookup, layout will be computed
as it would have been in the named lookup, but it will be
set to those node only, where directory is present.
So after that if create fop goes, the probabiliy to get the
subvolume with proper hash-range is high now, so reduces
the race window.
Other: Whenever a directory is created, we have to choose a brick
from which we start allocating layout in a circular fashion.
To calculate this starting brick, I have changed the candidate
from name of the directory to gfid of the directory
But to compute where a given file belongs, we will still
use the name of the file. Hash computed from the name of the
file should belong to any one of the directory-hash-range
Calculation of hash for a file is acting as a consumer and the
setting of directory layout based on gfid is acting as a producer,
which are independent from each other.
Change-Id: I3808c55082cd1b5c72d2c77cbbc063f55aa38bee
BUG: 1095888
Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com>
Reviewed-on: http://review.gluster.org/7493
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 23 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 1 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 197 |
4 files changed, 226 insertions, 1 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 5c4fe2d4ca4..7eb40faa9a3 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -212,6 +212,17 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) op_errno = ESTALE; goto out; } + + /* For fixing the directory layout, we need to choose + * the subvolume on which layout will be set first. + * Because in nameless lookup, we have gfid only, + * we are dependent on gfid. Therefore if conf-> + * randomize_by_gfid is set, then only we proceed for + * healing layout of directory otherwise we don't heal. + */ + + if (local->inode && conf->randomize_by_gfid) + goto selfheal; } if (local->inode) @@ -227,6 +238,18 @@ out: NULL); return ret; + +selfheal: + + main_frame->local = local; + discover_frame->local = NULL; + FRAME_SU_DO (main_frame, dht_local_t); + uuid_copy (local->loc.gfid, local->gfid); + ret = dht_selfheal_directory_for_nameless_lookup (main_frame, + dht_lookup_selfheal_cbk, + &local->loc, layout); + return ret; + } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index e1e3a5a4e38..ba66306dd13 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -456,6 +456,12 @@ int dht_lookup_everywhere (call_frame_t *f int dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, loc_t *loc, dht_layout_t *layout); + +int +dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame, + dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); + int dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, dht_layout_t *layout); diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 82127adf6e5..34892983a55 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -442,7 +442,6 @@ dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j) layout->list[j].xlator->name)); } - gf_boolean_t dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator) { diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 04e58903147..1e2d7438350 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -332,6 +332,144 @@ out: return 0; } +gf_boolean_t +dht_is_subvol_part_of_layout (dht_layout_t *layout, xlator_t *xlator) +{ + int i = 0; + gf_boolean_t ret = _gf_false; + + for (i = 0; i < layout->cnt; i++) { + if (!strcmp (layout->list[i].xlator->name, xlator->name)) { + ret = _gf_true; + break; + + } + } + + return ret; +} + +int +dht_layout_index_from_conf (dht_layout_t *layout, xlator_t *xlator) +{ + int i = -1; + int j = 0; + + for (j = 0; j < layout->cnt; j++) { + if (!strcmp (layout->list[j].xlator->name, xlator->name)) { + i = j; + break; + } + } + + return i; +} + + +static int +dht_selfheal_dir_xattr_for_nameless_lookup (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) +{ + dht_local_t *local = NULL; + int missing_xattr = 0; + int i = 0; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dht_layout_t *dummy = NULL; + int j = 0; + + local = frame->local; + this = frame->this; + conf = this->private; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) { + /* err != -1 would mean xattr present on the directory + or the directory is non existent. + !layout->list[i].stop would mean layout absent + */ + + continue; + } + missing_xattr++; + } + + /* Also account for subvolumes with no-layout. Used for zero'ing out + the layouts and for setting quota key's if present */ + + /* Send where either the subvol is not part of layout, + * or it is part of the layout but error is non-zero but error + * is not equal to -1 or ENOENT. + */ + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i]) + == _gf_false) { + missing_xattr++; + continue; + } + + j = dht_layout_index_from_conf (layout, conf->subvolumes[i]); + + if ((j != -1) && (layout->list[j].err != -1) && + (layout->list[j].err != 0) && + (layout->list[j].err != ENOENT)) { + missing_xattr++; + } + + } + + + gf_log (this->name, GF_LOG_TRACE, + "%d subvolumes missing xattr for %s", + missing_xattr, loc->path); + + if (missing_xattr == 0) { + dht_selfheal_dir_finish (frame, this, 0); + return 0; + } + + local->call_cnt = missing_xattr; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) + continue; + + dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL); + + if (--missing_xattr == 0) + break; + } + + dummy = dht_layout_new (this, 1); + if (!dummy) + goto out; + + for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) { + if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i]) + == _gf_false) { + dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0, + conf->subvolumes[i]); + missing_xattr--; + continue; + } + + j = dht_layout_index_from_conf (layout, conf->subvolumes[i]); + + if ((j != -1) && (layout->list[j].err != -1) && + (layout->list[j].err != ENOENT) && + (layout->list[j].err != 0)) { + dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0, + conf->subvolumes[i]); + missing_xattr--; + } + } + + dht_layout_unref (this, dummy); +out: + return 0; + +} + int dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *statpre, @@ -1030,6 +1168,65 @@ sorry_no_fix: return 0; } +int +dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame, + dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) +{ + dht_local_t *local = NULL; + uint32_t down = 0; + uint32_t misc = 0; + int ret = 0; + xlator_t *this = NULL; + + local = frame->local; + this = frame->this; + dht_layout_anomalies (this, loc, layout, + &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, + NULL, &local->selfheal.down, + &local->selfheal.misc, NULL); + + down = local->selfheal.down; + misc = local->selfheal.misc; + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref (this, layout); + + if (down) { + gf_log (this->name, GF_LOG_WARNING, + "%d subvolumes down -- not fixing", down); + ret = 0; + goto sorry_no_fix; + } + + if (misc) { + gf_log (this->name, GF_LOG_WARNING, + "%d subvolumes have unrecoverable errors", misc); + ret = 0; + goto sorry_no_fix; + } + + dht_layout_sort_volname (layout); + ret = dht_selfheal_dir_getafix (frame, loc, layout); + + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "not able to form layout for the directory"); + goto sorry_no_fix; + } + + dht_selfheal_dir_xattr_for_nameless_lookup (frame, &local->loc, layout); + return 0; + +sorry_no_fix: + /* TODO: need to put appropriate local->op_errno */ + dht_selfheal_dir_finish (frame, this, ret); + + return 0; + + +} int dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, |