summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-rebalance.c
diff options
context:
space:
mode:
authorVenkatesh Somyajulu <vsomyaju@redhat.com>2014-09-25 17:24:09 +0530
committerKaleb KEITHLEY <kkeithle@redhat.com>2014-10-20 07:53:56 -0700
commitb3387c83a0d968db7428920f46434f09f4922e53 (patch)
tree54dd3199d302979b86f3f2ab1b46beedc2351e9b /xlators/cluster/dht/src/dht-rebalance.c
parent3042613687d976c4e8938fb411cedc6e6ef559bf (diff)
cluster/dht: Fix races to avoid deletion of linkto
file Explanation of Race between rebalance processes: https://bugzilla.redhat.com/show_bug.cgi?id=1110694#c4 scenario-1: =========== STATE 1: BRICK-1 only one brick Cached File in the system STATE 2: Add brick-2 BRICK-1 BRICK-2 STATE 3: Lookup of File on brick-2 by this node's rebalance will fail because hashed file is not created yet. So dht_lookup_everywhere is about to get called. STATE 4: As part of lookup link file at brick-2 will be created. STATE 5: getxattr to check that cached file belongs to this node is done STATE 6: dht_lookup_everywhere_cbk detects the link created by rebalance-1. It will unlink it. STATE 7: getxattr at the link file with "pathinfo" key will be called will fail as the link file is deleted by rebalance on node-2 Fix: So in the STATE 6, we should avoid the deletion of link file. Every time dht_lookup_everywhere gets called, lookup will be performed on all the nodes. So to avoid STATE 6, if linkto file is found, it is not deleted until valid case is found in dht_lookup_everywhere_done. Case 1: if linkto file points to cached node, and cached file exists, uwind with success. Case 2: if linkto does not point to current cached node, and cached file exists: a) Unlink stale link file b) Create new link file Case 3: Only linkto file exists: Delete linkto file Case 4: Only cached file Create link file (Handled event without patch) Case 5: Neither cached nor hashed file is present Return with ENOENT (handled even without patch) Reviewed-on: http://review.gluster.org/8231 ****************************************************************************** scenario-2: =========== cluster/dht: Modified logic of linkto file deletion on non-hashed Currently whenever dht_lookup_everywhere gets called, if in dht_lookup_everywhere_cbk, a linkto file is found on non-hashed subvolume, file is unlinked. But there are cases when this file is under migration. Under such condition, we should avoid deletion of file. When some other rebalance process changes the layout of parent such that dst_file (w.r.t. migration) falls on non-hashed node, then may be lookup could have found it as linkto file but just before unlink, file is under migration or already migrated In such cased unlink can be avoided. Race: ------- If we have two bricks (brick-1 and brick-2) with initial file "a" under BaseDir which is hashed as well as cached on (brick-1). Assume "a" hashing gives 44. Brick-1 Brick-2 Initial Setup: BaseDir/a BaseDir [1-50] [51-100] Now add new-brick Brick-3. 1. Rebalance-1 on node Node-1 (Brick-1 node) will reset the BaseDir Layout. 2. After that it will perform a) Create linkto file on new-hashed (brick-2) b) Perform file migration. 1.Rebalance-1 Fixes the base-layout: Brick-1 Brick-2 Brick-3 --------- ---------- ------------ BaseDir/a BaseDir BaseDir [1-33] [34-66] [67-100] 2. Only a) is BaseDir/a BaseDir/a(linkto) BaseDir performed Create linktofile Now rebalance 2 on node-2 jumped in and it will perform step 1 and 2-a. After (rebal-2, step-1), it changes the layout of the BaseDir. BaseDir/a BaseDir/a(link) BaseDir [67-100] [1-33] [34-66] For (rebale-2, step-2), It will perform lookup at Brick-3 as w.r.t new layout 44 falls for brick-3. But lookup will fail. So dht_lookup_everywhere gets called. NOTE: On brick-2 by rebalance-1, a linkto file was created. Currently that linkto files gets deleted by rebalance-2 lookup as it is considered as stale linkto file. But with patch if rebalance is already in progress or rebalance is over, linkto file will not be unlinked. If rebalance is in progress fd will be open and if rebalance is over then linkto file wont be set. Reviewed-on: http://review.gluster.org/8345 ******************************************************************************* scenario-3: =========== cluster/dht: Added keys in dht_lookup_everywhere_done Case where both cached (C1) and hashed file are found, but hash does not point to above cached node (C1), then dont unlink if either fd-is-open on hashed or linkto-xattr is not found. Reviewed-on: http://review.gluster.org/8429 BUG: 1139995 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Change-Id: I86d0a21d4c0501c45d837101ced4f96d6fedc5b9 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: susant palai <spalai@redhat.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com> Reviewed-on: http://review.gluster.org/8674 Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-rebalance.c')
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c97
1 files changed, 78 insertions, 19 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index d6e34f92036..725e0c8c7b0 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -237,13 +237,15 @@ out:
}
static inline int
-__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd)
+__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc,
+ struct iatt *stbuf, dict_t *dict,
+ fd_t **dst_fd)
{
- xlator_t *this = NULL;
- int ret = -1;
- fd_t *fd = NULL;
- struct iatt new_stbuf = {0,};
+ xlator_t *this = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {0,};
+ struct iatt check_stbuf = {0,};
this = THIS;
@@ -300,6 +302,46 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
goto out;
}
+ /*Reason of doing lookup after create again:
+ *In the create, there is some time-gap between opening fd at the
+ *server (posix_layer) and binding it in server (incrementing fd count),
+ *so if in that time-gap, if other process sends unlink considering it
+ *as a linkto file, because inode->fd count will be 0, so file will be
+ *unlinked at the backend. And because furthur operations are performed
+ *on fd, so though migration will be done but will end with no file
+ *at the backend.
+ */
+
+
+ ret = syncop_lookup (to, loc, NULL, &check_stbuf, NULL, NULL);
+ if (!ret) {
+ if (uuid_compare (stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "file %s exists in %s with different gfid,"
+ "found in lookup after create",
+ loc->path, to->name);
+ ret = -1;
+ fd_unref (fd);
+ goto out;
+ }
+
+ }
+
+ if (-ret == ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: file does not exists"
+ "on %s (%s)", loc->path, to->name, strerror (-ret));
+ ret = -1;
+ fd_unref (fd);
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (to, fd, dict, 0);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
ret = syncop_ftruncate (to, fd, stbuf->ia_size);
if (ret < 0)
gf_log (this->name, GF_LOG_ERROR,
@@ -650,17 +692,18 @@ int
dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
int flag)
{
- int ret = -1;
- struct iatt new_stbuf = {0,};
- struct iatt stbuf = {0,};
- struct iatt empty_iatt = {0,};
- ia_prot_t src_ia_prot = {0,};
- fd_t *src_fd = NULL;
- fd_t *dst_fd = NULL;
- dict_t *dict = NULL;
- dict_t *xattr = NULL;
- dict_t *xattr_rsp = NULL;
- int file_has_holes = 0;
+ int ret = -1;
+ struct iatt new_stbuf = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt empty_iatt = {0,};
+ ia_prot_t src_ia_prot = {0,};
+ fd_t *src_fd = NULL;
+ fd_t *dst_fd = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
+ int file_has_holes = 0;
+ int rcvd_enoent_from_src = 0;
gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -827,15 +870,31 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
}
/* Do a stat and check the gfid before unlink */
+
+ /*
+ * Cached file changes its state from non-linkto to linkto file after
+ * migrating data. If lookup from any other mount-point is performed,
+ * converted-linkto-cached file will be treated as a stale and will be
+ * unlinked. But by this time, file is already migrated. So further
+ * failure because of ENOENT should not be treated as error
+ */
+
ret = syncop_stat (from, loc, &empty_iatt);
if (ret) {
gf_log (this->name, GF_LOG_WARNING,
"%s: failed to do a stat on %s (%s)",
loc->path, from->name, strerror (errno));
- goto out;
+
+ if (-ret != ENOENT) {
+ ret = -1;
+ goto out;
+ }
+
+ rcvd_enoent_from_src = 1;
}
- if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
+ if ((uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0 ) &&
+ (!rcvd_enoent_from_src)) {
/* take out the source from namespace */
ret = syncop_unlink (from, loc);
if (ret) {