cluster/dht: Fix races to avoid deletion of linkto file

Explanation of Race between rebalance processes: https://bugzilla.redhat.com/show_bug.cgi?id=1110694#c4 STATE 1: BRICK-1 only one brick Cached File in the system STATE 2: Add brick-2 BRICK-1 BRICK-2 STATE 3: Lookup of File on brick-2 by this node's rebalance will fail because hashed file is not created yet. So dht_lookup_everywhere is about to get called. STATE 4: As part of lookup link file at brick-2 will be created. STATE 5: getxattr to check that cached file belongs to this node is done STATE 6: dht_lookup_everywhere_cbk detects the link created by rebalance-1. It will unlink it. STATE 7: getxattr at the link file with "pathinfo" key will be called will fail as the link file is deleted by rebalance on node-2 Fix: So in the STATE 6, we should avoid the deletion of link file. Every time dht_lookup_everywhere gets called, lookup will be performed on all the nodes. So to avoid STATE 6, if linkto file is found, it is not deleted until valid case is found in dht_lookup_everywhere_done. Case 1: if linkto file points to cached node, and cached file exists, uwind with success. Case 2: if linkto does not point to current cached node, and cached file exists: a) Unlink stale link file b) Create new link file Case 3: Only linkto file exists: Delete linkto file Case 4: Only cached file Create link file (Handled event without patch) Case 5: Neither cached nor hashed file is present Return with ENOENT (handled even without patch) Change-Id: Ibf53671410d8d613b8e2e7e5d0ec30fc7dcc0298 BUG: 1116150 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Reviewed-on: http://review.gluster.org/8231 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
author: Venkatesh Somyajulu <vsomyaju@redhat.com> 2014-07-15 18:17:19 +0530
committer: Vijay Bellur <vbellur@redhat.com> 2014-07-18 03:11:06 -0700
commit: 74d92e322e3c9f4f70ddfbf9b0e2140922009658 (patch)
tree: b569c431d68676188a2df87711d4a975887b4ba9 /xlators/cluster/dht/src/dht-rebalance.c
parent: 52da727e7564963a8a244fc5cb7028315e458529 (diff)
1 files changed, 57 insertions, 3 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index eca1d52b62b..96bc02075cb 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -344,6 +344,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
         int          ret  = -1;
         fd_t        *fd   = NULL;
         struct iatt  new_stbuf = {0,};
+        struct iatt  check_stbuf= {0,};
         dht_conf_t  *conf = NULL;
 
         this = THIS;
@@ -412,6 +413,43 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
                 goto out;
         }
 
+
+        /*Reason of doing lookup after create again:
+         *In the create, there is some time-gap between opening fd at the
+         *server (posix_layer) and binding it in server (incrementing fd count),
+         *so if in that time-gap, if other process sends unlink considering it
+         *as a linkto file, because inode->fd count will be 0, so file will be
+         *unlinked at the backend. And because furthur operations are performed
+         *on fd, so though migration will be done but will end with no file
+         *at  the backend.
+         */
+
+
+        ret = syncop_lookup (to, loc, NULL, &check_stbuf, NULL, NULL);
+        if (!ret) {
+
+                if (uuid_compare (stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) {
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
+                                DHT_MSG_GFID_MISMATCH,
+                                "file %s exists in %s with different gfid,"
+                                "found in lookup after create",
+                                loc->path, to->name);
+                        ret = -1;
+                        fd_unref (fd);
+                        goto out;
+                }
+
+        }
+
+        if (-ret == ENOENT) {
+                gf_msg (this->name, GF_LOG_ERROR, 0,
+                        DHT_MSG_MIGRATE_FILE_FAILED, "%s: file does not exists"
+                        "on %s (%s)", loc->path, to->name, strerror (-ret));
+                ret = -1;
+                fd_unref (fd);
+                goto out;
+        }
+
         ret = syncop_fsetxattr (to, fd, xattr, 0);
         if (ret < 0)
                 gf_msg (this->name, GF_LOG_WARNING, 0,
@@ -831,6 +869,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
         dict_t         *xattr_rsp      = NULL;
         int             file_has_holes = 0;
         dht_conf_t     *conf           = this->private;
+        int            rcvd_enoent_from_src = 0;
 
         gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
                 loc->path, from->name, to->name);
@@ -1043,17 +1082,32 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
         }
 
         /* Do a stat and check the gfid before unlink */
+
+        /*
+         * Cached file changes its state from non-linkto to linkto file after
+         * migrating data. If lookup from any other mount-point is performed,
+         * converted-linkto-cached file will be treated as a stale and will be
+         * unlinked. But by this time, file is already migrated. So further
+         * failure because of ENOENT should  not be treated as error
+         */
+
         ret = syncop_stat (from, loc, &empty_iatt);
         if (ret) {
                 gf_msg (this->name, GF_LOG_WARNING, 0,
                         DHT_MSG_MIGRATE_FILE_FAILED,
                         "%s: failed to do a stat on %s (%s)",
                         loc->path, from->name, strerror (-ret));
-                ret = -1;
-                goto out;
+
+                if (-ret != ENOENT) {
+                        ret = -1;
+                        goto out;
+                }
+
+                rcvd_enoent_from_src = 1;
         }
 
-        if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
+        if ((uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0 ) &&
+            (!rcvd_enoent_from_src)) {
                 /* take out the source from namespace */
                 ret = syncop_unlink (from, loc);
                 if (ret) {
author	Venkatesh Somyajulu <vsomyaju@redhat.com>	2014-07-15 18:17:19 +0530
committer	Vijay Bellur <vbellur@redhat.com>	2014-07-18 03:11:06 -0700
commit	74d92e322e3c9f4f70ddfbf9b0e2140922009658 (patch)
tree	b569c431d68676188a2df87711d4a975887b4ba9 /xlators/cluster/dht/src/dht-rebalance.c
parent	52da727e7564963a8a244fc5cb7028315e458529 (diff)