diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2014-05-23 12:51:28 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2014-06-12 01:10:45 -0700 |
commit | d240958fb36e652a2b910fe79414fb8b934e6158 (patch) | |
tree | 99f5357a7aa527db304ce0487fade3905de0d4c1 /xlators/storage | |
parent | afeaab53f6ca450a3147a6230906a83ee1bebe5a (diff) |
storage/posix: Janitor should guard against dir renames.
Problem:
Directory rename while a brick is down can cause gfid handle of that directory
to be deleted until next lookup happens on that directory.
*) Self-heal does not have intelligence to detect renames at the moment. So it
has to delete the directory 'd' using special flags, because it has to perform
'rm -rf' of that directory as it is not empty. Posix xlator implements this by
renaming the directory deleted to 'landfill' directory in '.glusterfs' where
janitor thread will perform actual rm -rf by traversing the directory. Janitor
thread wakes up every 10 minutes to check if there are any directories to be
deleted and deletes them. As part of deleting it also deletes the gfid-handles.
Steps to hit the problem:
1) On a replicate volume create a directory 'd', file in 'd' called 'f' so the
directory 'd' is not empty.
2) bring one of the bricks down (lets call it brick-a, the other one is brick-b
3) Rename d to d1
4) When brick-a comes online again, self-heal deletes directory 'd' and creates
directory 'd1' on brick-a for performing self-heal. So on brick-a,
gfid-handle of 'd' pointing to 'da is deleted and recreated to point to 'd1'.
5) This directory 'b' with all its directory hierarchy (for now just the file
'f') will be under 'landfill' directory.
6) When janitor thread wakes up and deletes directory 'd' and gfid-handle of
'd' without realizing that it is now pointing to 'd1'. Thus 'd1' loses its
gfid-handle
Fix:
Delete gfid-handle for a directory only when the gfid-handle is stale.
Change-Id: I21265b3bd3852f0967d916aaa21108ae5c9e7373
BUG: 1101143
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/7879
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/storage')
-rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 63 |
1 files changed, 62 insertions, 1 deletions
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 2ddb41a7be2..143267637b5 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -972,6 +972,67 @@ out: return ret; } +static void +del_stale_dir_handle (xlator_t *this, uuid_t gfid) +{ + char newpath[PATH_MAX] = {0, }; + uuid_t gfid_curr = {0, }; + ssize_t size = -1; + gf_boolean_t stale = _gf_false; + char *hpath = NULL; + struct stat stbuf = {0, }; + struct iatt iabuf = {0, }; + + MAKE_HANDLE_GFID_PATH (hpath, this, gfid, NULL); + + /* check that it is valid directory handle */ + size = sys_lstat (hpath, &stbuf); + if (size < 0) { + gf_log (this->name, GF_LOG_DEBUG, "%s: Handle stat failed: " + "%s", hpath, strerror (errno)); + goto out; + } + + iatt_from_stat (&iabuf, &stbuf); + if (iabuf.ia_nlink != 1 || !IA_ISLNK (iabuf.ia_type)) { + gf_log (this->name, GF_LOG_DEBUG, "%s: Handle nlink %d %d", + hpath, iabuf.ia_nlink, IA_ISLNK (iabuf.ia_type)); + goto out; + } + + size = posix_handle_path (this, gfid, NULL, newpath, sizeof (newpath)); + if (size <= 0 && errno == ENOENT) { + gf_log (this->name, GF_LOG_DEBUG, "%s: %s", newpath, + strerror (ENOENT)); + stale = _gf_true; + goto out; + } + + size = sys_lgetxattr (newpath, GFID_XATTR_KEY, gfid_curr, 16); + if (size < 0 && errno == ENOENT) { + gf_log (this->name, GF_LOG_DEBUG, "%s: %s", newpath, + strerror (ENOENT)); + stale = _gf_true; + } else if (size == 16 && uuid_compare (gfid, gfid_curr)) { + gf_log (this->name, GF_LOG_DEBUG, "%s: mismatching gfid: %s, " + "at %s", hpath, uuid_utoa (gfid_curr), newpath); + stale = _gf_true; + } + +out: + if (stale) { + size = sys_unlink (hpath); + if (size < 0 && errno != ENOENT) + gf_log (this->name, GF_LOG_ERROR, "%s: Failed to " + "remove handle to %s (%s)", hpath, newpath, + strerror (errno)); + } else if (size == 16) { + gf_log (this->name, GF_LOG_DEBUG, "%s: Fresh handle for " + "%s with gfid %s", hpath, newpath, + uuid_utoa (gfid_curr)); + } + return; +} static int janitor_walker (const char *fpath, const struct stat *sb, @@ -1002,7 +1063,7 @@ janitor_walker (const char *fpath, const struct stat *sb, "removing directory %s", fpath); rmdir (fpath); - posix_handle_unset (this, stbuf.ia_gfid, NULL); + del_stale_dir_handle (this, stbuf.ia_gfid); } break; } |