diff options
author | Vikas Gorur <vikas@gluster.com> | 2009-11-12 08:44:16 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2009-11-13 02:13:43 -0800 |
commit | 26fbaa23cafee4643b7604660762656c4a71684b (patch) | |
tree | 90bef7f03cb305dad016439706d46fdd0ed84f62 /xlators/cluster/afr | |
parent | 4717e5279e169a36a870a267039e788cf22602a8 (diff) |
cluster/afr: Ensure directory contents are in sync during opendir.
The problem: If some files on the first subvolume disappeared
without leaving a trace in the entry changelog (this can happen,
for example, when an fsck has deleted files or when a hard drive
is replaced), those files would never be self-healed even though
they would be present on the second subvolume. This is because
readdir is sent only to the first subvolume, and since the files
don't appear in the directory listing, no lookup would ever be
sent on them.
This patch fixes this problem by doing a readdir on all the subvolumes
during the first opendir on a directory inode. If a discrepancy in the
contents is detected, entry self-heal in a special "force merge" mode
is triggered on that directory.
Signed-off-by: Vikas Gorur <vikas@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 249 (Self heal of a file that does not exist on the first subvolume)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=249
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 192 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 9 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 67 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 10 |
5 files changed, 281 insertions, 10 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index fe1f4dadf73..3bfb29e5a0e 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -42,8 +42,173 @@ #include "common-utils.h" #include "compat-errno.h" #include "compat.h" +#include "checksum.h" #include "afr.h" +#include "afr-self-heal.h" + + +int +afr_examine_dir_completion_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + sh = &local->self_heal; + + afr_set_opendir_done (this, local->fd->inode, 1); + + AFR_STACK_UNWIND (opendir, sh->orig_frame, local->op_ret, + local->op_errno, local->fd); + + return 0; +} + + +gf_boolean_t +__checksums_differ (uint32_t *checksum, int child_count) +{ + int ret = _gf_false; + int i = 0; + + uint32_t cksum; + + cksum = checksum[0]; + + while (i < child_count) { + if (cksum != checksum[i]) { + ret = _gf_true; + break; + } + + cksum = checksum[i]; + i++; + } + + return ret; +} + + +int32_t +afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + gf_dirent_t *entries) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + gf_dirent_t * entry = NULL; + gf_dirent_t * tmp = NULL; + + int child_index = 0; + + uint32_t entry_cksum; + + int call_count = 0; + off_t last_offset = 0; + + priv = this->private; + local = frame->local; + + child_index = (long) cookie; + + if (op_ret == -1) { + local->op_ret = -1; + goto out; + } + + if (op_ret == 0) + goto out; + + list_for_each_entry_safe (entry, tmp, &entries->list, list) { + entry_cksum = gf_rsync_weak_checksum (entry->d_name, + strlen (entry->d_name)); + local->cont.opendir.checksum[child_index] ^= entry_cksum; + } + + list_for_each_entry (entry, &entries->list, list) { + last_offset = entry->d_off; + } + + /* read more entries */ + + STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->readdir, + local->fd, 131072, last_offset); + +out: + if ((op_ret == 0) || (op_ret == -1)) { + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (__checksums_differ (local->cont.opendir.checksum, + priv->child_count)) { + + local->need_entry_self_heal = _gf_true; + local->self_heal.forced_merge = _gf_true; + + local->cont.lookup.buf.st_mode = local->fd->inode->st_mode; + + local->child_count = afr_up_children_count (priv->child_count, + local->child_up); + + gf_log (this->name, GF_LOG_DEBUG, + "checksums of directory %s differ," + " triggering forced merge", + local->loc.path); + + afr_self_heal (frame, this, + afr_examine_dir_completion_cbk); + } else { + afr_set_opendir_done (this, local->fd->inode, 1); + + AFR_STACK_UNWIND (opendir, frame, local->op_ret, + local->op_errno, local->fd); + } + } + } + + return 0; +} + + +int +afr_examine_dir (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int i; + int call_count = 0; + + local = frame->local; + priv = this->private; + + local->cont.opendir.checksum = CALLOC (priv->child_count, + sizeof (*local->cont.opendir.checksum)); + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->readdir, + local->fd, 131072, 0); + + if (!--call_count) + break; + } + } + + return 0; +} int32_t @@ -69,8 +234,29 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie, call_count = afr_frame_return (frame); if (call_count == 0) { - AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd); + if ((local->op_ret == 0) && + !afr_is_opendir_done (this, fd->inode)) { + + /* + * This is the first opendir on this inode. We need + * to check if the directory's entries are the same + * on all subvolumes. This is needed in addition + * to regular entry self-heal because the readdir + * call is sent only to the first subvolume, and + * thus files that exist only there will never be healed + * otherwise (assuming changelog shows no anamolies). + */ + + gf_log (this->name, GF_LOG_TRACE, + "reading contents of directory %s looking for mismatch", + local->loc.path); + + afr_examine_dir (frame, this); + + } else { + AFR_STACK_UNWIND (opendir, frame, local->op_ret, + local->op_errno, local->fd); + } } return 0; @@ -108,6 +294,8 @@ afr_opendir (call_frame_t *frame, xlator_t *this, goto out; } + loc_copy (&local->loc, loc); + frame->local = local; local->fd = fd_ref (fd); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b23eea391be..8142bdf1de1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1382,10 +1382,12 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this) loc_copy (&lc->loc, &l->loc); lc->child_up = memdup (l->child_up, priv->child_count); - lc->xattr_req = dict_copy_with_ref (l->xattr_req, NULL); + if (l->xattr_req) + lc->xattr_req = dict_copy_with_ref (l->xattr_req, NULL); lc->cont.lookup.inode = l->cont.lookup.inode; - lc->cont.lookup.xattr = dict_copy_with_ref (l->cont.lookup.xattr, NULL); + if (l->cont.lookup.xattr) + lc->cont.lookup.xattr = dict_copy_with_ref (l->cont.lookup.xattr, NULL); return lc; } @@ -1502,7 +1504,12 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, sh->background = _gf_true; sh->orig_frame = frame; - sh->completion_cbk = afr_bgsh_completion_cbk; + + if (completion_cbk == NULL) + sh->completion_cbk = afr_bgsh_completion_cbk; + else + sh->completion_cbk = completion_cbk; + sh->unwind = afr_bgsh_unwind; sh->buf = CALLOC (priv->child_count, sizeof (struct stat)); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index d952865a18f..5c47d887e52 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -2317,6 +2317,11 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; + if (sh->forced_merge) { + sh->source = -1; + goto heal; + } + afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr, priv->child_count, AFR_ENTRY_TRANSACTION); @@ -2338,12 +2343,14 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this) priv->child_count); source = afr_sh_select_source (sh->sources, priv->child_count); - sh->source = source; + + sh->source = source; if (sh->background) { sh->unwind (frame, this); } +heal: afr_sh_entry_sync_prepare (frame, this); return 0; diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index c56ce314a07..a0441bfb0bd 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -55,9 +55,9 @@ #include "afr-self-heal.h" - -#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL -#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL +#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL +#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL +#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL uint64_t @@ -115,6 +115,60 @@ out: uint64_t +afr_is_opendir_done (xlator_t *this, inode_t *inode) +{ + int ret = 0; + + uint64_t ctx = 0; + uint64_t opendir_done = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) + goto unlock; + + opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK; + } +unlock: + UNLOCK (&inode->lock); + +out: + return opendir_done; +} + + +void +afr_set_opendir_done (xlator_t *this, inode_t *inode, int32_t opendir_done) +{ + uint64_t ctx = 0; + int ret = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) { + ctx = 0; + } + + ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx) + | (opendir_done & AFR_ICTX_OPENDIR_DONE_MASK); + + __inode_ctx_put (inode, this, ctx); + } + UNLOCK (&inode->lock); +out: + return; +} + + +uint64_t afr_read_child (xlator_t *this, inode_t *inode) { int ret = 0; @@ -320,6 +374,11 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) { /* symlink */ FREE (local->cont.symlink.linkpath); } + + { /* opendir */ + if (local->cont.opendir.checksum) + FREE (local->cont.opendir.checksum); + } } @@ -642,7 +701,7 @@ unlock: lookup_buf->st_mode; } - afr_self_heal (frame, this, afr_self_heal_cbk); + afr_self_heal (frame, this, NULL); } else { AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 45a1c986348..bde151866bf 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -72,6 +72,10 @@ typedef struct _afr_private { } afr_private_t; typedef struct { + /* Is this a self-heal triggered to forcibly merge the + directories? */ + gf_boolean_t forced_merge; + /* array of stat's, one for each child */ struct stat *buf; struct stat parentbuf; @@ -273,6 +277,8 @@ typedef struct _afr_local { int success_count; int32_t op_ret; int32_t op_errno; + + uint32_t *checksum; } opendir; struct { @@ -546,7 +552,11 @@ afr_frame_return (call_frame_t *frame); void afr_set_split_brain (xlator_t *this, inode_t *inode, int32_t split_brain); +void +afr_set_opendir_done (xlator_t *this, inode_t *inode, int32_t opendir_done); +uint64_t +afr_is_opendir_done (xlator_t *this, inode_t *inode); #define AFR_STACK_UNWIND(fop, frame, params ...) \ do { \ |