summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorVikas Gorur <vikas@gluster.com>2009-11-12 08:44:16 +0000
committerAnand V. Avati <avati@dev.gluster.com>2009-11-13 02:13:43 -0800
commit26fbaa23cafee4643b7604660762656c4a71684b (patch)
tree90bef7f03cb305dad016439706d46fdd0ed84f62 /xlators/cluster/afr
parent4717e5279e169a36a870a267039e788cf22602a8 (diff)
cluster/afr: Ensure directory contents are in sync during opendir.
The problem: If some files on the first subvolume disappeared without leaving a trace in the entry changelog (this can happen, for example, when an fsck has deleted files or when a hard drive is replaced), those files would never be self-healed even though they would be present on the second subvolume. This is because readdir is sent only to the first subvolume, and since the files don't appear in the directory listing, no lookup would ever be sent on them. This patch fixes this problem by doing a readdir on all the subvolumes during the first opendir on a directory inode. If a discrepancy in the contents is detected, entry self-heal in a special "force merge" mode is triggered on that directory. Signed-off-by: Vikas Gorur <vikas@gluster.com> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 249 (Self heal of a file that does not exist on the first subvolume) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=249
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c192
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c13
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c9
-rw-r--r--xlators/cluster/afr/src/afr.c67
-rw-r--r--xlators/cluster/afr/src/afr.h10
5 files changed, 281 insertions, 10 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index fe1f4dadf73..3bfb29e5a0e 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -42,8 +42,173 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "checksum.h"
#include "afr.h"
+#include "afr-self-heal.h"
+
+
+int
+afr_examine_dir_completion_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ afr_set_opendir_done (this, local->fd->inode, 1);
+
+ AFR_STACK_UNWIND (opendir, sh->orig_frame, local->op_ret,
+ local->op_errno, local->fd);
+
+ return 0;
+}
+
+
+gf_boolean_t
+__checksums_differ (uint32_t *checksum, int child_count)
+{
+ int ret = _gf_false;
+ int i = 0;
+
+ uint32_t cksum;
+
+ cksum = checksum[0];
+
+ while (i < child_count) {
+ if (cksum != checksum[i]) {
+ ret = _gf_true;
+ break;
+ }
+
+ cksum = checksum[i];
+ i++;
+ }
+
+ return ret;
+}
+
+
+int32_t
+afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+
+ int child_index = 0;
+
+ uint32_t entry_cksum;
+
+ int call_count = 0;
+ off_t last_offset = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ goto out;
+ }
+
+ if (op_ret == 0)
+ goto out;
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ entry_cksum = gf_rsync_weak_checksum (entry->d_name,
+ strlen (entry->d_name));
+ local->cont.opendir.checksum[child_index] ^= entry_cksum;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ }
+
+ /* read more entries */
+
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->readdir,
+ local->fd, 131072, last_offset);
+
+out:
+ if ((op_ret == 0) || (op_ret == -1)) {
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (__checksums_differ (local->cont.opendir.checksum,
+ priv->child_count)) {
+
+ local->need_entry_self_heal = _gf_true;
+ local->self_heal.forced_merge = _gf_true;
+
+ local->cont.lookup.buf.st_mode = local->fd->inode->st_mode;
+
+ local->child_count = afr_up_children_count (priv->child_count,
+ local->child_up);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "checksums of directory %s differ,"
+ " triggering forced merge",
+ local->loc.path);
+
+ afr_self_heal (frame, this,
+ afr_examine_dir_completion_cbk);
+ } else {
+ afr_set_opendir_done (this, local->fd->inode, 1);
+
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd);
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_examine_dir (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int i;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->cont.opendir.checksum = CALLOC (priv->child_count,
+ sizeof (*local->cont.opendir.checksum));
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->readdir,
+ local->fd, 131072, 0);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
int32_t
@@ -69,8 +234,29 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ if ((local->op_ret == 0) &&
+ !afr_is_opendir_done (this, fd->inode)) {
+
+ /*
+ * This is the first opendir on this inode. We need
+ * to check if the directory's entries are the same
+ * on all subvolumes. This is needed in addition
+ * to regular entry self-heal because the readdir
+ * call is sent only to the first subvolume, and
+ * thus files that exist only there will never be healed
+ * otherwise (assuming changelog shows no anamolies).
+ */
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "reading contents of directory %s looking for mismatch",
+ local->loc.path);
+
+ afr_examine_dir (frame, this);
+
+ } else {
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd);
+ }
}
return 0;
@@ -108,6 +294,8 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
goto out;
}
+ loc_copy (&local->loc, loc);
+
frame->local = local;
local->fd = fd_ref (fd);
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index b23eea391be..8142bdf1de1 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1382,10 +1382,12 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
loc_copy (&lc->loc, &l->loc);
lc->child_up = memdup (l->child_up, priv->child_count);
- lc->xattr_req = dict_copy_with_ref (l->xattr_req, NULL);
+ if (l->xattr_req)
+ lc->xattr_req = dict_copy_with_ref (l->xattr_req, NULL);
lc->cont.lookup.inode = l->cont.lookup.inode;
- lc->cont.lookup.xattr = dict_copy_with_ref (l->cont.lookup.xattr, NULL);
+ if (l->cont.lookup.xattr)
+ lc->cont.lookup.xattr = dict_copy_with_ref (l->cont.lookup.xattr, NULL);
return lc;
}
@@ -1502,7 +1504,12 @@ afr_self_heal (call_frame_t *frame, xlator_t *this,
sh->background = _gf_true;
sh->orig_frame = frame;
- sh->completion_cbk = afr_bgsh_completion_cbk;
+
+ if (completion_cbk == NULL)
+ sh->completion_cbk = afr_bgsh_completion_cbk;
+ else
+ sh->completion_cbk = completion_cbk;
+
sh->unwind = afr_bgsh_unwind;
sh->buf = CALLOC (priv->child_count, sizeof (struct stat));
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index d952865a18f..5c47d887e52 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -2317,6 +2317,11 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
+ if (sh->forced_merge) {
+ sh->source = -1;
+ goto heal;
+ }
+
afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
priv->child_count, AFR_ENTRY_TRANSACTION);
@@ -2338,12 +2343,14 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
priv->child_count);
source = afr_sh_select_source (sh->sources, priv->child_count);
- sh->source = source;
+
+ sh->source = source;
if (sh->background) {
sh->unwind (frame, this);
}
+heal:
afr_sh_entry_sync_prepare (frame, this);
return 0;
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index c56ce314a07..a0441bfb0bd 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -55,9 +55,9 @@
#include "afr-self-heal.h"
-
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
+#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
+#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
uint64_t
@@ -115,6 +115,60 @@ out:
uint64_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+
+ uint64_t ctx = 0;
+ uint64_t opendir_done = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0)
+ goto unlock;
+
+ opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+out:
+ return opendir_done;
+}
+
+
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode, int32_t opendir_done)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0) {
+ ctx = 0;
+ }
+
+ ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx)
+ | (opendir_done & AFR_ICTX_OPENDIR_DONE_MASK);
+
+ __inode_ctx_put (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+out:
+ return;
+}
+
+
+uint64_t
afr_read_child (xlator_t *this, inode_t *inode)
{
int ret = 0;
@@ -320,6 +374,11 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
{ /* symlink */
FREE (local->cont.symlink.linkpath);
}
+
+ { /* opendir */
+ if (local->cont.opendir.checksum)
+ FREE (local->cont.opendir.checksum);
+ }
}
@@ -642,7 +701,7 @@ unlock:
lookup_buf->st_mode;
}
- afr_self_heal (frame, this, afr_self_heal_cbk);
+ afr_self_heal (frame, this, NULL);
} else {
AFR_STACK_UNWIND (lookup, frame, local->op_ret,
local->op_errno,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 45a1c986348..bde151866bf 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -72,6 +72,10 @@ typedef struct _afr_private {
} afr_private_t;
typedef struct {
+ /* Is this a self-heal triggered to forcibly merge the
+ directories? */
+ gf_boolean_t forced_merge;
+
/* array of stat's, one for each child */
struct stat *buf;
struct stat parentbuf;
@@ -273,6 +277,8 @@ typedef struct _afr_local {
int success_count;
int32_t op_ret;
int32_t op_errno;
+
+ uint32_t *checksum;
} opendir;
struct {
@@ -546,7 +552,11 @@ afr_frame_return (call_frame_t *frame);
void
afr_set_split_brain (xlator_t *this, inode_t *inode, int32_t split_brain);
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode, int32_t opendir_done);
+uint64_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode);
#define AFR_STACK_UNWIND(fop, frame, params ...) \
do { \