summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorPranith K <pranithk@gluster.com>2011-07-14 06:31:08 +0000
committerAnand Avati <avati@gluster.com>2011-07-17 07:45:29 -0700
commit5dff9a2938c199285662bb5b33d7e3aeda0e3fb6 (patch)
tree20af21a206a8ed7937101005cbb4b9128b5c48fd /xlators
parent84c3d7a83a8c84ca11514202a1bc365026fd1c87 (diff)
cluster/afr: Fix conflict files and gfid self-heal
Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Anand Avati <avati@gluster.com> BUG: 2745 (failure to detect split brain) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2745
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c5
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c1172
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h24
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c30
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c104
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c74
7 files changed, 811 insertions, 600 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 894442936b5..d6b358e2be5 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1357,6 +1357,11 @@ afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
priv->child_count * sizeof (*sh->parentbufs));
afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ local->cont.lookup.xattr = NULL;
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (sh->xattr[i])
local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 6da666804de..58e979791e7 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -54,6 +54,8 @@ afr_build_parent_loc (loc_t *parent, loc_t *child)
char *tmp = NULL;
if (!child->parent) {
+ //this should never be called with root as the child
+ GF_ASSERT (0);
loc_copy (parent, child);
return;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index b28f9114fc0..d76e6c8de57 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -27,6 +27,21 @@
#include "afr-self-heal.h"
#include "pump.h"
+//Intersection[child]=1 if child is part of intersection
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count)
+{
+ int i = 0;
+
+ memset (intersection, 0, sizeof (*intersection) * child_count);
+ for (i = 0; i < child_count; i++) {
+ intersection[i] = afr_is_child_present (set1, child_count, i)
+ && afr_is_child_present (set2, child_count,
+ i);
+ }
+}
+
/**
* select_source - select a source and return it
*/
@@ -71,6 +86,14 @@ afr_sh_source_count (int sources[], int child_count)
}
void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
+{
+ sh->op_ret = -1;
+ if (afr_error_more_important (sh->op_errno, op_errno))
+ sh->op_errno = op_errno;
+}
+
+void
afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
{
afr_private_t * priv = this->private;
@@ -186,31 +209,6 @@ out:
return ret;
}
-
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
-
typedef enum {
AFR_NODE_INNOCENT,
AFR_NODE_FOOL,
@@ -585,6 +583,60 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child,
}
int
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+ int nsources = -1;
+
+ priv = this->private;
+
+ if (afr_get_children_count (success_children, priv->child_count) == 0)
+ goto out;
+
+ afr_build_pending_matrix (priv->pending_key, pending_matrix,
+ xattr, type, priv->child_count);
+
+ sh_type = afr_self_heal_type_for_transaction (type);
+ if (AFR_SELF_HEAL_INVALID == sh_type)
+ goto out;
+
+ afr_sh_print_pending_matrix (pending_matrix, this);
+
+ nsources = afr_mark_sources (sources, pending_matrix, bufs,
+ priv->child_count, sh_type,
+ success_children, this->name);
+out:
+ return nsources;
+}
+
+/**
+ * mark_sources: Mark all 'source' nodes and return number of source
+ * nodes found
+ *
+ * A node (a row in the pending matrix) belongs to one of
+ * three categories:
+ *
+ * M is the pending matrix.
+ *
+ * 'innocent' - M[i] is all zeroes
+ * 'fool' - M[i] has i'th element = 1 (self-reference)
+ * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
+ *
+ * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
+ * needed.
+ *
+ * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
+ * a split-brain. If one wise node refers to the other but the other doesn't
+ * refer back, the referrer is a source.
+ *
+ * All fools are sinks, unless there are no 'wise' nodes. In that case,
+ * one of the fools is made a source.
+ */
+
+int
afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
int32_t child_count, afr_self_heal_type type,
int32_t *valid_children, const char *xlator_name)
@@ -886,7 +938,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
sh->xattr[i] = NULL;
}
- if (local->govinda_gOvinda) {
+ if (local->govinda_gOvinda || sh->op_failed) {
gf_log (this->name, GF_LOG_INFO,
"split brain found, aborting selfheal of %s",
local->loc.path);
@@ -904,7 +956,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
static int
-sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
+afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -918,524 +970,751 @@ sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
return 0;
}
-
-static int
-sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int op_errno,
- struct iatt *preop, struct iatt *postop)
+static void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent)
{
- afr_local_t *local = NULL;
- loc_t *parent_loc = cookie;
- int call_count = 0;
+ int child_index = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ child_index = (long) cookie;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr on %s failed: %s",
- local->loc.path, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ sh->buf[child_index] = *buf;
+ sh->parentbuf = *postparent;
+ sh->parentbufs[child_index] = *postparent;
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ sh->xattr[child_index] = dict_ref (xattr);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s on subvolume %s => -1 (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ local->self_heal.child_errno[child_index] = op_errno;
+ }
}
+ UNLOCK (&frame->lock);
+ return;
+}
- if (parent_loc) {
- loc_wipe (parent_loc);
- GF_FREE (parent_loc);
+gf_boolean_t
+afr_valid_ia_type (ia_type_t ia_type)
+{
+ switch (ia_type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ case IA_IFDIR:
+ return _gf_true;
+ default:
+ return _gf_false;
}
+ return _gf_false;
+}
- call_count = afr_frame_return (frame);
+void
+afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this,
+ int child_index, struct iatt *buf,
+ struct iatt *postparent,
+ afr_impunge_done_cbk_t impunge_done)
+{
+ call_frame_t *impunge_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
- if (call_count == 0) {
- STACK_DESTROY (frame->root);
+ impunge_frame = copy_frame (frame);
+ if (!impunge_frame) {
+ op_errno = ENOMEM;
+ goto out;
}
- return 0;
-}
+ ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
+ local = frame->local;
+ sh = &local->self_heal;
+ impunge_frame->local = impunge_local;
+ impunge_sh = &impunge_local->self_heal;
+ impunge_sh->sh_frame = frame;
+ impunge_sh->active_source = sh->source;
+ impunge_sh->impunging_entry_mode = st_mode_from_ia (buf->ia_prot,
+ buf->ia_type);
+ impunge_sh->impunge_ret_child = child_index;
+ loc_copy (&impunge_local->loc, &local->loc);
+ sh->impunge_done = impunge_done;
+ impunge_local->call_count = 1;
+ afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf,
+ postparent);
+ return;
+out:
+ gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s",
+ local->loc.path, strerror (op_errno));
+ impunge_done (frame, this, child_index, -1, op_errno);
+}
-static int
-sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- call_frame_t *setattr_frame = NULL;
- int call_count = 0;
- int child_index = 0;
- loc_t *parent_loc = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
+int
+afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
+ int32_t op_ret, int32_t op_errno)
+{
+ int call_count = 0;
+ afr_local_t *local = NULL;
local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- child_index = (long) cookie;
-
- stbuf.ia_atime = sh->buf[sh->source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[sh->source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[sh->source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[sh->source].ia_mtime_nsec;
+ if (op_ret == -1)
+ gf_log (this->name, GF_LOG_ERROR,
+ "create entry %s failed, on child %d reason, %s",
+ local->loc.path, child, strerror (op_errno));
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_sh_missing_entries_finish (frame, this);
+ return 0;
+}
- stbuf.ia_uid = sh->buf[sh->source].ia_uid;
- stbuf.ia_gid = sh->buf[sh->source].ia_gid;
+static int
+sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int type = 0;
+ afr_private_t *priv = NULL;
+ int enoent_count = 0;
+ int i = 0;
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- if (op_ret == -1) {
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (enoent_count == 0) {
gf_log (this->name, GF_LOG_INFO,
- "%s: failed to mknod on %s (%s)",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
+ "no missing files - %s. proceeding to metadata check",
+ local->loc.path);
+ /* proceed to next step - metadata self-heal */
+ afr_sh_missing_entries_finish (frame, this);
+ return 0;
}
- if (op_ret == 0) {
- setattr_frame = copy_frame (frame);
+ buf = &sh->buf[sh->source];
+ postparent = &sh->parentbufs[sh->source];
- setattr_frame->local = GF_CALLOC (1, sizeof (afr_local_t),
- gf_afr_mt_afr_local_t);
-
- ((afr_local_t *)setattr_frame->local)->call_count = 2;
-
- gf_log (this->name, GF_LOG_TRACE,
- "setattr (%s) on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk,
- (void *) (long) 0,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- parent_loc = GF_CALLOC (1, sizeof (*parent_loc),
- gf_afr_mt_loc_t);
- afr_build_parent_loc (parent_loc, &local->loc);
-
- STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk,
- (void *) (long) parent_loc,
- priv->children[child_index],
- priv->children[child_index]->fops->setattr,
- parent_loc, &sh->parentbuf, valid);
+ type = buf->ia_type;
+ if (!afr_valid_ia_type (type)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unknown file type: 0%o", local->loc.path, type);
+ local->govinda_gOvinda = 1;
+ afr_sh_missing_entries_finish (frame, this);
+ goto out;
}
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- sh_missing_entries_finish (frame, this);
+ local->call_count = enoent_count;
+ for (i = 0; i < priv->child_count; i++) {
+ //If !child_up errno will be zero
+ if (sh->child_errno[i] != ENOENT)
+ continue;
+ afr_sh_call_entry_impunge_recreate (frame, this, i,
+ buf, postparent,
+ afr_sh_create_entry_cbk);
+ enoent_count--;
}
-
+ GF_ASSERT (enoent_count == 0);
+out:
return 0;
}
-
-static int
-sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
- int ret = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
- dev_t ia_rdev = 0;
- dict_t *dict = NULL;
- dev_t st_rdev = 0;
+ int32_t op_errno = 0;
+ ia_type_t ia_type = IA_INVAL;
+ int32_t nsources = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
+ if (afr_get_children_count (sh->success_children,
+ priv->child_count) == 0) {
+ op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
+ priv->child_count);
+ goto out;
+ }
- call_count = enoent_count;
- local->call_count = call_count;
+ if (afr_gfid_missing_count (this->name, sh->success_children,
+ sh->buf, priv->child_count,
+ local->loc.path) ||
+ afr_conflicting_iattrs (sh->buf, sh->success_children,
+ priv->child_count, local->loc.path,
+ this->name)) {
+ //this can happen if finding the fresh parent dir failed
+ local->govinda_gOvinda = 1;
+ sh->op_failed = 1;
+ op_errno = EIO;
+ goto out;
+ }
- st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot,
- sh->buf[sh->source].ia_type);
- ia_rdev = sh->buf[sh->source].ia_rdev;
- st_rdev = makedev (ia_major (ia_rdev), ia_minor (ia_rdev));
+ //now No chance for the ia_type to conflict
+ ia_type = sh->buf[sh->success_children[0]].ia_type;
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ afr_transaction_type_get (ia_type));
+ if (nsources < 0) {
+ gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
+ " in missing entry self-heal, continuing with the rest"
+ " of the self-heals", local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "mknod %s mode 0%o device type %"PRId64" on %d subvolumes",
- local->loc.path, st_mode, (uint64_t)st_rdev, enoent_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ sh->source = sh->fresh_children[0];
+ if (sh->source == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
+ goto out;
+ }
- dict = dict_new ();
- if (!dict)
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ if (sh->gfid_sh_success_cbk)
+ sh->gfid_sh_success_cbk (frame, this);
+ sh_missing_entries_create (frame, this);
+ return;
+out:
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+}
- ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- local->loc.path);
+static int
+afr_sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, st_mode, st_rdev, dict);
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent);
+ call_count = afr_frame_return (frame);
- if (dict)
- dict_unref (dict);
+ if (call_count == 0)
+ afr_sh_missing_entries_lookup_done (frame, this);
return 0;
}
-
-static int
-sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this)
+int
+afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
+ int32_t op_ret, int32_t op_errno)
{
+ int call_count = 0;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- dict_t *dict = NULL;
- int i = 0;
- int ret = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot,
- sh->buf[sh->source].ia_type);
-
- dict = dict_new ();
- if (!dict) {
+ GF_ASSERT (sh->post_remove_call);
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- sh_missing_entries_finish (frame, this);
- return 0;
+ "purge entry %s failed, on child %d reason, %s",
+ local->loc.path, child, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_sh_set_error (sh, EIO);
+ sh->op_failed = 1;
+ }
+ UNLOCK (&frame->lock);
}
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ sh->post_remove_call (frame, this);
+ return 0;
+}
- ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO,
- "%s: inode gfid set failed", local->loc.path);
+void
+afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
+ int child_index, struct iatt *buf,
+ afr_expunge_done_cbk_t expunge_done)
+{
+ call_frame_t *expunge_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int32_t op_errno = 0;
+ expunge_frame = copy_frame (frame);
+ if (!expunge_frame) {
+ goto out;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "mkdir %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
+ ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- if (!strcmp (local->loc.path, "/")) {
- /* We shouldn't try to create "/" */
+ local = frame->local;
+ sh = &local->self_heal;
+ expunge_frame->local = expunge_local;
+ expunge_sh = &expunge_local->self_heal;
+ expunge_sh->sh_frame = frame;
+ loc_copy (&expunge_local->loc, &local->loc);
+ sh->expunge_done = expunge_done;
+ afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf);
+ return;
+out:
+ gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
+ local->loc.path, strerror (op_errno));
+ expunge_done (frame, this, child_index, -1, op_errno);
+}
- sh_missing_entries_finish (frame, this);
+void
+afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
+ int32_t *fresh_children,
+ unsigned int child_count)
+{
+ int i = 0;
- return 0;
- } else {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, st_mode, dict);
- if (!--call_count)
- break;
- }
+ for (i = 0; i < child_count; i++) {
+ if (afr_is_child_present (success_children, child_count, i) &&
+ !afr_is_child_present (fresh_children, child_count, i)) {
+ sh->child_errno[i] = ENOENT;
+ GF_ASSERT (sh->xattr[i]);
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
}
}
+}
- if (dict)
- dict_unref (dict);
+int
+afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->op_failed) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ if (afr_gfid_missing_count (this->name, sh->fresh_children,
+ sh->buf, priv->child_count,
+ local->loc.path)) {
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_missing_entries_lookup_cbk,
+ _gf_true);
+ } else {
+ //No need to set gfid so goto missing entries lookup done
+ //Behave as if you have done the lookup
+ afr_sh_remove_stale_lookup_info (sh,
+ sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_children_copy (sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_missing_entries_lookup_done (frame, this);
+ }
+ }
return 0;
}
-
-static int
-sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this,
- const char *link, struct iatt *buf)
+gf_boolean_t
+afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
{
- afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- dict_t *dict = NULL;
- int i = 0;
- int ret = 0;
- int enoent_count = 0;
- int call_count = 0;
-
- local = frame->local;
sh = &local->self_heal;
- priv = this->private;
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ return _gf_false;
+}
- ret = afr_set_dict_gfid (dict, buf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: dict gfid set failed", local->loc.path);
+gf_boolean_t
+afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
+{
+ afr_self_heal_t *sh = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "symlink %s -> %s on %d subvolumes",
- local->loc.path, link, enoent_count);
+ sh = &local->self_heal;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- link, &local->loc, dict);
- if (!--call_count)
- break;
- }
- }
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_children, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- return 0;
+ return _gf_false;
}
-
-static int
-sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *link, struct iatt *sbuf)
+void
+afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t purge_condition (afr_local_t *local,
+ afr_private_t *priv,
+ int child))
{
afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- if (op_ret > 0)
- sh_missing_entries_symlink (frame, this, link, sbuf);
- else {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to do readlink on %s (%s)",
- local->loc.path, priv->children[sh->source]->name,
- strerror (op_errno));
- sh_missing_entries_finish (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (purge_condition (local, priv, i))
+ call_count++;
}
- return 0;
-}
+ if (call_count == 0) {
+ sh->post_remove_call (frame, this);
+ goto out;
+ }
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!purge_condition (local, priv, i))
+ continue;
+ afr_sh_call_entry_expunge_remove (frame, this,
+ (long) i, &sh->buf[i],
+ afr_sh_remove_entry_cbk);
+ }
+out:
+ return;
+}
-static int
-sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
local = frame->local;
sh = &local->self_heal;
- priv = this->private;
-
- STACK_WIND (frame, sh_missing_entries_readlink_cbk,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readlink,
- &local->loc, 4096);
+ sh->post_remove_call = afr_sh_missing_entries_finish;
- return 0;
+ afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
}
-
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- int type = 0;
- int i = 0;
afr_private_t *priv = NULL;
- int enoent_count = 0;
- int govinda_gOvinda = 0;
+ int i = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
+ sh->post_remove_call = afr_sh_purge_stale_entries_done;
+
for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
+ if (afr_is_child_present (sh->fresh_children,
+ priv->child_count, i))
continue;
- if (sh->child_errno[i]) {
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
- } else {
- if (type) {
- if (type != sh->buf[i].ia_type) {
- gf_log (this->name, GF_LOG_DEBUG,
- "file %s is not recoverable "
- "automatically!",
- local->loc.path);
-
- govinda_gOvinda = 1;
- }
- } else {
- sh->source = i;
- type = sh->buf[i].ia_type;
- }
- }
- }
+ if ((!local->child_up[i]) || sh->child_errno[i] != 0)
+ continue;
- if (govinda_gOvinda) {
- gf_log (this->name, GF_LOG_ERROR,
- "conflicting filetypes exist for path %s. returning.",
- local->loc.path);
+ GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
+ uuid_is_null (sh->buf[i].ia_gfid));
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
+ (uuid_compare (sh->buf[i].ia_gfid,
+ sh->entrybuf.ia_gfid)))
+ continue;
+
+ afr_fresh_children_add_child (sh->fresh_children,
+ i, priv->child_count);
- if (!type) {
- gf_log (this->name, GF_LOG_ERROR,
- "no source found for %s. all nodes down?. returning.",
- local->loc.path);
- /* subvolumes down and/or file does not exist */
- sh_missing_entries_finish (frame, this);
- return 0;
}
+ afr_sh_purge_entry_common (frame, this,
+ afr_sh_purge_stale_entry_condition);
+}
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- sh_missing_entries_finish (frame, this);
- return 0;
+void
+afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
+ struct iatt *save,
+ unsigned int child_count)
+{
+ int i = 0;
+ int child = 0;
+ gf_boolean_t saved = _gf_false;
+
+ GF_ASSERT (save);
+ //if iatt buf with gfid exists sets it
+ for (i = 0; i < child_count; i++) {
+ child = children[i];
+ if (child == -1)
+ break;
+ *save = bufs[child];
+ saved = _gf_true;
+ if (!uuid_is_null (save->ia_gfid))
+ break;
}
+ GF_ASSERT (saved);
+}
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- sh_missing_entries_mknod (frame, this);
- break;
- case IA_IFLNK:
- sh_missing_entries_readlink (frame, this);
- break;
- case IA_IFDIR:
- sh_missing_entries_mkdir (frame, this);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unknown file type: 0%o", local->loc.path, type);
+void
+afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t fresh_child_enoents = 0;
+ int32_t fresh_parent_count = 0;
+ int32_t op_errno = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (afr_get_children_count (sh->success_children,
+ priv->child_count) == 0) {
+ op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
+ priv->child_count);
+ goto fail;
+ }
+
+ //make intersection of (success_children & fresh_parent_dirs) fresh_children
+ //the other success_children will be added to it if they are not stale
+ afr_children_intersection_get (sh->success_children,
+ sh->fresh_parent_dirs,
+ sh->sources, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ memset (sh->sources, 0, sizeof (*sh->sources) * priv->child_count);
+
+ fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
+ priv->child_count);
+ //we need the enoent count of the subvols present in fresh_parent_dirs
+ fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
+ sh->child_errno,
+ priv->child_count, ENOENT);
+ if (fresh_child_enoents == fresh_parent_count) {
+ afr_sh_set_error (sh, ENOENT);
+ sh->op_failed = 1;
+ afr_sh_purge_entry (frame, this);
+ } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
+ priv->child_count, local->loc.path,
+ this->name)) {
+ afr_sh_save_child_iatts_from_policy (sh->fresh_children,
+ sh->buf, &sh->entrybuf,
+ priv->child_count);
+ afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_purge_stale_entry (frame, this);
+ } else {
+ op_errno = EIO;
local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
+ goto fail;
}
- return 0;
-}
+ return;
+fail:
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+}
static int
-sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+afr_sh_children_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
{
- int child_index = 0;
- afr_local_t *local = NULL;
int call_count = 0;
+
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_children_lookup_done (frame, this);
+
+ return 0;
+}
+
+static int
+afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- mode_t st_mode = 0;
+ afr_local_t *local = NULL;
+ int enoent_count = 0;
+ int nsources = 0;
+ int source = -1;
local = frame->local;
+ sh = &local->self_heal;
priv = this->private;
- child_index = (long) cookie;
-
- if (buf)
- st_mode = st_mode_from_ia (buf->ia_prot, buf->ia_type);
+ /* If We can't find a fresh parent directory here,
+ * we wont know which subvol is correct without finding a parent dir
+ * upwards which has correct xattrs, for that we may have to
+ * do lookups till root, we dont wanna do that,
+ * instead make sure that if there are conflicting gfid
+ * parent dirs, self-heal thus lookup is failed with EIO.
+ * if there are missing entries we dont know whether to delete or
+ * create so fail with EIO,
+ * If there are conflicting xattr fail with EIO.
+ */
+ if (afr_get_children_count (sh->success_children,
+ priv->child_count) == 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Parent dir lookup failed "
+ "for %s, in missing entry self-heal, continuing with "
+ "the rest of the self-heals", local->loc.path);
+ goto out;
+ }
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- st_mode);
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (enoent_count > 0) {
+ gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
+ " in missing entry self-heal, continuing with the rest"
+ " of the self-heals", local->loc.path);
+ goto out;
+ }
- local->self_heal.buf[child_index] = *buf;
- local->self_heal.parentbuf = *postparent;
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ if (afr_conflicting_iattrs (sh->buf, sh->success_children,
+ priv->child_count, sh->parent_loc.path,
+ this->name)) {
+ gf_log (this->name, GF_LOG_INFO, "conflicting stat info for "
+ "parent dirs of %s", local->loc.path);
+ goto out;
+ }
- local->self_heal.child_errno[child_index] = op_errno;
- }
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION);
+ if (nsources < 0) {
+ gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
+ " in missing entry self-heal, continuing with the rest"
+ " of the self-heals", local->loc.path);
+ goto out;
+ }
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (source == -1) {
+ GF_ASSERT (0);
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ goto out;
}
- UNLOCK (&frame->lock);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_parent_dirs, priv->child_count);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_children_lookup_cbk, _gf_false);
+ return 0;
+out:
+ afr_sh_set_error (sh, EIO);
+ sh->op_failed = 1;
+ afr_sh_missing_entries_finish (frame, this);
+ return 0;
+}
+
+int
+afr_sh_conflicting_entry_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ int call_count = 0;
+
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent);
call_count = afr_frame_return (frame);
- if (call_count == 0) {
- sh_missing_entries_create (frame, this);
- }
+ if (call_count == 0)
+ afr_sh_find_fresh_parents (frame, this);
return 0;
}
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
+{
+ int i = 0;
-static int
-sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
+ for (i = 0; i < child_count; i++) {
+ memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
+ memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
+ sh->child_errno[i] = 0;
+ }
+ memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
+ sh->success_count = 0;
+ afr_reset_children (sh->success_children, child_count);
+ afr_reset_children (sh->fresh_children, child_count);
+ afr_reset_xattr (sh->xattr, child_count);
+}
+
+/* afr self-heal state will be lost if this call is made
+ * please check the afr_sh_common_reset that is called in this function
+ */
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid)
{
afr_local_t *local = NULL;
int i = 0;
int call_count = 0;
afr_private_t *priv = NULL;
dict_t *xattr_req = NULL;
- int ret = -1;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
priv = this->private;
+ sh = &local->self_heal;
call_count = afr_up_children_count (priv->child_count,
local->child_up);
@@ -1445,29 +1724,29 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
xattr_req = dict_new();
if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set value for %s",
- local->loc.path, priv->pending_key[i]);
+ afr_xattr_req_prepare (this, xattr_req, loc->path);
+ if (set_gfid) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s with gfid: %s",
+ local->loc.path, uuid_utoa (sh->sh_gfid_req));
+ GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
+ afr_set_dict_gfid (xattr_req, sh->sh_gfid_req);
}
}
+ afr_sh_common_reset (sh, priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"looking up %s on subvolume %s",
local->loc.path, priv->children[i]->name);
STACK_WIND_COOKIE (frame,
- sh_missing_entries_lookup_cbk,
+ lookup_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
+ loc, xattr_req);
if (!--call_count)
break;
@@ -1483,13 +1762,15 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
int
-afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ sh = &local->self_heal;
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_INFO,
@@ -1499,14 +1780,41 @@ afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG,
"Non blocking entrylks done. Proceeding to FOP");
- sh_missing_entries_lookup (frame, this);
+ afr_sh_common_lookup (frame, this, &sh->parent_loc,
+ afr_sh_conflicting_entry_lookup_cbk,
+ _gf_false);
}
return 0;
}
-static int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this)
+int
+afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Non blocking entrylks failed.");
+ afr_sh_missing_entries_done (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking entrylks done. Proceeding to FOP");
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_missing_entries_lookup_cbk,
+ _gf_true);
+ }
+
+ return 0;
+}
+
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -1521,9 +1829,9 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this)
afr_set_lock_number (frame, this);
- int_lock->lk_basename = local->loc.name;
- int_lock->lk_loc = &sh->parent_loc;
- int_lock->lock_cbk = afr_sh_post_nonblocking_entrylk_cbk;
+ int_lock->lk_basename = base_name;
+ int_lock->lk_loc = loc;
+ int_lock->lock_cbk = lock_cbk;
afr_nonblocking_entrylk (frame, this);
@@ -1531,7 +1839,8 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this)
}
static int
-afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
+afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
+ afr_lock_cbk_t lock_cbk)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -1547,9 +1856,27 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
"attempting to recreate missing entries for path=%s",
local->loc.path);
+ GF_ASSERT (local->loc.parent);
afr_build_parent_loc (&sh->parent_loc, &local->loc);
- afr_sh_entrylk (frame, this);
+ afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
+ lock_cbk);
+ return 0;
+}
+
+static int
+afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
+{
+ afr_self_heal_parent_entrylk (frame, this,
+ afr_sh_post_nb_entrylk_conflicting_sh_cbk);
+ return 0;
+}
+
+static int
+afr_self_heal_gfids (call_frame_t *frame, xlator_t *this)
+{
+ afr_self_heal_parent_entrylk (frame, this,
+ afr_sh_post_nb_entrylk_gfid_sh_cbk);
return 0;
}
@@ -1572,6 +1899,9 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
shc = &lc->self_heal;
shc->unwind = sh->unwind;
+ shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
+ shc->need_missing_entry_self_heal = sh->need_missing_entry_self_heal;
+ shc->need_gfid_self_heal = sh->need_gfid_self_heal;
shc->need_data_self_heal = sh->need_data_self_heal;
shc->need_metadata_self_heal = sh->need_metadata_self_heal;
shc->need_entry_self_heal = sh->need_entry_self_heal;
@@ -1585,6 +1915,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
shc->background = sh->background;
shc->type = sh->type;
+ uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
if (l->loc.path)
loc_copy (&lc->loc, &l->loc);
@@ -1648,6 +1979,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
if (local->govinda_gOvinda)
split_brain = _gf_true;
+
afr_set_split_brain (this, sh->inode, split_brain);
afr_self_heal_type_str_get (sh, sh_type_str,
@@ -1764,11 +2096,15 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
}
sh->success_children = afr_fresh_children_create (priv->child_count);
sh->fresh_children = afr_fresh_children_create (priv->child_count);
+ sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count);
FRAME_SU_DO (sh_frame, afr_local_t);
- if (local->success_count && local->enoent_count) {
- afr_self_heal_missing_entries (sh_frame, this);
+ if (sh->need_missing_entry_self_heal) {
+ afr_self_heal_conflicting_entries (sh_frame, this);
+ } else if (sh->need_gfid_self_heal) {
+ GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
+ afr_self_heal_gfids (sh_frame, this);
} else {
gf_log (this->name, GF_LOG_TRACE,
"proceeding to metadata check on %s",
@@ -1784,18 +2120,28 @@ void
afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
size_t size)
{
- GF_ASSERT (str && (size > strlen (" meta-data data entry")));
+ GF_ASSERT (str && (size > strlen (" missing-entry gfid "
+ "meta-data data entry")));
if (self_heal_p->need_metadata_self_heal) {
- snprintf(str, size, " meta-data");
+ snprintf (str, size, " meta-data");
}
if (self_heal_p->need_data_self_heal) {
- snprintf(str + strlen(str), size - strlen(str), " data");
+ snprintf (str + strlen(str), size - strlen(str), " data");
}
if (self_heal_p->need_entry_self_heal) {
- snprintf(str + strlen(str), size - strlen(str), " entry");
+ snprintf (str + strlen(str), size - strlen(str), " entry");
+ }
+
+ if (self_heal_p->need_missing_entry_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str),
+ " missing-entry");
+ }
+
+ if (self_heal_p->need_gfid_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " gfid");
}
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 7f624745510..c600db8255e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -29,6 +29,11 @@ typedef enum {
AFR_SELF_HEAL_INVALID = -1,
} afr_self_heal_type;
+typedef int
+(*afr_lookup_cbk_t) (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent);
int
afr_sh_select_source (int sources[], int child_count);
@@ -71,4 +76,23 @@ afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
afr_self_heal_type
afr_self_heal_type_for_transaction (afr_transaction_type type);
+int
+afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type);
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid);
+int
+afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src, struct iatt *buf);
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk);
+int
+afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, struct iatt *buf,
+ struct iatt *postparent);
#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 7179e929d65..6d16b170ff3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -282,7 +282,7 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"finishing data selfheal of %s", local->loc.path);
if (!sh->data_lock_held)
@@ -607,18 +607,11 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- afr_build_pending_matrix (priv->pending_key, sh->pending_matrix,
- sh->xattr, AFR_DATA_TRANSACTION,
- priv->child_count);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
- priv->child_count, AFR_SELF_HEAL_DATA,
- sh->success_children, this->name);
-
+ nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
+ sh->sources, sh->success_children,
+ AFR_DATA_TRANSACTION);
if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"No self-heal needed for %s",
local->loc.path);
@@ -760,7 +753,6 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
afr_private_t *priv = NULL;
int read_child = -1;
int ret = -1;
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
int32_t **pending_matrix = NULL;
int32_t *sources = NULL;
int32_t *success_children = NULL;
@@ -784,16 +776,8 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
if (NULL == sources)
goto out;
- afr_build_pending_matrix (priv->pending_key, pending_matrix,
- xattr, txn_type, priv->child_count);
-
- sh_type = afr_self_heal_type_for_transaction (txn_type);
- if (AFR_SELF_HEAL_INVALID == sh_type)
- goto out;
-
- nsources = afr_mark_sources (sources, pending_matrix, bufs,
- priv->child_count, sh_type,
- success_children, this->name);
+ nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
+ sources, success_children, txn_type);
if (nsources < 0) {
ret = -1;
goto out;
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 7e43c427737..4a5e7531d60 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -50,9 +50,6 @@
#include "afr-self-heal-common.h"
int
-afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this);
-
-int
afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
@@ -2267,16 +2264,10 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
goto heal;
}
- afr_build_pending_matrix (priv->pending_key, sh->pending_matrix,
- sh->xattr, AFR_ENTRY_TRANSACTION,
- priv->child_count);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
- priv->child_count, AFR_SELF_HEAL_ENTRY,
- sh->success_children, this->name);
-
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION);
if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
@@ -2340,62 +2331,6 @@ afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
return 0;
}
-
-
-int
-afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = 0;
- int call_count = 0;
- int i = 0;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value.",
- local->loc.path);
- }
- }
-
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame,
- afr_sh_entry_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
-
int
afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
{
@@ -2416,38 +2351,14 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
"for %s. Proceeding to FOP", local->loc.path);
- afr_sh_entry_lookup(frame, this);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_entry_lookup_cbk, _gf_false);
}
return 0;
}
int
-afr_sh_entry_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
-
- afr_set_lock_number (frame, this);
-
- int_lock->lk_basename = NULL;
- int_lock->lk_loc = &local->loc;
- int_lock->lock_cbk = afr_sh_post_nonblocking_entry_cbk;
-
- afr_nonblocking_entrylk (frame, this);
-
-
- return 0;
-}
-
-
-int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
@@ -2458,7 +2369,8 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
local = frame->local;
if (local->self_heal.need_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entry_lock (frame, this);
+ afr_sh_entrylk (frame, this, &local->loc, NULL,
+ afr_sh_post_nonblocking_entry_cbk);
} else {
gf_log (this->name, GF_LOG_TRACE,
"proceeding to completion on %s",
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 5993e9596ba..04c5ef4e11b 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -492,16 +492,10 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- afr_build_pending_matrix (priv->pending_key, sh->pending_matrix,
- sh->xattr, AFR_METADATA_TRANSACTION,
- priv->child_count);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
- priv->child_count, AFR_SELF_HEAL_METADATA,
- sh->success_children, this->name);
-
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_METADATA_TRANSACTION);
if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
@@ -631,63 +625,6 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
-int
-afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- dict_t *xattr_req = NULL;
- int ret = 0;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- }
- }
-
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
-
int
afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
xlator_t *this)
@@ -709,7 +646,8 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
"inodelks done for %s. Proceeding to FOP",
local->loc.path);
- afr_sh_metadata_lookup (frame, this);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_metadata_lookup_cbk, _gf_false);
}
return 0;