diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-entry.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 2787 |
1 files changed, 506 insertions, 2281 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 00f1a9cb91b..9605d69f417 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -8,2399 +8,624 @@ cases as published by the Free Software Foundation. */ -#include <libgen.h> -#include <unistd.h> -#include <fnmatch.h> -#include <sys/time.h> -#include <stdlib.h> -#include <signal.h> #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif -#include "glusterfs.h" -#include "inode.h" #include "afr.h" -#include "dict.h" -#include "xlator.h" -#include "hashfn.h" -#include "logging.h" -#include "stack.h" -#include "list.h" -#include "call-stub.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" +#include "afr-self-heal.h" #include "byte-order.h" - #include "afr-transaction.h" -#include "afr-self-heal.h" -#include "afr-self-heal-common.h" - -#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\ - do {\ - _local = _frame->local;\ - _sh = &_local->self_heal;\ - _sh_frame = _sh->sh_frame;\ - _sh_local = _sh_frame->local;\ - _sh_sh = &_sh_local->self_heal;\ - } while (0); - -int -afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this, - int child_index); -int -afr_sh_entry_done (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - sh = &local->self_heal; - - sh->completion_cbk (frame, this); - - return 0; -} - - -int -afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - - int_lock->lock_cbk = afr_sh_entry_done; - afr_unlock (frame, this); - - return 0; -} - - -int -afr_sh_entry_finish (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - gf_log (this->name, GF_LOG_TRACE, - "finishing entry selfheal of %s", local->loc.path); - - afr_sh_entry_unlock (frame, this); - - return 0; -} - - -int -afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xattr, dict_t *xdata) -{ - long i = 0; - int call_count = 0; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_local_t *orig_local = NULL; - call_frame_t *orig_frame = NULL; - afr_private_t *priv = NULL; - int32_t read_child = -1; - - local = frame->local; - priv = this->private; - sh = &local->self_heal; - i = (long)cookie; - - - afr_children_add_child (sh->fresh_children, i, priv->child_count); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_INFO, - "%s: failed to erase pending xattrs on %s (%s)", - local->loc.path, priv->children[i]->name, - strerror (op_errno)); - } - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - if (sh->source == -1) { - //this happens if the forced merge option is set - read_child = sh->fresh_children[0]; - } else { - read_child = sh->source; - } - afr_inode_set_read_ctx (this, sh->inode, read_child, - sh->fresh_children); - orig_frame = sh->orig_frame; - orig_local = orig_frame->local; - - if (sh->source != -1) { - orig_local->cont.lookup.buf.ia_nlink = sh->buf[sh->source].ia_nlink; - } - - afr_sh_entry_finish (frame, this); - } - - return 0; -} - - -int -afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - sh = &local->self_heal; - - if (sh->entries_skipped) { - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - goto out; - } - afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION, - afr_sh_entry_erase_pending_cbk, - afr_sh_entry_finish); - return 0; -out: - afr_sh_entry_finish (frame, this); - return 0; -} - - - -static int -next_active_source (call_frame_t *frame, xlator_t *this, - int current_active_source) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int source = -1; - int next_active_source = -1; - int i = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - source = sh->source; - - if (source != -1) { - if (current_active_source != source) - next_active_source = source; - goto out; - } - - /* - the next active sink becomes the source for the - 'conservative decision' of merging all entries - */ - - for (i = 0; i < priv->child_count; i++) { - if ((sh->sources[i] == 0) - && (local->child_up[i] == 1) - && (i > current_active_source)) { - - next_active_source = i; - break; - } - } -out: - return next_active_source; -} - static int -next_active_sink (call_frame_t *frame, xlator_t *this, - int current_active_sink) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int next_active_sink = -1; - int i = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - /* - the next active sink becomes the source for the - 'conservative decision' of merging all entries - */ - - for (i = 0; i < priv->child_count; i++) { - if ((sh->sources[i] == 0) - && (local->child_up[i] == 1) - && (i > current_active_sink)) { - - next_active_sink = i; - break; - } - } - - return next_active_sink; -} - -int -afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this); - -int -afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this); +afr_selfheal_entry_delete (call_frame_t *frame, xlator_t *this, inode_t *dir, + const char *name, inode_t *inode, int child, + struct afr_reply *replies) +{ + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = {0, }; + char g[64]; + + priv = this->private; + + subvol = priv->children[child]; + + loc.parent = inode_ref (dir); + uuid_copy (loc.pargfid, dir->gfid); + loc.name = name; + loc.inode = inode_ref (inode); + + if (replies[child].valid && replies[child].op_ret == 0) { + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_log (this->name, GF_LOG_WARNING, + "expunging dir %s/%s (%s) on %s", + uuid_utoa (dir->gfid), name, + uuid_utoa_r (replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir (subvol, &loc, 1); + break; + default: + gf_log (this->name, GF_LOG_WARNING, + "expunging file %s/%s (%s) on %s", + uuid_utoa (dir->gfid), name, + uuid_utoa_r (replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink (subvol, &loc); + break; + } + } + + loc_wipe (&loc); + + return ret; +} + + +int +afr_selfheal_recreate_entry (call_frame_t *frame, xlator_t *this, int dst, + int source, inode_t *dir, const char *name, + inode_t *inode, struct afr_reply *replies) +{ + int ret = 0; + loc_t loc = {0,}; + loc_t srcloc = {0,}; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + struct iatt *iatt = NULL; + char *linkname = NULL; + mode_t mode = 0; + struct iatt newent = {0,}; + + priv = this->private; + + xdata = dict_new(); + if (!xdata) + return -ENOMEM; + + loc.parent = inode_ref (dir); + uuid_copy (loc.pargfid, dir->gfid); + loc.name = name; + loc.inode = inode_ref (inode); + + ret = afr_selfheal_entry_delete (frame, this, dir, name, inode, dst, + replies); + if (ret) + goto out; + + ret = dict_set_static_bin (xdata, "gfid-req", + replies[source].poststat.ia_gfid, 16); + if (ret) + goto out; + + iatt = &replies[source].poststat; + + srcloc.inode = inode_ref (inode); + uuid_copy (srcloc.gfid, iatt->ia_gfid); + + mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type); + + switch (iatt->ia_type) { + case IA_IFDIR: + ret = syncop_mkdir (priv->children[dst], &loc, mode, xdata, 0); + break; + case IA_IFLNK: + ret = syncop_lookup (priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == 0) { + ret = syncop_link (priv->children[dst], &srcloc, &loc); + } else { + ret = syncop_readlink (priv->children[source], &srcloc, + &linkname, 4096); + if (ret <= 0) + goto out; + ret = syncop_symlink (priv->children[dst], &loc, linkname, + xdata, NULL); + } + break; + default: + ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); + if (ret) + goto out; + ret = syncop_mknod (priv->children[dst], &loc, mode, + iatt->ia_rdev, xdata, &newent); + if (ret == 0 && iatt->ia_size && !newent.ia_size) { + /* New entry created. Mark @dst pending on all sources */ + ret = 1; + } + break; + } -int -afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this); - -int -afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this, - int active_src); - -int -afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this, - int active_src, int32_t op_ret, - int32_t op_errno) -{ - int call_count = 0; - - call_count = afr_frame_return (frame); - - if (call_count == 0) - afr_sh_entry_expunge_subvol (frame, this, active_src); - - return 0; -} - -int -afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, - dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - afr_self_heal_t *expunge_sh = NULL; - call_frame_t *frame = NULL; - int active_src = (long) cookie; - afr_self_heal_t *sh = NULL; - afr_local_t *local = NULL; - - priv = this->private; - expunge_local = expunge_frame->local; - expunge_sh = &expunge_local->self_heal; - frame = expunge_sh->sh_frame; - local = frame->local; - sh = &local->self_heal; - - if (op_ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "setattr on parent directory of %s on subvolume %s failed: %s", - expunge_local->loc.path, - priv->children[active_src]->name, strerror (op_errno)); - } - - AFR_STACK_DESTROY (expunge_frame); - sh->expunge_done (frame, this, active_src, op_ret, op_errno); - - return 0; -} - - -int -afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - afr_self_heal_t *expunge_sh = NULL; - int active_src = 0; - int32_t valid = 0; - - priv = this->private; - expunge_local = expunge_frame->local; - expunge_sh = &expunge_local->self_heal; - - active_src = (long) cookie; - - if (op_ret == 0) { - gf_log (this->name, GF_LOG_DEBUG, - "removed %s on %s", - expunge_local->loc.path, - priv->children[active_src]->name); - } else { - gf_log (this->name, GF_LOG_INFO, - "removing %s on %s failed (%s)", - expunge_local->loc.path, - priv->children[active_src]->name, - strerror (op_errno)); - } - - valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; - - STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk, - (void *) (long) active_src, - priv->children[active_src], - priv->children[active_src]->fops->setattr, - &expunge_sh->parent_loc, - &expunge_sh->parentbuf, - valid, NULL); - - return 0; -} - - -int -afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this, - int active_src) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - - priv = this->private; - expunge_local = expunge_frame->local; - - gf_log (this->name, GF_LOG_TRACE, - "expunging file %s on %s", - expunge_local->loc.path, priv->children[active_src]->name); - - STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk, - (void *) (long) active_src, - priv->children[active_src], - priv->children[active_src]->fops->unlink, - &expunge_local->loc, 0, NULL); - - return 0; -} - - - -int -afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this, - int active_src) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - - priv = this->private; - expunge_local = expunge_frame->local; - - gf_log (this->name, GF_LOG_DEBUG, - "expunging directory %s on %s", - expunge_local->loc.path, priv->children[active_src]->name); - - STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk, - (void *) (long) active_src, - priv->children[active_src], - priv->children[active_src]->fops->rmdir, - &expunge_local->loc, 1, NULL); - - return 0; -} - - -int -afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, - int active_src, struct iatt *buf, - struct iatt *parentbuf) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - afr_self_heal_t *expunge_sh = NULL; - call_frame_t *frame = NULL; - int type = 0; - afr_self_heal_t *sh = NULL; - afr_local_t *local = NULL; - loc_t *loc = NULL; - - priv = this->private; - expunge_local = expunge_frame->local; - expunge_sh = &expunge_local->self_heal; - frame = expunge_sh->sh_frame; - local = frame->local; - sh = &local->self_heal; - loc = &expunge_local->loc; - - type = buf->ia_type; - if (loc->parent && uuid_is_null (loc->parent->gfid)) - uuid_copy (loc->pargfid, parentbuf->ia_gfid); - - switch (type) { - case IA_IFSOCK: - case IA_IFREG: - case IA_IFBLK: - case IA_IFCHR: - case IA_IFIFO: - case IA_IFLNK: - afr_sh_entry_expunge_unlink (expunge_frame, this, active_src); - break; - case IA_IFDIR: - afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src); - break; - default: - gf_log (this->name, GF_LOG_ERROR, - "%s has unknown file type on %s: 0%o", - expunge_local->loc.path, - priv->children[active_src]->name, type); - goto out; - break; - } - - return 0; -out: - AFR_STACK_DESTROY (expunge_frame); - sh->expunge_done (frame, this, active_src, -1, EINVAL); - - return 0; -} - - -int -afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *x, - struct iatt *postparent) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - afr_self_heal_t *expunge_sh = NULL; - call_frame_t *frame = NULL; - int active_src = 0; - afr_self_heal_t *sh = NULL; - afr_local_t *local = NULL; - - priv = this->private; - expunge_local = expunge_frame->local; - expunge_sh = &expunge_local->self_heal; - frame = expunge_sh->sh_frame; - active_src = (long) cookie; - local = frame->local; - sh = &local->self_heal; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "lookup of %s on %s failed (%s)", - expunge_local->loc.path, - priv->children[active_src]->name, - strerror (op_errno)); - goto out; - } - - afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf, - postparent); - - return 0; out: - AFR_STACK_DESTROY (expunge_frame); - sh->expunge_done (frame, this, active_src, op_ret, op_errno); - - return 0; + if (xdata) + dict_unref (xdata); + loc_wipe (&loc); + loc_wipe (&srcloc); + return ret; } -int -afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this, - int active_src) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - - priv = this->private; - expunge_local = expunge_frame->local; - - gf_log (this->name, GF_LOG_TRACE, - "looking up %s on %s", - expunge_local->loc.path, priv->children[active_src]->name); - - STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk, - (void *) (long) active_src, - priv->children[active_src], - priv->children[active_src]->fops->lookup, - &expunge_local->loc, NULL); - - return 0; -} - -int -afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *x, - struct iatt *postparent) -{ - afr_private_t *priv = NULL; - afr_local_t *expunge_local = NULL; - afr_self_heal_t *expunge_sh = NULL; - int source = 0; - call_frame_t *frame = NULL; - int active_src = 0; - int need_expunge = 0; - afr_self_heal_t *sh = NULL; - afr_local_t *local = NULL; - - priv = this->private; - expunge_local = expunge_frame->local; - expunge_sh = &expunge_local->self_heal; - frame = expunge_sh->sh_frame; - active_src = expunge_sh->active_source; - source = (long) cookie; - local = frame->local; - sh = &local->self_heal; - - if (op_ret == -1 && op_errno == ENOENT) - need_expunge = 1; - else if (op_ret == -1) - goto out; - - if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) && - !uuid_is_null (buf->ia_gfid) && - (uuid_compare (expunge_sh->entrybuf.ia_gfid, buf->ia_gfid) != 0)) { - char uuidbuf1[64]; - char uuidbuf2[64]; - gf_log (this->name, GF_LOG_DEBUG, - "entry %s found on %s with mismatching gfid (%s/%s)", - expunge_local->loc.path, - priv->children[source]->name, - uuid_utoa_r (expunge_sh->entrybuf.ia_gfid, uuidbuf1), - uuid_utoa_r (buf->ia_gfid, uuidbuf2)); - need_expunge = 1; - } - - if (need_expunge) { - gf_log (this->name, GF_LOG_INFO, - "Entry %s is missing on %s and deleting from " - "replica's other bricks", - expunge_local->loc.path, - priv->children[source]->name); - - if (postparent) - expunge_sh->parentbuf = *postparent; - - afr_sh_entry_expunge_purge (expunge_frame, this, active_src); - - return 0; - } - -out: - if (op_ret == 0) { - gf_log (this->name, GF_LOG_TRACE, - "%s exists under %s", - expunge_local->loc.path, - priv->children[source]->name); - } else { - gf_log (this->name, GF_LOG_INFO, - "looking up %s under %s failed (%s)", - expunge_local->loc.path, - priv->children[source]->name, - strerror (op_errno)); - } - - AFR_STACK_DESTROY (expunge_frame); - sh->expunge_done (frame, this, active_src, op_ret, op_errno); - - return 0; -} - -static gf_boolean_t -can_skip_entry_self_heal (char *name, loc_t *parent_loc) -{ - if (strcmp (name, ".") == 0) { - return _gf_true; - } else if (strcmp (name, "..") == 0) { - return _gf_true; - } else if (loc_is_root (parent_loc) && - (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0)) { - return _gf_true; - } - return _gf_false; -} - -int -afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, - gf_dirent_t *entry) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int ret = -1; - call_frame_t *expunge_frame = NULL; - afr_local_t *expunge_local = NULL; - afr_self_heal_t *expunge_sh = NULL; - int active_src = 0; - int source = 0; - int op_errno = 0; - char *name = NULL; - int op_ret = -1; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - active_src = sh->active_source; - source = sh->source; - sh->expunge_done = afr_sh_entry_expunge_entry_done; - - name = entry->d_name; - if (can_skip_entry_self_heal (name, &local->loc)) { - op_ret = 0; - goto out; - } - - gf_log (this->name, GF_LOG_TRACE, - "inspecting existence of %s under %s", - name, local->loc.path); - - expunge_frame = copy_frame (frame); - if (!expunge_frame) { - op_errno = ENOMEM; - goto out; - } - - AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out); - - expunge_frame->local = expunge_local; - expunge_sh = &expunge_local->self_heal; - expunge_sh->sh_frame = frame; - expunge_sh->active_source = active_src; - expunge_sh->entrybuf = entry->d_stat; - loc_copy (&expunge_sh->parent_loc, &local->loc); - - ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc, - name); - if (ret != 0) { - op_errno = EINVAL; - goto out; - } - - gf_log (this->name, GF_LOG_TRACE, - "looking up %s on %s", expunge_local->loc.path, - priv->children[source]->name); - - STACK_WIND_COOKIE (expunge_frame, - afr_sh_entry_expunge_entry_cbk, - (void *) (long) source, - priv->children[source], - priv->children[source]->fops->lookup, - &expunge_local->loc, NULL); - - ret = 0; -out: - if (ret == -1) - sh->expunge_done (frame, this, active_src, op_ret, op_errno); - - return 0; -} - - -int -afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - gf_dirent_t *entry = NULL; - off_t last_offset = 0; - int active_src = 0; - int entry_count = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - active_src = sh->active_source; - - if (op_ret <= 0) { - if (op_ret < 0) { - gf_log (this->name, GF_LOG_INFO, - "readdir of %s on subvolume %s failed (%s)", - local->loc.path, - priv->children[active_src]->name, - strerror (op_errno)); - } else { - gf_log (this->name, GF_LOG_TRACE, - "readdir of %s on subvolume %s complete", - local->loc.path, - priv->children[active_src]->name); - } - - afr_sh_entry_expunge_all (frame, this); - return 0; - } - - list_for_each_entry (entry, &entries->list, list) { - last_offset = entry->d_off; - entry_count++; - } - - gf_log (this->name, GF_LOG_TRACE, - "readdir'ed %d entries from %s", - entry_count, priv->children[active_src]->name); - - sh->offset = last_offset; - local->call_count = entry_count; - - list_for_each_entry (entry, &entries->list, list) { - afr_sh_entry_expunge_entry (frame, this, entry); - } - - return 0; -} - -int -afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this, - int active_src) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk, - priv->children[active_src], - priv->children[active_src]->fops->readdirp, - sh->healing_fd, sh->block_size, sh->offset, NULL); - - return 0; -} - - -int -afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this) +static int +afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode, + int source, struct afr_reply *replies, + unsigned char *sources, unsigned char *newentry) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int active_src = -1; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; + int ret = 0; + int i = 0; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + int **changelog = NULL; + int idx = 0; - sh->offset = 0; + priv = this->private; - if (sh->source == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "no active sources for %s to expunge entries", - local->loc.path); - goto out; - } + idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); - active_src = next_active_sink (frame, this, sh->active_source); - sh->active_source = active_src; + uuid_copy (inode->gfid, replies[source].poststat.ia_gfid); - if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { - goto out; - } + changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS); - if (active_src == -1) { - /* completed creating missing files on all subvolumes */ - goto out; - } + xattr = dict_new(); + if (!xattr) + return -ENOMEM; - gf_log (this->name, GF_LOG_TRACE, - "expunging entries of %s on %s to other sinks", - local->loc.path, priv->children[active_src]->name); + for (i = 0; i < priv->child_count; i++) { + if (!newentry[i]) + continue; + changelog[i][idx] = hton32(1); + } - afr_sh_entry_expunge_subvol (frame, this, active_src); + afr_set_pending_dict (priv, xattr, changelog); - return 0; -out: - afr_sh_entry_impunge_all (frame, this); - return 0; + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + afr_selfheal_post_op (frame, this, inode, i, xattr); + } + dict_unref (xattr); + return ret; } -int -afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - int call_count = 0; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - sh = &local->self_heal; - if (op_ret < 0) - sh->entries_skipped = _gf_true; - call_count = afr_frame_return (frame); - if (call_count == 0) - afr_sh_entry_impunge_subvol (frame, this); - - return 0; +static int +__afr_selfheal_heal_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, + char *name, inode_t *inode, int source, + unsigned char *sources, unsigned char *healed_sinks, + unsigned char *locked_on, struct afr_reply *replies) +{ + int ret = 0; + afr_private_t *priv = NULL; + int i = 0; + unsigned char *newentry = NULL; + + priv = this->private; + newentry = alloca0 (priv->child_count); + + if (!replies[source].valid) + return -EIO; + + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i]) + continue; + if (replies[source].op_ret == -1 && + replies[source].op_errno == ENOENT) { + ret = afr_selfheal_entry_delete (frame, this, fd->inode, + name, inode, i, replies); + } else { + if (!uuid_compare (replies[i].poststat.ia_gfid, + replies[source].poststat.ia_gfid)) + continue; + + ret = afr_selfheal_recreate_entry (frame, this, i, source, + fd->inode, name, inode, + replies); + if (ret > 0) { + newentry[i] = 1; + ret = 0; + } + } + if (ret < 0) + break; + } + + if (AFR_COUNT (newentry, priv->child_count)) + afr_selfheal_newentry_mark (frame, this, inode, source, replies, + sources, newentry); + return ret; } -void -afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_local_t *impunge_local = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_self_heal_t *impunge_sh = NULL; - call_frame_t *frame = NULL; - AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh, - frame, local, sh); - - AFR_STACK_DESTROY (impunge_frame); - sh->impunge_done (frame, this, op_ret, op_errno); -} - -int -afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, - dict_t *xdata) +static int +__afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, + char *name, inode_t *inode, unsigned char *sources, + unsigned char *healed_sinks, unsigned char *locked_on, + struct afr_reply *replies) { - int call_count = 0; - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - int child_index = 0; - - priv = this->private; - impunge_local = impunge_frame->local; - child_index = (long) cookie; - - if (op_ret == 0) { - gf_log (this->name, GF_LOG_DEBUG, - "setattr done for %s on %s", - impunge_local->loc.path, - priv->children[child_index]->name); - } else { - gf_log (this->name, GF_LOG_INFO, - "setattr (%s) on %s failed (%s)", - impunge_local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); - } - - call_count = afr_frame_return (impunge_frame); - if (call_count == 0) { - afr_sh_entry_call_impunge_done (impunge_frame, this, - 0, op_errno); - } - - return 0; -} + int ret = 0; + afr_private_t *priv = NULL; + int i = 0; + int source = -1; -int -afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, - dict_t *xdata) -{ - int call_count = 0; - afr_local_t *setattr_local = NULL; - - setattr_local = setattr_frame->local; - if (op_ret != 0) { - gf_log (this->name, GF_LOG_INFO, - "setattr on parent directory (%s) failed: %s", - setattr_local->loc.path, strerror (op_errno)); - } - - call_count = afr_frame_return (setattr_frame); - if (call_count == 0) - AFR_STACK_DESTROY (setattr_frame); - return 0; -} + priv = this->private; -int -afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_local_t *setattr_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - call_frame_t *setattr_frame = NULL; - int32_t valid = 0; - int32_t op_errno = 0; - int child_index = 0; - int call_count = 0; - int i = 0; - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - - gf_log (this->name, GF_LOG_DEBUG, - "setting ownership of %s on %s to %d/%d", - impunge_local->loc.path, - priv->children[child_index]->name, - impunge_sh->entrybuf.ia_uid, - impunge_sh->entrybuf.ia_gid); - - setattr_frame = copy_frame (impunge_frame); - if (!setattr_frame) { - op_errno = ENOMEM; - goto out; - } - AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out); - setattr_local = setattr_frame->local; - call_count = afr_errno_count (NULL, impunge_sh->child_errno, - priv->child_count, 0); - loc_copy (&setattr_local->loc, &impunge_sh->parent_loc); - impunge_local->call_count = call_count; - setattr_local->call_count = call_count; - for (i = 0; i < priv->child_count; i++) { - if (impunge_sh->child_errno[i]) - continue; - valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; - STACK_WIND_COOKIE (setattr_frame, - afr_sh_entry_impunge_parent_setattr_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->setattr, - &setattr_local->loc, - &impunge_sh->parentbuf, valid, NULL); - - valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID | - GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; - STACK_WIND_COOKIE (impunge_frame, - afr_sh_entry_impunge_setattr_cbk, - (void *) (long) i, priv->children[i], - priv->children[i]->fops->setattr, - &impunge_local->loc, - &impunge_sh->entrybuf, valid, NULL); - call_count--; - } - GF_ASSERT (!call_count); - return 0; -out: - if (setattr_frame) - AFR_STACK_DESTROY (setattr_frame); - afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno); - return 0; -} + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid && replies[i].op_ret == 0) { + source = i; + break; + } + } -int -afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - dict_t *xattr, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - int child_index = 0; - int call_count = -1; - - priv = this->private; - impunge_local = impunge_frame->local; - - child_index = (long) cookie; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_INFO, - "%s: failed to perform xattrop on %s (%s)", - impunge_local->loc.path, - priv->children[child_index]->name, strerror (op_errno)); - - LOCK (&impunge_frame->lock); - { - impunge_local->op_ret = -1; - impunge_local->op_errno = op_errno; - } - UNLOCK (&impunge_frame->lock); - } - - call_count = afr_frame_return (impunge_frame); - - if (call_count == 0) { - if (impunge_local->op_ret == 0) { - afr_sh_entry_impunge_setattr (impunge_frame, this); - } else { - afr_sh_entry_call_impunge_done (impunge_frame, this, - -1, impunge_local->op_errno); - } - } - return 0; -} + if (source == -1) { + /* entry got deleted in the mean time? */ + return 0; + } -int -afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame, - xlator_t *this) -{ - int active_src = 0; - dict_t *xattr = NULL; - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - int32_t op_errno = 0; - int32_t call_count = 0; - int32_t i = 0; - - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - active_src = impunge_sh->active_source; - impunge_local->op_ret = 0; - - afr_prepare_new_entry_pending_matrix (impunge_local->pending, - afr_is_errno_unset, - impunge_sh->child_errno, - &impunge_sh->entrybuf, - priv->child_count); - xattr = dict_new (); - if (!xattr) { - op_errno = ENOMEM; - goto out; - } - - afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src, - LOCAL_LAST); - - for (i = 0; i < priv->child_count; i++) { - if ((impunge_sh->child_errno[i] == EEXIST) && - (impunge_local->child_up[i] == 1)) - - call_count++; - } - - impunge_local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - - if ((impunge_sh->child_errno[i] == EEXIST) - && (impunge_local->child_up[i] == 1)) { - - - STACK_WIND_COOKIE (impunge_frame, - afr_sh_entry_impunge_xattrop_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &impunge_local->loc, - GF_XATTROP_ADD_ARRAY, xattr, NULL); - if (!--call_count) - break; - } - } - - if (xattr) - dict_unref (xattr); - return 0; -out: - afr_sh_entry_call_impunge_done (impunge_frame, this, - -1, op_errno); - return 0; -} + for (i = 0; i < priv->child_count; i++) { + if (i == source || !healed_sinks[i]) + continue; -int -afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int call_count = 0; - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - int child_index = 0; - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - - child_index = (long) cookie; - - if (op_ret == -1) { - impunge_sh->child_errno[child_index] = op_errno; - gf_log (this->name, GF_LOG_ERROR, - "creation of %s on %s failed (%s)", - impunge_local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); - } else { - impunge_sh->child_errno[child_index] = 0; - } - - call_count = afr_frame_return (impunge_frame); - if (call_count == 0) { - if (!afr_errno_count (NULL, impunge_sh->child_errno, - priv->child_count, 0)) { - // new_file creation failed every where - afr_sh_entry_call_impunge_done (impunge_frame, this, - -1, op_errno); - goto out; - } - afr_sh_entry_impunge_perform_xattrop (impunge_frame, this); - } -out: - return 0; -} + if (replies[i].op_errno != ENOENT) + continue; -int -afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int call_count = 0; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - - if (IA_IFLNK == impunge_sh->entrybuf.ia_type) { - //For symlinks impunge is attempted un-conditionally - //So the file can already exist. - if ((op_ret < 0) && (op_errno == EEXIST)) - op_ret = 0; - } - - call_count = afr_frame_return (impunge_frame); - if (call_count == 0) - afr_sh_entry_call_impunge_done (impunge_frame, this, - op_ret, op_errno); - - return 0; -} + ret = afr_selfheal_recreate_entry (frame, this, i, source, + fd->inode, name, inode, + replies); + } -int -afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this, - int child_index) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - loc_t *loc = NULL; - struct iatt *buf = NULL; - loc_t oldloc = {0}; - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - loc = &impunge_local->loc; - buf = &impunge_sh->entrybuf; - - oldloc.inode = inode_ref (loc->inode); - uuid_copy (oldloc.gfid, buf->ia_gfid); - gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s", - loc->path, priv->children[child_index]->name); - - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->link, - &oldloc, loc, NULL); - loc_wipe (&oldloc); - - return 0; + return ret; } -int -afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xattr, - struct iatt *postparent) -{ - if (op_ret < 0) { - afr_sh_entry_impunge_create_file (impunge_frame, this, - (long)cookie); - } else { - afr_sh_entry_impunge_hardlink (impunge_frame, this, - (long)cookie); - } - return 0; -} -int -afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame, - xlator_t *this, - int child_index, struct iatt *stbuf) +static int +__afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, + char *name, inode_t *inode, int source, + unsigned char *sources, unsigned char *healed_sinks, + unsigned char *locked_on, struct afr_reply *replies) { - afr_private_t *priv = NULL; - call_frame_t *frame = NULL; - afr_local_t *impunge_local = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *impunge_sh = NULL; - afr_self_heal_t *sh = NULL; - loc_t *loc = NULL; - dict_t *xattr_req = NULL; - loc_t oldloc = {0}; - int ret = -1; - - priv = this->private; - AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh, - frame, local, sh); - loc = &impunge_local->loc; - - xattr_req = dict_new (); - if (!xattr_req) - goto out; - oldloc.inode = inode_ref (loc->inode); - uuid_copy (oldloc.gfid, stbuf->ia_gfid); - - STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->lookup, - &oldloc, xattr_req); - ret = 0; -out: - if (xattr_req) - dict_unref (xattr_req); - loc_wipe (&oldloc); - if (ret) - sh->impunge_done (frame, this, -1, ENOMEM); - return 0; -} + int ret = -1; -int -afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this, - int child_index, struct iatt *stbuf) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - dict_t *dict = NULL; - int ret = 0; - - priv = this->private; - impunge_local = impunge_frame->local; - - gf_log (this->name, GF_LOG_DEBUG, - "creating missing file %s on %s", - impunge_local->loc.path, - priv->children[child_index]->name); - - dict = dict_new (); - if (!dict) - gf_log (this->name, GF_LOG_ERROR, "Out of memory"); - - GF_ASSERT (!uuid_is_null (stbuf->ia_gfid)); - ret = afr_set_dict_gfid (dict, stbuf->ia_gfid); - if (ret) - gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed", - impunge_local->loc.path); - - /* - * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY : - * - * Problem: - * While a brick is down in a replica pair, lets say the user creates - * one file(file-A) and a hard link to that file(h-file-A). After the - * brick comes back up, entry self-heal is attempted on parent dir of - * these two files. As part of readdir in self-heal it reads both the - * entries file-A and h-file-A for both of them it does name less lookup - * to check if there are any hardlinks already present in the - * destination brick. It finds that there are no hard links already - * present for files file-A, h-file-A. Self-heal does mknods for both - * file-A and h-file-A. This leads to file-A and h-file-A not being - * hardlinks anymore. - * - * Fix: (More like shrinking of race-window, the race itself is still - * present in posix-mknod). - * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then - * posix_mknod checks if there are already any gfid-links and does - * link() instead of mknod. There still can be a race where two - * posix_mknods same gfid see that - * gfid-link file is not present and proceeds with mknods and result in - * two different files with same gfid. - */ - ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); - if (ret) - gf_log (this->name, GF_LOG_INFO, "%s: %s set failed", - impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY); - - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->mknod, - &impunge_local->loc, - st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type), - makedev (ia_major (stbuf->ia_rdev), - ia_minor (stbuf->ia_rdev)), 0, dict); - - if (dict) - dict_unref (dict); - - return 0; + if (source < 0) + ret = __afr_selfheal_merge_dirent (frame, this, fd, name, inode, + sources, healed_sinks, + locked_on, replies); + else + ret = __afr_selfheal_heal_dirent (frame, this, fd, name, inode, + source, sources, healed_sinks, + locked_on, replies); + return ret; } - -int -afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this, - int child_index, struct iatt *stbuf) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - dict_t *dict = NULL; - - int ret = 0; - - priv = this->private; - impunge_local = impunge_frame->local; - - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - return 0; - } - - GF_ASSERT (!uuid_is_null (stbuf->ia_gfid)); - ret = afr_set_dict_gfid (dict, stbuf->ia_gfid); - if (ret) - gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed", - impunge_local->loc.path); - - gf_log (this->name, GF_LOG_DEBUG, - "creating missing directory %s on %s", - impunge_local->loc.path, - priv->children[child_index]->name); - - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->mkdir, - &impunge_local->loc, - st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type), - 0, dict); - - if (dict) - dict_unref (dict); - - return 0; +static int +afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, + int source, unsigned char *sources, + unsigned char *healed_sinks, char *name) +{ + afr_private_t *priv = NULL; + int ret = 0; + unsigned char *locked_on = NULL; + struct afr_reply *replies = NULL; + inode_t *inode = NULL; + + priv = this->private; + + locked_on = alloca0 (priv->child_count); + + replies = alloca0 (priv->child_count * sizeof(*replies)); + + ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, + name, locked_on); + { + if (ret < 2) { + ret = -ENOTCONN; + goto unlock; + } + + inode = afr_selfheal_unlocked_lookup_on (frame, fd->inode, name, + replies, locked_on); + if (!inode) { + ret = -ENOMEM; + goto unlock; + } + + ret = __afr_selfheal_entry_dirent (frame, this, fd, name, inode, + source, sources, healed_sinks, + locked_on, replies); + } +unlock: + afr_selfheal_unentrylk (frame, this, fd->inode, this->name, name, + locked_on); + if (inode) + inode_unref (inode); + return ret; } -int -afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this, - int child_index, const char *linkname) +static int +afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, fd_t *fd, + int child, int source, unsigned char *sources, + unsigned char *healed_sinks) { - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - dict_t *dict = NULL; - struct iatt *buf = NULL; - int ret = 0; - - priv = this->private; - impunge_local = impunge_frame->local; - - buf = &impunge_local->cont.dir_fop.buf; - - dict = dict_new (); - if (!dict) { - afr_sh_entry_call_impunge_done (impunge_frame, this, - -1, ENOMEM); - goto out; - } - - GF_ASSERT (!uuid_is_null (buf->ia_gfid)); - ret = afr_set_dict_gfid (dict, buf->ia_gfid); - if (ret) - gf_log (this->name, GF_LOG_INFO, - "%s: dict set gfid failed", - impunge_local->loc.path); - - gf_log (this->name, GF_LOG_DEBUG, - "creating missing symlink %s -> %s on %s", - impunge_local->loc.path, linkname, - priv->children[child_index]->name); - - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->symlink, - linkname, &impunge_local->loc, 0, dict); - - if (dict) - dict_unref (dict); -out: - return 0; -} + int ret = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + off_t offset = 0; + call_frame_t *iter_frame = NULL; + xlator_t *subvol = NULL; + afr_private_t *priv = NULL; + priv = this->private; + subvol = priv->children[child]; -int -afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - int child_index = -1; - int call_count = -1; - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - - child_index = (long) cookie; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_INFO, - "unlink of %s on %s failed (%s)", - impunge_local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); - goto out; - } - - afr_sh_entry_impunge_symlink (impunge_frame, this, child_index, - impunge_sh->linkname); - - return 0; -out: - LOCK (&impunge_frame->lock); - { - call_count = --impunge_local->call_count; - } - UNLOCK (&impunge_frame->lock); + INIT_LIST_HEAD (&entries.list); - if (call_count == 0) - afr_sh_entry_call_impunge_done (impunge_frame, this, - op_ret, op_errno); + iter_frame = afr_copy_frame (frame); + if (!iter_frame) + return -ENOMEM; - return 0; -} + while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) { + if (ret > 0) + ret = 0; + list_for_each_entry (entry, &entries.list, list) { + offset = entry->d_off; + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name, "..")) + continue; -int -afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this, - int child_index) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; + if (__is_root_gfid (fd->inode->gfid) && + !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) + continue; - priv = this->private; - impunge_local = impunge_frame->local; + ret = afr_selfheal_entry_dirent (iter_frame, this, fd, + source, sources, + healed_sinks, + entry->d_name); + AFR_STACK_RESET (iter_frame); - gf_log (this->name, GF_LOG_DEBUG, - "unlinking symlink %s with wrong target on %s", - impunge_local->loc.path, - priv->children[child_index]->name); + if (ret) + break; + } - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->unlink, - &impunge_local->loc, 0, NULL); + gf_dirent_free (&entries); + if (ret) + break; + } - return 0; + AFR_STACK_DESTROY (iter_frame); + return ret; } - -int -afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - const char *linkname, struct iatt *sbuf, dict_t *xdata) +static int +afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd, + int source, unsigned char *sources, + unsigned char *healed_sinks, + struct afr_reply *locked_replies) { - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - int child_index = -1; - int call_count = -1; - int active_src = -1; - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - active_src = impunge_sh->active_source; - - child_index = (long) cookie; - - if ((op_ret == -1) && (!afr_inode_missing(op_errno))) { - gf_log (this->name, GF_LOG_INFO, - "readlink of %s on %s failed (%s)", - impunge_local->loc.path, - priv->children[active_src]->name, - strerror (op_errno)); - goto out; - } - - /* symlink doesn't exist on the sink */ - - if ((op_ret == -1) && (afr_inode_missing(op_errno))) { - afr_sh_entry_impunge_symlink (impunge_frame, this, - child_index, impunge_sh->linkname); - return 0; - } - - - /* symlink exists on the sink, so check if targets match */ - - if (strcmp (linkname, impunge_sh->linkname) == 0) { - /* targets match, nothing to do */ - - goto out; - } else { - /* - * Hah! Sneaky wolf in sheep's clothing! - */ - afr_sh_entry_impunge_symlink_unlink (impunge_frame, this, - child_index); - return 0; - } + int i = 0; + afr_private_t *priv = NULL; + int ret = 0; -out: - LOCK (&impunge_frame->lock); - { - call_count = --impunge_local->call_count; - } - UNLOCK (&impunge_frame->lock); + priv = this->private; - if (call_count == 0) - afr_sh_entry_call_impunge_done (impunge_frame, this, - op_ret, op_errno); + gf_log (this->name, GF_LOG_INFO, "performing entry selfheal on %s", + uuid_utoa (fd->inode->gfid)); - return 0; + for (i = 0; i < priv->child_count; i++) { + if (i != source && !healed_sinks[i]) + continue; + ret = afr_selfheal_entry_do_subvol (frame, this, fd, i, source, + sources, healed_sinks); + if (ret) + break; + } + return ret; } -int -afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this, - int child_index) +static int +__afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources, + unsigned char *sinks, + unsigned char *locked_on, + struct afr_reply *replies) { - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; + int i = 0; + afr_private_t *priv = NULL; + int source = -1; + int locked_count = 0; + int sources_count = 0; + int sinks_count = 0; - priv = this->private; - impunge_local = impunge_frame->local; + priv = this->private; - gf_log (this->name, GF_LOG_DEBUG, - "checking symlink target of %s on %s", - impunge_local->loc.path, priv->children[child_index]->name); + locked_count = AFR_COUNT (locked_on, priv->child_count); + sources_count = AFR_COUNT (sources, priv->child_count); + sinks_count = AFR_COUNT (sinks, priv->child_count); - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->readlink, - &impunge_local->loc, 4096, NULL); + if (locked_count == sinks_count || !sources_count) { + return -1; + } - return 0; -} - - -int -afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - const char *linkname, struct iatt *sbuf, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - int child_index = -1; - int call_count = -1; - int active_src = -1; - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - active_src = impunge_sh->active_source; - - child_index = (long) cookie; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_INFO, - "readlink of %s on %s failed (%s)", - impunge_local->loc.path, - priv->children[active_src]->name, - strerror (op_errno)); - goto out; - } - - impunge_sh->linkname = gf_strdup (linkname); - afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index); - - return 0; + for (i = 0; i < priv->child_count; i++) { + if (sources[i]) { + source = i; + break; + } + } -out: - LOCK (&impunge_frame->lock); - { - call_count = --impunge_local->call_count; - } - UNLOCK (&impunge_frame->lock); - - if (call_count == 0) - afr_sh_entry_call_impunge_done (impunge_frame, this, - op_ret, op_errno); - - return 0; + return source; } -int -afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this, - int child_index, struct iatt *stbuf) +static int +__afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd, + unsigned char *locked_on, unsigned char *sources, + unsigned char *sinks, unsigned char *healed_sinks, + struct afr_reply *replies, int *source_p) { - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - int active_src = -1; - - priv = this->private; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - active_src = impunge_sh->active_source; - impunge_local->cont.dir_fop.buf = *stbuf; - - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk, - (void *) (long) child_index, - priv->children[active_src], - priv->children[active_src]->fops->readlink, - &impunge_local->loc, 4096, NULL); - - return 0; -} + int ret = -1; + int source = -1; + afr_private_t *priv = NULL; + int i = 0; -int -afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this, - int child_index) -{ - call_frame_t *frame = NULL; - afr_local_t *impunge_local = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *impunge_sh = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - ia_type_t type = IA_INVAL; - int active_src = 0; - struct iatt *buf = NULL; - - AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh, - frame, local, sh); - active_src = impunge_sh->active_source; - afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf, - &impunge_sh->parentbuf); - - buf = &impunge_sh->entrybuf; - type = buf->ia_type; - - switch (type) { - case IA_IFSOCK: - case IA_IFREG: - case IA_IFBLK: - case IA_IFCHR: - case IA_IFIFO: - case IA_IFLNK: - afr_sh_entry_impunge_check_hardlink (impunge_frame, this, - child_index, buf); - break; - case IA_IFDIR: - afr_sh_entry_impunge_mkdir (impunge_frame, this, - child_index, buf); - break; - default: - gf_log (this->name, GF_LOG_ERROR, - "%s has unknown file type on %s: 0%o", - impunge_local->loc.path, - priv->children[active_src]->name, type); - sh->impunge_done (frame, this, -1, EINVAL); - break; - } - - return 0; -} + priv = this->private; -int -afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this, - int child_index) -{ - call_frame_t *frame = NULL; - afr_local_t *impunge_local = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *impunge_sh = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - ia_type_t type = IA_INVAL; - int active_src = 0; - struct iatt *buf = NULL; - - AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh, - frame, local, sh); - active_src = impunge_sh->active_source; - buf = &impunge_sh->entrybuf; - type = buf->ia_type; - - switch (type) { - case IA_IFSOCK: - case IA_IFREG: - case IA_IFBLK: - case IA_IFCHR: - case IA_IFIFO: - afr_sh_entry_impunge_mknod (impunge_frame, this, - child_index, buf); - break; - case IA_IFLNK: - afr_sh_entry_impunge_readlink (impunge_frame, this, - child_index, buf); - break; - default: - gf_log (this->name, GF_LOG_ERROR, - "%s has unknown file type on %s: 0%o", - impunge_local->loc.path, - priv->children[active_src]->name, type); - sh->impunge_done (frame, this, -1, EINVAL); - break; - } - - return 0; -} + ret = afr_selfheal_unlocked_discover (frame, fd->inode, fd->inode->gfid, + replies); + if (ret) + return ret; -gf_boolean_t -afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child, - unsigned int child_count) -{ - gf_boolean_t recreate = _gf_false; + ret = afr_selfheal_find_direction (frame, this, replies, + AFR_ENTRY_TRANSACTION, + locked_on, sources, sinks); + if (ret) + return ret; - GF_ASSERT (impunge_sh->child_errno); + source = __afr_selfheal_entry_finalize_source (this, sources, sinks, + locked_on, replies); + if (source < 0) { + /* If source is < 0 (typically split-brain), we perform a + conservative merge of entries rather than erroring out */ + } + *source_p = source; - if (child == impunge_sh->active_source) - goto out; + for (i = 0; i < priv->child_count; i++) + /* Initialize the healed_sinks[] array optimistically to + the intersection of to-be-healed (i.e sinks[]) and + the list of servers which are up (i.e locked_on[]). - if (IA_IFLNK == impunge_sh->entrybuf.ia_type) { - recreate = _gf_true; - goto out; - } + As we encounter failures in the healing process, we + will unmark the respective servers in the healed_sinks[] + array. + */ + healed_sinks[i] = sinks[i] && locked_on[i]; - if (impunge_sh->child_errno[child] == ENOENT) - recreate = _gf_true; -out: - return recreate; + return ret; } -unsigned int -afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources, - unsigned int child_count) -{ - int count = 0; - int i = 0; - - for (i = 0; i < child_count; i++) { - if (afr_sh_need_recreate (impunge_sh, i, child_count)) - count++; - } - - return count; -} -int -afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame, - xlator_t *this) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - call_frame_t *frame = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - unsigned int recreate_count = 0; - int i = 0; - int active_src = 0; - - priv = this->private; - AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh, - frame, local, sh); - active_src = impunge_sh->active_source; - impunge_sh->entrybuf = impunge_sh->buf[active_src]; - impunge_sh->parentbuf = impunge_sh->parentbufs[active_src]; - recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources, - priv->child_count); - if (!recreate_count) { - afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0); - goto out; - } - impunge_local->call_count = recreate_count; - for (i = 0; i < priv->child_count; i++) { - if (!impunge_local->child_up[i]) { - impunge_sh->child_errno[i] = ENOTCONN; - continue; - } - if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) { - impunge_sh->child_errno[i] = EEXIST; - continue; - } - } - for (i = 0; i < priv->child_count; i++) { - if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) - continue; - (void)afr_sh_entry_impunge_create (impunge_frame, this, i); - recreate_count--; - } - GF_ASSERT (!recreate_count); +static int +__afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, + unsigned char *locked_on) +{ + afr_private_t *priv = NULL; + int ret = -1; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *data_lock = NULL; + unsigned char *healed_sinks = NULL; + struct afr_reply *locked_replies = NULL; + int source = -1; + + priv = this->private; + + sources = alloca0 (priv->child_count); + sinks = alloca0 (priv->child_count); + healed_sinks = alloca0 (priv->child_count); + data_lock = alloca0 (priv->child_count); + + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + + ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL, + data_lock); + { + if (ret < 2) { + ret = -ENOTCONN; + goto unlock; + } + + ret = __afr_selfheal_entry_prepare (frame, this, fd, data_lock, + sources, sinks, healed_sinks, + locked_replies, &source); + } +unlock: + afr_selfheal_unentrylk (frame, this, fd->inode, this->name, NULL, + data_lock); + if (ret < 0) + goto out; + + ret = afr_selfheal_entry_do (frame, this, fd, source, sources, + healed_sinks, locked_replies); + if (ret) + goto out; + + ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks, + healed_sinks, AFR_ENTRY_TRANSACTION, + locked_replies, data_lock); out: - return 0; + return ret; } -void -afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_private_t *priv = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - call_frame_t *frame = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - unsigned int gfid_miss_count = 0; - unsigned int children_up_count = 0; - uuid_t gfid = {0}; - int active_src = 0; - - priv = this->private; - AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh, - frame, local, sh); - active_src = impunge_sh->active_source; - - if (op_ret < 0) - goto done; - if (impunge_sh->child_errno[active_src]) { - op_ret = -1; - op_errno = impunge_sh->child_errno[active_src]; - goto done; - } - - gfid_miss_count = afr_gfid_missing_count (this->name, - impunge_sh->success_children, - impunge_sh->buf, priv->child_count, - impunge_local->loc.path); - children_up_count = afr_up_children_count (impunge_local->child_up, - priv->child_count); - if ((gfid_miss_count == children_up_count) && - (children_up_count < priv->child_count)) { - op_ret = -1; - op_errno = ENODATA; - gf_log (this->name, GF_LOG_ERROR, "Not all children are up, " - "gfid should not be assigned in this state for %s", - impunge_local->loc.path); - goto done; - } - - if (gfid_miss_count) { - afr_update_gfid_from_iatts (gfid, impunge_sh->buf, - impunge_sh->success_children, - priv->child_count); - if (uuid_is_null (gfid)) { - sh->entries_skipped = _gf_true; - gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry " - "self-heal because of gfid absence", - impunge_local->loc.path); - goto done; - } - afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc, - afr_sh_entry_common_lookup_done, gfid, - AFR_LOOKUP_FAIL_CONFLICTS | - AFR_LOOKUP_FAIL_MISSING_GFIDS, - NULL); - } else { - afr_sh_entry_call_impunge_recreate (impunge_frame, this); - } - return; -done: - afr_sh_entry_call_impunge_done (impunge_frame, this, - op_ret, op_errno); - return; -} -int -afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, - gf_dirent_t *entry) +static fd_t * +afr_selfheal_data_opendir (xlator_t *this, inode_t *inode) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_self_heal_t *impunge_sh = NULL; - int ret = -1; - call_frame_t *impunge_frame = NULL; - afr_local_t *impunge_local = NULL; - int active_src = 0; - int op_errno = 0; - int op_ret = -1; - - local = frame->local; - sh = &local->self_heal; - - active_src = sh->active_source; - sh->impunge_done = afr_sh_entry_impunge_entry_done; - - if (can_skip_entry_self_heal (entry->d_name, &local->loc)) { - op_ret = 0; - goto out; - } - - gf_log (this->name, GF_LOG_TRACE, - "inspecting existence of %s under %s", - entry->d_name, local->loc.path); - - ret = afr_impunge_frame_create (frame, this, active_src, - &impunge_frame); - if (ret) { - op_errno = -ret; - goto out; - } - - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc, - entry->d_name); - loc_copy (&impunge_sh->parent_loc, &local->loc); - if (ret != 0) { - op_errno = ENOMEM; - goto out; - } - - afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc, - afr_sh_entry_common_lookup_done, NULL, - AFR_LOOKUP_FAIL_CONFLICTS, NULL); - - op_ret = 0; -out: - if (ret) { - if (impunge_frame) - AFR_STACK_DESTROY (impunge_frame); - sh->impunge_done (frame, this, op_ret, op_errno); - } + loc_t loc = {0,}; + int ret = 0; + fd_t *fd = NULL; - return 0; -} + fd = fd_create (inode, 0); + if (!fd) + return NULL; + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); -int -afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries, dict_t *xdata) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - gf_dirent_t *entry = NULL; - off_t last_offset = 0; - int active_src = 0; - int entry_count = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - active_src = sh->active_source; - - if (op_ret <= 0) { - if (op_ret < 0) { - gf_log (this->name, GF_LOG_INFO, - "readdir of %s on subvolume %s failed (%s)", - local->loc.path, - priv->children[active_src]->name, - strerror (op_errno)); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - } else { - gf_log (this->name, GF_LOG_TRACE, - "readdir of %s on subvolume %s complete", - local->loc.path, - priv->children[active_src]->name); - } - - afr_sh_entry_impunge_all (frame, this); - return 0; - } - - list_for_each_entry (entry, &entries->list, list) { - last_offset = entry->d_off; - entry_count++; - } - - gf_log (this->name, GF_LOG_DEBUG, - "readdir'ed %d entries from %s", - entry_count, priv->children[active_src]->name); - - sh->offset = last_offset; - local->call_count = entry_count; - - list_for_each_entry (entry, &entries->list, list) { - afr_sh_entry_impunge_entry (frame, this, entry); - } - - return 0; -} + ret = syncop_opendir (this, &loc, fd); + if (ret) { + fd_unref (fd); + fd = NULL; + } else { + fd_bind (fd); + } + loc_wipe (&loc); -int -afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int32_t active_src = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - active_src = sh->active_source; - gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd", - local->loc.path, sh->offset); - - STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk, - priv->children[active_src], - priv->children[active_src]->fops->readdirp, - sh->healing_fd, sh->block_size, sh->offset, NULL); - - return 0; + return fd; } -int -afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int active_src = -1; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - sh->offset = 0; - - active_src = next_active_source (frame, this, sh->active_source); - sh->active_source = active_src; - - if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { - afr_sh_entry_finish (frame, this); - return 0; - } - - if (active_src == -1) { - /* completed creating missing files on all subvolumes */ - afr_sh_entry_erase_pending (frame, this); - return 0; - } - - gf_log (this->name, GF_LOG_TRACE, - "impunging entries of %s on %s to other sinks", - local->loc.path, priv->children[active_src]->name); - - afr_sh_entry_impunge_subvol (frame, this); - - return 0; -} - int -afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int child_index = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - child_index = (long) cookie; - - /* TODO: some of the open's might fail. - In that case, modify cleanup fn to send flush on those - fd's which are already open */ - - LOCK (&frame->lock); - { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "opendir of %s failed on child %s (%s)", - local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - } - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { - afr_sh_entry_finish (frame, this); - return 0; - } - gf_log (this->name, GF_LOG_TRACE, - "fd for %s opened, commencing sync", - local->loc.path); - - sh->active_source = -1; - afr_sh_entry_expunge_all (frame, this); - } - - return 0; -} - - -int -afr_sh_entry_open (call_frame_t *frame, xlator_t *this) -{ - int i = 0; - int call_count = 0; - - int source = -1; - int *sources = NULL; + afr_private_t *priv = NULL; + unsigned char *locked_on = NULL; + fd_t *fd = NULL; + int ret = 0; - fd_t *fd = NULL; + priv = this->private; - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - afr_self_heal_t *sh = NULL; + fd = afr_selfheal_data_opendir (this, inode); + if (!fd) + return -EIO; - local = frame->local; - sh = &local->self_heal; - priv = this->private; + locked_on = alloca0 (priv->child_count); - source = local->self_heal.source; - sources = local->self_heal.sources; + ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL, + locked_on); + { + if (ret < 2) { + /* Either less than two subvols available, or another + selfheal (from another server) is in progress. Skip + for now in any case there isn't anything to do. + */ + ret = -ENOTCONN; + goto unlock; + } - sh->block_size = priv->sh_readdir_size; - sh->offset = 0; + ret = __afr_selfheal_entry (frame, this, fd, locked_on); + } +unlock: + afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, locked_on); - call_count = sh->active_sinks; - if (source != -1) - call_count++; + if (fd) + fd_unref (fd); - local->call_count = call_count; - - fd = fd_create (local->loc.inode, frame->root->pid); - sh->healing_fd = fd; - - if (source != -1) { - gf_log (this->name, GF_LOG_TRACE, - "opening directory %s on subvolume %s (source)", - local->loc.path, priv->children[source]->name); - - /* open source */ - STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk, - (void *) (long) source, - priv->children[source], - priv->children[source]->fops->opendir, - &local->loc, fd, NULL); - call_count--; - } - - /* open sinks */ - for (i = 0; i < priv->child_count; i++) { - if (sources[i] || !local->child_up[i]) - continue; - - gf_log (this->name, GF_LOG_TRACE, - "opening directory %s on subvolume %s (sink)", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->opendir, - &local->loc, fd, NULL); - - if (!--call_count) - break; - } - - return 0; -} - - -int -afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int source = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - source = sh->source; - - afr_sh_mark_source_sinks (frame, this); - if (source != -1) - sh->success[source] = 1; - - if (sh->active_sinks == 0) { - gf_log (this->name, GF_LOG_TRACE, - "no active sinks for self-heal on dir %s", - local->loc.path); - afr_sh_entry_finish (frame, this); - return 0; - } - if (source == -1 && sh->active_sinks < 2) { - gf_log (this->name, GF_LOG_TRACE, - "cannot sync with 0 sources and 1 sink on dir %s", - local->loc.path); - afr_sh_entry_finish (frame, this); - return 0; - } - - if (source != -1) - gf_log (this->name, GF_LOG_DEBUG, - "self-healing directory %s from subvolume %s to " - "%d other", - local->loc.path, priv->children[source]->name, - sh->active_sinks); - else - gf_log (this->name, GF_LOG_DEBUG, - "no active sources for %s found. " - "merging all entries as a conservative decision", - local->loc.path); - - sh->actual_sh_started = _gf_true; - afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN); - afr_sh_entry_open (frame, this); - - return 0; -} - - -void -afr_sh_entry_fix (call_frame_t *frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int source = 0; - int nsources = 0; - int32_t subvol_status = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - if (op_ret < 0) { - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_set_error (sh, op_errno); - afr_sh_entry_finish (frame, this); - goto out; - } - - if (sh->forced_merge) { - sh->source = -1; - goto heal; - } - - nsources = afr_build_sources (this, sh->xattr, sh->buf, - sh->pending_matrix, sh->sources, - sh->success_children, - AFR_ENTRY_TRANSACTION, &subvol_status, - _gf_true); - if ((subvol_status & ALL_FOOLS) || - (subvol_status & SPLIT_BRAIN)) { - gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative " - "merge", local->loc.path); - source = -1; - memset (sh->sources, 0, - sizeof (*sh->sources) * priv->child_count); - } else if (nsources == 0) { - gf_log (this->name, GF_LOG_TRACE, - "No self-heal needed for %s", - local->loc.path); - - afr_sh_entry_finish (frame, this); - return; - } else { - source = afr_sh_select_source (sh->sources, priv->child_count); - } - - sh->source = source; - - afr_reset_children (sh->fresh_children, priv->child_count); - afr_get_fresh_children (sh->success_children, sh->sources, - sh->fresh_children, priv->child_count); - if (sh->source >= 0) - afr_inode_set_read_ctx (this, sh->inode, sh->source, - sh->fresh_children); - -heal: - afr_sh_entry_sync_prepare (frame, this); -out: - return; -} - -int -afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - sh = &local->self_heal; - - if (int_lock->lock_op_ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks " - "failed for %s.", local->loc.path); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_entry_done (frame, this); - } else { - - gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done " - "for %s. Proceeding to FOP", local->loc.path); - afr_sh_common_lookup (frame, this, &local->loc, - afr_sh_entry_fix, NULL, - AFR_LOOKUP_FAIL_CONFLICTS | - AFR_LOOKUP_FAIL_MISSING_GFIDS, - NULL); - } - - return 0; -} - -int -afr_self_heal_entry (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_self_heal_t *sh = NULL; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY; - - if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) { - afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); - afr_sh_entrylk (frame, this, &local->loc, NULL, - afr_sh_post_nonblocking_entry_cbk); - } else { - gf_log (this->name, GF_LOG_TRACE, - "proceeding to completion on %s", - local->loc.path); - afr_sh_entry_done (frame, this); - } - - return 0; + return ret; } |