summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorPranith K <pranithk@gluster.com>2011-07-14 06:30:40 +0000
committerAnand Avati <avati@gluster.com>2011-07-17 07:45:23 -0700
commit84c3d7a83a8c84ca11514202a1bc365026fd1c87 (patch)
tree7d88342b25db32e343c3e700d0c24086122d1f4e /xlators/cluster/afr
parent10e50f9df6de9efccd66dc8b37c73c52569e3559 (diff)
cluster/afr: Detect conflict/gfid self-heals
Added some helper functions that can be reused Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Anand Avati <avati@gluster.com> BUG: 2745 (failure to detect split brain) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2745
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-common.c621
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c3
-rw-r--r--xlators/cluster/afr/src/afr-open.c8
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c7
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2
-rw-r--r--xlators/cluster/afr/src/afr.h52
6 files changed, 532 insertions, 161 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 21f7b4e4356..894442936b5 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -60,6 +60,60 @@
#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ dst[i] = src[i];
+}
+
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ 3 * sizeof(int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ path, priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+}
+
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno)
+{
+ int i = 0;
+ int errno_count = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ if (child_errno[child] == op_errno)
+ errno_count++;
+ }
+ return errno_count;
+}
+
int32_t
afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
{
@@ -346,7 +400,7 @@ afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
}
gf_boolean_t
-afr_is_success_child (int32_t *success_children, int32_t child_count,
+afr_is_child_present (int32_t *success_children, int32_t child_count,
int32_t child)
{
gf_boolean_t success_child = _gf_false;
@@ -375,7 +429,7 @@ afr_is_read_child (int32_t *success_children, int32_t *sources,
GF_ASSERT (success_children);
GF_ASSERT (child_count > 0);
- success_child = afr_is_success_child (success_children, child_count,
+ success_child = afr_is_child_present (success_children, child_count,
child);
if (!success_child)
goto out;
@@ -527,6 +581,23 @@ out:
}
void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+{
+ unsigned int i = 0;
+
+ if (!xattr)
+ goto out;
+ for (i = 0; i < child_count; i++) {
+ if (xattr[i]) {
+ dict_unref (xattr[i]);
+ xattr[i] = NULL;
+ }
+ }
+out:
+ return;
+}
+
+void
afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
{
afr_self_heal_t *sh = NULL;
@@ -540,13 +611,14 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
if (sh->buf)
GF_FREE (sh->buf);
+ if (sh->parentbufs)
+ GF_FREE (sh->parentbufs);
+
+ if (sh->inode)
+ inode_unref (sh->inode);
+
if (sh->xattr) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
+ afr_reset_xattr (sh->xattr, priv->child_count);
GF_FREE (sh->xattr);
}
@@ -590,6 +662,9 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
if (sh->fresh_children)
GF_FREE (sh->fresh_children);
+ if (sh->fresh_parent_dirs)
+ GF_FREE (sh->fresh_parent_dirs);
+
loc_wipe (&sh->parent_loc);
}
@@ -636,7 +711,6 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
void
afr_local_cleanup (afr_local_t *local, xlator_t *this)
{
- int i = 0;
afr_private_t * priv = NULL;
if (!local)
@@ -665,12 +739,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
{ /* lookup */
if (local->cont.lookup.xattrs) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lookup.xattrs[i]) {
- dict_unref (local->cont.lookup.xattrs[i]);
- local->cont.lookup.xattrs[i] = NULL;
- }
- }
+ afr_reset_xattr (local->cont.lookup.xattrs,
+ priv->child_count);
GF_FREE (local->cont.lookup.xattrs);
local->cont.lookup.xattrs = NULL;
}
@@ -813,26 +883,6 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
uuid_copy (loc->pargfid, postparent->ia_gfid);
}
-
-int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->govinda_gOvinda && local->cont.lookup.inode) {
- afr_set_split_brain (this, local->cont.lookup.inode, _gf_true);
- }
-
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode, &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
-
- return 0;
-}
-
void
afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
{
@@ -842,13 +892,15 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
dict_t **xattr = NULL;
GF_ASSERT (local);
- GF_ASSERT (local->cont.lookup.read_child >= 0);
buf = &local->cont.lookup.buf;
postparent = &local->cont.lookup.postparent;
xattr = &local->cont.lookup.xattr;
- read_child = local->cont.lookup.read_child;
+ read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
+ NULL);
+ gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
+ read_child);
*xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
*buf = local->cont.lookup.bufs[read_child];
*postparent = local->cont.lookup.postparents[read_child];
@@ -859,8 +911,7 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
}
}
-
- static void
+static void
afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
int child_index, dict_t *xattr)
{
@@ -885,8 +936,8 @@ afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
}
static void
-afr_lookup_detect_self_heal_by_xattr (afr_local_t *local, xlator_t *this,
- dict_t *xattr)
+afr_lookup_set_self_heal_data_by_xattr (afr_local_t *local, xlator_t *this,
+ dict_t *xattr)
{
GF_ASSERT (local);
GF_ASSERT (this);
@@ -954,6 +1005,8 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
local->self_heal.need_metadata_self_heal = _gf_true;
local->self_heal.need_data_self_heal = _gf_true;
local->self_heal.need_entry_self_heal = _gf_true;
+ local->self_heal.need_gfid_self_heal = _gf_true;
+ local->self_heal.need_missing_entry_self_heal = _gf_true;
gf_log(this->name, GF_LOG_INFO,
"entries are missing in lookup of %s.",
local->loc.path);
@@ -961,14 +1014,15 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
goto out;
}
- if (local->success_count > 0) {
- if (afr_is_split_brain (this, local->cont.lookup.inode) &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
+ if ((local->success_count > 0) &&
+ afr_is_split_brain (this, local->cont.lookup.inode) &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ local->self_heal.need_data_self_heal = _gf_true;
+ local->self_heal.need_gfid_self_heal = _gf_true;
+ local->self_heal.need_missing_entry_self_heal = _gf_true;
+ gf_log (this->name, GF_LOG_WARNING,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
}
out:
@@ -981,31 +1035,39 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
GF_ASSERT (sh);
GF_ASSERT (priv);
- return ((priv->data_self_heal && sh->need_data_self_heal)
+ return (sh->need_gfid_self_heal
+ || sh->need_missing_entry_self_heal
+ || (priv->data_self_heal && sh->need_data_self_heal)
|| (priv->metadata_self_heal && sh->need_metadata_self_heal)
|| (priv->entry_self_heal && sh->need_entry_self_heal));
}
-gf_boolean_t
-afr_is_self_heal_enabled (afr_private_t *priv)
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type)
{
- GF_ASSERT (priv);
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+
+ GF_ASSERT (ia_type != IA_INVAL);
- return (priv->data_self_heal || priv->metadata_self_heal
- || priv->entry_self_heal);
+ if (IA_ISDIR (ia_type)) {
+ type = AFR_ENTRY_TRANSACTION;
+ } else if (IA_ISREG (ia_type)) {
+ type = AFR_DATA_TRANSACTION;
+ }
+ return type;
}
int
afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
int32_t *read_child)
{
+ ia_type_t ia_type = IA_INVAL;
int32_t source = -1;
- ia_type_t ia_type = 0;
int ret = -1;
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
dict_t **xattrs = NULL;
int32_t *success_children = NULL;
struct iatt *bufs = NULL;
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
GF_ASSERT (local);
GF_ASSERT (this);
@@ -1013,18 +1075,23 @@ afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
bufs = local->cont.lookup.bufs;
success_children = local->cont.lookup.success_children;
+ /*We can take the success_children[0] only because we already
+ *handle the conflicting children other wise, we could select the
+ *read_child based on wrong file type
+ */
ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
- if (IA_ISDIR (ia_type)) {
- type = AFR_ENTRY_TRANSACTION;
- } else if (IA_ISREG (ia_type)) {
- type = AFR_DATA_TRANSACTION;
- }
+ type = afr_transaction_type_get (ia_type);
xattrs = local->cont.lookup.xattrs;
source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
type);
- if (source < 0)
+ if (source < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
+ "for %s", local->loc.path);
goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
+ source, local->loc.path);
*read_child = source;
ret = 0;
out:
@@ -1041,7 +1108,10 @@ afr_is_self_heal_running (afr_local_t *local)
static void
afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
gf_boolean_t is_background, ia_type_t ia_type,
- int (*unwind) (call_frame_t *frame, xlator_t *this))
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno))
{
afr_local_t *local = NULL;
char sh_type_str[256] = {0,};
@@ -1054,6 +1124,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
local->self_heal.background = is_background;
local->self_heal.type = ia_type;
local->self_heal.unwind = unwind;
+ local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
afr_self_heal_type_str_get (&local->self_heal,
sh_type_str,
@@ -1066,8 +1137,135 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
afr_self_heal (frame, this, inode);
}
+int
+afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path)
+{
+ int gfid_miss_count = 0;
+ int i = 0;
+ struct iatt *child1 = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if (uuid_is_null (child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
+ " on subvolume %d", path, success_children[i]);
+ gfid_miss_count++;
+ }
+ }
+
+ return gfid_miss_count;
+}
+
+static int
+afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
+{
+ int32_t *success_children = NULL;
+ afr_private_t *priv = NULL;
+ struct iatt *bufs = NULL;
+ int miss_count = 0;
+
+ priv = this->private;
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
+
+ miss_count = afr_gfid_missing_count (this->name, success_children,
+ bufs, priv->child_count,
+ local->loc.path);
+ return miss_count;
+}
+
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name)
+{
+ gf_boolean_t conflicting = _gf_false;
+ int i = 0;
+ struct iatt *child1 = NULL;
+ struct iatt *child2 = NULL;
+ uuid_t *gfid = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
+ gfid = &child1->ia_gfid;
+
+ if (i == 0)
+ continue;
+
+ child2 = &bufs[success_children[i-1]];
+ if (FILETYPE_DIFFERS (child1, child2)) {
+ gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
+ "differs on subvolumes (%d, %d)", path,
+ success_children[i-1], success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ if (!gfid || uuid_is_null (child1->ia_gfid))
+ continue;
+ if (uuid_compare (*gfid, child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
+ " on subvolume %d", path, success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ }
+out:
+ return conflicting;
+}
+
+/* afr_update_gfid_from_iatts: This function should be called only if the
+ * iatts are not conflicting.
+ */
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children, unsigned int child_count)
+{
+ uuid_t *gfid = NULL;
+ int i = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
+ gfid = &bufs[child].ia_gfid;
+ } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
+ if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
+ GF_ASSERT (0);
+ goto out;
+ }
+ }
+ }
+ if (gfid && (!uuid_is_null (*gfid)))
+ uuid_copy (uuid, *gfid);
+out:
+ return;
+}
+
+static gf_boolean_t
+afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t conflict = _gf_false;
+
+ priv = this->private;
+ conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count, local->loc.path,
+ this->name);
+ return conflict;
+}
+
static void
-afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)
+afr_lookup_set_self_heal_data (afr_local_t *local, xlator_t *this)
{
int i = 0;
struct iatt *bufs = NULL;
@@ -1076,8 +1274,20 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)
int32_t child1 = -1;
int32_t child2 = -1;
+ priv = this->private;
afr_detect_self_heal_by_lookup_status (local, this);
+ if (afr_lookup_gfid_missing_count (local, this))
+ local->self_heal.need_gfid_self_heal = _gf_true;
+
+ if (_gf_true == afr_lookup_conflicting_entries (local, this))
+ local->self_heal.need_missing_entry_self_heal = _gf_true;
+ else
+ afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
+ local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count);
+
bufs = local->cont.lookup.bufs;
for (i = 1; i < local->success_count; i++) {
child1 = local->cont.lookup.success_children[i-1];
@@ -1087,12 +1297,75 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)
}
xattr = local->cont.lookup.xattrs;
- priv = this->private;
for (i = 0; i < local->success_count; i++) {
child1 = local->cont.lookup.success_children[i];
- afr_lookup_detect_self_heal_by_xattr (local, this,
- xattr[child1]);
+ afr_lookup_set_self_heal_data_by_xattr (local, this,
+ xattr[child1]);
+ }
+}
+
+int
+afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ if (afr_error_more_important (local->op_errno, op_errno))
+ local->op_errno = op_errno;
+
+ goto out;
+ } else {
+ local->op_ret = 0;
+ }
+
+ afr_lookup_done_success_action (frame, this, _gf_true);
+out:
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->cont.lookup.inode, &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+
+ return 0;
+}
+
+//TODO: At the moment only lookup needs this, so not doing any checks, in the
+// future we will have to do fop specific operations
+void
+afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ struct iatt *lookup_bufs = NULL;
+ struct iatt *lookup_parentbufs = NULL;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+ local = sh->orig_frame->local;
+ lookup_bufs = local->cont.lookup.bufs;
+ lookup_parentbufs = local->cont.lookup.postparents;
+ priv = this->private;
+
+ memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
+ memcpy (lookup_parentbufs, sh->parentbufs,
+ priv->child_count * sizeof (*sh->parentbufs));
+
+ afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i])
+ local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
}
+
+ afr_reset_children (local->cont.lookup.success_children,
+ priv->child_count);
+ afr_children_copy (local->cont.lookup.success_children,
+ sh->fresh_children, priv->child_count);
}
static void
@@ -1115,20 +1388,14 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,
goto out;
}
- if (_gf_false == afr_is_self_heal_enabled (priv)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Self heal is not enabled");
- goto out;
- }
-
- afr_lookup_detect_self_heal (local, this);
+ afr_lookup_set_self_heal_data (local, this);
if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_self_heal_running (local)) {
+ if (afr_is_self_heal_running (local))
goto out;
- }
afr_launch_self_heal (frame, this, local->cont.lookup.inode,
_gf_true, local->cont.lookup.buf.ia_type,
+ afr_post_gfid_sh_success,
afr_self_heal_lookup_unwind);
*sh_launched = _gf_true;
}
@@ -1136,46 +1403,6 @@ out:
return;
}
-static gf_boolean_t
-afr_lookup_split_brain (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- gf_boolean_t symptom = _gf_false;
- struct iatt *bufs = NULL;
- int32_t *success_children = NULL;
- struct iatt *child1 = NULL;
- struct iatt *child2 = NULL;
- const char *path = NULL;
-
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
- for (i = 1; i < local->success_count; i++) {
- child1 = &bufs[success_children[i-1]];
- child2 = &bufs[success_children[i]];
- /*
- * TODO: gfid self-heal
- * if (uuid_compare (child1->ia_gfid, child2->ia_gfid)) {
- * gf_log (this->name, GF_LOG_WARNING, "%s: gfid differs"
- * " on subvolumes (%d, %d)", local->loc.path,
- * success_children[i-1], success_children[i]);
- * symptom = _gf_true;
- * }
- */
-
- if (FILETYPE_DIFFERS (child1, child2)) {
- path = local->loc.path;
- gf_log (this->name, GF_LOG_WARNING, "%s: filetype "
- "differs on subvolumes (%d, %d)", path,
- success_children[i-1], success_children[i]);
- symptom = _gf_true;
- local->govinda_gOvinda = 1;
- }
- if (symptom)
- break;
- }
- return symptom;
-}
-
void
afr_get_fresh_children (int32_t *success_children, int32_t *sources,
int32_t *fresh_children, unsigned int child_count)
@@ -1187,6 +1414,7 @@ afr_get_fresh_children (int32_t *success_children, int32_t *sources,
GF_ASSERT (sources);
GF_ASSERT (fresh_children);
+ afr_reset_children (fresh_children, child_count);
for (i = 0; i < child_count; i++) {
if (success_children[i] == -1)
break;
@@ -1206,7 +1434,6 @@ afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
GF_ASSERT (read_child >= 0);
priv = this->private;
- local->cont.lookup.read_child = read_child;
afr_get_fresh_children (local->cont.lookup.success_children,
local->cont.lookup.sources,
local->fresh_children, priv->child_count);
@@ -1216,6 +1443,60 @@ afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
return 0;
}
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict)
+{
+ int32_t read_child = -1;
+ int32_t ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->loc.parent == NULL)
+ fail_conflict = _gf_true;
+
+ if (afr_conflicting_iattrs (local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count, local->loc.path,
+ this->name)) {
+ if (fail_conflict == _gf_false) {
+ ret = 0;
+ } else {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ }
+ goto out;
+ }
+
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ afr_lookup_build_response_params (local, this);
+ if (afr_is_fresh_lookup (&local->loc, this)) {
+ afr_update_loc_gfids (&local->loc,
+ &local->cont.lookup.buf,
+ &local->cont.lookup.postparent);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
static void
afr_lookup_done (call_frame_t *frame, xlator_t *this)
{
@@ -1224,44 +1505,44 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
int ret = -1;
gf_boolean_t sh_launched = _gf_false;
- int32_t read_child = -1;
+ int gfid_miss_count = 0;
+ int enotconn_count = 0;
+ int up_children_count = 0;
priv = this->private;
local = frame->local;
if (local->op_ret < 0)
goto unwind;
-
- if (_gf_true == afr_lookup_split_brain (local, this)) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unwind;
- }
-
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (ret) {
+ gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
+ up_children_count = afr_up_children_count (priv->child_count,
+ local->child_up);
+ enotconn_count = priv->child_count - up_children_count;
+ if ((gfid_miss_count == local->success_count) &&
+ (enotconn_count > 0)) {
local->op_ret = -1;
local->op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
+ "LOOKUP on a file without gfid is not allowed when "
+ "some of the children are down", local->loc.path);
goto unwind;
}
- ret = afr_lookup_set_read_ctx (local, this, read_child);
+ ret = afr_lookup_done_success_action (frame, this, _gf_false);
if (ret)
goto unwind;
-
- afr_lookup_build_response_params (local, this);
- if (afr_is_fresh_lookup (&local->loc, this)) {
- afr_update_loc_gfids (&local->loc, &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
- }
+ uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
afr_lookup_perform_self_heal_if_needed (frame, this, &sh_launched);
- if (sh_launched)
+ if (sh_launched) {
unwind = 0;
+ goto unwind;
+ }
+
unwind:
if (unwind) {
AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->cont.lookup.inode,
+ local->op_errno, local->cont.lookup.inode,
&local->cont.lookup.buf,
local->cont.lookup.xattr,
&local->cont.lookup.postparent);
@@ -1277,8 +1558,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
*
*/
-static gf_boolean_t
-__error_more_important (int32_t old_errno, int32_t new_errno)
+gf_boolean_t
+afr_error_more_important (int32_t old_errno, int32_t new_errno)
{
gf_boolean_t ret = _gf_true;
@@ -1293,6 +1574,28 @@ __error_more_important (int32_t old_errno, int32_t new_errno)
return ret;
}
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count)
+{
+ int i = 0;
+ int32_t op_errno = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ if (afr_error_more_important (op_errno, child_errno[child]))
+ op_errno = child_errno[child];
+ }
+ return op_errno;
+}
+
static void
afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
{
@@ -1300,7 +1603,7 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
if (op_errno == ENOENT)
local->enoent_count++;
- if (__error_more_important (local->op_errno, op_errno))
+ if (afr_error_more_important (local->op_errno, op_errno))
local->op_errno = op_errno;
if (local->op_errno == ESTALE) {
@@ -1439,7 +1742,6 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
if (NULL == local->fresh_children)
goto out;
- local->cont.lookup.read_child = -1;
ret = 0;
out:
return ret;
@@ -1451,6 +1753,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ void *gfid_req = NULL;
int ret = -1;
int i = 0;
int call_count = 0;
@@ -1521,23 +1824,13 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
else
local->xattr_req = dict_ref (xattr_req);
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
-
+ afr_xattr_req_prepare (this, local->xattr_req, loc->path);
ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"%s: Unable to set dict value for %s",
loc->path, GLUSTERFS_INODELK_COUNT);
}
-
ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
@@ -1545,6 +1838,16 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
loc->path, GLUSTERFS_ENTRYLK_COUNT);
}
+ ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get the gfid from dict");
+ } else {
+ uuid_copy (local->cont.lookup.gfid_req, gfid_req);
+ }
+ if (local->loc.parent != NULL)
+ dict_del (xattr_req, "gfid-req");
+
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_lookup_cbk,
@@ -3252,3 +3555,17 @@ afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
fresh_children[i] = child;
}
}
+
+int
+afr_get_children_count (int32_t *fresh_children, unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (fresh_children[i] == -1)
+ break;
+ count++;
+ }
+ return count;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index ce941f0189e..64b14f88f23 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -50,7 +50,8 @@
#include "afr-self-heal-common.h"
int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
+afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno)
{
afr_local_t *local = NULL;
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 4aa587399b5..ab636a5b4b0 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -277,7 +277,8 @@ __unopened_count (int child_count, unsigned int *opened_on, unsigned char *child
int
-afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this)
+afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -418,9 +419,8 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this)
GF_ASSERT (local->loc.path);
/* forcibly trigger missing-entries self-heal */
- local->success_count = 1;
- local->enoent_count = 1;
-
+ sh->need_missing_entry_self_heal = _gf_true;
+ sh->need_gfid_self_heal = _gf_true;
sh->data_lock_held = _gf_true;
sh->need_data_self_heal = _gf_true;
sh->type = local->fd->inode->ia_type;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 16345bee738..b28f9114fc0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1663,7 +1663,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
FRAME_SU_UNDO (bgsh_frame, afr_local_t);
if (!sh->unwound) {
- sh->unwind (sh->orig_frame, this);
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
}
if (sh->background) {
@@ -1723,7 +1723,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
sh_local = afr_local_copy (local, this);
sh_frame->local = sh_local;
sh = &sh_local->self_heal;
- sh->inode = inode;
+
+ sh->inode = inode_ref (inode);
sh->orig_frame = frame;
@@ -1731,6 +1732,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
sh->buf = GF_CALLOC (priv->child_count, sizeof (struct iatt),
gf_afr_mt_iatt);
+ sh->parentbufs = GF_CALLOC (priv->child_count, sizeof (struct iatt),
+ gf_afr_mt_iatt);
sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int),
gf_afr_mt_int);
sh->success = GF_CALLOC (priv->child_count, sizeof (int),
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index f9a25797275..7179e929d65 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -699,7 +699,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]);
if (sh->background) {
- sh->unwind (sh->orig_frame, this);
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
sh->unwound = _gf_true;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 994ea40a47b..7d5d0547993 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -44,6 +44,9 @@ typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
int child, int32_t op_error,
int32_t op_errno);
+typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
+
+typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
typedef struct afr_inode_params_ {
uint64_t mask_type;
@@ -128,6 +131,8 @@ typedef struct {
gf_boolean_t need_data_self_heal;
gf_boolean_t need_metadata_self_heal;
gf_boolean_t need_entry_self_heal;
+ gf_boolean_t need_gfid_self_heal;
+ gf_boolean_t need_missing_entry_self_heal;
gf_boolean_t forced_merge; /* Is this a self-heal triggered to
forcibly merge the directories? */
@@ -147,17 +152,21 @@ typedef struct {
self-heal on */
inode_t *inode; /* inode on which the self-heal is
performed on */
+ uuid_t sh_gfid_req; /* gfid self-heal needs to be done
+ with this gfid if it is not null */
/* Function to call to unwind. If self-heal is being done in the
background, this function will be called as soon as possible. */
- int (*unwind) (call_frame_t *frame, xlator_t *this);
+ int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno);
/* End of external interface members */
/* array of stat's, one for each child */
struct iatt *buf;
+ struct iatt *parentbufs;
struct iatt parentbuf;
struct iatt entrybuf;
@@ -174,12 +183,17 @@ typedef struct {
int success_count;
/* array containing the fresh children found in the self-heal process */
int32_t *fresh_children;
+ /* array containing the fresh children found in the parent lookup */
+ int32_t *fresh_parent_dirs;
/* array of errno's, one for each child */
int *child_errno;
int32_t **pending_matrix;
int32_t **delta_matrix;
+ int32_t op_ret;
+ int32_t op_errno;
+
int *sources;
int source;
int active_source;
@@ -197,6 +211,7 @@ typedef struct {
blksize_t block_size;
off_t file_size;
off_t offset;
+ afr_post_remove_call_t post_remove_call;
loc_t parent_loc;
@@ -211,6 +226,7 @@ typedef struct {
int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
call_frame_t *sh_frame;
} afr_self_heal_t;
@@ -376,6 +392,7 @@ typedef struct _afr_local {
} statfs;
struct {
+ uuid_t gfid_req;
inode_t *inode;
struct iatt buf;
struct iatt postparent;
@@ -891,4 +908,37 @@ afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
int32_t child_count);
void
afr_reset_children (int32_t *fresh_children, int32_t child_count);
+gf_boolean_t
+afr_error_more_important (int32_t old_errno, int32_t new_errno);
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno);
+int
+afr_get_children_count (int32_t *fresh_children, unsigned int child_count);
+gf_boolean_t
+afr_is_child_present (int32_t *success_children, int32_t child_count,
+ int32_t child);
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children,
+ unsigned int child_count);
+void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count);
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name);
+int
+afr_gfid_missing_count (const char *xlator_name, int32_t *children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path);
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count);
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type);
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count);
#endif /* __AFR_H__ */