From dd80d06145a5911e697b724a44fd4d858e3a9134 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Sat, 13 Sep 2014 12:08:56 +0530 Subject: cluster/afr: Fix spurious metadata self-heals Backport of http://review.gluster.org/8709 - Added logging for metadata and data self-heals which helped in debugging this issue. - Added checks to skip self-heals when no sinks are available to heal BUG: 1145987 Change-Id: Ide03af4f531a1280ec8ad95b627285df4d7bc42d Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/8832 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 7 +++-- xlators/cluster/afr/src/afr-read-txn.c | 3 ++ xlators/cluster/afr/src/afr-self-heal-common.c | 32 ++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-data.c | 38 +++++++++++++----------- xlators/cluster/afr/src/afr-self-heal-metadata.c | 29 ++++++++++++------ xlators/cluster/afr/src/afr-self-heal.h | 4 +++ xlators/cluster/afr/src/afr.h | 2 ++ 7 files changed, 86 insertions(+), 29 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index d1bce6284a8..3e745e2491e 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1198,8 +1198,8 @@ xattr_is_equal (dict_t *this, char *key1, data_t *value1, void *data) * We need to do both because ignoring glusterfs' internal xattrs * happens only in xattr_is_equal(). */ -static gf_boolean_t -dicts_are_equal (dict_t *dict1, dict_t *dict2) +gf_boolean_t +afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2) { int ret = 0; @@ -1596,7 +1596,8 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this) } /*Check if xattrs need heal*/ - if (!dicts_are_equal (replies[first].xdata, replies[i].xdata)) + if (!afr_xattrs_are_equal (replies[first].xdata, + replies[i].xdata)) start = _gf_true; } diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 2585c443e1c..29a926dbd97 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -202,6 +202,9 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, /* very first transaction on this inode */ goto refresh; + gf_log (this->name, GF_LOG_DEBUG, "%s: generation now vs cached: %d, " + "%d", uuid_utoa (inode->gfid), local->event_generation, + event_generation); if (local->event_generation != event_generation) /* servers have disconnected / reconnected, and possibly rebooted, very likely changing the state of freshness diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b104e6b7869..b855f98ddf2 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -384,6 +384,38 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies, return 0; } +void +afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, + int source, unsigned char *healed_sinks) +{ + char *status = NULL; + char *sinks_str = NULL; + char *p = NULL; + afr_private_t *priv = NULL; + gf_loglevel_t loglevel = GF_LOG_NONE; + int i = 0; + + priv = this->private; + sinks_str = alloca0 (priv->child_count * 8); + p = sinks_str; + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i]) + continue; + p += sprintf (p, "%d ", i); + } + + if (ret < 0) { + status = "Failed"; + loglevel = GF_LOG_DEBUG; + } else { + status = "Completed"; + loglevel = GF_LOG_INFO; + } + + gf_log (this->name, loglevel, "%s %s selfheal on %s. " + "source=%d sinks=%s", status, type, uuid_utoa (gfid), + source, sinks_str); +} int afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 25e310fd2a3..74088f4bf6d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -300,29 +300,14 @@ afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd, struct afr_reply *replies) { afr_private_t *priv = NULL; - int i = 0; off_t off = 0; size_t block = 128 * 1024; int type = AFR_SELFHEAL_DATA_FULL; int ret = -1; call_frame_t *iter_frame = NULL; - char *sinks_str = NULL; - char *p = NULL; priv = this->private; - sinks_str = alloca0 (priv->child_count * 8); - p = sinks_str; - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i]) - continue; - p += sprintf (p, "%d ", i); - } - - gf_log (this->name, GF_LOG_INFO, "performing data selfheal on %s. " - "source=%d sinks=%s", - uuid_utoa (fd->inode->gfid), source, sinks_str); - type = afr_data_self_heal_type_get (priv, healed_sinks, source, replies); @@ -331,6 +316,11 @@ afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd, return -ENOMEM; for (off = 0; off < replies[source].poststat.ia_size; off += block) { + if (AFR_COUNT (healed_sinks, priv->child_count) == 0) { + ret = -ENOTCONN; + goto out; + } + ret = afr_selfheal_data_block (iter_frame, this, fd, source, healed_sinks, off, block, type, replies); @@ -511,6 +501,10 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, data_lock); { if (ret < AFR_SH_MIN_PARTICIPANTS) { + gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping " + "self-heal as only %d number of subvolumes " + "could be locked", uuid_utoa (fd->inode->gfid), + ret); ret = -ENOTCONN; goto unlock; } @@ -559,6 +553,9 @@ out: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, LLONG_MAX - 2, 1, compat_lock); + afr_log_selfheal (fd->inode->gfid, this, ret, "data", source, + healed_sinks); + if (locked_replies) afr_replies_wipe (locked_replies, priv->child_count); @@ -605,8 +602,11 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) priv = this->private; fd = afr_selfheal_data_open (this, inode); - if (!fd) - return -EIO; + if (!fd) { + gf_log (this->name, GF_LOG_DEBUG, "%s: Failed to open", + uuid_utoa (inode->gfid)); + return -EIO; + } locked_on = alloca0 (priv->child_count); @@ -614,6 +614,10 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) locked_on); { if (ret < AFR_SH_MIN_PARTICIPANTS) { + gf_log (this->name, GF_LOG_DEBUG, "%s: Skipping " + "self-heal as only %d number of subvolumes " + "could be locked", uuid_utoa (fd->inode->gfid), + ret); /* Either less than two subvols available, or another selfheal (from another server) is in progress. Skip for now in any case there isn't anything to do. diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 2c5f3fd652c..5b9e290e49a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -97,7 +97,6 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, struct iatt first = {0, }; int source = -1; int sources_count = 0; - int ret = 0; priv = this->private; @@ -136,6 +135,10 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, !IA_EQUAL (first, replies[i].poststat, uid) || !IA_EQUAL (first, replies[i].poststat, gid) || !IA_EQUAL (first, replies[i].poststat, prot)) { + gf_log (this->name, GF_LOG_DEBUG, "%s: iatt mismatch " + "for source(%d) vs (%d)", + uuid_utoa (replies[source].poststat.ia_gfid), + source, i); sources[i] = 0; healed_sinks[i] = 1; } @@ -144,14 +147,12 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, for (i =0; i < priv->child_count; i++) { if (!sources[i] || i == source) continue; - if (replies[source].xdata->count != replies[i].xdata->count) { - sources[i] = 0; - healed_sinks[i] = 1; - continue; - } - ret = dict_foreach(replies[source].xdata, xattr_is_equal, - (void*)replies[i].xdata); - if (ret == -1) { + if (!afr_xattrs_are_equal (replies[source].xdata, + replies[i].xdata)) { + gf_log (this->name, GF_LOG_DEBUG, "%s: xattr mismatch " + "for source(%d) vs (%d)", + uuid_utoa (replies[source].poststat.ia_gfid), + source, i); sources[i] = 0; healed_sinks[i] = 1; } @@ -241,6 +242,12 @@ __afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode, goto unlock; source = ret; + + if (AFR_COUNT (healed_sinks, priv->child_count) == 0) { + ret = -ENOTCONN; + goto unlock; + } + ret = __afr_selfheal_metadata_do (frame, this, inode, source, healed_sinks, locked_replies); if (ret) @@ -254,6 +261,10 @@ __afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode, unlock: afr_selfheal_uninodelk (frame, this, inode, this->name, LLONG_MAX -1, 0, data_lock); + + afr_log_selfheal (inode->gfid, this, ret, "metadata", source, + healed_sinks); + if (locked_replies) afr_replies_wipe (locked_replies, priv->child_count); return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 7936659e5e4..c32ec120a50 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -185,4 +185,8 @@ afr_inode_link (inode_t *inode, struct iatt *iatt); unsigned int afr_success_count (struct afr_reply *replies, unsigned int count); + +void +afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type, + int source, unsigned char *healed_sinks); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 4b894c5c464..2b0ced58f6c 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1006,4 +1006,6 @@ afr_remove_eager_lock_stub (afr_local_t *local); void afr_replies_wipe (struct afr_reply *replies, int count); +gf_boolean_t +afr_xattrs_are_equal (dict_t *dict1, dict_t *dict2); #endif /* __AFR_H__ */ -- cgit