diff options
author | Emmanuel Dreyfus <manu@netbsd.org> | 2014-12-23 06:54:50 +0100 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2014-12-22 23:29:54 -0800 |
commit | 9a6ea92df1007cc6f402ff7cc560e3206fa42e62 (patch) | |
tree | fe31bb641e4e4c3684e81fe14a71534636d0593d /xlators/cluster/afr | |
parent | da9deb54df91dedc51ebe165f3a0be646455cb5b (diff) |
Avoid spurious directory metedata split brain
When directory content is modified, [mc]time is updated. On
Linux, the filesystem does it, while at least on NetBSD, the
kernel file-system independant code does it. This means that
when entries are added while bricks are down, the kernel sends
a SETATTR [mc]time which will cause metadata split brain for
the directory. In this case, clear the split brain by finding
the source with the most recent modification date.
BUG: 1129939
Change-Id: Ic0177e0df753a4748624d0b906834ed54593adb9
Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org>
Reviewed-on: http://review.gluster.org/9291
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 116 |
1 files changed, 115 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 68dc568bc65..87600df3bad 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -99,6 +99,96 @@ out: return ret; } +static inline uint64_t +mtime_ns(struct iatt *ia) +{ + uint64_t ret; + + ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) + + (uint64_t)(ia->ia_mtime_nsec); + + return ret; +} + +/* + * When directory content is modified, [mc]time is updated. On + * Linux, the filesystem does it, while at least on NetBSD, the + * kernel file-system independant code does it. This means that + * when entries are added while bricks are down, the kernel sends + * a SETATTR [mc]time which will cause metadata split brain for + * the directory. In this case, clear the split brain by finding + * the source with the most recent modification date. + */ +static int +afr_dirtime_splitbrain_source (call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, + unsigned char *locked_on) +{ + afr_private_t *priv = NULL; + int source = -1; + struct iatt source_ia; + struct iatt child_ia; + uint64_t mtime = 0; + int i; + int ret = -1; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (!locked_on[i]) + continue; + + if (!replies[i].valid) + continue; + + if (replies[i].op_ret != 0) + continue; + + if (mtime_ns(&replies[i].poststat) <= mtime) + continue; + + mtime = mtime_ns(&replies[i].poststat); + source = i; + } + + if (source == -1) + goto out; + + source_ia = replies[source].poststat; + if (source_ia.ia_type != IA_IFDIR) + goto out; + + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; + + if (!replies[i].valid) + continue; + + if (replies[i].op_ret != 0) + continue; + + child_ia = replies[i].poststat; + + if (!IA_EQUAL(source_ia, child_ia, gfid) || + !IA_EQUAL(source_ia, child_ia, type) || + !IA_EQUAL(source_ia, child_ia, prot) || + !IA_EQUAL(source_ia, child_ia, uid) || + !IA_EQUAL(source_ia, child_ia, gid) || + !afr_xattrs_are_equal (replies[source].xdata, + replies[i].xdata)) + goto out; + } + + /* + * Metadata split brain is just about [amc]time + * We return our source. + */ + ret = source; +out: + return ret; +} + /* * Look for mismatching uid/gid or mode or user xattrs even if @@ -122,10 +212,34 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, sources_count = AFR_COUNT (sources, priv->child_count); if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) - || !sources_count) { + || !sources_count) { + /* If this is a directory mtime/ctime only split brain + use the most recent */ + source = afr_dirtime_splitbrain_source (frame, this, + replies, locked_on); + if (source != -1) { + gf_log (this->name, GF_LOG_NOTICE, "clear time " + "split brain on %s", + uuid_utoa (replies[source].poststat.ia_gfid)); + sources[source] = 1; + + for (i = 0; i < priv->child_count; i++) { + if (i == source) + continue; + + if (!locked_on[i]) + continue; + + healed_sinks[i] = 1; + } + + return source; + } + if (!priv->metadata_splitbrain_forced_heal) { return -EIO; } + /* Metadata split brain, select one subvol arbitrarily */ for (i = 0; i < priv->child_count; i++) { |