diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2013-03-08 15:20:22 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-04-10 00:41:19 -0700 |
commit | 26ab9cfeca978cc5459d1638fb1628051143eab9 (patch) | |
tree | 125da6efabc87c40efe19b155483c339c689fa11 | |
parent | f42426512700fc2ff508a889eb5ffe97bd9b9002 (diff) |
cluster/afr: Preserve mtime in self-heal
Problem:
Data self-heal may choose sink iatt to set mtimes.
This happens because after syncing of data is done
self-heal does one more xattrops/fstat to determine
sources sinks to set the inode-ctx. Since this is done
after data syncing and erase of xattrs, old source and
old sink are now sources, but the mtimes of them differ.
Old code just takes the first source from the list and
update mtimes, which could be sink before the self-heal
started.
Fix:
Set mtime from 'sources before syncing'.
Change-Id: Id769e1b99aa4f041eaee775f64cbf2c57b799723
BUG: 918437
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/4658
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-on: http://review.gluster.org/4664
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r-- | tests/bugs/bug-918437-sh-mtime.t | 52 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 38 |
2 files changed, 76 insertions, 14 deletions
diff --git a/tests/bugs/bug-918437-sh-mtime.t b/tests/bugs/bug-918437-sh-mtime.t new file mode 100644 index 00000000000..080956f519a --- /dev/null +++ b/tests/bugs/bug-918437-sh-mtime.t @@ -0,0 +1,52 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function get_mtime { + local f=$1 + stat $f | grep Modify | awk '{print $2 $3}' | cut -f1 -d'.' +} +cleanup; + +## Tests if mtime is correct after self-heal. +TEST glusterd +TEST pidof glusterd +TEST mkdir -p $B0/gfs0/brick0{1,2} +TEST $CLI volume create $V0 replica 2 transport tcp $H0:$B0/gfs0/brick01 $H0:$B0/gfs0/brick02 +TEST $CLI volume set $V0 nfs.disable on +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable +# file 'a' is healed from brick02 to brick01 where as file 'b' is healed from +# brick01 to brick02 + +TEST cp -p /etc/passwd $M0/a +TEST cp -p /etc/passwd $M0/b + +#Store mtimes before self-heals +TEST modify_atstamp=$(get_mtime $B0/gfs0/brick02/a) +TEST modify_btstamp=$(get_mtime $B0/gfs0/brick02/b) + +TEST $CLI volume stop $V0 +TEST gf_rm_file_and_gfid_link $B0/gfs0/brick01 a +TEST gf_rm_file_and_gfid_link $B0/gfs0/brick02 b + +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1 + +find $M0 | xargs stat 1>/dev/null + +TEST modify_atstamp1=$(get_mtime $B0/gfs0/brick01/a) +TEST modify_atstamp2=$(get_mtime $B0/gfs0/brick02/a) +EXPECT $modify_atstamp echo $modify_atstamp1 +EXPECT $modify_atstamp echo $modify_atstamp2 + +TEST modify_btstamp1=$(get_mtime $B0/gfs0/brick01/b) +TEST modify_btstamp2=$(get_mtime $B0/gfs0/brick02/b) +EXPECT $modify_btstamp echo $modify_btstamp1 +EXPECT $modify_btstamp echo $modify_btstamp2 +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 423e1c8049c..70395be862b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -185,29 +185,20 @@ afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } int -afr_sh_data_setattr (call_frame_t *frame, xlator_t *this) +afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf) { afr_local_t *local = NULL; afr_private_t *priv = NULL; afr_self_heal_t *sh = NULL; int i = 0; int call_count = 0; - int source = 0; int32_t valid = 0; - struct iatt stbuf = {0,}; local = frame->local; sh = &local->self_heal; priv = this->private; - source = sh->source; - - valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME); - - stbuf.ia_atime = sh->buf[source].ia_atime; - stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec; - stbuf.ia_mtime = sh->buf[source].ia_mtime; - stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec; + valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME); call_count = afr_set_elem_count_get (sh->success, priv->child_count); @@ -227,7 +218,7 @@ afr_sh_data_setattr (call_frame_t *frame, xlator_t *this) (void *) (long) i, priv->children[i], priv->children[i]->fops->setattr, - &local->loc, &stbuf, valid, NULL); + &local->loc, stbuf, valid, NULL); if (!--call_count) break; @@ -251,7 +242,7 @@ afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie, GF_ASSERT (sh->source == child_index); if (op_ret != -1) { sh->buf[child_index] = *buf; - afr_sh_data_setattr (frame, this); + afr_sh_data_setattr (frame, this, buf); } else { gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set " "time-stamps after self-heal", local->loc.path); @@ -729,6 +720,9 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int nsources = 0; int ret = 0; + int *old_sources = NULL; + int tstamp_source = 0; + int i = 0; local = frame->local; sh = &local->self_heal; @@ -736,6 +730,13 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s", lkowner_utoa (&frame->root->lk_owner)); + if (sh->sync_done) { + //store sources before sync so that mtime can be set using the + //iatt buf from one of them. + old_sources = alloca (priv->child_count*sizeof (*old_sources)); + memcpy (old_sources, sh->sources, + priv->child_count * sizeof (*old_sources)); + } nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, sh->sources, sh->success_children, @@ -778,7 +779,16 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) } if (sh->sync_done) { - afr_sh_data_setattr (frame, this); + /* Perform setattr from one of the old_sources if possible + * Because only they have the correct mtime, the new sources + * (i.e. old sinks) have mtime from last writev in sync. + */ + tstamp_source = sh->source; + for (i = 0; i < priv->child_count; i++) { + if (old_sources[i] && sh->sources[i]) + tstamp_source = i; + } + afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]); } else { if (nsources == 0) { gf_log (this->name, GF_LOG_DEBUG, |