diff options
-rw-r--r-- | tests/bugs/bug-918437-sh-mtime.t | 52 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 39 |
2 files changed, 77 insertions, 14 deletions
diff --git a/tests/bugs/bug-918437-sh-mtime.t b/tests/bugs/bug-918437-sh-mtime.t new file mode 100644 index 00000000000..080956f519a --- /dev/null +++ b/tests/bugs/bug-918437-sh-mtime.t @@ -0,0 +1,52 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function get_mtime { + local f=$1 + stat $f | grep Modify | awk '{print $2 $3}' | cut -f1 -d'.' +} +cleanup; + +## Tests if mtime is correct after self-heal. +TEST glusterd +TEST pidof glusterd +TEST mkdir -p $B0/gfs0/brick0{1,2} +TEST $CLI volume create $V0 replica 2 transport tcp $H0:$B0/gfs0/brick01 $H0:$B0/gfs0/brick02 +TEST $CLI volume set $V0 nfs.disable on +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable +# file 'a' is healed from brick02 to brick01 where as file 'b' is healed from +# brick01 to brick02 + +TEST cp -p /etc/passwd $M0/a +TEST cp -p /etc/passwd $M0/b + +#Store mtimes before self-heals +TEST modify_atstamp=$(get_mtime $B0/gfs0/brick02/a) +TEST modify_btstamp=$(get_mtime $B0/gfs0/brick02/b) + +TEST $CLI volume stop $V0 +TEST gf_rm_file_and_gfid_link $B0/gfs0/brick01 a +TEST gf_rm_file_and_gfid_link $B0/gfs0/brick02 b + +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1 + +find $M0 | xargs stat 1>/dev/null + +TEST modify_atstamp1=$(get_mtime $B0/gfs0/brick01/a) +TEST modify_atstamp2=$(get_mtime $B0/gfs0/brick02/a) +EXPECT $modify_atstamp echo $modify_atstamp1 +EXPECT $modify_atstamp echo $modify_atstamp2 + +TEST modify_btstamp1=$(get_mtime $B0/gfs0/brick01/b) +TEST modify_btstamp2=$(get_mtime $B0/gfs0/brick02/b) +EXPECT $modify_btstamp echo $modify_btstamp1 +EXPECT $modify_btstamp echo $modify_btstamp2 +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 69e650620e9..cfc00a3728e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -190,29 +190,20 @@ afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } int -afr_sh_data_setattr (call_frame_t *frame, xlator_t *this) +afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf) { afr_local_t *local = NULL; afr_private_t *priv = NULL; afr_self_heal_t *sh = NULL; int i = 0; int call_count = 0; - int source = 0; int32_t valid = 0; - struct iatt stbuf = {0,}; local = frame->local; sh = &local->self_heal; priv = this->private; - source = sh->source; - - valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME); - - stbuf.ia_atime = sh->buf[source].ia_atime; - stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec; - stbuf.ia_mtime = sh->buf[source].ia_mtime; - stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec; + valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME); call_count = afr_set_elem_count_get (sh->success, priv->child_count); @@ -232,7 +223,7 @@ afr_sh_data_setattr (call_frame_t *frame, xlator_t *this) (void *) (long) i, priv->children[i], priv->children[i]->fops->setattr, - &local->loc, &stbuf, valid, NULL); + &local->loc, stbuf, valid, NULL); if (!--call_count) break; @@ -256,7 +247,7 @@ afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie, GF_ASSERT (sh->source == child_index); if (op_ret != -1) { sh->buf[child_index] = *buf; - afr_sh_data_setattr (frame, this); + afr_sh_data_setattr (frame, this, buf); } else { gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set " "time-stamps after self-heal", local->loc.path); @@ -683,6 +674,9 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int nsources = 0; int ret = 0; + int *old_sources = NULL; + int tstamp_source = 0; + int i = 0; local = frame->local; sh = &local->self_heal; @@ -690,6 +684,13 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s", lkowner_utoa (&frame->root->lk_owner)); + if (sh->sync_done) { + //store sources before sync so that mtime can be set using the + //iatt buf from one of them. + old_sources = alloca (priv->child_count*sizeof (*old_sources)); + memcpy (old_sources, sh->sources, + priv->child_count * sizeof (*old_sources)); + } nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix, sh->sources, sh->success_children, @@ -720,6 +721,7 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) } afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB); + ret = afr_sh_inode_set_read_ctx (sh, this); if (ret) { gf_log (this->name, GF_LOG_DEBUG, @@ -730,7 +732,16 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this) } if (sh->sync_done) { - afr_sh_data_setattr (frame, this); + /* Perform setattr from one of the old_sources if possible + * Because only they have the correct mtime, the new sources + * (i.e. old sinks) have mtime from last writev in sync. + */ + tstamp_source = sh->source; + for (i = 0; i < priv->child_count; i++) { + if (old_sources[i] && sh->sources[i]) + tstamp_source = i; + } + afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]); } else { if (nsources == 0) { gf_log (this->name, GF_LOG_DEBUG, |