diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2018-08-27 11:46:33 +0530 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2018-09-21 13:27:00 +0000 | 
| commit | 00dae0e2ea1260a082af33dac5b06ca6aa68ccc3 (patch) | |
| tree | 90198e83596fe1bc164de9f3fdf864f28c7492f8 | |
| parent | bbab048f0a06c34656c2c11c3cec4f2dd9883037 (diff) | |
cluster/afr: Delegate metadata heal with pending xattrs to SHD
Problem:
When metadata-self-heal is triggered on the mount, it blocks
lookup until metadata-self-heal completes. But that can lead
to hangs when lot of clients are accessing a directory which
needs metadata heal and all of them trigger heals waiting
for other clients to complete heal.
Fix:
Only when the heal is needed but the pending xattrs are not set,
trigger metadata heal that could block lookup. This is the only
case where different clients may give different metadata to the
clients without heals, which should be avoided.
Updates bz#1625575
Change-Id: I6089e9fda0770a83fb287941b229c882711f4e66
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
| -rwxr-xr-x | tests/basic/afr/client-side-heal.t | 28 | ||||
| -rw-r--r-- | tests/bugs/glusterfs/bug-906646.t | 10 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 44 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 38 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 | 
5 files changed, 72 insertions, 51 deletions
| diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t index eba7dc2b3c4..1e9336184b5 100755 --- a/tests/basic/afr/client-side-heal.t +++ b/tests/basic/afr/client-side-heal.t @@ -17,6 +17,7 @@ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;  echo "some data" > $M0/datafile  EXPECT 0 echo $?  TEST touch $M0/mdatafile +TEST touch $M0/mdatafile-backend-direct-modify  TEST mkdir $M0/dir  #Kill a brick and perform I/O to have pending heals. @@ -29,6 +30,7 @@ EXPECT 0 echo $?  #pending metadata heal  TEST chmod +x $M0/mdatafile +TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify  #pending entry heal. Also causes pending metadata/data heals on file{1..5}  TEST touch $M0/dir/file{1..5} @@ -40,9 +42,12 @@ TEST $CLI volume start $V0 force  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0  #Medatada heal via explicit lookup must not happen -TEST ls $M0/mdatafile +TEST getfattr -d -m. -e hex $M0/mdatafile +TEST ls $M0/mdatafile-backend-direct-modify -#Inode refresh must not trigger data and entry heals. +TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]] + +#Inode refresh must not trigger data metadata and entry heals.  #To trigger inode refresh for sure, the volume is unmounted and mounted each time.  #Check that data heal does not happen.  EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 @@ -52,7 +57,6 @@ TEST cat $M0/datafile  EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0  TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;  TEST ls $M0/dir -  #No heal must have happened  EXPECT 8 get_pending_heal_count $V0 @@ -61,21 +65,25 @@ TEST $CLI volume set $V0 cluster.data-self-heal on  TEST $CLI volume set $V0 cluster.metadata-self-heal on  TEST $CLI volume set $V0 cluster.entry-self-heal on -#Metadata heal is triggered by lookup without need for inode refresh. -TEST ls $M0/mdatafile -EXPECT 7 get_pending_heal_count $V0 - -#Inode refresh must trigger data and entry heals. +#Inode refresh must trigger data metadata and entry heals.  #To trigger inode refresh for sure, the volume is unmounted and mounted each time.  EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0  TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST ls $M0/mdatafile-backend-direct-modify + +TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]] + + +TEST getfattr -d -m. -e hex $M0/mdatafile +EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0 +  TEST cat $M0/datafile  EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0  EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0  TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;  TEST ls $M0/dir -EXPECT 5 get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0  TEST cat  $M0/dir/file1  TEST cat  $M0/dir/file2 @@ -83,5 +91,5 @@ TEST cat  $M0/dir/file3  TEST cat  $M0/dir/file4  TEST cat  $M0/dir/file5 -EXPECT 0 get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0  cleanup; diff --git a/tests/bugs/glusterfs/bug-906646.t b/tests/bugs/glusterfs/bug-906646.t index 45c85d9f67c..37b8fe5c8eb 100644 --- a/tests/bugs/glusterfs/bug-906646.t +++ b/tests/bugs/glusterfs/bug-906646.t @@ -13,7 +13,6 @@ TEST pidof glusterd  TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11  TEST $CLI volume start $V0 -TEST $CLI volume set $V0 cluster.self-heal-daemon off  TEST $CLI volume set $V0 cluster.background-self-heal-count 0  ## Mount FUSE with caching disabled @@ -82,10 +81,15 @@ EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"  # restart the brick process  TEST $CLI volume start $V0 force -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 `expr $brick_id - 1` +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 -cat $pth >/dev/null +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0  # check backends - xattr should not be present anywhere  EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"  EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name" diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 1e0c1a7b265..81176c784da 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2692,6 +2692,42 @@ out:          return 0;  } +gf_boolean_t +afr_is_pending_set (xlator_t *this, dict_t *xdata, int type) +{ +        int idx = -1; +        afr_private_t *priv = NULL; +        void *pending_raw = NULL; +        int *pending_int = NULL; +        int i = 0; + +        priv = this->private; +        idx = afr_index_for_transaction_type (type); + +        if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) { +                if (pending_raw) { +                        pending_int = pending_raw; + +                        if (ntoh32 (pending_int[idx])) +                                return _gf_true; +                } +        } + +        for (i = 0; i < priv->child_count; i++) { +                if (dict_get_ptr (xdata, priv->pending_key[i], +                                  &pending_raw)) +                        continue; +                if (!pending_raw) +                        continue; +                pending_int = pending_raw; + +                if (ntoh32 (pending_int[idx])) +                        return _gf_true; +        } + +        return _gf_false; +} +  static gf_boolean_t  afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)  { @@ -2718,6 +2754,14 @@ afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)                          continue;                  } +                if (afr_is_pending_set (this, replies[i].xdata, +                                        AFR_METADATA_TRANSACTION)) { +                        /* Let shd do the heal so that lookup is not blocked +                         * on getting metadata lock/doing the heal */ +                        start = _gf_false; +                        break; +                } +                  if (gf_uuid_compare (stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {                          start = _gf_false;                          break; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 3cb69beabff..a1aad6315ac 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2180,44 +2180,6 @@ afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode,  	return 0;  } - -gf_boolean_t -afr_is_pending_set (xlator_t *this, dict_t *xdata, int type) -{ -	int idx = -1; -	afr_private_t *priv = NULL; -	void *pending_raw = NULL; -	int *pending_int = NULL; -	int i = 0; - -	priv = this->private; -	idx = afr_index_for_transaction_type (type); - -	if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) { -		if (pending_raw) { -			pending_int = pending_raw; - -			if (ntoh32 (pending_int[idx])) -				return _gf_true; -		} -	} - -	for (i = 0; i < priv->child_count; i++) { -		if (dict_get_ptr (xdata, priv->pending_key[i], -				  &pending_raw)) -			continue; -		if (!pending_raw) -			continue; -		pending_int = pending_raw; - -		if (ntoh32 (pending_int[idx])) -			return _gf_true; -	} - -	return _gf_false; -} - -  gf_boolean_t  afr_is_data_set (xlator_t *this, dict_t *xdata)  { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 68087e0ea20..6fc555148c4 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1229,4 +1229,7 @@ afr_write_subvol_reset (call_frame_t *frame, xlator_t *this);  int  afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode); + +gf_boolean_t +afr_is_pending_set (xlator_t *this, dict_t *xdata, int type);  #endif /* __AFR_H__ */ | 
