From b8106d1127f034ffa88b5dd322c23a10e023b9b6 Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Mon, 1 Feb 2016 11:46:08 +0530 Subject: cluster/afr: Fix heal-info slow response while IO is in progress Now heal-info does an open() on the file being examined so that the client at some point sees open-fd count being > 1 and releases the eager-lock so that heal-info doesn't remain blocked forever until IO completes. Change-Id: Icc478098e2bc7234408728b54d8185102b3540dc BUG: 1297695 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/13326 Reviewed-by: Ravishankar N Smoke: Gluster Build System Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System --- tests/bugs/replicate/bug-1297695.t | 43 ++++++++++++++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 17 ++++++++++- xlators/cluster/afr/src/afr-self-heal-data.c | 37 ++++++++++++------------ xlators/cluster/afr/src/afr.h | 4 +++ 4 files changed, 82 insertions(+), 19 deletions(-) create mode 100644 tests/bugs/replicate/bug-1297695.t diff --git a/tests/bugs/replicate/bug-1297695.t b/tests/bugs/replicate/bug-1297695.t new file mode 100644 index 00000000000..e0f431684e8 --- /dev/null +++ b/tests/bugs/replicate/bug-1297695.t @@ -0,0 +1,43 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +function write_to_file { + dd of=$M0/dir/file if=/dev/urandom bs=1024k count=128 2>&1 >/dev/null +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 + +TEST $CLI volume set $V0 eager-lock on +TEST $CLI volume set $V0 post-op-delay-secs 3 +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.self-heal-daemon off + +TEST $CLI volume start $V0 +TEST $CLI volume profile $V0 start +TEST $CLI volume set $V0 ensure-durability off +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST mkdir $M0/dir +TEST touch $M0/dir/file + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST `echo 'abc' > $M0/dir/file` + +TEST $CLI volume start $V0 force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +write_to_file & +#Test if the MAX [F]INODELK fop latency is of the order of seconds. +EXPECT "^1$" get_pending_heal_count $V0 +inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}") +TEST [ -z $inodelk_max_latency ] +cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f33e00a76a0..457f7865cec 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4513,12 +4513,13 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, gf_boolean_t *pflag) { int ret = -1; - afr_private_t *priv = NULL; unsigned char *locked_on = NULL; unsigned char *data_lock = NULL; unsigned char *sources = NULL; unsigned char *sinks = NULL; unsigned char *healed_sinks = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; struct afr_reply *locked_replies = NULL; priv = this->private; @@ -4528,6 +4529,18 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + /* Heal-info does an open() on the file being examined so that the + * current eager-lock holding client, if present, at some point sees + * open-fd count being > 1 and releases the eager-lock so that heal-info + * doesn't remain blocked forever until IO completes. + */ + ret = afr_selfheal_data_open (this, inode, &fd); + if (ret < 0) { + gf_msg_debug (this->name, -ret, "%s: Failed to open", + uuid_utoa (inode->gfid)); + goto out; + } + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, @@ -4565,6 +4578,8 @@ unlock: out: if (locked_replies) afr_replies_wipe (locked_replies, priv->child_count); + if (fd) + fd_unref (fd); return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 45b4c53dbaf..27d2849c158 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -798,31 +798,32 @@ out: } -static fd_t * -afr_selfheal_data_open (xlator_t *this, inode_t *inode) +int +afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd) { - loc_t loc = {0,}; - int ret = 0; - fd_t *fd = NULL; + int ret = 0; + fd_t *fd_tmp = NULL; + loc_t loc = {0,}; - fd = fd_create (inode, 0); - if (!fd) - return NULL; + fd_tmp = fd_create (inode, 0); + if (!fd_tmp) + return -ENOMEM; loc.inode = inode_ref (inode); gf_uuid_copy (loc.gfid, inode->gfid); - ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd, NULL, NULL); - if (ret) { - fd_unref (fd); - fd = NULL; + ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL, NULL); + if (ret < 0) { + fd_unref (fd_tmp); + loc_wipe (&loc); + goto out; } else { - fd_bind (fd); + fd_bind (fd_tmp); } - loc_wipe (&loc); - - return fd; + *fd = fd_tmp; +out: + return ret; } int @@ -835,9 +836,9 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode) priv = this->private; - fd = afr_selfheal_data_open (this, inode); + ret = afr_selfheal_data_open (this, inode, &fd); if (!fd) { - gf_msg_debug (this->name, 0, "%s: Failed to open", + gf_msg_debug (this->name, -ret, "%s: Failed to open", uuid_utoa (inode->gfid)); return -EIO; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c78163506e8..8e6d7bde963 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1092,4 +1092,8 @@ afr_get_need_heal (xlator_t *this); void afr_set_need_heal (xlator_t *this, afr_local_t *local); + +int +afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd); + #endif /* __AFR_H__ */ -- cgit