diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2013-07-30 18:29:15 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-08-02 02:25:55 -0700 |
commit | 36b102645ab66d141dcc831e67caa78966d45419 (patch) | |
tree | 4fb743b70123e74f31b8230a9301c0e34589864d | |
parent | c95db3046c672473611d9ac0ab6cd93bd8211347 (diff) |
cluster/afr: Disable eager-lock if open-fd-count > 1
Lets say mount1 has eager-lock(full-lock) and after the eager-lock
is taken mount2 opened the same file, it won't be able to
perform any data operations until mount1 releases eager-lock.
To avoid such scenario do not enable eager-lock for transaction
if open-fd-count is > 1. Delaying of changelog piggybacking is
avoided in this situation.
Change-Id: I51b45d6a7c216a78860aff0265a0b8dabc6423a5
BUG: 910217
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/5432
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: venkatesh somyajulu <vsomyaju@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r-- | tests/bugs/bug-921231.t | 31 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 25 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 33 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 36 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 9 |
5 files changed, 129 insertions, 5 deletions
diff --git a/tests/bugs/bug-921231.t b/tests/bugs/bug-921231.t new file mode 100644 index 00000000000..db9cf3b6f06 --- /dev/null +++ b/tests/bugs/bug-921231.t @@ -0,0 +1,31 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +# This test writes to same file with 2 fds and tests that eager-lock is not +# causing extra delay because of post-op-delay-secs +cleanup; + +function write_to_file { + dd of=$M0/1 if=/dev/zero bs=1M count=128 oflag=append 2>&1 >/dev/null +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 +TEST $CLI volume set $V0 eager-lock on +TEST $CLI volume set $V0 post-op-delay-secs 3 +TEST $CLI volume set $V0 client-log-level DEBUG +TEST $CLI volume start $V0 +TEST $CLI volume profile $V0 start +TEST $CLI volume set $V0 ensure-durability off +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +write_to_file & +write_to_file & +wait +#Test if the MAX [F]INODELK fop latency is of the order of seconds. +inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}") +TEST [ -z $inodelk_max_latency ] + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 075f33c618b..79644b74086 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4890,3 +4890,28 @@ afr_is_fd_fixable (fd_t *fd) return _gf_true; } + +void +afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + inode_t *inode = NULL; + afr_inode_ctx_t *ctx = NULL; + + local = frame->local; + + if (local->fd) + inode = local->fd->inode; + else + inode = local->loc.inode; + + if (!inode) + return; + + LOCK (&inode->lock); + { + ctx = __afr_inode_ctx_get (inode, this); + ctx->open_fd_count = local->open_fd_count; + } + UNLOCK (&inode->lock); +} diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index ce4fbf22698..68570f15afe 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -137,6 +137,8 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int child_index = (long) cookie; int call_count = -1; int read_child = 0; + int ret = 0; + uint32_t open_fd_count = 0; local = frame->local; @@ -162,6 +164,17 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret != -1) { + if (xdata) { + ret = dict_get_uint32 (xdata, + GLUSTERFS_OPEN_FD_COUNT, + &open_fd_count); + if ((ret == 0) && + (open_fd_count > local->open_fd_count)) { + local->open_fd_count = open_fd_count; + local->update_open_fd_count = _gf_true; + } + } + if ((local->success_count == 0) || (child_index == read_child)) { local->cont.writev.prebuf = *prebuf; @@ -176,8 +189,11 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (call_count == 0) { - if (!local->stable_write) - afr_fd_report_unstable_write (this, local->fd); + if (local->update_open_fd_count) + afr_handle_open_fd_count (frame, this); + + if (!local->stable_write) + afr_fd_report_unstable_write (this, local->fd); afr_writev_handle_short_writes (frame, this); /* @@ -206,6 +222,8 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this) afr_private_t *priv = NULL; int i = 0; int call_count = -1; + dict_t *xdata = NULL; + GF_UNUSED int ret = 0; local = frame->local; priv = this->private; @@ -229,6 +247,12 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this) return 0; } + xdata = dict_new (); + if (xdata) { + ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT, + sizeof (uint32_t)); + } + for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i]) { STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, @@ -241,13 +265,16 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this) local->cont.writev.offset, local->cont.writev.flags, local->cont.writev.iobref, - NULL); + xdata); if (!--call_count) break; } } + if (xdata) + dict_unref (xdata); + return 0; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index def00e28834..817645505c6 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1308,6 +1308,34 @@ afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this) local->delayed_post_op = _gf_true; } +gf_boolean_t +afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this) +{ + afr_inode_ctx_t *ictx = NULL; + + if (!inode) { + /* If false is returned, it may keep on taking eager-lock + * which may lead to starvation, so return true to avoid that. + */ + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode"); + return _gf_true; + } + /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock + * is taken mount2 opened the same file, it won't be able to + * perform any data operations until mount1 releases eager-lock. + * To avoid such scenario do not enable eager-lock for this transaction + * if open-fd-count is > 1 + */ + + ictx = afr_inode_ctx_get (inode, this); + if (!ictx) + return _gf_true; + + if (ictx->open_fd_count > 1) + return _gf_true; + + return _gf_false; +} gf_boolean_t is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) @@ -1322,6 +1350,9 @@ is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) if (!local->delayed_post_op) goto out; + if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this)) + goto out; + res = _gf_true; out: return res; @@ -1753,8 +1784,7 @@ afr_locals_overlap (afr_local_t *local1, afr_local_t *local2) return ((end1 >= start2) && (end2 >= start1)); } - - void +void afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this) { afr_private_t *priv = NULL; @@ -1776,6 +1806,8 @@ afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this) if (!fdctx) return; + if (afr_are_multiple_fds_opened (local->fd->inode, this)) + return; /* * Once full file lock is acquired in eager-lock phase, overlapping * writes do not compete for inode-locks, instead are transferred to the diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c6e6913df29..49d281acae1 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -87,6 +87,7 @@ typedef struct afr_inode_ctx_ { int32_t *fresh_children;//increasing order of latency afr_spb_state_t mdata_spb; afr_spb_state_t data_spb; + uint32_t open_fd_count; } afr_inode_ctx_t; typedef enum { @@ -445,6 +446,8 @@ typedef struct _afr_local { unsigned int call_count; unsigned int success_count; unsigned int enoent_count; + uint32_t open_fd_count; + gf_boolean_t update_open_fd_count; unsigned int unhealable; @@ -1171,4 +1174,10 @@ afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub); int afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count); +void +afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this); + +afr_inode_ctx_t* +afr_inode_ctx_get (inode_t *inode, xlator_t *this); + #endif /* __AFR_H__ */ |