diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2013-02-20 09:53:41 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-05-08 20:52:37 -0700 |
commit | 2c80052dbe5aca895a13597e36add51f796000e0 (patch) | |
tree | 9832b605ced080a650270b3139fec88ff234620a | |
parent | 0762a610296dc0f9445f0c9f9261b449cadb0f0d (diff) |
cluster/afr: Don't queue transactions during open-fd fix
Before Anonymous fds are available, afr had to queue up
transactions if the file is not opened on one of its
subvolumes. This happens until the attempt to open the
file either succeeds or fails. These attempts happen
until the file is successfully opened on the subvolume.
Now client xlator uses anonymous fds to perform the fops
if the fd used for the fop is not 'opened'.
Fops will be successful even when the file is not opened
so there is no need to queue up the transactions anymore in afr.
Open is attempted on the subvolume where it is not
opened independent of the fop.
Change-Id: I6d59293023e2de41c606395028c8980b83faca3f
BUG: 953887
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/4868
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rwxr-xr-x | tests/bugs/bug-853258.t | 4 | ||||
-rw-r--r-- | tests/bugs/bug-913051.t | 65 | ||||
-rw-r--r-- | tests/bugs/bug-953887.t | 23 | ||||
-rw-r--r-- | tests/volume.rc | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 32 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 14 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 252 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 35 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-open.c | 196 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 23 |
10 files changed, 258 insertions, 388 deletions
diff --git a/tests/bugs/bug-853258.t b/tests/bugs/bug-853258.t index c702e6f30ea..79cb88099f2 100755 --- a/tests/bugs/bug-853258.t +++ b/tests/bugs/bug-853258.t @@ -20,7 +20,7 @@ EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status'; # Force assignment of initial ranges. TEST $CLI volume rebalance $V0 fix-layout start -EXPECT_WITHIN 15 "success:" rebalance_status_field $V0 +EXPECT_WITHIN 15 "completed" rebalance_status_field $V0 # Get the original values. xattrs="" @@ -32,7 +32,7 @@ done TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 # Force assignment of initial ranges. TEST $CLI volume rebalance $V0 fix-layout start -EXPECT_WITHIN 15 "success:" rebalance_status_field $V0 +EXPECT_WITHIN 15 "completed" rebalance_status_field $V0 for i in $(seq 0 3); do xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)" diff --git a/tests/bugs/bug-913051.t b/tests/bugs/bug-913051.t new file mode 100644 index 00000000000..69e90cf66c2 --- /dev/null +++ b/tests/bugs/bug-913051.t @@ -0,0 +1,65 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../fileio.rc + +cleanup; + +#Test that afr opens the file on the bricks that were offline at the time of +# open after the brick comes online. This tests for writev, readv triggering +# open-fd-fix in afr. +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume start $V0 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable +TEST kill_brick $V0 $H0 $B0/${V0}0 + +TEST mkdir $M0/dir +TEST touch $M0/dir/a +TEST touch $M0/dir/b +echo abc > $M0/dir/b + +TEST wfd=`fd_available` +TEST fd_open $wfd "w" $M0/dir/a +TEST rfd=`fd_available` +TEST fd_open $rfd "r" $M0/dir/b + +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 + +#check that the files are not opned on brick-0 +realpatha=$(gf_get_gfid_backend_file_path $B0/${V0}0 "dir/a") +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpatha" +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/a + +realpathb=$(gf_get_gfid_backend_file_path $B0/${V0}0 "dir/b") +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpathb" +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/b + +#attempt self-heal so that the files are created on brick-0 + +TEST ls -l $M0/dir/a +TEST ls -l $M0/dir/b + +#trigger writev for attempting open-fd-fix in afr +TEST fd_write $wfd "open sesame" + +#trigger readv for attempting open-fd-fix in afr +TEST fd_cat $rfd + +EXPECT_WITHIN 20 "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/a +EXPECT_WITHIN 20 "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/b + +TEST fd_close $wfd +TEST fd_close $rfd +cleanup; diff --git a/tests/bugs/bug-953887.t b/tests/bugs/bug-953887.t new file mode 100644 index 00000000000..d926473efc4 --- /dev/null +++ b/tests/bugs/bug-953887.t @@ -0,0 +1,23 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../fileio.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST touch $M0/10 +TEST fd=`fd_available` +TEST fd_open $fd 'w' $M0/10 +TEST gluster volume add-brick $V0 $H0:$B0/${V0}{2,3} +TEST gluster volume rebalance $V0 start +EXPECT_WITHIN 15 "completed" rebalance_status_field $V0 +TEST cat $M0/10 +TEST fd_write $fd "abc" +EXPECT "abc" echo "$(cat $M0/10)" +cleanup diff --git a/tests/volume.rc b/tests/volume.rc index 9debe2b997f..044333a83b4 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -27,7 +27,7 @@ function volume_option() } function rebalance_status_field { - $CLI volume rebalance $1 status | sed -n '$p' | cut -d' ' -f4 + $CLI volume rebalance $1 status | awk '{print $6}' | sed -n 3p } function remove_brick_status_completed_field { diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index eff49b58a2e..537b9c2062a 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -887,8 +887,6 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) GF_FREE (local->fresh_children); - GF_FREE (local->fd_open_on); - { /* lookup */ if (local->cont.lookup.xattrs) { afr_reset_xattr (local->cont.lookup.xattrs, @@ -2456,7 +2454,6 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd) } pthread_mutex_init (&fd_ctx->delay_lock, NULL); - INIT_LIST_HEAD (&fd_ctx->paused_calls); INIT_LIST_HEAD (&fd_ctx->entries); fd_ctx->call_child = -1; @@ -2575,8 +2572,6 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) uint64_t ctx = 0; afr_fd_ctx_t *fd_ctx = NULL; int ret = 0; - afr_fd_paused_call_t *paused_call = NULL; - afr_fd_paused_call_t *tmp = NULL; ret = fd_ctx_get (fd, this, &ctx); if (ret < 0) @@ -2592,12 +2587,6 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) GF_FREE (fd_ctx->locked_on); GF_FREE (fd_ctx->pre_op_piggyback); - list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls, - call_list) { - list_del_init (&paused_call->call_list); - GF_FREE (paused_call); - } - GF_FREE (fd_ctx->lock_piggyback); GF_FREE (fd_ctx->lock_acquired); @@ -3986,14 +3975,6 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this) if (!local->fresh_children) goto out; - if (local->fd) { - local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on), - priv->child_count, - gf_afr_mt_char); - if (!local->fd_open_on) - goto out; - } - local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op), priv->child_count, gf_afr_mt_char); @@ -4263,3 +4244,16 @@ afr_prepare_new_entry_pending_matrix (int32_t **pending, } } } + +gf_boolean_t +afr_is_fd_fixable (fd_t *fd) +{ + if (!fd || !fd->inode) + return _gf_false; + else if (fd_is_anonymous (fd)) + return _gf_false; + else if (uuid_is_null (fd->inode->gfid)) + return _gf_false; + + return _gf_true; +} diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 1263749b7fd..c798e19328f 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -382,11 +382,8 @@ afr_fstat (call_frame_t *frame, xlator_t *this, local->fd = fd_ref (fd); - ret = afr_open_fd_fix (frame, this, _gf_false); - if (ret) { - op_errno = -ret; - goto out; - } + afr_open_fd_fix (fd, this); + STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child, children[call_child], children[call_child]->fops->fstat, @@ -1856,11 +1853,8 @@ afr_readv (call_frame_t *frame, xlator_t *this, local->cont.readv.offset = offset; local->cont.readv.flags = flags; - ret = afr_open_fd_fix (frame, this, _gf_false); - if (ret) { - op_errno = -ret; - goto out; - } + afr_open_fd_fix (fd, this); + STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) call_child, children[call_child], diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 99fa027d03c..a83223569d0 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -328,153 +328,74 @@ out: return 0; } -static int -afr_prepare_loc (call_frame_t *frame, fd_t *fd) +static void +afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this) { - afr_local_t *local = NULL; - char *name = NULL; - char *path = NULL; - int ret = 0; - - if ((!fd) || (!fd->inode)) - return -1; - - local = frame->local; - ret = inode_path (fd->inode, NULL, (char **)&path); - if (ret <= 0) { - gf_log (frame->this->name, GF_LOG_DEBUG, - "Unable to get path for gfid: %s", - uuid_utoa (fd->inode->gfid)); - return -1; - } - - if (local->loc.path) { - if (strcmp (path, local->loc.path)) - gf_log (frame->this->name, GF_LOG_DEBUG, - "overwriting old loc->path %s with %s", - local->loc.path, path); - GF_FREE ((char *)local->loc.path); - } - local->loc.path = path; - - name = strrchr (local->loc.path, '/'); - if (name) - name++; - local->loc.name = name; - - if (local->loc.inode) { - inode_unref (local->loc.inode); - } - local->loc.inode = inode_ref (fd->inode); - - if (local->loc.parent) { - inode_unref (local->loc.parent); + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + char *reason = NULL; + int32_t op_errno = 0; + int ret = 0; + + if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: " + "fd: %p, inode: %p", fd, + fd ? fd->inode : NULL); + goto out; } - local->loc.parent = inode_parent (local->loc.inode, 0, NULL); - - return 0; -} - -afr_fd_paused_call_t* -afr_paused_call_create (call_frame_t *frame) -{ - afr_local_t *local = NULL; - afr_fd_paused_call_t *paused_call = NULL; + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); local = frame->local; - GF_ASSERT (local->fop_call_continue); - - paused_call = GF_CALLOC (1, sizeof (*paused_call), - gf_afr_fd_paused_call_t); - if (paused_call) { - INIT_LIST_HEAD (&paused_call->call_list); - paused_call->frame = frame; - } - - return paused_call; -} - -static int -afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx) -{ - afr_fd_paused_call_t *paused_call = NULL; - int ret = 0; - - paused_call = afr_paused_call_create (frame); - if (paused_call) - list_add (&paused_call->call_list, &fd_ctx->paused_calls); - else - ret = -ENOMEM; - - return ret; -} + ret = afr_local_init (local, this->private, &op_errno); + if (ret < 0) + goto out; -static void -afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - inode_t *inode = NULL; - char *reason = NULL; + local->loc.inode = inode_ref (fd->inode); + ret = loc_path (&local->loc, NULL); + if (ret < 0) + goto out; - local = frame->local; sh = &local->self_heal; - inode = local->fd->inode; - - sh->do_missing_entry_self_heal = _gf_true; - sh->do_gfid_self_heal = _gf_true; - sh->do_data_self_heal = _gf_true; + sh->do_metadata_self_heal = _gf_true; + if (fd->inode->ia_type == IA_IFREG) + sh->do_data_self_heal = _gf_true; + else if (fd->inode->ia_type == IA_IFDIR) + sh->do_entry_self_heal = _gf_true; reason = "subvolume came online"; - afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type, - reason, NULL, NULL); + afr_launch_self_heal (frame, this, fd->inode, _gf_true, + fd->inode->ia_type, reason, NULL, NULL); + return; +out: + AFR_STACK_DESTROY (frame); } -int -afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop) +void +afr_open_fd_fix (fd_t *fd, xlator_t *this) { - int ret = 0; - int i = 0; - afr_fd_ctx_t *fd_ctx = NULL; - inode_t *inode = NULL; - gf_boolean_t need_self_heal = _gf_false; - int *need_open = NULL; - int need_open_count = 0; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - gf_boolean_t fop_continue = _gf_true; + int ret = 0; + int i = 0; + afr_fd_ctx_t *fd_ctx = NULL; + gf_boolean_t need_self_heal = _gf_false; + int *need_open = NULL; + size_t need_open_count = 0; + afr_private_t *priv = NULL; - local = frame->local; priv = this->private; - GF_ASSERT (local->fd); - - inode = local->fd->inode; - //gfid is not set in rebalance, that case needs to be handled. - if (fd_is_anonymous (local->fd) || - !inode || uuid_is_null (inode->gfid)) { - fop_continue = _gf_true; - goto out; - } - - if (pause_fop) - GF_ASSERT (local->fop_call_continue); - - ret = afr_prepare_loc (frame, local->fd); - if (ret < 0) { - //File does not exist we cant open it. - ret = 0; + if (!afr_is_fd_fixable (fd)) goto out; - } - fd_ctx = afr_fd_ctx_get (local->fd, this); - if (!fd_ctx) { - ret = -EINVAL; + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) goto out; - } - LOCK (&local->fd->lock); + LOCK (&fd->lock); { if (fd_ctx->up_count < priv->up_count) { need_self_heal = _gf_true; @@ -482,55 +403,34 @@ afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop) fd_ctx->down_count = priv->down_count; } + need_open = alloca (priv->child_count * sizeof (*need_open)); for (i = 0; i < priv->child_count; i++) { - if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) && - local->child_up[i]) { - fd_ctx->opened_on[i] = AFR_FD_OPENING; - if (!need_open) - need_open = GF_CALLOC (priv->child_count, - sizeof (*need_open), - gf_afr_mt_int32_t); - need_open[i] = 1; - need_open_count++; - } else if (pause_fop && local->child_up[i] && - (fd_ctx->opened_on[i] == AFR_FD_OPENING)) { - local->fop_paused = _gf_true; - } - } + need_open[i] = 0; + if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED) + continue; + + if (!priv->child_up[i]) + continue; + + fd_ctx->opened_on[i] = AFR_FD_OPENING; - if (local->fop_paused) { - GF_ASSERT (pause_fop); - gf_log (this->name, GF_LOG_INFO, "Pause fd %p", - local->fd); - ret = afr_pause_fd_fop (frame, this, fd_ctx); - if (ret) - goto unlock; - fop_continue = _gf_false; + need_open[i] = 1; + need_open_count++; } } -unlock: - UNLOCK (&local->fd->lock); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s", - local->loc.path); - fop_continue = _gf_false; + UNLOCK (&fd->lock); + if (ret) goto out; - } if (need_self_heal) - afr_trigger_open_fd_self_heal (frame, this); + afr_trigger_open_fd_self_heal (fd, this); if (!need_open_count) goto out; - gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd); - afr_fix_open (frame, this, fd_ctx, need_open_count, need_open); - fop_continue = _gf_false; + afr_fix_open (this, fd, need_open_count, need_open); out: - GF_FREE (need_open); - if (fop_continue && local->fop_call_continue) - local->fop_call_continue (frame, this); - return ret; + return; } int @@ -570,13 +470,10 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, local->cont.writev.iobref = iobref_ref (iobref); local->fd = fd_ref (fd); - local->fop_call_continue = afr_do_writev; - ret = afr_open_fd_fix (frame, this, _gf_true); - if (ret) { - op_errno = -ret; - goto out; - } + afr_open_fd_fix (fd, this); + + afr_do_writev (frame, this); ret = 0; out: @@ -1026,13 +923,10 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, local->cont.ftruncate.offset = offset; local->fd = fd_ref (fd); - local->fop_call_continue = afr_do_ftruncate; - ret = afr_open_fd_fix (frame, this, _gf_true); - if (ret) { - op_errno = -ret; - goto out; - } + afr_open_fd_fix (fd, this); + + afr_do_ftruncate (frame, this); ret = 0; out: @@ -1448,11 +1342,7 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this, local->fd = fd_ref (fd); - ret = afr_open_fd_fix (transaction_frame, this, _gf_false); - if (ret) { - op_errno = -ret; - goto out; - } + afr_open_fd_fix (fd, this); local->transaction.main_frame = frame; local->transaction.start = LLONG_MAX - 1; diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index cb74fc80e56..e091a7939fe 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -148,16 +148,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this) local = frame->local; priv = this->private; - if (local->fd) { - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i] && local->fd_open_on[i]) - ++call_count; - } - } else { - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) - ++call_count; - } + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) + ++call_count; } return call_count; @@ -1024,8 +1017,6 @@ _is_lock_wind_needed (afr_local_t *local, int child_index) { if (!local->child_up[child_index]) return _gf_false; - else if (local->fd && !local->fd_open_on[child_index]) - return _gf_false; return _gf_true; } @@ -1293,20 +1284,6 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -void -afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx, - size_t child_count) -{ - int i = 0; - - GF_ASSERT (local->fd_open_on); - - memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count); - for (i = 0; i < child_count; i++) - if (fd_ctx->opened_on[i] == AFR_FD_OPENED) - local->fd_open_on[i] = 1; -} - int afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) { @@ -1342,7 +1319,6 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) return -1; } - afr_mark_fd_open_on (local, fd_ctx, priv->child_count); call_count = int_lock->lockee_count * internal_lock_count (frame, this); int_lock->lk_call_count = call_count; int_lock->lk_expected_count = call_count; @@ -1359,7 +1335,7 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) { index = i%copies; lockee_no = i/copies; - if (local->child_up[index] && local->fd_open_on[index]) { + if (local->child_up[index]) { AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION, AFR_LOCK_OP, int_lock->lockee[lockee_no].basename, @@ -1536,7 +1512,6 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) goto out; } - afr_mark_fd_open_on (local, fd_ctx, priv->child_count); call_count = internal_lock_count (frame, this); int_lock->lk_call_count = call_count; int_lock->lk_expected_count = call_count; @@ -1551,7 +1526,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) /* Send non-blocking inodelk calls only on up children and where the fd has been opened */ for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i] || !local->fd_open_on[i]) + if (!local->child_up[i]) continue; flock_use = &flock; diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index c0be197f212..643a5d692df 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -249,189 +249,126 @@ out: return 0; } -//NOTE: this function should be called with holding the lock on -//fd to which fd_ctx belongs -void -afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx, - struct list_head *list) -{ - afr_fd_paused_call_t *paused_call = NULL; - afr_fd_paused_call_t *tmp = NULL; - afr_local_t *call_local = NULL; - afr_private_t *priv = NULL; - int i = 0; - gf_boolean_t call = _gf_false; - - priv = this->private; - list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls, - call_list) { - call = _gf_true; - call_local = paused_call->frame->local; - for (i = 0; i < priv->child_count; i++) { - if (call_local->child_up[i] && - (fd_ctx->opened_on[i] == AFR_FD_OPENING)) - call = _gf_false; - } - - if (call) { - list_del_init (&paused_call->call_list); - list_add (&paused_call->call_list, list); - } - } -} - -void -afr_resume_calls (xlator_t *this, struct list_head *list) -{ - afr_fd_paused_call_t *paused_call = NULL; - afr_fd_paused_call_t *tmp = NULL; - afr_local_t *call_local = NULL; - - list_for_each_entry_safe (paused_call, tmp, list, call_list) { - list_del_init (&paused_call->call_list); - call_local = paused_call->frame->local; - call_local->fop_call_continue (paused_call->frame, this); - GF_FREE (paused_call); - } -} - int afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) + int32_t op_ret, int32_t op_errno, fd_t *fd, + dict_t *xdata) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - int call_count = 0; - int child_index = (long) cookie; - struct list_head paused_calls = {0}; - gf_boolean_t fop_paused = _gf_false; - fd_t *local_fd = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int call_count = 0; + int child_index = (long) cookie; priv = this->private; local = frame->local; - fop_paused = local->fop_paused; if (op_ret >= 0) { - gf_log (this->name, GF_LOG_INFO, "fd for %s opened " + gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened " "successfully on subvolume %s", local->loc.path, priv->children[child_index]->name); + } else { + gf_log (this->name, GF_LOG_ERROR, "Failed to open %s " + "on subvolume %s", local->loc.path, + priv->children[child_index]->name); } - local_fd = fd_ref (local->fd); - call_count = afr_frame_return (frame); - //Note: Do not access any thing using the frame outside call_count 0 - - //Note: No frame locking needed for this block of code - fd_ctx = afr_fd_ctx_get (local_fd, this); + fd_ctx = afr_fd_ctx_get (local->fd, this); if (!fd_ctx) { gf_log (this->name, GF_LOG_WARNING, - "failed to get fd context, %p", local_fd); + "failed to get fd context, %p", local->fd); goto out; } - LOCK (&local_fd->lock); + LOCK (&local->fd->lock); { if (op_ret >= 0) { fd_ctx->opened_on[child_index] = AFR_FD_OPENED; } else { fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED; } - if (call_count == 0) { - INIT_LIST_HEAD (&paused_calls); - afr_get_resumable_calls (this, fd_ctx, &paused_calls); - } } - UNLOCK (&local_fd->lock); + UNLOCK (&local->fd->lock); out: - if (call_count == 0) { - afr_resume_calls (this, &paused_calls); - //If the fop is paused then resume_calls will continue the fop - if (fop_paused) - goto done; - - if (local->fop_call_continue) - local->fop_call_continue (frame, this); - else - AFR_STACK_DESTROY (frame); - } + call_count = afr_frame_return (frame); + if (call_count == 0) + AFR_STACK_DESTROY (frame); -done: - fd_unref (local_fd); return 0; } -int -afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx, - int need_open_count, int *need_open) +void +afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - int i = 0; - call_frame_t *open_frame = NULL; - afr_local_t *open_local = NULL; - int ret = -1; - ia_type_t ia_type = IA_INVAL; - int32_t op_errno = 0; - - GF_ASSERT (fd_ctx); - GF_ASSERT (need_open_count > 0); - GF_ASSERT (need_open); + afr_private_t *priv = NULL; + int i = 0; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = -1; + int32_t op_errno = 0; + afr_fd_ctx_t *fd_ctx = NULL; - local = frame->local; priv = this->private; - if (!local->fop_call_continue) { - open_frame = copy_frame (frame); - if (!open_frame) { - ret = -ENOMEM; - goto out; - } - AFR_LOCAL_ALLOC_OR_GOTO (open_frame->local, out); - open_local = open_frame->local; - ret = afr_local_init (open_local, priv, &op_errno); - if (ret < 0) - goto out; - loc_copy (&open_local->loc, &local->loc); - open_local->fd = fd_ref (local->fd); - } else { - ret = 0; - open_frame = frame; - open_local = local; + + if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count) + goto out; + + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) { + ret = -1; + goto out; } - open_local->call_count = need_open_count; + frame = create_frame (this, this->ctx->pool); + if (!frame) { + ret = -1; + goto out; + } - gf_log (this->name, GF_LOG_DEBUG, "need open count: %d", + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; + + local->loc.inode = inode_ref (fd->inode); + ret = loc_path (&local->loc, NULL); + if (ret < 0) + goto out; + + local->fd = fd_ref (fd); + local->call_count = need_open_count; + + gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd", need_open_count); - ia_type = open_local->fd->inode->ia_type; - GF_ASSERT (ia_type != IA_INVAL); for (i = 0; i < priv->child_count; i++) { if (!need_open[i]) continue; - if (IA_IFDIR == ia_type) { + + if (IA_IFDIR == fd->inode->ia_type) { gf_log (this->name, GF_LOG_DEBUG, "opening fd for dir %s on subvolume %s", local->loc.path, priv->children[i]->name); - STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk, + STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk, (void*) (long) i, priv->children[i], priv->children[i]->fops->opendir, - &open_local->loc, open_local->fd, + &local->loc, local->fd, NULL); } else { gf_log (this->name, GF_LOG_DEBUG, "opening fd for file %s on subvolume %s", local->loc.path, priv->children[i]->name); - STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk, + STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk, (void *)(long) i, priv->children[i], priv->children[i]->fops->open, - &open_local->loc, + &local->loc, fd_ctx->flags & (~O_TRUNC), - open_local->fd, NULL); + local->fd, NULL); } } @@ -439,8 +376,7 @@ afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx, ret = 0; out: if (op_errno) - ret = -op_errno; - if (ret && open_frame) - AFR_STACK_DESTROY (open_frame); - return ret; + ret = -1; //For handling ALLOC_OR_GOTO + if (ret && frame) + AFR_STACK_DESTROY (frame); } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f4ebf435699..978339017a2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -425,7 +425,6 @@ typedef struct _afr_local { loc_t newloc; fd_t *fd; - unsigned char *fd_open_on; glusterfs_fop_t fop; @@ -449,9 +448,6 @@ typedef struct _afr_local { int optimistic_change_log; gf_boolean_t delayed_post_op; - gf_boolean_t fop_paused; - int (*fop_call_continue) (call_frame_t *frame, xlator_t *this); - /* This struct contains the arguments for the "continuation" (scheme-like) of fops @@ -703,11 +699,6 @@ typedef enum { } afr_fd_open_status_t; typedef struct { - struct list_head call_list; - call_frame_t *frame; -} afr_fd_paused_call_t; - -typedef struct { unsigned int *pre_op_done; afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */ unsigned int *pre_op_piggyback; @@ -726,7 +717,6 @@ typedef struct { struct list_head entries; /* needed for readdir failover */ unsigned char *locked_on; /* which subvolumes locks have been successful */ - struct list_head paused_calls; /* queued calls while fix_open happens */ /* used for delayed-post-op optimization */ pthread_mutex_t delay_lock; @@ -1021,11 +1011,11 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode, int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret, int32_t op_errno, int32_t sh_failed)); -int -afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx, - int need_open_count, int *need_open); -int -afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop); +void +afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open); + +void +afr_open_fd_fix (fd_t *fd, xlator_t *this); int afr_set_elem_count_get (unsigned char *elems, int child_count); @@ -1059,6 +1049,9 @@ afr_is_errno_set (int *child_errno, int child); gf_boolean_t afr_is_errno_unset (int *child_errno, int child); +gf_boolean_t +afr_is_fd_fixable (fd_t *fd); + void afr_prepare_new_entry_pending_matrix (int32_t **pending, gf_boolean_t (*is_pending) (int *, int), |