diff options
| -rw-r--r-- | tests/bugs/bug-921231.t | 31 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 25 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 33 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 36 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 9 | 
5 files changed, 129 insertions, 5 deletions
diff --git a/tests/bugs/bug-921231.t b/tests/bugs/bug-921231.t new file mode 100644 index 00000000..db9cf3b6 --- /dev/null +++ b/tests/bugs/bug-921231.t @@ -0,0 +1,31 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +# This test writes to same file with 2 fds and tests that eager-lock is not +# causing extra delay because of post-op-delay-secs +cleanup; + +function write_to_file { +        dd of=$M0/1 if=/dev/zero bs=1M count=128 oflag=append 2>&1 >/dev/null +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 +TEST $CLI volume set $V0 eager-lock on +TEST $CLI volume set $V0 post-op-delay-secs 3 +TEST $CLI volume set $V0 client-log-level DEBUG +TEST $CLI volume start $V0 +TEST $CLI volume profile $V0 start +TEST $CLI volume set $V0 ensure-durability off +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +write_to_file & +write_to_file & +wait +#Test if the MAX [F]INODELK fop latency is of the order of seconds. +inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}") +TEST [ -z $inodelk_max_latency ] + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 075f33c6..79644b74 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4890,3 +4890,28 @@ afr_is_fd_fixable (fd_t *fd)          return _gf_true;  } + +void +afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t     *local = NULL; +        inode_t         *inode = NULL; +        afr_inode_ctx_t *ctx   = NULL; + +        local = frame->local; + +        if (local->fd) +                inode = local->fd->inode; +        else +                inode = local->loc.inode; + +        if (!inode) +                return; + +        LOCK (&inode->lock); +        { +                ctx = __afr_inode_ctx_get (inode, this); +                ctx->open_fd_count = local->open_fd_count; +        } +        UNLOCK (&inode->lock); +} diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index ce4fbf22..68570f15 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -137,6 +137,8 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          int child_index = (long) cookie;          int call_count  = -1;          int read_child  = 0; +        int      ret = 0; +        uint32_t open_fd_count = 0;          local = frame->local; @@ -162,6 +164,17 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		}  		if (op_ret != -1) { +                        if (xdata) { +                                ret = dict_get_uint32 (xdata, +                                                       GLUSTERFS_OPEN_FD_COUNT, +                                                       &open_fd_count); +                                if ((ret == 0) && +                                    (open_fd_count > local->open_fd_count)) { +                                        local->open_fd_count = open_fd_count; +                                        local->update_open_fd_count = _gf_true; +                                } +                        } +  			if ((local->success_count == 0) ||  			    (child_index == read_child)) {  				local->cont.writev.prebuf  = *prebuf; @@ -176,8 +189,11 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (call_count == 0) { -		if (!local->stable_write) -			afr_fd_report_unstable_write (this, local->fd); +                if (local->update_open_fd_count) +                        afr_handle_open_fd_count (frame, this); + +                if (!local->stable_write) +                        afr_fd_report_unstable_write (this, local->fd);                  afr_writev_handle_short_writes (frame, this);                  /* @@ -206,6 +222,8 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)          afr_private_t *priv = NULL;          int i = 0;          int call_count = -1; +        dict_t *xdata = NULL; +        GF_UNUSED int     ret = 0;          local = frame->local;          priv = this->private; @@ -229,6 +247,12 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)  		return 0;  	} +        xdata = dict_new (); +        if (xdata) { +                ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT, +                                       sizeof (uint32_t)); +        } +          for (i = 0; i < priv->child_count; i++) {                  if (local->transaction.pre_op[i]) {                          STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, @@ -241,13 +265,16 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)                                             local->cont.writev.offset,                                             local->cont.writev.flags,                                             local->cont.writev.iobref, -                                           NULL); +                                           xdata);                          if (!--call_count)                                  break;                  }          } +        if (xdata) +                dict_unref (xdata); +          return 0;  } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index def00e28..81764550 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1308,6 +1308,34 @@ afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)                  local->delayed_post_op = _gf_true;  } +gf_boolean_t +afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this) +{ +        afr_inode_ctx_t *ictx = NULL; + +        if (!inode) { +                /* If false is returned, it may keep on taking eager-lock +                 * which may lead to starvation, so return true to avoid that. +                 */ +                gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode"); +                return _gf_true; +        } +        /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock +         * is taken mount2 opened the same file, it won't be able to +         * perform any data operations until mount1 releases eager-lock. +         * To avoid such scenario do not enable eager-lock for this transaction +         * if open-fd-count is > 1 +         */ + +        ictx = afr_inode_ctx_get (inode, this); +        if (!ictx) +                return _gf_true; + +        if (ictx->open_fd_count > 1) +                return _gf_true; + +        return _gf_false; +}  gf_boolean_t  is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) @@ -1322,6 +1350,9 @@ is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)          if (!local->delayed_post_op)                  goto out; +        if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this)) +                goto out; +          res = _gf_true;  out:          return res; @@ -1753,8 +1784,7 @@ afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)          return ((end1 >= start2) && (end2 >= start1));  } - -        void +void  afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)  {          afr_private_t *priv = NULL; @@ -1776,6 +1806,8 @@ afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)          if (!fdctx)                  return; +        if (afr_are_multiple_fds_opened (local->fd->inode, this)) +                return;          /*           * Once full file lock is acquired in eager-lock phase, overlapping           * writes do not compete for inode-locks, instead are transferred to the diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c6e6913d..49d281ac 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -87,6 +87,7 @@ typedef struct afr_inode_ctx_ {          int32_t  *fresh_children;//increasing order of latency          afr_spb_state_t mdata_spb;          afr_spb_state_t data_spb; +        uint32_t        open_fd_count;  } afr_inode_ctx_t;  typedef enum { @@ -445,6 +446,8 @@ typedef struct _afr_local {          unsigned int call_count;          unsigned int success_count;          unsigned int enoent_count; +        uint32_t     open_fd_count; +        gf_boolean_t update_open_fd_count;          unsigned int unhealable; @@ -1171,4 +1174,10 @@ afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);  int  afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count); +void +afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this); + +afr_inode_ctx_t* +afr_inode_ctx_get (inode_t *inode, xlator_t *this); +  #endif /* __AFR_H__ */  | 
