diff options
Diffstat (limited to 'xlators/cluster')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 142 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 17 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-open.c | 61 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 768 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 24 | 
6 files changed, 422 insertions, 594 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 13eb57e3ec5..b5f060a8730 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1188,25 +1188,22 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)                          goto unlock;                  } -                fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on), -                                               priv->child_count, -                                               gf_afr_mt_char); -                if (!fd_ctx->opened_on) { +                fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback), +                                                      priv->child_count, +                                                      gf_afr_mt_char); +                if (!fd_ctx->pre_op_piggyback) {                          gf_log (this->name, GF_LOG_ERROR,                                  "Out of memory");                          ret = -ENOMEM;                          goto unlock;                  } -                fd_ctx->child_failed = GF_CALLOC ( -                                         sizeof (*fd_ctx->child_failed), -                                         priv->child_count, -                                         gf_afr_mt_char); - -                if (!fd_ctx->child_failed) { +                fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on), +                                               priv->child_count, +                                               gf_afr_mt_char); +                if (!fd_ctx->opened_on) {                          gf_log (this->name, GF_LOG_ERROR,                                  "Out of memory"); -                          ret = -ENOMEM;                          goto unlock;                  } @@ -1352,73 +1349,6 @@ afr_flush_done (call_frame_t *frame, xlator_t *this)  int -afr_plain_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                     int32_t op_ret, int32_t op_errno) - -{ -        afr_local_t *local = NULL; - -        int call_count = -1; - -        local = frame->local; - -        LOCK (&frame->lock); -        { -                if (op_ret == 0) -                        local->op_ret = 0; - -                local->op_errno = op_errno; -        } -        UNLOCK (&frame->lock); - -        call_count = afr_frame_return (frame); - -        if (call_count == 0) -                AFR_STACK_UNWIND (flush, frame, local->op_ret, local->op_errno); - -        return 0; -} - - -static int -__no_pre_op_done (xlator_t *this, fd_t *fd) -{ -        int i      = 0; -        int op_ret = 1; - -        int _ret = 0; -        uint64_t       ctx; -        afr_fd_ctx_t * fd_ctx = NULL; - -        afr_private_t *priv = NULL; - -        priv = this->private; - -        LOCK (&fd->lock); -        { -                _ret = __fd_ctx_get (fd, this, &ctx); - -                if (_ret < 0) { -                        goto out; -                } - -                fd_ctx = (afr_fd_ctx_t *)(long) ctx; - -                for (i = 0; i < priv->child_count; i++) { -                        if (fd_ctx->pre_op_done[i]) { -                                op_ret = 0; -                                break; -                        } -                } -        } -out: -        UNLOCK (&fd->lock); - -        return op_ret; -} - - -int  afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)  {          afr_private_t * priv  = NULL; @@ -1431,7 +1361,6 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)          int op_ret   = -1;          int op_errno = 0; -        int i          = 0;          int call_count = 0;          VALIDATE_OR_GOTO (frame, out); @@ -1450,45 +1379,30 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)          call_count = afr_up_children_count (priv->child_count, local->child_up); -        if (__no_pre_op_done (this, fd)) { -                frame->local = local; +        transaction_frame = copy_frame (frame); +        if (!transaction_frame) { +                op_errno = ENOMEM; +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory."); +                goto out; +        } -                for (i = 0; i < priv->child_count; i++) { -                        if (local->child_up[i]) { -                                STACK_WIND_COOKIE (frame, afr_plain_flush_cbk, -                                                   (void *) (long) i, -                                                   priv->children[i], -                                                   priv->children[i]->fops->flush, -                                                   fd); -                                if (!--call_count) -                                        break; -                        } -                } -        } else { -                transaction_frame = copy_frame (frame); -                if (!transaction_frame) { -                        op_errno = ENOMEM; -                        gf_log (this->name, GF_LOG_ERROR, -                                "Out of memory."); -                        goto out; -                } +        transaction_frame->local = local; -                transaction_frame->local = local; +        local->op = GF_FOP_FLUSH; -                local->op = GF_FOP_FLUSH; +        local->transaction.fop    = afr_flush_wind; +        local->transaction.done   = afr_flush_done; +        local->transaction.unwind = afr_flush_unwind; -                local->transaction.fop    = afr_flush_wind; -                local->transaction.done   = afr_flush_done; -                local->transaction.unwind = afr_flush_unwind; +        local->fd                 = fd_ref (fd); -                local->fd                 = fd_ref (fd); +        local->transaction.main_frame = frame; +        local->transaction.start  = 0; +        local->transaction.len    = 0; -                local->transaction.main_frame = frame; -                local->transaction.start  = 0; -                local->transaction.len    = 0; +        afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); -                afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION); -        }          op_ret = 0;  out: @@ -1519,10 +1433,10 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)          fd_ctx = (afr_fd_ctx_t *)(long) ctx; -        if (fd_ctx) { -                if (fd_ctx->child_failed) -                        GF_FREE (fd_ctx->child_failed); +        gf_log (this->name, GF_LOG_TRACE, +                "hits=%d, miss=%d", fd_ctx->hit, fd_ctx->miss); +        if (fd_ctx) {                  if (fd_ctx->pre_op_done)                          GF_FREE (fd_ctx->pre_op_done); diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 587e4840008..5a2c40cc467 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -291,14 +291,9 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,          fd_ctx = (afr_fd_ctx_t *)(long) ctx; -        if (fd_ctx->down_count < priv->down_count) { -                local->up_down_flush_cbk = afr_do_writev; -                afr_up_down_flush (frame, this, fd, AFR_CHILD_DOWN_FLUSH); - -        } else if (fd_ctx->up_count < priv->up_count) { -                local->up_down_flush_cbk = afr_do_writev; -                afr_up_down_flush (frame, this, fd, AFR_CHILD_UP_FLUSH); - +        if (fd_ctx->up_count < priv->up_count) { +                local->openfd_flush_cbk = afr_do_writev; +                afr_openfd_flush (frame, this, fd);          } else {                  afr_do_writev (frame, this);          } @@ -769,9 +764,9 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,          fd_ctx = (afr_fd_ctx_t *)(long) ctx; -        if (fd_ctx->down_count < priv->down_count) { -                local->up_down_flush_cbk = afr_do_ftruncate; -                afr_up_down_flush (frame, this, fd, AFR_CHILD_DOWN_FLUSH); +        if (fd_ctx->up_count < priv->up_count) { +                local->openfd_flush_cbk = afr_do_ftruncate; +                afr_openfd_flush (frame, this, fd);          } else {                  afr_do_ftruncate (frame, this);          } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 749c6bf9a59..ee53d1d7bfb 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -433,7 +433,6 @@ is_afr_lock_transaction (afr_local_t *local)          switch (local->transaction.type) {  	case AFR_DATA_TRANSACTION:  	case AFR_METADATA_TRANSACTION: -	case AFR_FLUSH_TRANSACTION:                  ret = 1;                  break; @@ -878,7 +877,6 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)          switch (local->transaction.type) {  	case AFR_DATA_TRANSACTION:  	case AFR_METADATA_TRANSACTION: -	case AFR_FLUSH_TRANSACTION:                  memcpy (int_lock->inode_locked_nodes,                          int_lock->locked_nodes,                          priv->child_count); @@ -998,7 +996,6 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)  	switch (local->transaction.type) {  	case AFR_DATA_TRANSACTION:  	case AFR_METADATA_TRANSACTION: -	case AFR_FLUSH_TRANSACTION:  		if (local->fd) {                          afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, @@ -1110,7 +1107,6 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)          switch (local->transaction.type) {  	case AFR_DATA_TRANSACTION:  	case AFR_METADATA_TRANSACTION: -	case AFR_FLUSH_TRANSACTION:                  initialize_inodelk_variables (frame, this);                  break; diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 69bbb56b8d0..d943213b277 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -216,9 +216,8 @@ out:  int -afr_up_down_flush_open_cbk (call_frame_t *frame, void *cookie, -                            xlator_t *this, int32_t op_ret, int32_t op_errno, -                            fd_t *fd) +afr_openfd_sh_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                        int32_t op_ret, int32_t op_errno, fd_t *fd)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; @@ -261,7 +260,7 @@ out:          if (call_count == 0) {                  int_lock->lock_cbk = local->transaction.done; -                local->transaction.post_post_op (frame, this); +                local->transaction.resume (frame, this);          }          return 0; @@ -269,7 +268,7 @@ out:  static int -__unopened_count (int child_count, unsigned char *opened_on, unsigned char *child_up) +__unopened_count (int child_count, unsigned int *opened_on, unsigned char *child_up)  {          int i;          int count = 0; @@ -284,7 +283,7 @@ __unopened_count (int child_count, unsigned char *opened_on, unsigned char *chil  int -afr_up_down_flush_sh_unwind (call_frame_t *frame, xlator_t *this) +afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this)  {          afr_local_t *local  = NULL;          afr_private_t *priv = NULL; @@ -331,7 +330,7 @@ afr_up_down_flush_sh_unwind (call_frame_t *frame, xlator_t *this)                                  "opening fd for %s on subvolume %s",                                  local->loc.path, priv->children[i]->name); -                        STACK_WIND_COOKIE (frame, afr_up_down_flush_open_cbk, +                        STACK_WIND_COOKIE (frame, afr_openfd_sh_open_cbk,                                             (void *)(long) i,                                             priv->children[i],                                             priv->children[i]->fops->open, @@ -345,14 +344,14 @@ afr_up_down_flush_sh_unwind (call_frame_t *frame, xlator_t *this)  out:          if (abandon) -                local->transaction.post_post_op (frame, this); +                local->transaction.resume (frame, this);          return 0;  }  int -afr_up_down_flush_post_post_op (call_frame_t *frame, xlator_t *this) +afr_openfd_sh (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local  = NULL;          afr_self_heal_t *sh = NULL; @@ -375,7 +374,7 @@ afr_up_down_flush_post_post_op (call_frame_t *frame, xlator_t *this)          sh->need_data_self_heal = _gf_true;          sh->type                = local->fd->inode->ia_type;          sh->background          = _gf_false; -        sh->unwind              = afr_up_down_flush_sh_unwind; +        sh->unwind              = afr_openfd_sh_unwind;          afr_self_heal_type_str_get(&local->self_heal,                                     sh_type_str, @@ -391,19 +390,7 @@ afr_up_down_flush_post_post_op (call_frame_t *frame, xlator_t *this)  int -afr_up_down_flush_wind (call_frame_t *frame, xlator_t *this) -{ -	afr_local_t *local = NULL; - -	local = frame->local; - -        local->transaction.resume (frame, this); -	return 0; -} - - -int -afr_up_down_flush_done (call_frame_t *frame, xlator_t *this) +afr_openfd_flush_done (call_frame_t *frame, xlator_t *this)  {          afr_private_t *priv = NULL;  	afr_local_t *local  = NULL; @@ -412,7 +399,6 @@ afr_up_down_flush_done (call_frame_t *frame, xlator_t *this)          afr_fd_ctx_t * fd_ctx = NULL;          int _ret = -1; -        int i    = 0;          priv  = this->private;  	local = frame->local; @@ -429,11 +415,6 @@ afr_up_down_flush_done (call_frame_t *frame, xlator_t *this)                  fd_ctx->down_count = priv->down_count;                  fd_ctx->up_count   = priv->up_count; - -                for (i = 0; i < priv->child_count; i++) { -                        if (local->child_up[i]) -                                fd_ctx->pre_op_done[i] = 0; -                }          }  out:          UNLOCK (&local->fd->lock); @@ -444,15 +425,14 @@ out:                  "The up/down flush is over");          fd_unref (local->fd); -        local->up_down_flush_cbk (frame, this); +        local->openfd_flush_cbk (frame, this);  	return 0;  }  int -afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, -                   afr_flush_type type) +afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)  {  	afr_local_t   * local = NULL; @@ -466,18 +446,8 @@ afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd,          local->fd = fd_ref (fd); -        local->transaction.fop          = afr_up_down_flush_wind; -        local->transaction.done         = afr_up_down_flush_done; - -        switch (type) { -        case AFR_CHILD_UP_FLUSH: -                local->transaction.post_post_op = afr_up_down_flush_post_post_op; -                break; - -        case AFR_CHILD_DOWN_FLUSH: -                local->transaction.post_post_op = NULL; -                break; -        } +        local->transaction.fop          = afr_openfd_sh; +        local->transaction.done         = afr_openfd_flush_done;          local->transaction.start  = 0;          local->transaction.len    = 0; @@ -486,8 +456,9 @@ afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd,                  "doing up/down flush on fd=%p",                  fd); -        afr_transaction (frame, this, AFR_FLUSH_TRANSACTION); +        afr_transaction (frame, this, AFR_DATA_TRANSACTION);  out:  	return 0;  } + diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index b4615326747..e31f0c1df31 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -33,6 +33,25 @@  #define LOCKED_LOWER    0x2        /* for lower_path of RENAME */ +afr_fd_ctx_t * +afr_fd_ctx_get (fd_t *fd, xlator_t *this) +{ +        uint64_t       ctx = 0; +        afr_fd_ctx_t  *fd_ctx = NULL; +        int            ret = 0; + +        ret = fd_ctx_get (fd, this, &ctx); + +        if (ret < 0) +                goto out; + +        fd_ctx = (afr_fd_ctx_t *)(long) ctx; + +out: +        return fd_ctx; +} + +  static void  afr_pid_save (call_frame_t *frame)  { @@ -82,80 +101,54 @@ __mark_child_dead (int32_t *pending[], int child_count, int child,  static void -__mark_fop_failed_on_fd (fd_t *fd, xlator_t *this, -                         int child_index) +__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)  { -        uint64_t       ctx; -        afr_fd_ctx_t * fd_ctx = NULL; - -        int ret = 0; - -        ret = fd_ctx_get (fd, this, &ctx); - -        if (ret < 0) -                goto out; - -        fd_ctx = (afr_fd_ctx_t *)(long) ctx; - -        fd_ctx->child_failed[child_index] = 1; -out: -        return; -} - +        afr_local_t   *local = NULL; +        afr_fd_ctx_t  *fd_ctx = NULL; -static void -__mark_failed_children (int32_t *pending[], int child_count, -                        xlator_t *this, fd_t *fd, afr_transaction_type type) -{ -        uint64_t       ctx; -        afr_fd_ctx_t * fd_ctx = NULL; +        local = frame->local; -        int ret = 0; -        int i   = 0; -        int j   = 0; +        if (!local->fd) +                return; -        ret = fd_ctx_get (fd, this, &ctx); +        fd_ctx = afr_fd_ctx_get (local->fd, this); -        if (ret < 0) +        if (!fd_ctx)                  goto out; -        fd_ctx = (afr_fd_ctx_t *)(long) ctx; - -        for (i = 0; i < child_count; i++) { -                j = afr_index_for_transaction_type (type); - -                if (fd_ctx->child_failed[i]) -                        pending[i][j] = 0; +        LOCK (&local->fd->lock); +        { +                if (local->transaction.type == AFR_DATA_TRANSACTION) +                        fd_ctx->pre_op_done[child_index]++;          } - +        UNLOCK (&local->fd->lock);  out:          return;  }  static void -__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index) +__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)  { -        afr_local_t *local = NULL; - -        uint64_t       ctx; -        afr_fd_ctx_t * fd_ctx = NULL; -        int ret = 0; +        afr_local_t   *local = NULL; +        afr_fd_ctx_t  *fd_ctx = NULL;          local = frame->local; -        ret = fd_ctx_get (local->fd, this, &ctx); +        if (!local->fd) +                return; -        if (ret < 0) -                goto out; +        fd_ctx = afr_fd_ctx_get (local->fd, this); -        fd_ctx = (afr_fd_ctx_t *)(long) ctx; +        if (!fd_ctx) +                goto out; -        if ((local->op == GF_FOP_WRITE) -            || (local->op == GF_FOP_FTRUNCATE)) { -                fd_ctx->pre_op_done[child_index] = 1; +        LOCK (&local->fd->lock); +        { +                if (local->transaction.type == AFR_DATA_TRANSACTION) +                        fd_ctx->pre_op_done[child_index]--;          } - +        UNLOCK (&local->fd->lock);  out:          return;  } @@ -192,116 +185,6 @@ __mark_all_success (int32_t *pending[], int child_count,  static int -__is_first_write_on_fd (xlator_t *this, fd_t *fd) -{ -        int op_ret     = 0; -        int _ret       = -1; -        int i          = 0; - -        uint64_t       ctx; -        afr_fd_ctx_t * fd_ctx = NULL; - -        afr_private_t *priv = NULL; - -        priv = this->private; - -        LOCK (&fd->lock); -        { -                _ret = __fd_ctx_get (fd, this, &ctx); - -                if (_ret < 0) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "could not get fd ctx on fd=%p", -                                fd); -                        goto out; -                } - -                fd_ctx = (afr_fd_ctx_t *)(long) ctx; - -                op_ret = 1; -                for (i = 0; i < priv->child_count; i++) { -                        if (fd_ctx->pre_op_done[i] == 0) -                                continue; - -                        op_ret = 0; -                } -        } -out: -        UNLOCK (&fd->lock); - -        return op_ret; -} - - -static int -__if_fd_pre_op_done (xlator_t *this, fd_t *fd, int child_index) -{ -        int op_ret = 0; -        int _ret   = -1; - -        uint64_t       ctx; -        afr_fd_ctx_t * fd_ctx = NULL; - -        LOCK (&fd->lock); -        { -                _ret = __fd_ctx_get (fd, this, &ctx); - -                if (_ret < 0) { -                        goto out; -                } - -                fd_ctx = (afr_fd_ctx_t *)(long) ctx; - -                if (fd_ctx->pre_op_done[child_index]) { -                        op_ret = 1; -                } -                fd_ctx->pre_op_done[child_index] = 0; -        } -out: -        UNLOCK (&fd->lock); - -        return op_ret; -} - - -static int -afr_pre_op_done_count (xlator_t *this, fd_t *fd, unsigned char *child_up) -{ -        int i = 0; -        int count = 0; - -        int _ret = 0; -        uint64_t       ctx; -        afr_fd_ctx_t * fd_ctx = NULL; - -        afr_private_t *priv = NULL; - -        priv = this->private; - -        LOCK (&fd->lock); -        { -                _ret = __fd_ctx_get (fd, this, &ctx); - -                if (_ret < 0) { -                        goto out; -                } - -                fd_ctx = (afr_fd_ctx_t *)(long) ctx; - -                for (i = 0; i < priv->child_count; i++) { -                        if (fd_ctx->pre_op_done[i] && child_up[i]) { -                                count++; -                        } -                } -        } -out: -        UNLOCK (&fd->lock); - -        return count; -} - - -static int  __changelog_enabled (afr_private_t *priv, afr_transaction_type type)  {  	int ret = 0; @@ -325,9 +208,6 @@ __changelog_enabled (afr_private_t *priv, afr_transaction_type type)  			ret = 1;  		break; - -	case AFR_FLUSH_TRANSACTION: -		ret = 1;  	}  	return ret; @@ -339,7 +219,6 @@ __changelog_needed_pre_op (call_frame_t *frame, xlator_t *this)  {  	afr_private_t * priv  = NULL;  	afr_local_t   * local = NULL; -	fd_t *          fd    = NULL;  	int op_ret   = 0; @@ -351,20 +230,10 @@ __changelog_needed_pre_op (call_frame_t *frame, xlator_t *this)  		case GF_FOP_WRITE:  		case GF_FOP_FTRUNCATE: -			/* -			   if it's a data transaction, we write the changelog -			   only on the first write on an fd -			*/ - -			fd = local->fd; -			if (!fd || __is_first_write_on_fd (this, fd)) -				op_ret = 1; - +                        op_ret = 1;  			break;  		case GF_FOP_FLUSH: -			/* only do post-op on flush() */ -  			op_ret = 0;  			break; @@ -395,11 +264,11 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this)                  case GF_FOP_WRITE:                  case GF_FOP_FTRUNCATE: -                        op_ret = 0; +                        op_ret = 1;                          break;                  case GF_FOP_FLUSH: -                        op_ret = 1; +                        op_ret = 0;                          break;                  default: @@ -431,13 +300,48 @@ out:  } +static int +afr_set_piggyback_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending, +                        afr_transaction_type type) +{ +        int i; +        int ret = 0; +        int *arr = NULL; +        int index = 0; + +        index = afr_index_for_transaction_type (type); + +        for (i = 0; i < priv->child_count; i++) { +                arr = GF_CALLOC (3 * sizeof (int32_t), priv->child_count, +                                 gf_afr_mt_char); +                if (!arr) { +                        ret = -1; +                        goto out; +                } + +                memcpy (arr, pending[i], 3 * sizeof (int32_t)); + +                arr[index]++; + +                ret = dict_set_bin (xattr, priv->pending_key[i], +                                    arr, 3 * sizeof (int32_t)); +                /* 3 = data+metadata+entry */ + +                if (ret < 0) +                        goto out; +        } + +out: +        return ret; +} + +  int  afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)  {  	int ret = 0;  	switch (type) { -	case AFR_FLUSH_TRANSACTION:  	case AFR_DATA_TRANSACTION:  		ret = priv->child_count;  		break; @@ -464,15 +368,23 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          afr_internal_lock_t *int_lock = NULL;  	afr_private_t       *priv     = NULL;  	afr_local_t         *local    = NULL; +        int                  child_index = 0;  	int call_count = -1; -        int (*post_post_op) (call_frame_t *, xlator_t *); -  	priv     = this->private;  	local    = frame->local;          int_lock = &local->internal_lock; +        child_index = (long) cookie; + +        if (op_ret == 1) { +        } + +        if (op_ret == 0) { +                __mark_pre_op_undone_on_fd (frame, this, child_index); +        } +  	LOCK (&frame->lock);  	{  		call_count = --local->call_count; @@ -480,24 +392,11 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	UNLOCK (&frame->lock);  	if (call_count == 0) { -                if (local->transaction.post_post_op) { -                        post_post_op = local->transaction.post_post_op; - -                        if (afr_lock_server_count (priv, local->transaction.type) == 0) { -                                local->transaction.post_post_op = local->transaction.done; -                        } else { -                                int_lock->lock_cbk = local->transaction.done; -                                local->transaction.post_post_op = afr_unlock; -                        } - -                        post_post_op (frame, this); +                if (afr_lock_server_count (priv, local->transaction.type) == 0) { +                        local->transaction.done (frame, this);                  } else { -                        if (afr_lock_server_count (priv, local->transaction.type) == 0) { -                                local->transaction.done (frame, this); -                        } else { -                                int_lock->lock_cbk = local->transaction.done; -                                afr_unlock (frame, this); -                        } +                        int_lock->lock_cbk = local->transaction.done; +                        afr_unlock (frame, this);                  }  	} @@ -515,7 +414,11 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)  	int call_count = 0;  	afr_local_t *  local = NULL; +        afr_fd_ctx_t  *fdctx = NULL;  	dict_t        **xattr = NULL; +        int            piggyback = 0; +        int            index = 0; +        int            nothing_failed = 1;  	local    = frame->local;          int_lock = &local->internal_lock; @@ -523,12 +426,6 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)  	__mark_down_children (local->pending, priv->child_count,                                local->child_up, local->transaction.type); -        if (local->op == GF_FOP_FLUSH) { -                __mark_failed_children (local->pending, priv->child_count, -                                        this, local->fd, -                                        local->transaction.type); -        } -          xattr = alloca (priv->child_count * sizeof (*xattr));          memset (xattr, 0, (priv->child_count * sizeof (*xattr)));  	for (i = 0; i < priv->child_count; i++) { @@ -536,18 +433,17 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)                  dict_ref (xattr[i]);          } -        if (local->op == GF_FOP_FLUSH) { -                call_count = afr_pre_op_done_count (this, local->fd, local->child_up); -        } else { -                call_count = afr_up_children_count (priv->child_count, local->child_up); +        call_count = afr_up_children_count (priv->child_count, local->child_up); -                if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { -                        call_count *= 2; -                } +        if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { +                call_count *= 2;          }  	local->call_count = call_count; +        if (local->fd) +                fdctx = afr_fd_ctx_get (local->fd, this); +  	if (call_count == 0) {  		/* no child is up */                  for (i = 0; i < priv->child_count; i++) { @@ -559,100 +455,134 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)  		return 0;  	} +        /* check if something has failed, to handle piggybacking */ +        nothing_failed = 1; +        index = afr_index_for_transaction_type (local->transaction.type); +        for (i = 0; i < priv->child_count; i++) { +                if (local->pending[i][index] == 0) { +                        nothing_failed = 0; +                        break; +                } +        } +  	for (i = 0; i < priv->child_count; i++) { -		if (local->child_up[i]) { -                        ret = afr_set_pending_dict (priv, xattr[i], -                                                    local->pending); - -			if (ret < 0) -				gf_log (this->name, GF_LOG_DEBUG, -					"failed to set pending entry"); - - -			switch (local->transaction.type) { -			case AFR_DATA_TRANSACTION: -			case AFR_METADATA_TRANSACTION: -			{ -				if (local->fd) -					STACK_WIND (frame, afr_changelog_post_op_cbk, -						    priv->children[i], -						    priv->children[i]->fops->fxattrop, -						    local->fd, -						    GF_XATTROP_ADD_ARRAY, xattr[i]); -				else -					STACK_WIND (frame, afr_changelog_post_op_cbk, -						    priv->children[i], -						    priv->children[i]->fops->xattrop, -						    &local->loc, -						    GF_XATTROP_ADD_ARRAY, xattr[i]); -                                call_count--; -			} -			break; +		if (!local->child_up[i]) +                        continue; -			case AFR_FLUSH_TRANSACTION: -			{ -				if (__if_fd_pre_op_done (this, local->fd, i)) { -					STACK_WIND (frame, afr_changelog_post_op_cbk, -						    priv->children[i], -						    priv->children[i]->fops->fxattrop, -						    local->fd, -						    GF_XATTROP_ADD_ARRAY, xattr[i]); -                                        call_count--; +                ret = afr_set_pending_dict (priv, xattr[i], +                                            local->pending); + +                if (ret < 0) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "failed to set pending entry"); + + +                switch (local->transaction.type) { +                case AFR_DATA_TRANSACTION: +                { +                        if (!fdctx) { +                                STACK_WIND (frame, afr_changelog_post_op_cbk, +                                            priv->children[i], +                                            priv->children[i]->fops->xattrop, +                                            &local->loc, +                                            GF_XATTROP_ADD_ARRAY, xattr[i]); +                                break; +                        } + +                        LOCK (&local->fd->lock); +                        { +                                piggyback = 0; +                                if (fdctx->pre_op_piggyback[i]) { +                                        fdctx->pre_op_piggyback[i]--; +                                        piggyback = 1;                                  } -			} -			break; +                        } +                        UNLOCK (&local->fd->lock); -			case AFR_ENTRY_RENAME_TRANSACTION: -			{ -				STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk, -						   (void *) (long) i, -						   priv->children[i], -						   priv->children[i]->fops->xattrop, -						   &local->transaction.new_parent_loc, -						   GF_XATTROP_ADD_ARRAY, xattr[i]); +                        if (piggyback && !nothing_failed) +                                ret = afr_set_piggyback_dict (priv, xattr[i], +                                                              local->pending, +                                                              local->transaction.type); -				call_count--; -			} +                        if (nothing_failed && piggyback) { +                                afr_changelog_post_op_cbk (frame, (void *)(long)i, +                                                          this, 1, 0, xattr[i]); +                        } else { +                                STACK_WIND_COOKIE (frame, +                                                   afr_changelog_post_op_cbk, +                                                   (void *) (long) i, +                                                   priv->children[i], +                                                   priv->children[i]->fops->fxattrop, +                                                   local->fd, +                                                   GF_XATTROP_ADD_ARRAY, xattr[i]); +                        } +                } +                break; +                case AFR_METADATA_TRANSACTION: +                { +                        if (local->fd) +                                STACK_WIND (frame, afr_changelog_post_op_cbk, +                                            priv->children[i], +                                            priv->children[i]->fops->fxattrop, +                                            local->fd, +                                            GF_XATTROP_ADD_ARRAY, xattr[i]); +                        else +                                STACK_WIND (frame, afr_changelog_post_op_cbk, +                                            priv->children[i], +                                            priv->children[i]->fops->xattrop, +                                            &local->loc, +                                            GF_XATTROP_ADD_ARRAY, xattr[i]); +                } +                break; -			/* -			   set it again because previous stack_wind -			   might have already returned (think of case -			   where subvolume is posix) and would have -			   used the dict as placeholder for return -			   value -			*/ - -			ret = afr_set_pending_dict (priv, xattr[i], -                                                    local->pending); - -			if (ret < 0) -				gf_log (this->name, GF_LOG_DEBUG, -					"failed to set pending entry"); - -			/* fall through */ - -			case AFR_ENTRY_TRANSACTION: -			{ -				if (local->fd) -					STACK_WIND (frame, afr_changelog_post_op_cbk, -						    priv->children[i], -						    priv->children[i]->fops->fxattrop, -						    local->fd, -						    GF_XATTROP_ADD_ARRAY, xattr[i]); -				else -					STACK_WIND (frame, afr_changelog_post_op_cbk, -						    priv->children[i], -						    priv->children[i]->fops->xattrop, -						    &local->transaction.parent_loc, -						    GF_XATTROP_ADD_ARRAY, xattr[i]); -                                call_count--; -			} -			break; -			} +                case AFR_ENTRY_RENAME_TRANSACTION: +                { +                        STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk, +                                           (void *) (long) i, +                                           priv->children[i], +                                           priv->children[i]->fops->xattrop, +                                           &local->transaction.new_parent_loc, +                                           GF_XATTROP_ADD_ARRAY, xattr[i]); +                        call_count--; +                } -			if (!call_count) -				break; -		} +                /* +                  set it again because previous stack_wind +                  might have already returned (think of case +                  where subvolume is posix) and would have +                  used the dict as placeholder for return +                  value +                */ + +                ret = afr_set_pending_dict (priv, xattr[i], +                                            local->pending); + +                if (ret < 0) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "failed to set pending entry"); + +                /* fall through */ + +                case AFR_ENTRY_TRANSACTION: +                { +                        if (local->fd) +                                STACK_WIND (frame, afr_changelog_post_op_cbk, +                                            priv->children[i], +                                            priv->children[i]->fops->fxattrop, +                                            local->fd, +                                            GF_XATTROP_ADD_ARRAY, xattr[i]); +                        else +                                STACK_WIND (frame, afr_changelog_post_op_cbk, +                                            priv->children[i], +                                            priv->children[i]->fops->xattrop, +                                            &local->transaction.parent_loc, +                                            GF_XATTROP_ADD_ARRAY, xattr[i]); +                } +                break; +                } + +                if (!--call_count) +                        break;  	}          for (i = 0; i < priv->child_count; i++) { @@ -679,6 +609,10 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	LOCK (&frame->lock);  	{ +                if (op_ret == 1) { +                        /* special op_ret for piggyback */ +                } +                  if (op_ret == 0) {                          __mark_pre_op_done_on_fd (frame, this, child_index);                  } @@ -731,8 +665,10 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)  	int ret = 0;  	int call_count = 0;  	dict_t **xattr = NULL; +        afr_fd_ctx_t *fdctx = NULL;  	afr_local_t *local = NULL; +        int          piggyback = 0;  	local = frame->local; @@ -768,97 +704,136 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)  	__mark_all_pending (local->pending, priv->child_count,                              local->transaction.type); +        if (local->fd) +                fdctx = afr_fd_ctx_get (local->fd, this); +  	for (i = 0; i < priv->child_count; i++) { -		if (local->child_up[i]) { -			ret = afr_set_pending_dict (priv, xattr[i], -                                                    local->pending); - -			if (ret < 0) -				gf_log (this->name, GF_LOG_DEBUG, -					"failed to set pending entry"); - - -			switch (local->transaction.type) { -			case AFR_DATA_TRANSACTION: -			case AFR_METADATA_TRANSACTION: -			case AFR_FLUSH_TRANSACTION: -			{ -				if (local->fd) -					STACK_WIND_COOKIE (frame, -							   afr_changelog_pre_op_cbk, -							   (void *) (long) i, -							   priv->children[i], -							   priv->children[i]->fops->fxattrop, -							   local->fd, -							   GF_XATTROP_ADD_ARRAY, xattr[i]); -				else -					STACK_WIND_COOKIE (frame, -							   afr_changelog_pre_op_cbk, -							   (void *) (long) i, -							   priv->children[i], -							   priv->children[i]->fops->xattrop, -							   &(local->loc), -							   GF_XATTROP_ADD_ARRAY, xattr[i]); -			} -			break; +                if (!local->child_up[i]) +                        continue; +                ret = afr_set_pending_dict (priv, xattr[i], +                                            local->pending); -			case AFR_ENTRY_RENAME_TRANSACTION: -			{ -				STACK_WIND_COOKIE (frame, -						   afr_changelog_pre_op_cbk, -						   (void *) (long) i, -						   priv->children[i], -						   priv->children[i]->fops->xattrop, -						   &local->transaction.new_parent_loc, -						   GF_XATTROP_ADD_ARRAY, xattr[i]); - -				call_count--; -			} +                if (ret < 0) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "failed to set pending entry"); + + +                switch (local->transaction.type) { +                case AFR_DATA_TRANSACTION: +                { +                        if (!fdctx) { +                                STACK_WIND_COOKIE (frame, +                                                   afr_changelog_pre_op_cbk, +                                                   (void *) (long) i, +                                                   priv->children[i], +                                                   priv->children[i]->fops->xattrop, +                                                   &(local->loc), +                                                   GF_XATTROP_ADD_ARRAY, xattr[i]); +                                break; +                        } + +                        LOCK (&local->fd->lock); +                        { +                                piggyback = 0; +                                if (fdctx->pre_op_done[i]) { +                                        fdctx->pre_op_piggyback[i]++; +                                        piggyback = 1; +                                        fdctx->hit++; +                                } else { +                                        fdctx->miss++; +                                } +                        } +                        UNLOCK (&local->fd->lock); + +                        if (piggyback) +                                afr_changelog_pre_op_cbk (frame, (void *)(long)i, +                                                          this, 1, 0, xattr[i]); +                        else +                                STACK_WIND_COOKIE (frame, +                                                   afr_changelog_pre_op_cbk, +                                                   (void *) (long) i, +                                                   priv->children[i], +                                                   priv->children[i]->fops->fxattrop, +                                                   local->fd, +                                                   GF_XATTROP_ADD_ARRAY, xattr[i]); +                } +                break; +                case AFR_METADATA_TRANSACTION: +                { +                        if (local->fd) +                                STACK_WIND_COOKIE (frame, +                                                   afr_changelog_pre_op_cbk, +                                                   (void *) (long) i, +                                                   priv->children[i], +                                                   priv->children[i]->fops->fxattrop, +                                                   local->fd, +                                                   GF_XATTROP_ADD_ARRAY, xattr[i]); +                        else +                                STACK_WIND_COOKIE (frame, +                                                   afr_changelog_pre_op_cbk, +                                                   (void *) (long) i, +                                                   priv->children[i], +                                                   priv->children[i]->fops->xattrop, +                                                   &(local->loc), +                                                   GF_XATTROP_ADD_ARRAY, xattr[i]); +                } +                break; +                case AFR_ENTRY_RENAME_TRANSACTION: +                { +                        STACK_WIND_COOKIE (frame, +                                           afr_changelog_pre_op_cbk, +                                           (void *) (long) i, +                                           priv->children[i], +                                           priv->children[i]->fops->xattrop, +                                           &local->transaction.new_parent_loc, +                                           GF_XATTROP_ADD_ARRAY, xattr[i]); + +                        call_count--; +                } -			/* -			   set it again because previous stack_wind -			   might have already returned (think of case -			   where subvolume is posix) and would have -			   used the dict as placeholder for return -			   value -			*/ - -			ret = afr_set_pending_dict (priv, xattr[i], -                                                    local->pending); - -			if (ret < 0) -				gf_log (this->name, GF_LOG_DEBUG, -					"failed to set pending entry"); - -			/* fall through */ - -			case AFR_ENTRY_TRANSACTION: -			{ -				if (local->fd) -					STACK_WIND_COOKIE (frame, -							   afr_changelog_pre_op_cbk, -							   (void *) (long) i, -							   priv->children[i], -							   priv->children[i]->fops->fxattrop, -							   local->fd, -							   GF_XATTROP_ADD_ARRAY, xattr[i]); -				else -					STACK_WIND_COOKIE (frame, -							   afr_changelog_pre_op_cbk, -							   (void *) (long) i, -							   priv->children[i], -							   priv->children[i]->fops->xattrop, -							   &local->transaction.parent_loc, -							   GF_XATTROP_ADD_ARRAY, xattr[i]); -			} -			break; -			} +                /* +                  set it again because previous stack_wind +                  might have already returned (think of case +                  where subvolume is posix) and would have +                  used the dict as placeholder for return +                  value +                */ -			if (!--call_count) -				break; -		} +                ret = afr_set_pending_dict (priv, xattr[i], +                                            local->pending); + +                if (ret < 0) +                        gf_log (this->name, GF_LOG_DEBUG, +                                "failed to set pending entry"); + +                /* fall through */ + +                case AFR_ENTRY_TRANSACTION: +                { +                        if (local->fd) +                                STACK_WIND_COOKIE (frame, +                                                   afr_changelog_pre_op_cbk, +                                                   (void *) (long) i, +                                                   priv->children[i], +                                                   priv->children[i]->fops->fxattrop, +                                                   local->fd, +                                                   GF_XATTROP_ADD_ARRAY, xattr[i]); +                        else +                                STACK_WIND_COOKIE (frame, +                                                   afr_changelog_pre_op_cbk, +                                                   (void *) (long) i, +                                                   priv->children[i], +                                                   priv->children[i]->fops->xattrop, +                                                   &local->transaction.parent_loc, +                                                   GF_XATTROP_ADD_ARRAY, xattr[i]); +                } +                break; +                } + +                if (!--call_count) +                        break;  	}          for (i = 0; i < priv->child_count; i++) { @@ -1038,7 +1013,6 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)  	switch (local->transaction.type) {  	case AFR_DATA_TRANSACTION:  	case AFR_METADATA_TRANSACTION: -	case AFR_FLUSH_TRANSACTION:                  afr_set_transaction_flock (local);                  int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk; @@ -1098,7 +1072,7 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)                  afr_changelog_pre_op (frame, this);          } else {                  __mark_all_success (local->pending, priv->child_count, -                                            local->transaction.type); +                                    local->transaction.type);                  afr_pid_restore (frame); @@ -1148,15 +1122,8 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index  	local = frame->local;  	priv  = this->private; -        switch (local->op) { -        case GF_FOP_WRITE: -                __mark_fop_failed_on_fd (local->fd, this, child_index); -                break; -        default: -                __mark_child_dead (local->pending, priv->child_count, -                                   child_index, local->transaction.type); -                break; -        } +        __mark_child_dead (local->pending, priv->child_count, +                           child_index, local->transaction.type);  } @@ -1175,16 +1142,7 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)  	local->transaction.type   = type;  	if (afr_lock_server_count (priv, local->transaction.type) == 0) { -		if (__changelog_needed_pre_op (frame, this)) { -			afr_changelog_pre_op (frame, this); -		} else { -                        __mark_all_success (local->pending, priv->child_count, -                                            local->transaction.type); - -                        afr_pid_restore (frame); - -			local->transaction.fop (frame, this); -		} +                afr_internal_lock_finish (frame, this);  	} else {  		afr_lock (frame, this);  	} diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index db762c11ec5..749264a8d65 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -174,7 +174,6 @@ typedef enum {  	AFR_METADATA_TRANSACTION,      /* chmod, chown, ... */  	AFR_ENTRY_TRANSACTION,         /* create, rmdir, ... */  	AFR_ENTRY_RENAME_TRANSACTION,  /* rename */ -	AFR_FLUSH_TRANSACTION,         /* flush */  } afr_transaction_type;  typedef enum { @@ -217,7 +216,6 @@ afr_index_for_transaction_type (afr_transaction_type type)          switch (type) {          case AFR_DATA_TRANSACTION: -        case AFR_FLUSH_TRANSACTION:                  return 0;          case AFR_METADATA_TRANSACTION: @@ -232,11 +230,6 @@ afr_index_for_transaction_type (afr_transaction_type type)  } -typedef enum { -        AFR_CHILD_UP_FLUSH, -        AFR_CHILD_DOWN_FLUSH, -} afr_flush_type; -  typedef struct {          loc_t *lk_loc;          struct flock lk_flock; @@ -309,7 +302,7 @@ typedef struct _afr_local {          dict_t  *dict; -        int (*up_down_flush_cbk) (call_frame_t *, xlator_t *); +        int (*openfd_flush_cbk) (call_frame_t *frame, xlator_t *this);  	/*  	   This struct contains the arguments for the "continuation" @@ -606,7 +599,6 @@ typedef struct _afr_local {  		int (*unwind) (call_frame_t *frame, xlator_t *this);                  /* post-op hook */ -                int (*post_post_op) (call_frame_t *frame, xlator_t *this);  	} transaction;  	afr_self_heal_t self_heal; @@ -614,15 +606,17 @@ typedef struct _afr_local {  typedef struct { -        unsigned char *pre_op_done; -        unsigned char *opened_on;     /* which subvolumes the fd is open on */ -        unsigned char *child_failed; +        unsigned int *pre_op_done; +        unsigned int *opened_on;     /* which subvolumes the fd is open on */ +        unsigned int *pre_op_piggyback;          int flags;          int32_t wbflags;          uint64_t up_count;   /* number of CHILD_UPs this fd has seen */          uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */          int32_t last_tried; + +        int  hit, miss;          gf_boolean_t failed_over;          struct list_head entries; /* needed for readdir failover */  } afr_fd_ctx_t; @@ -729,9 +723,6 @@ int  afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,            fd_t *fd, int32_t wbflags); -int -afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, afr_flush_type type); -  void  afr_set_opendir_done (xlator_t *this, inode_t *inode); @@ -744,6 +735,9 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);  int  afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd); +int +afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd); +  #define AFR_STACK_UNWIND(fop, frame, params ...)        \  	do {						\  		afr_local_t *__local = NULL;		\  | 
