diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-lk-common.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 1249 |
1 files changed, 582 insertions, 667 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 2fe134990..060d78f35 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #include "dict.h" @@ -31,8 +22,69 @@ #define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */ #define LOCKED_LOWER 0x2 /* for lower path */ +#define AFR_TRACE_INODELK_IN(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->inodelk_trace) \ + break; \ + afr_trace_inodelk_in (frame, this, params); \ + } while (0); + +#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->inodelk_trace) \ + break; \ + afr_trace_inodelk_out (frame, this, params); \ + } while (0); + +#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->entrylk_trace) \ + break; \ + afr_trace_entrylk_in (frame, this, params); \ + } while (0); + +#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->entrylk_trace) \ + break; \ + afr_trace_entrylk_out (frame, this, params); \ + } while (0); + int -afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index); +afr_entry_lockee_cmp (const void *l1, const void *l2) +{ + const afr_entry_lockee_t *r1 = l1; + const afr_entry_lockee_t *r2 = l2; + int ret = 0; + uuid_t gfid1 = {0}; + uuid_t gfid2 = {0}; + + loc_gfid ((loc_t*)&r1->loc, gfid1); + loc_gfid ((loc_t*)&r2->loc, gfid2); + ret = uuid_compare (gfid1, gfid2); + /*Entrylks with NULL basename are the 'smallest'*/ + if (ret == 0) { + if (!r1->basename) + return -1; + if (!r2->basename) + return 1; + ret = strcmp (r1->basename, r2->basename); + } + + if (ret <= 0) + return -1; + else + return 1; +} + +int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index); + +static int +afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this); static uint64_t afr_lock_number = 1; @@ -57,13 +109,13 @@ afr_set_lock_number (call_frame_t *frame, xlator_t *this) } void -afr_set_lk_owner (call_frame_t *frame, xlator_t *this) +afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner) { gf_log (this->name, GF_LOG_TRACE, "Setting lk-owner=%llu", - (unsigned long long) (unsigned long)frame->root); + (unsigned long long) (unsigned long)lk_owner); - set_lk_owner_from_ptr (&frame->root->lk_owner, frame->root); + set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner); } static int @@ -99,16 +151,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this) local = frame->local; priv = this->private; - if (local->fd) { - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i] && local->fd_open_on[i]) - ++call_count; - } - } else { - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) - ++call_count; - } + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) + ++call_count; } return call_count; @@ -242,27 +287,20 @@ afr_set_lock_call_type (afr_lock_call_type_t lock_call_type, } static void -afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, +afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, afr_lock_op_type_t lk_op_type, struct gf_flock *flock, int op_ret, int op_errno, int32_t child_index) { - xlator_t *this = NULL; afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; - afr_private_t *priv = NULL; char lockee[256]; char lock_call_type_str[256]; char verdict[16]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; - priv = this->private; - - if (!priv->inodelk_trace) { - return; - } afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); @@ -271,37 +309,29 @@ afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, afr_print_verdict (op_ret, op_errno, verdict); gf_log (this->name, GF_LOG_INFO, - "[%s %s] [%s] Lockee={%s} Number={%llu}", + "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}", lock_call_type_str, lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY", - verdict, - lockee, + verdict, lkowner_utoa (&frame->root->lk_owner), lockee, (unsigned long long) int_lock->lock_number); } static void -afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, +afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, afr_lock_op_type_t lk_op_type, struct gf_flock *flock, int32_t cmd, int32_t child_index) { - xlator_t *this = NULL; afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; char lock[256]; char lockee[256]; char lock_call_type_str[256]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; - priv = this->private; - - if (!priv->inodelk_trace) { - return; - } afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner); afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); @@ -318,20 +348,21 @@ afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, } static void -afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, +afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, afr_lock_op_type_t lk_op_type, const char *basename, - int32_t child_index) + int32_t cookie) { - xlator_t *this = NULL; afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; afr_private_t *priv = NULL; + int child_index = 0; + int lockee_no = 0; char lock[256]; char lockee[256]; char lock_call_type_str[256]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; priv = this->private; @@ -339,36 +370,41 @@ afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, if (!priv->entrylk_trace) { return; } + lockee_no = cookie / priv->child_count; + child_index = cookie % priv->child_count; afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner); - afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd, + child_index); afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); gf_log (this->name, GF_LOG_INFO, - "[%s %s] Lock={%s} Lockee={%s} Number={%llu}", + "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}", lock_call_type_str, lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST", lock, lockee, - (unsigned long long) int_lock->lock_number); + (unsigned long long) int_lock->lock_number, + cookie); } static void -afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, - afr_lock_op_type_t lk_op_type, const char *basename, int op_ret, - int op_errno, int32_t child_index) +afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, const char *basename, + int op_ret, int op_errno, int32_t cookie) { - xlator_t *this = NULL; afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; + int lockee_no = 0; + int child_index = 0; char lock[256]; char lockee[256]; char lock_call_type_str[256]; char verdict[16]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; priv = this->private; @@ -376,20 +412,25 @@ afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, if (!priv->entrylk_trace) { return; } + lockee_no = cookie / priv->child_count; + child_index = cookie % priv->child_count; - afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner); + afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd, + child_index); afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); afr_print_verdict (op_ret, op_errno, verdict); gf_log (this->name, GF_LOG_INFO, - "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}", + "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}", lock_call_type_str, lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY", verdict, lock, lockee, - (unsigned long long) int_lock->lock_number); + (unsigned long long) int_lock->lock_number, + cookie); } @@ -442,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local) return ret; } +int +afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local, + loc_t *loc, char *basename, int child_count) +{ + int ret = -1; + + loc_copy (&lockee->loc, loc); + lockee->basename = (basename)? gf_strdup (basename): NULL; + if (basename && !lockee->basename) + goto out; + + lockee->locked_count = 0; + lockee->locked_nodes = GF_CALLOC (child_count, + sizeof (*lockee->locked_nodes), + gf_afr_mt_afr_node_character); + + if (!lockee->locked_nodes) + goto out; + + ret = 0; +out: + return ret; + +} + +void +afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock) +{ + int i = 0; + + for (i = 0; i < int_lock->lockee_count; i++) { + loc_wipe (&int_lock->lockee[i].loc); + if (int_lock->lockee[i].basename) + GF_FREE (int_lock->lockee[i].basename); + if (int_lock->lockee[i].locked_nodes) + GF_FREE (int_lock->lockee[i].locked_nodes); + } + + return; +} + static int initialize_entrylk_variables (call_frame_t *frame, xlator_t *this) { @@ -459,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this) int_lock->lock_op_ret = -1; int_lock->lock_op_errno = 0; - for (i = 0; i < priv->child_count; i++) { - int_lock->entry_locked_nodes[i] = 0; + for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) { + if (!int_lock->lockee[i].locked_nodes) + break; + int_lock->lockee[i].locked_count = 0; + memset (int_lock->lockee[i].locked_nodes, 0, + sizeof (*int_lock->lockee[i].locked_nodes) * + priv->child_count); } return 0; @@ -472,19 +559,23 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; afr_private_t *priv = NULL; - int i = 0; + afr_inodelk_t *inodelk = NULL; priv = this->private; local = frame->local; int_lock = &local->internal_lock; - int_lock->inodelk_lock_count = 0; - int_lock->lock_op_ret = -1; - int_lock->lock_op_errno = 0; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); - for (i = 0; i < priv->child_count; i++) { - int_lock->inode_locked_nodes[i] = 0; - } + inodelk->lock_count = 0; + int_lock->lk_attempted_count = 0; + int_lock->lock_op_ret = -1; + int_lock->lock_op_errno = 0; + + memset (inodelk->locked_nodes, 0, + sizeof (*inodelk->locked_nodes) * priv->child_count); + memset (int_lock->locked_nodes, 0, + sizeof (*int_lock->locked_nodes) * priv->child_count); return 0; } @@ -494,7 +585,7 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2) { int ret = 0; - ret = strcmp (l1->path, l2->path); + ret = uuid_compare (l1->inode->gfid, l2->inode->gfid); if (ret == 0) ret = strcmp (b1, b2); @@ -506,6 +597,18 @@ lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2) } int +afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock) +{ + int call_count = 0; + int i = 0; + + for (i = 0; i < int_lock->lockee_count; i++) + call_count += int_lock->lockee[i].locked_count; + + return call_count; +} + +int afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) { @@ -523,7 +626,7 @@ afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) /* FIXME: What if UNLOCK fails */ static int32_t afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; @@ -549,33 +652,37 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, static int32_t afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; int32_t child_index = (long)cookie; + afr_private_t *priv = NULL; local = frame->local; int_lock = &local->internal_lock; - afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION, + AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION, AFR_UNLOCK_OP, NULL, op_ret, op_errno, child_index); + priv = this->private; + if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { - gf_log (this->name, GF_LOG_ERROR, - "%s: unlock failed on %d, reason: %s", - local->loc.path, child_index, strerror (op_errno)); + gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s " + "with lock owner %s", local->loc.path, + priv->children[child_index]->name, + lkowner_utoa (&frame->root->lk_owner)); } - int_lock->inode_locked_nodes[child_index] &= LOCKED_NO; - - if (op_ret == 1) { + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + inodelk->locked_nodes[child_index] &= LOCKED_NO; + if (local->transaction.eager_lock) local->transaction.eager_lock[child_index] = 0; - } - afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno); + afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata); return 0; @@ -585,9 +692,12 @@ static int afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; struct gf_flock flock = {0,}; + struct gf_flock full_flock = {0,}; + struct gf_flock *flock_use = NULL; int call_count = 0; int i = 0; int piggyback = 0; @@ -598,15 +708,14 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) int_lock = &local->internal_lock; priv = this->private; - flock.l_start = int_lock->lk_flock.l_start; - flock.l_len = int_lock->lk_flock.l_len; - flock.l_type = F_UNLCK; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); - gf_log (this->name, GF_LOG_DEBUG, "attempting data unlock range %"PRIu64 - " %"PRIu64" by %s", flock.l_start, flock.l_len, - lkowner_utoa (&frame->root->lk_owner)); + flock.l_start = inodelk->flock.l_start; + flock.l_len = inodelk->flock.l_len; + flock.l_type = F_UNLCK; - call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes, + full_flock.l_type = F_UNLCK; + call_count = afr_locked_nodes_count (inodelk->locked_nodes, priv->child_count); int_lock->lk_call_count = call_count; @@ -622,11 +731,11 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) fd_ctx = afr_fd_ctx_get (local->fd, this); for (i = 0; i < priv->child_count; i++) { - if ((int_lock->inode_locked_nodes[i] & LOCKED_YES) - != LOCKED_YES) + if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES) continue; if (local->fd) { + flock_use = &flock; if (!local->transaction.eager_lock[i]) { goto wind; } @@ -638,43 +747,48 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) if (fd_ctx->lock_piggyback[i]) { fd_ctx->lock_piggyback[i]--; piggyback = 1; + } else { + fd_ctx->lock_acquired[i]--; } } UNLOCK (&local->fd->lock); if (piggyback) { afr_unlock_inodelk_cbk (frame, (void *) (long) i, - this, 1, 0); + this, 1, 0, NULL); if (!--call_count) break; continue; } - fd_ctx->lock_acquired[i]--; + flock_use = &full_flock; wind: - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, - AFR_UNLOCK_OP, &flock, F_SETLK, i); + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, + AFR_UNLOCK_OP, flock_use, F_SETLK, + i); STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, (void *) (long)i, priv->children[i], priv->children[i]->fops->finodelk, - this->name, local->fd, - F_SETLK, &flock); + int_lock->domain, local->fd, + F_SETLK, flock_use, NULL); if (!--call_count) break; } else { - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, AFR_UNLOCK_OP, &flock, F_SETLK, i); STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, (void *) (long)i, priv->children[i], priv->children[i]->fops->inodelk, - this->name, &local->loc, - F_SETLK, &flock); + int_lock->domain, &local->loc, + F_SETLK, &flock, NULL); if (!--call_count) break; @@ -686,24 +800,34 @@ out: static int32_t afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; - int32_t child_index = (long)cookie; + afr_private_t *priv = NULL; + afr_internal_lock_t *int_lock = NULL; + int32_t child_index = 0; + int lockee_no = 0; + + priv = this->private; + lockee_no = (int)((long) cookie) / priv->child_count; + child_index = (int) ((long) cookie) % priv->child_count; local = frame->local; + int_lock = &local->internal_lock; - afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, - AFR_UNLOCK_OP, NULL, op_ret, - op_errno, child_index); + AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION, + AFR_UNLOCK_OP, + int_lock->lockee[lockee_no].basename, op_ret, + op_errno, (int) ((long)cookie)); - if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { + if (op_ret < 0) { gf_log (this->name, GF_LOG_ERROR, "%s: unlock failed on %d, reason: %s", local->loc.path, child_index, strerror (op_errno)); } - afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno); + int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO; + afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL); return 0; } @@ -711,24 +835,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, static int afr_unlock_entrylk (call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - const char *basename = NULL; - loc_t *loc = NULL; - int call_count = 0; - int i = -1; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int index = 0; + int lockee_no = 0; + int copies = 0; + int i = -1; local = frame->local; int_lock = &local->internal_lock; priv = this->private; + copies = priv->child_count; - basename = int_lock->lk_basename; - if (int_lock->lk_loc) - loc = int_lock->lk_loc; + call_count = afr_lockee_locked_nodes_count (int_lock); - call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes, - priv->child_count); int_lock->lk_call_count = call_count; if (!call_count){ @@ -738,18 +860,23 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this) goto out; } - for (i = 0; i < priv->child_count; i++) { - if (int_lock->entry_locked_nodes[i] & LOCKED_YES) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_UNLOCK_OP, basename, i); + for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) { + lockee_no = i / copies; + index = i % copies; + if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) { + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION, + AFR_UNLOCK_OP, + int_lock->lockee[lockee_no].basename, + i); STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - loc, basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + priv->children[index], + priv->children[index]->fops->entrylk, + int_lock->domain, + &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL); if (!--call_count) break; @@ -763,15 +890,22 @@ out: static int32_t afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int cky = (long) cookie; + int child_index = 0; + int lockee_no = 0; + priv = this->private; local = frame->local; int_lock = &local->internal_lock; + child_index = ((int)cky) % priv->child_count; + lockee_no = ((int)cky) / priv->child_count; + LOCK (&frame->lock); { if (op_ret == -1) { @@ -787,6 +921,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; int_lock->lock_op_errno = op_errno; } + + int_lock->lk_attempted_count++; } UNLOCK (&frame->lock); @@ -795,10 +931,17 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_unlock (frame, this); } else { if (op_ret == 0) { - int_lock->locked_nodes[child_index] |= LOCKED_YES; - int_lock->lock_count++; + if (local->transaction.type == AFR_ENTRY_TRANSACTION || + local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES; + int_lock->lockee[lockee_no].locked_count++; + int_lock->entrylk_lock_count++; + } else { + int_lock->locked_nodes[child_index] |= LOCKED_YES; + int_lock->lock_count++; + } } - afr_lock_blocking (frame, this, child_index + 1); + afr_lock_blocking (frame, this, cky + 1); } return 0; @@ -806,98 +949,26 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, static int32_t afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION, + AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION, AFR_LOCK_OP, NULL, op_ret, op_errno, (long) cookie); - afr_lock_cbk (frame, cookie, this, op_ret, op_errno); + afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); return 0; } static int32_t -afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - loc_t *lower = NULL; - loc_t *higher = NULL; - const char *higher_name = NULL; - int child_index = (long) cookie; - - priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - - gf_log (this->name, GF_LOG_ERROR, - "subvolume does not support locking. " - "please load features/locks xlator on server"); - - local->op_ret = op_ret; - } - - local->op_errno = op_errno; - } - } - UNLOCK (&frame->lock); - - if (op_ret != 0) { - afr_unlock (frame, this); - goto out; - } else { - int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER; - int_lock->lock_count++; - } - - /* The lower path has been locked. Now lock the higher path */ - - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); - - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, higher_name, child_index); - - - STACK_WIND_COOKIE (frame, afr_lock_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, higher, higher_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK); - -out: - return 0; -} - -static int32_t afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, + AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION, AFR_LOCK_OP, NULL, op_ret, op_errno, (long)cookie); - afr_lock_cbk (frame, cookie, this, op_ret, op_errno); + afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); return 0; } @@ -905,6 +976,7 @@ static int afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -915,18 +987,16 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this) switch (local->transaction.type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: - memcpy (int_lock->inode_locked_nodes, - int_lock->locked_nodes, - priv->child_count); - int_lock->inodelk_lock_count = int_lock->lock_count; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + memcpy (inodelk->locked_nodes, int_lock->locked_nodes, + sizeof (*inodelk->locked_nodes) * priv->child_count); + inodelk->lock_count = int_lock->lock_count; break; case AFR_ENTRY_RENAME_TRANSACTION: case AFR_ENTRY_TRANSACTION: - memcpy (int_lock->entry_locked_nodes, - int_lock->locked_nodes, - priv->child_count); - int_lock->entrylk_lock_count = int_lock->lock_count; + /*entrylk_count is being used in both non-blocking and blocking + * modes */ break; } @@ -934,25 +1004,67 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this) } +static inline gf_boolean_t +afr_is_entrylk (afr_internal_lock_t *int_lock, + afr_transaction_type trans_type) +{ + gf_boolean_t is_entrylk = _gf_false; + + if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) && + int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) { + + is_entrylk = _gf_true; + + } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) && + (trans_type == AFR_ENTRY_TRANSACTION || + trans_type == AFR_ENTRY_RENAME_TRANSACTION)) { + + is_entrylk = _gf_true; + + } else { + is_entrylk = _gf_false; + } + + return is_entrylk; +} + +static gf_boolean_t +_is_lock_wind_needed (afr_local_t *local, int child_index) +{ + if (!local->child_up[child_index]) + return _gf_false; + + return _gf_true; +} + int -afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) +afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; - loc_t *lower = NULL; - const char *lower_name = NULL; struct gf_flock flock = {0,}; uint64_t ctx = 0; int ret = 0; + int child_index = 0; + int lockee_no = 0; + gf_boolean_t is_entrylk = _gf_false; - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + child_index = cookie % priv->child_count; + lockee_no = cookie / priv->child_count; + is_entrylk = afr_is_entrylk (int_lock, local->transaction.type); - flock.l_start = int_lock->lk_flock.l_start; - flock.l_len = int_lock->lk_flock.l_len; - flock.l_type = int_lock->lk_flock.l_type; + + if (!is_entrylk) { + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + flock.l_start = inodelk->flock.l_start; + flock.l_len = inodelk->flock.l_len; + flock.l_type = inodelk->flock.l_type; + } if (local->fd) { ret = fd_ctx_get (local->fd, this, &ctx); @@ -971,42 +1083,26 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) return 0; } - - /* skip over children that or down - or don't have the fd open */ - - while ((child_index < priv->child_count) - && (!local->child_up[child_index] || - !local->fd_open_on[child_index])) - - child_index++; - } else { - /* skip over children that are down */ - while ((child_index < priv->child_count) - && !local->child_up[child_index]) - child_index++; } - if ((child_index == priv->child_count) && - int_lock->lock_count == 0) { - - gf_log (this->name, GF_LOG_INFO, - "unable to lock on even one child"); - - local->op_ret = -1; - int_lock->lock_op_ret = -1; + if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { + if ((is_entrylk && int_lock->entrylk_lock_count == 0) || + (!is_entrylk && int_lock->lock_count == 0)) { + gf_log (this->name, GF_LOG_INFO, + "unable to lock on even one child"); - afr_copy_locked_nodes (frame, this); + local->op_ret = -1; + int_lock->lock_op_ret = -1; - afr_unlock(frame, this); + afr_copy_locked_nodes (frame, this); - return 0; + afr_unlock(frame, this); + return 0; + } } - if ((child_index == priv->child_count) - || (int_lock->lock_count == int_lock->lk_expected_count)) { - + if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { /* we're done locking */ gf_log (this->name, GF_LOG_DEBUG, @@ -1019,12 +1115,18 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) return 0; } + if (!_is_lock_wind_needed (local, child_index)) { + afr_lock_blocking (frame, this, cookie + 1); + return 0; + } + switch (local->transaction.type) { case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: if (local->fd) { - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, AFR_LOCK_OP, &flock, F_SETLKW, child_index); @@ -1032,11 +1134,12 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) (void *) (long) child_index, priv->children[child_index], priv->children[child_index]->fops->finodelk, - this->name, local->fd, - F_SETLKW, &flock); + int_lock->domain, local->fd, + F_SETLKW, &flock, NULL); } else { - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, AFR_LOCK_OP, &flock, F_SETLKW, child_index); @@ -1044,63 +1147,44 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) (void *) (long) child_index, priv->children[child_index], priv->children[child_index]->fops->inodelk, - this->name, &local->loc, - F_SETLKW, &flock); + int_lock->domain, &local->loc, + F_SETLKW, &flock, NULL); } break; case AFR_ENTRY_RENAME_TRANSACTION: - { - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, lower_name, child_index); - - - STACK_WIND_COOKIE (frame, afr_lock_lower_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, lower, lower_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK); - - break; - } - case AFR_ENTRY_TRANSACTION: + /*Accounting for child_index increments on 'down' + *and 'fd-less' children */ + if (local->fd) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, local->transaction.basename, - child_index); + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, + cookie); STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, - (void *) (long) child_index, + (void *) (long) cookie, priv->children[child_index], priv->children[child_index]->fops->fentrylk, - this->name, local->fd, - local->transaction.basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK); + int_lock->domain, local->fd, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); } else { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, + AFR_TRACE_ENTRYLK_IN (frame, this, + AFR_ENTRYLK_TRANSACTION, AFR_LOCK_OP, local->transaction.basename, child_index); STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, - (void *) (long) child_index, + (void *) (long) cookie, priv->children[child_index], priv->children[child_index]->fops->entrylk, - this->name, - &local->transaction.parent_loc, - local->transaction.basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK); + int_lock->domain, + &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); } break; @@ -1128,11 +1212,12 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this) break; case AFR_ENTRY_RENAME_TRANSACTION: + case AFR_ENTRY_TRANSACTION: up_count = afr_up_children_count (local->child_up, priv->child_count); - int_lock->lk_expected_count = 2 * up_count; - //fallthrough - case AFR_ENTRY_TRANSACTION: + int_lock->lk_call_count = int_lock->lk_expected_count + = (int_lock->lockee_count * + up_count); initialize_entrylk_variables (frame, this); break; } @@ -1144,42 +1229,55 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this) static int32_t afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; int call_count = 0; int child_index = (long) cookie; + int copies = 0; + int index = 0; + int lockee_no = 0; + afr_private_t *priv = NULL; + + priv = this->private; + + copies = priv->child_count; + index = child_index % copies; + lockee_no = child_index / copies; local = frame->local; int_lock = &local->internal_lock; - afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, NULL, op_ret, + AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, op_ret, op_errno, (long) cookie); - LOCK (&frame->lock); - { - call_count = --int_lock->lk_call_count; - } - UNLOCK (&frame->lock); - - if (op_ret < 0 ) { - if (op_errno == ENOSYS) { + LOCK (&frame->lock); + { + if (op_ret < 0 ) { + if (op_errno == ENOSYS) { /* return ENOTSUP */ - gf_log (this->name, GF_LOG_ERROR, - "subvolume does not support locking. " - "please load features/locks xlator on server"); - local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; + gf_log (this->name, GF_LOG_ERROR, + "subvolume does not support locking. " + "please load features/locks xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + } else if (op_ret == 0) { + int_lock->lockee[lockee_no].locked_nodes[index] |= \ + LOCKED_YES; + int_lock->lockee[lockee_no].locked_count++; + int_lock->entrylk_lock_count++; + } - int_lock->lock_op_errno = op_errno; - local->op_errno = op_errno; - } - } else if (op_ret == 0) { - int_lock->entry_locked_nodes[child_index] |= LOCKED_YES; - int_lock->entrylk_lock_count++; + call_count = --int_lock->lk_call_count; } + UNLOCK (&frame->lock); if (call_count == 0) { gf_log (this->name, GF_LOG_TRACE, @@ -1206,42 +1304,26 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } -void -afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx, - size_t child_count) -{ - int i = 0; - - GF_ASSERT (local->fd_open_on); - - memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count); - for (i = 0; i < child_count; i++) - if (fd_ctx->opened_on[i] == AFR_FD_OPENED) - local->fd_open_on[i] = 1; -} - int afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - const char *basename = NULL; - loc_t *loc = NULL; - int32_t call_count = 0; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int copies = 0; + int index = 0; + int lockee_no = 0; + int32_t call_count = 0; int i = 0; local = frame->local; int_lock = &local->internal_lock; priv = this->private; + copies = priv->child_count; initialize_entrylk_variables (frame, this); - basename = int_lock->lk_basename; - if (int_lock->lk_loc) - loc = int_lock->lk_loc; - if (local->fd) { fd_ctx = afr_fd_ctx_get (local->fd, this); if (!fd_ctx) { @@ -1254,11 +1336,11 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) local->op_errno = EINVAL; int_lock->lock_op_errno = EINVAL; + afr_unlock (frame, this); return -1; } - afr_mark_fd_open_on (local, fd_ctx, priv->child_count); - call_count = internal_lock_count (frame, this); + call_count = int_lock->lockee_count * internal_lock_count (frame, this); int_lock->lk_call_count = call_count; int_lock->lk_expected_count = call_count; @@ -1271,42 +1353,52 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) /* Send non-blocking entrylk calls only on up children and where the fd has been opened */ - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i] && local->fd_open_on[i]) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_LOCK_OP, basename, i); + for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) { + index = i%copies; + lockee_no = i/copies; + if (local->child_up[index]) { + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, + i); STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fentrylk, + priv->children[index], + priv->children[index]->fops->fentrylk, this->name, local->fd, - basename, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, + NULL); + if (!--call_count) + break; } } } else { - GF_ASSERT (loc); - - call_count = internal_lock_count (frame, this); + call_count = int_lock->lockee_count * internal_lock_count (frame, this); int_lock->lk_call_count = call_count; int_lock->lk_expected_count = call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_LOCK_OP, basename, i); + for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) { + index = i%copies; + lockee_no = i/copies; + if (local->child_up[index]) { + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, + i); STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, loc, basename, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); + priv->children[index], + priv->children[index]->fops->entrylk, + this->name, &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, + NULL); if (!--call_count) break; - } } } @@ -1316,70 +1408,69 @@ out: int32_t afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; int call_count = 0; int child_index = (long) cookie; afr_fd_ctx_t *fd_ctx = NULL; - afr_private_t *priv = NULL; - priv = this->private; local = frame->local; int_lock = &local->internal_lock; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); - afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION, + AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION, AFR_LOCK_OP, NULL, op_ret, op_errno, (long) cookie); + if (local->fd) + fd_ctx = afr_fd_ctx_get (local->fd, this); + LOCK (&frame->lock); { + if (op_ret < 0) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_log (this->name, GF_LOG_ERROR, + "subvolume does not support locking. " + "please load features/locks xlator on " + "server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + if (local->transaction.eager_lock) + local->transaction.eager_lock[child_index] = 0; + } else { + inodelk->locked_nodes[child_index] |= LOCKED_YES; + inodelk->lock_count++; + + if (local->transaction.eager_lock && + local->transaction.eager_lock[child_index] && + local->fd) { + /* piggybacked */ + if (op_ret == 1) { + /* piggybacked */ + } else if (op_ret == 0) { + /* lock acquired from server */ + fd_ctx->lock_acquired[child_index]++; + } + } + } + call_count = --int_lock->lk_call_count; } UNLOCK (&frame->lock); - if (op_ret < 0) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - gf_log (this->name, GF_LOG_ERROR, - "subvolume does not support locking. " - "please load features/locks xlator on server"); - local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; - int_lock->lock_op_errno = op_errno; - local->op_errno = op_errno; - } - } else { - int_lock->inode_locked_nodes[child_index] - |= LOCKED_YES; - int_lock->inodelk_lock_count++; - - if (priv->eager_lock && local->fd) { - fd_ctx = afr_fd_ctx_get (local->fd, this); - local->transaction.eager_lock[child_index] = 1; - /* piggybacked */ - - if (op_ret == 1) { - /* piggybacked */ - } else if (op_ret == 0) { - /* lock acquired from server */ - LOCK (&local->fd->lock); - { - fd_ctx->lock_acquired[child_index]++; - } - UNLOCK (&local->fd->lock); - } - } - } - if (call_count == 0) { gf_log (this->name, GF_LOG_TRACE, "Last inode locking reply received"); /* all locks successful. Proceed to call FOP */ - if (int_lock->inodelk_lock_count == - int_lock->lk_expected_count) { + if (inodelk->lock_count == int_lock->lk_expected_count) { gf_log (this->name, GF_LOG_TRACE, "All servers locked. Calling the cbk"); int_lock->lock_op_ret = 0; @@ -1403,30 +1494,29 @@ int afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; afr_fd_ctx_t *fd_ctx = NULL; - int32_t call_count = 0; - int i = 0; - int ret = 0; - struct gf_flock flock = {0,}; - struct gf_flock full_flock = {0,}; - struct gf_flock *flock_use = &flock; - int piggyback = 0; + int32_t call_count = 0; + int i = 0; + int ret = 0; + struct gf_flock flock = {0,}; + struct gf_flock full_flock = {0,}; + struct gf_flock *flock_use = NULL; + int piggyback = 0; local = frame->local; int_lock = &local->internal_lock; priv = this->private; - flock.l_start = int_lock->lk_flock.l_start; - flock.l_len = int_lock->lk_flock.l_len; - flock.l_type = int_lock->lk_flock.l_type; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); - gf_log (this->name, GF_LOG_DEBUG, "attempting data lock range %"PRIu64 - " %"PRIu64" by %s", flock.l_start, flock.l_len, - lkowner_utoa (&frame->root->lk_owner)); + flock.l_start = inodelk->flock.l_start; + flock.l_len = inodelk->flock.l_len; + flock.l_type = inodelk->flock.l_type; - full_flock.l_type = int_lock->lk_flock.l_type; + full_flock.l_type = inodelk->flock.l_type; initialize_inodelk_variables (frame, this); @@ -1442,11 +1532,11 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) local->op_errno = EINVAL; int_lock->lock_op_errno = EINVAL; + afr_unlock (frame, this); ret = -1; goto out; } - afr_mark_fd_open_on (local, fd_ctx, priv->child_count); call_count = internal_lock_count (frame, this); int_lock->lk_call_count = call_count; int_lock->lk_expected_count = call_count; @@ -1461,14 +1551,18 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) /* Send non-blocking inodelk calls only on up children and where the fd has been opened */ for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i] || !local->fd_open_on[i]) + if (!local->child_up[i]) continue; - if (!priv->eager_lock) + flock_use = &flock; + if (!local->transaction.eager_lock_on) { goto wind; + } - flock_use = &full_flock; piggyback = 0; + local->transaction.eager_lock[i] = 1; + + afr_set_delayed_post_op (frame, this); LOCK (&local->fd->lock); { @@ -1482,21 +1576,23 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) if (piggyback) { /* (op_ret == 1) => indicate piggybacked lock */ afr_nonblocking_inodelk_cbk (frame, (void *) (long) i, - this, 1, 0); + this, 1, 0, NULL); if (!--call_count) break; continue; } + flock_use = &full_flock; wind: - afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION, + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_NB_TRANSACTION, AFR_LOCK_OP, flock_use, F_SETLK, i); STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, (void *) (long) i, priv->children[i], priv->children[i]->fops->finodelk, - this->name, local->fd, - F_SETLK, flock_use); + int_lock->domain, local->fd, + F_SETLK, flock_use, NULL); if (!--call_count) break; @@ -1509,15 +1605,16 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) for (i = 0; i < priv->child_count; i++) { if (!local->child_up[i]) continue; - afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION, + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_NB_TRANSACTION, AFR_LOCK_OP, &flock, F_SETLK, i); STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, (void *) (long) i, priv->children[i], priv->children[i]->fops->inodelk, - this->name, &local->loc, - F_SETLK, &flock); + int_lock->domain, &local->loc, + F_SETLK, &flock, NULL); if (!--call_count) break; @@ -1527,200 +1624,6 @@ out: return ret; } -static int -__is_lower_locked (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int count = 0; - int i = 0; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) - count++; - } - - return count; - -} - -static int -__is_higher_locked (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int count = 0; - int i = 0; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (int_lock->locked_nodes[i] & LOCKED_YES) - count++; - } - - return count; - -} - -static int -afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - const char *basename = NULL; - loc_t *loc = NULL; - int call_count = 0; - int i = -1; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - - basename = int_lock->lk_basename; - if (int_lock->lk_loc) - loc = int_lock->lk_loc; - - call_count = __is_lower_locked (frame, this); - int_lock->lk_call_count = call_count; - - if (!call_count){ - gf_log (this->name, GF_LOG_TRACE, - "No internal locks unlocked"); - int_lock->lock_cbk (frame, this); - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_UNLOCK_OP, basename, i); - - STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - loc, basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); - - if (!--call_count) - break; - - } - } - -out: - return 0; - -} - - -static int -afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - local->transaction.done (frame, this); - return 0; -} - -static int -afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - loc_t *lower = NULL; - loc_t *higher = NULL; - const char *higher_name = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); - - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - if (__is_higher_locked (frame, this)) { - gf_log (this->name, GF_LOG_DEBUG, - "unlocking higher"); - int_lock->lk_basename = higher_name; - int_lock->lk_loc = higher; - int_lock->lock_cbk = afr_post_unlock_higher_cbk; - - afr_unlock_entrylk (frame, this); - } else - local->transaction.done (frame, this); - - return 0; -} - -static int -afr_rename_unlock (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - loc_t *lower = NULL; - const char *lower_name = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - if (__is_lower_locked (frame, this)) { - gf_log (this->name, GF_LOG_DEBUG, - "unlocking lower"); - int_lock->lk_basename = lower_name; - int_lock->lk_loc = lower; - int_lock->lock_cbk = afr_post_unlock_lower_cbk; - - afr_unlock_lower_entrylk (frame, this); - } else - afr_post_unlock_lower_cbk (frame, this); - - return 0; -} - -static int -afr_rename_transaction (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - return (local->transaction.type == - AFR_ENTRY_RENAME_TRANSACTION); - -} - int32_t afr_unlock (call_frame_t *frame, xlator_t *this) { @@ -1732,10 +1635,8 @@ afr_unlock (call_frame_t *frame, xlator_t *this) if (is_afr_lock_transaction (local)) afr_unlock_inodelk (frame, this); else - if (!afr_rename_transaction (frame, this)) - afr_unlock_entrylk (frame, this); - else - afr_rename_unlock (frame, this); + afr_unlock_entrylk (frame, this); + } else { if (is_afr_lock_selfheal (local)) afr_unlock_inodelk (frame, this); @@ -1904,10 +1805,12 @@ out: int32_t afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock); + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata); int32_t afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock) + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) { afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -1931,7 +1834,7 @@ afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, (void *) (long) source_child, priv->children[source_child], priv->children[source_child]->fops->lk, - local->fd, F_GETLK_FD, &flock); + local->fd, F_GETLK_FD, &flock, NULL); return 0; @@ -1959,7 +1862,7 @@ afr_recover_lock (call_frame_t *frame, xlator_t *this, (void *) (long) lock_recovery_child, priv->children[lock_recovery_child], priv->children[lock_recovery_child]->fops->lk, - local->fd, F_SETLK, flock); + local->fd, F_SETLK, flock, NULL); return 0; } @@ -1977,7 +1880,8 @@ is_afr_lock_eol (struct gf_flock *lock) int32_t afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock) + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) { if (op_ret) { gf_log (this->name, GF_LOG_INFO, @@ -2037,7 +1941,7 @@ afr_lock_recovery (call_frame_t *frame, xlator_t *this) (void *) (long) source_child, priv->children[source_child], priv->children[source_child]->fops->lk, - local->fd, F_GETLK_FD, &flock); + local->fd, F_GETLK_FD, &flock, NULL); out: return ret; @@ -2065,7 +1969,8 @@ out: int32_t afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, + dict_t *xdata) { int32_t child_index = (long )cookie; int ret = 0; @@ -2137,8 +2042,7 @@ afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this) (void *)(long) child_index, priv->children[child_index], priv->children[child_index]->fops->open, - &loc, fdctx->flags, local->fd, - fdctx->wbflags); + &loc, fdctx->flags, local->fd, NULL); return 0; } @@ -2187,7 +2091,7 @@ afr_attempt_lock_recovery (xlator_t *this, int32_t child_index) goto out; } - ALLOC_OR_GOTO (frame->local, afr_local_t, out); + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); local = frame->local; ret = afr_local_init (local, priv, &op_errno); if (ret < 0) { @@ -2233,27 +2137,38 @@ out: return ret; } -void -afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, +int +afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom, unsigned int child_count) { - afr_local_t *dst_local = NULL; - afr_local_t *src_local = NULL; - afr_internal_lock_t *dst_lock = NULL; - afr_internal_lock_t *src_lock = NULL; + afr_local_t *dst_local = NULL; + afr_local_t *src_local = NULL; + afr_internal_lock_t *dst_lock = NULL; + afr_internal_lock_t *src_lock = NULL; + afr_inodelk_t *dst_inodelk = NULL; + afr_inodelk_t *src_inodelk = NULL; + int ret = -1; - dst_local = dst->local; - dst_lock = &dst_local->internal_lock; src_local = src->local; src_lock = &src_local->internal_lock; - if (src_lock->inode_locked_nodes) { - memcpy (dst_lock->inode_locked_nodes, - src_lock->inode_locked_nodes, - sizeof (*dst_lock->inode_locked_nodes) * child_count); - memset (src_lock->inode_locked_nodes, 0, - sizeof (*src_lock->inode_locked_nodes) * child_count); - } - - dst_lock->inodelk_lock_count = src_lock->inodelk_lock_count; - src_lock->inodelk_lock_count = 0; + src_inodelk = afr_get_inodelk (src_lock, dom); + dst_local = dst->local; + dst_lock = &dst_local->internal_lock; + dst_inodelk = afr_get_inodelk (dst_lock, dom); + if (!dst_inodelk || !src_inodelk) + goto out; + if (src_inodelk->locked_nodes) { + memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes, + sizeof (*dst_inodelk->locked_nodes) * child_count); + memset (src_inodelk->locked_nodes, 0, + sizeof (*src_inodelk->locked_nodes) * child_count); + } + + dst_lock->transaction_lk_type = src_lock->transaction_lk_type; + dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type; + dst_inodelk->lock_count = src_inodelk->lock_count; + src_inodelk->lock_count = 0; + ret = 0; +out: + return ret; } |
