diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-lk-common.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 1985 |
1 files changed, 1251 insertions, 734 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index dc850d38f..060d78f35 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #include "dict.h" @@ -31,8 +22,69 @@ #define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */ #define LOCKED_LOWER 0x2 /* for lower path */ +#define AFR_TRACE_INODELK_IN(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->inodelk_trace) \ + break; \ + afr_trace_inodelk_in (frame, this, params); \ + } while (0); + +#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->inodelk_trace) \ + break; \ + afr_trace_inodelk_out (frame, this, params); \ + } while (0); + +#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->entrylk_trace) \ + break; \ + afr_trace_entrylk_in (frame, this, params); \ + } while (0); + +#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \ + do { \ + afr_private_t *_priv = this->private; \ + if (!_priv->entrylk_trace) \ + break; \ + afr_trace_entrylk_out (frame, this, params); \ + } while (0); + int -afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index); +afr_entry_lockee_cmp (const void *l1, const void *l2) +{ + const afr_entry_lockee_t *r1 = l1; + const afr_entry_lockee_t *r2 = l2; + int ret = 0; + uuid_t gfid1 = {0}; + uuid_t gfid2 = {0}; + + loc_gfid ((loc_t*)&r1->loc, gfid1); + loc_gfid ((loc_t*)&r2->loc, gfid2); + ret = uuid_compare (gfid1, gfid2); + /*Entrylks with NULL basename are the 'smallest'*/ + if (ret == 0) { + if (!r1->basename) + return -1; + if (!r2->basename) + return 1; + ret = strcmp (r1->basename, r2->basename); + } + + if (ret <= 0) + return -1; + else + return 1; +} + +int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index); + +static int +afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this); static uint64_t afr_lock_number = 1; @@ -57,14 +109,13 @@ afr_set_lock_number (call_frame_t *frame, xlator_t *this) } void -afr_set_lk_owner (call_frame_t *frame, xlator_t *this) +afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner) { - if (!frame->root->lk_owner) { - gf_log (this->name, GF_LOG_TRACE, - "Setting lk-owner=%llu", - (unsigned long long) frame->root); - frame->root->lk_owner = (uint64_t) frame->root; - } + gf_log (this->name, GF_LOG_TRACE, + "Setting lk-owner=%llu", + (unsigned long long) (unsigned long)lk_owner); + + set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner); } static int @@ -90,29 +141,19 @@ is_afr_lock_selfheal (afr_local_t *local) } int32_t -internal_lock_count (call_frame_t *frame, xlator_t *this, - afr_fd_ctx_t *fd_ctx) +internal_lock_count (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; afr_private_t *priv = NULL; - int32_t call_count = 0; int i = 0; local = frame->local; priv = this->private; - if (fd_ctx) { - GF_ASSERT (local->fd); - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i] && fd_ctx->opened_on[i]) - ++call_count; - } - } else { - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) - ++call_count; - } + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) + ++call_count; } return call_count; @@ -120,7 +161,7 @@ internal_lock_count (call_frame_t *frame, xlator_t *this, static void afr_print_inodelk (char *str, int size, int cmd, - struct flock *flock, uint64_t owner) + struct gf_flock *flock, gf_lkowner_t *owner) { char *cmd_str = NULL; char *type_str = NULL; @@ -168,11 +209,11 @@ afr_print_inodelk (char *str, int size, int cmd, } snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, " - "start=%llu, len=%llu, pid=%llu, lk-owner=%llu", + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", cmd_str, type_str, (unsigned long long) flock->l_start, (unsigned long long) flock->l_len, (unsigned long long) flock->l_pid, - (unsigned long long) owner); + lkowner_utoa (owner)); } @@ -188,11 +229,11 @@ afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd, void afr_print_entrylk (char *str, int size, const char *basename, - uint64_t owner) + gf_lkowner_t *owner) { - snprintf (str, size, "Basename=%s, lk-owner=%llu", + snprintf (str, size, "Basename=%s, lk-owner=%s", basename ? basename : "<nul>", - (unsigned long long)owner); + lkowner_utoa (owner)); } static void @@ -246,27 +287,20 @@ afr_set_lock_call_type (afr_lock_call_type_t lock_call_type, } static void -afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, - afr_lock_op_type_t lk_op_type, struct flock *flock, +afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, struct gf_flock *flock, int op_ret, int op_errno, int32_t child_index) { - xlator_t *this = NULL; afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; - afr_private_t *priv = NULL; char lockee[256]; char lock_call_type_str[256]; char verdict[16]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; - priv = this->private; - - if (!priv->inodelk_trace) { - return; - } afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); @@ -274,45 +308,37 @@ afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, afr_print_verdict (op_ret, op_errno, verdict); - gf_log (this->name, GF_LOG_NORMAL, - "[%s %s] [%s] Lockee={%s} Number={%llu}", + gf_log (this->name, GF_LOG_INFO, + "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}", lock_call_type_str, lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY", - verdict, - lockee, + verdict, lkowner_utoa (&frame->root->lk_owner), lockee, (unsigned long long) int_lock->lock_number); } static void -afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, - afr_lock_op_type_t lk_op_type, struct flock *flock, +afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, struct gf_flock *flock, int32_t cmd, int32_t child_index) { - xlator_t *this = NULL; afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; char lock[256]; char lockee[256]; char lock_call_type_str[256]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; - priv = this->private; - if (!priv->inodelk_trace) { - return; - } - - afr_print_inodelk (lock, 256, cmd, flock, frame->root->lk_owner); + afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner); afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); - gf_log (this->name, GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_INFO, "[%s %s] Lock={%s} Lockee={%s} Number={%llu}", lock_call_type_str, lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST", @@ -322,20 +348,21 @@ afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, } static void -afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, +afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, afr_lock_op_type_t lk_op_type, const char *basename, - int32_t child_index) + int32_t cookie) { - xlator_t *this = NULL; afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; afr_private_t *priv = NULL; + int child_index = 0; + int lockee_no = 0; char lock[256]; char lockee[256]; char lock_call_type_str[256]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; priv = this->private; @@ -343,36 +370,41 @@ afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, if (!priv->entrylk_trace) { return; } + lockee_no = cookie / priv->child_count; + child_index = cookie % priv->child_count; - afr_print_entrylk (lock, 256, basename, frame->root->lk_owner); - afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner); + afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd, + child_index); afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); - gf_log (this->name, GF_LOG_NORMAL, - "[%s %s] Lock={%s} Lockee={%s} Number={%llu}", + gf_log (this->name, GF_LOG_INFO, + "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}", lock_call_type_str, lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST", lock, lockee, - (unsigned long long) int_lock->lock_number); + (unsigned long long) int_lock->lock_number, + cookie); } static void -afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, - afr_lock_op_type_t lk_op_type, const char *basename, int op_ret, - int op_errno, int32_t child_index) +afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this, + afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, const char *basename, + int op_ret, int op_errno, int32_t cookie) { - xlator_t *this = NULL; afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; + int lockee_no = 0; + int child_index = 0; char lock[256]; char lockee[256]; char lock_call_type_str[256]; char verdict[16]; - this = THIS; local = frame->local; int_lock = &local->internal_lock; priv = this->private; @@ -380,20 +412,25 @@ afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, if (!priv->entrylk_trace) { return; } + lockee_no = cookie / priv->child_count; + child_index = cookie % priv->child_count; - afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner); + afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd, + child_index); afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); afr_print_verdict (op_ret, op_errno, verdict); - gf_log (this->name, GF_LOG_NORMAL, - "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}", + gf_log (this->name, GF_LOG_INFO, + "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}", lock_call_type_str, lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY", verdict, lock, lockee, - (unsigned long long) int_lock->lock_number); + (unsigned long long) int_lock->lock_number, + cookie); } @@ -431,9 +468,8 @@ is_afr_lock_transaction (afr_local_t *local) int ret = 0; switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - case AFR_METADATA_TRANSACTION: - case AFR_FLUSH_TRANSACTION: + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: ret = 1; break; @@ -447,6 +483,47 @@ is_afr_lock_transaction (afr_local_t *local) return ret; } +int +afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local, + loc_t *loc, char *basename, int child_count) +{ + int ret = -1; + + loc_copy (&lockee->loc, loc); + lockee->basename = (basename)? gf_strdup (basename): NULL; + if (basename && !lockee->basename) + goto out; + + lockee->locked_count = 0; + lockee->locked_nodes = GF_CALLOC (child_count, + sizeof (*lockee->locked_nodes), + gf_afr_mt_afr_node_character); + + if (!lockee->locked_nodes) + goto out; + + ret = 0; +out: + return ret; + +} + +void +afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock) +{ + int i = 0; + + for (i = 0; i < int_lock->lockee_count; i++) { + loc_wipe (&int_lock->lockee[i].loc); + if (int_lock->lockee[i].basename) + GF_FREE (int_lock->lockee[i].basename); + if (int_lock->lockee[i].locked_nodes) + GF_FREE (int_lock->lockee[i].locked_nodes); + } + + return; +} + static int initialize_entrylk_variables (call_frame_t *frame, xlator_t *this) { @@ -464,8 +541,13 @@ initialize_entrylk_variables (call_frame_t *frame, xlator_t *this) int_lock->lock_op_ret = -1; int_lock->lock_op_errno = 0; - for (i = 0; i < priv->child_count; i++) { - int_lock->entry_locked_nodes[i] = 0; + for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) { + if (!int_lock->lockee[i].locked_nodes) + break; + int_lock->lockee[i].locked_count = 0; + memset (int_lock->lockee[i].locked_nodes, 0, + sizeof (*int_lock->lockee[i].locked_nodes) * + priv->child_count); } return 0; @@ -477,20 +559,23 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; afr_private_t *priv = NULL; - - int i = 0; + afr_inodelk_t *inodelk = NULL; priv = this->private; local = frame->local; int_lock = &local->internal_lock; - int_lock->inodelk_lock_count = 0; - int_lock->lock_op_ret = -1; - int_lock->lock_op_errno = 0; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); - for (i = 0; i < priv->child_count; i++) { - int_lock->inode_locked_nodes[i] = 0; - } + inodelk->lock_count = 0; + int_lock->lk_attempted_count = 0; + int_lock->lock_op_ret = -1; + int_lock->lock_op_errno = 0; + + memset (inodelk->locked_nodes, 0, + sizeof (*inodelk->locked_nodes) * priv->child_count); + memset (int_lock->locked_nodes, 0, + sizeof (*int_lock->locked_nodes) * priv->child_count); return 0; } @@ -498,24 +583,36 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this) loc_t * lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2) { - int ret = 0; + int ret = 0; - ret = strcmp (l1->path, l2->path); + ret = uuid_compare (l1->inode->gfid, l2->inode->gfid); - if (ret == 0) - ret = strcmp (b1, b2); + if (ret == 0) + ret = strcmp (b1, b2); - if (ret <= 0) - return l1; - else - return l2; + if (ret <= 0) + return l1; + else + return l2; +} + +int +afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock) +{ + int call_count = 0; + int i = 0; + + for (i = 0; i < int_lock->lockee_count; i++) + call_count += int_lock->lockee[i].locked_count; + + return call_count; } int afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) { - int i; + int i = 0; int call_count = 0; for (i = 0; i < child_count; i++) { @@ -529,44 +626,63 @@ afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) /* FIXME: What if UNLOCK fails */ static int32_t afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; - int call_count = 0; + int call_count = 0; - local = frame->local; + local = frame->local; int_lock = &local->internal_lock; - LOCK (&frame->lock); - { - call_count = --int_lock->lk_call_count; - } - UNLOCK (&frame->lock); + LOCK (&frame->lock); + { + call_count = --int_lock->lk_call_count; + } + UNLOCK (&frame->lock); - if (call_count == 0) { + if (call_count == 0) { gf_log (this->name, GF_LOG_TRACE, "All internal locks unlocked"); int_lock->lock_cbk (frame, this); } - return 0; + return 0; } static int32_t afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION, + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; + int32_t child_index = (long)cookie; + afr_private_t *priv = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + + AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION, AFR_UNLOCK_OP, NULL, op_ret, - op_errno, (long) cookie); + op_errno, child_index); + + priv = this->private; if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { - gf_log (this->name, GF_LOG_TRACE, - "Unlock failed for some reason"); + gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s " + "with lock owner %s", local->loc.path, + priv->children[child_index]->name, + lkowner_utoa (&frame->root->lk_owner)); } - afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno); + + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + inodelk->locked_nodes[child_index] &= LOCKED_NO; + if (local->transaction.eager_lock) + local->transaction.eager_lock[child_index] = 0; + + afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata); return 0; @@ -576,22 +692,30 @@ static int afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; - - struct flock flock; + struct gf_flock flock = {0,}; + struct gf_flock full_flock = {0,}; + struct gf_flock *flock_use = NULL; int call_count = 0; int i = 0; + int piggyback = 0; + afr_fd_ctx_t *fd_ctx = NULL; + local = frame->local; int_lock = &local->internal_lock; priv = this->private; - flock.l_start = int_lock->lk_flock.l_start; - flock.l_len = int_lock->lk_flock.l_len; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + + flock.l_start = inodelk->flock.l_start; + flock.l_len = inodelk->flock.l_len; flock.l_type = F_UNLCK; - call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes, + full_flock.l_type = F_UNLCK; + call_count = afr_locked_nodes_count (inodelk->locked_nodes, priv->child_count); int_lock->lk_call_count = call_count; @@ -603,46 +727,107 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) goto out; } + if (local->fd) + fd_ctx = afr_fd_ctx_get (local->fd, this); + for (i = 0; i < priv->child_count; i++) { - if (int_lock->inode_locked_nodes[i] & LOCKED_YES) { - if (local->fd) { - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, - AFR_UNLOCK_OP, &flock, F_SETLK, i); - - STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, - (void *) (long)i, - priv->children[i], - priv->children[i]->fops->finodelk, - this->name, local->fd, - F_SETLK, &flock); - } else { - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, - AFR_UNLOCK_OP, &flock, F_SETLK, i); - - STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, - (void *) (long)i, - priv->children[i], - priv->children[i]->fops->inodelk, - this->name, &local->loc, - F_SETLK, &flock); + if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES) + continue; + + if (local->fd) { + flock_use = &flock; + if (!local->transaction.eager_lock[i]) { + goto wind; } + piggyback = 0; + + LOCK (&local->fd->lock); + { + if (fd_ctx->lock_piggyback[i]) { + fd_ctx->lock_piggyback[i]--; + piggyback = 1; + } else { + fd_ctx->lock_acquired[i]--; + } + } + UNLOCK (&local->fd->lock); + + if (piggyback) { + afr_unlock_inodelk_cbk (frame, (void *) (long) i, + this, 1, 0, NULL); + if (!--call_count) + break; + continue; + } + + flock_use = &full_flock; + wind: + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, + AFR_UNLOCK_OP, flock_use, F_SETLK, + i); + + STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, + (void *) (long)i, + priv->children[i], + priv->children[i]->fops->finodelk, + int_lock->domain, local->fd, + F_SETLK, flock_use, NULL); + + if (!--call_count) + break; + + } else { + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, + AFR_UNLOCK_OP, &flock, F_SETLK, i); + + STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, + (void *) (long)i, + priv->children[i], + priv->children[i]->fops->inodelk, + int_lock->domain, &local->loc, + F_SETLK, &flock, NULL); + + if (!--call_count) + break; } } - out: return 0; } static int32_t afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, - AFR_UNLOCK_OP, NULL, op_ret, - op_errno, (long) cookie); + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_internal_lock_t *int_lock = NULL; + int32_t child_index = 0; + int lockee_no = 0; + + priv = this->private; + lockee_no = (int)((long) cookie) / priv->child_count; + child_index = (int) ((long) cookie) % priv->child_count; - afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno); + local = frame->local; + int_lock = &local->internal_lock; + + AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION, + AFR_UNLOCK_OP, + int_lock->lockee[lockee_no].basename, op_ret, + op_errno, (int) ((long)cookie)); + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s: unlock failed on %d, reason: %s", + local->loc.path, child_index, strerror (op_errno)); + } + + int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO; + afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL); return 0; } @@ -650,25 +835,22 @@ afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, static int afr_unlock_entrylk (call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - const char *basename = NULL; - loc_t *loc = NULL; - - int call_count = 0; - int i = -1; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int index = 0; + int lockee_no = 0; + int copies = 0; + int i = -1; local = frame->local; int_lock = &local->internal_lock; priv = this->private; + copies = priv->child_count; - basename = int_lock->lk_basename; - if (int_lock->lk_loc) - loc = int_lock->lk_loc; + call_count = afr_lockee_locked_nodes_count (int_lock); - call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes, - priv->child_count); int_lock->lk_call_count = call_count; if (!call_count){ @@ -678,18 +860,26 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this) goto out; } - for (i = 0; i < priv->child_count; i++) { - if (int_lock->entry_locked_nodes[i] & LOCKED_YES) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_UNLOCK_OP, basename, i); + for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) { + lockee_no = i / copies; + index = i % copies; + if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) { + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION, + AFR_UNLOCK_OP, + int_lock->lockee[lockee_no].basename, + i); STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - loc, basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + priv->children[index], + priv->children[index]->fops->entrylk, + int_lock->domain, + &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL); + + if (!--call_count) + break; } } @@ -700,155 +890,85 @@ out: static int32_t afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - int done = 0; - int child_index = (long) cookie; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - gf_log (this->name, GF_LOG_ERROR, - "subvolume does not support locking. " - "please load features/posix-locks xlator on server"); - local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; - done = 1; - } - - local->child_up[child_index] = 0; - local->op_errno = op_errno; - int_lock->lock_op_errno = op_errno; - } - } - UNLOCK (&frame->lock); - - if ((op_ret == -1) && - (op_errno == ENOSYS)) { - afr_unlock (frame, this); - } else { - if (op_ret == 0) { - int_lock->locked_nodes[child_index] - |= LOCKED_YES; - int_lock->lock_count++; - } - afr_lock_blocking (frame, this, child_index + 1); - } - - return 0; -} - -static int32_t -afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION, - AFR_LOCK_OP, NULL, op_ret, - op_errno, (long) cookie); - - afr_lock_cbk (frame, cookie, this, op_ret, op_errno); - return 0; - -} - -static int32_t -afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - loc_t *lower = NULL; - loc_t *higher = NULL; - const char *lower_name = NULL; - const char *higher_name = NULL; - - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int cky = (long) cookie; + int child_index = 0; + int lockee_no = 0; priv = this->private; local = frame->local; int_lock = &local->internal_lock; + child_index = ((int)cky) % priv->child_count; + lockee_no = ((int)cky) / priv->child_count; + LOCK (&frame->lock); { if (op_ret == -1) { if (op_errno == ENOSYS) { /* return ENOTSUP */ - gf_log (this->name, GF_LOG_ERROR, "subvolume does not support locking. " - "please load features/posix-locks xlator on server"); - - local->op_ret = op_ret; + "please load features/locks xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; } - local->child_up[child_index] = 0; - local->op_errno = op_errno; + local->op_errno = op_errno; + int_lock->lock_op_errno = op_errno; } + + int_lock->lk_attempted_count++; } UNLOCK (&frame->lock); - if (op_ret != 0) { + if ((op_ret == -1) && + (op_errno == ENOSYS)) { afr_unlock (frame, this); - goto out; } else { - int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER; - int_lock->lock_count++; + if (op_ret == 0) { + if (local->transaction.type == AFR_ENTRY_TRANSACTION || + local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES; + int_lock->lockee[lockee_no].locked_count++; + int_lock->entrylk_lock_count++; + } else { + int_lock->locked_nodes[child_index] |= LOCKED_YES; + int_lock->lock_count++; + } + } + afr_lock_blocking (frame, this, cky + 1); } - /* The lower path has been locked. Now lock the higher path */ - - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); - - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, higher_name, child_index); - + return 0; +} - STACK_WIND_COOKIE (frame, afr_lock_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, higher, higher_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK); +static int32_t +afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION, + AFR_LOCK_OP, NULL, op_ret, + op_errno, (long) cookie); -out: + afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); return 0; + } static int32_t afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, + AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION, AFR_LOCK_OP, NULL, op_ret, op_errno, (long)cookie); - afr_lock_cbk (frame, cookie, this, op_ret, op_errno); + afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); return 0; } @@ -856,6 +976,7 @@ static int afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -864,21 +985,18 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this) int_lock = &local->internal_lock; switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - case AFR_METADATA_TRANSACTION: - case AFR_FLUSH_TRANSACTION: - memcpy (int_lock->inode_locked_nodes, - int_lock->locked_nodes, - priv->child_count); - int_lock->inodelk_lock_count = int_lock->lock_count; + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + memcpy (inodelk->locked_nodes, int_lock->locked_nodes, + sizeof (*inodelk->locked_nodes) * priv->child_count); + inodelk->lock_count = int_lock->lock_count; break; case AFR_ENTRY_RENAME_TRANSACTION: case AFR_ENTRY_TRANSACTION: - memcpy (int_lock->entry_locked_nodes, - int_lock->locked_nodes, - priv->child_count); - int_lock->entrylk_lock_count = int_lock->lock_count; + /*entrylk_count is being used in both non-blocking and blocking + * modes */ break; } @@ -886,42 +1004,78 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this) } +static inline gf_boolean_t +afr_is_entrylk (afr_internal_lock_t *int_lock, + afr_transaction_type trans_type) +{ + gf_boolean_t is_entrylk = _gf_false; + + if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) && + int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) { + + is_entrylk = _gf_true; + + } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) && + (trans_type == AFR_ENTRY_TRANSACTION || + trans_type == AFR_ENTRY_RENAME_TRANSACTION)) { + + is_entrylk = _gf_true; + + } else { + is_entrylk = _gf_false; + } + + return is_entrylk; +} + +static gf_boolean_t +_is_lock_wind_needed (afr_local_t *local, int child_index) +{ + if (!local->child_up[child_index]) + return _gf_false; + + return _gf_true; +} + int -afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) +afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - loc_t *lower = NULL; - loc_t *higher = NULL; - const char *lower_name = NULL; - const char *higher_name = NULL; - - struct flock flock; - uint64_t ctx; + afr_private_t *priv = NULL; + struct gf_flock flock = {0,}; + uint64_t ctx = 0; int ret = 0; - - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; - - flock.l_start = int_lock->lk_flock.l_start; - flock.l_len = int_lock->lk_flock.l_len; - flock.l_type = int_lock->lk_flock.l_type; + int child_index = 0; + int lockee_no = 0; + gf_boolean_t is_entrylk = _gf_false; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + child_index = cookie % priv->child_count; + lockee_no = cookie / priv->child_count; + is_entrylk = afr_is_entrylk (int_lock, local->transaction.type); + + + if (!is_entrylk) { + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + flock.l_start = inodelk->flock.l_start; + flock.l_len = inodelk->flock.l_len; + flock.l_type = inodelk->flock.l_type; + } if (local->fd) { ret = fd_ctx_get (local->fd, this, &ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_INFO, "unable to get fd ctx for fd=%p", local->fd); local->op_ret = -1; int_lock->lock_op_ret = -1; - local->op_errno = EINVAL; - int_lock->lock_op_errno = EINVAL; afr_copy_locked_nodes (frame, this); @@ -929,49 +1083,27 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) return 0; } - - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - /* skip over children that or down - or don't have the fd open */ - - while ((child_index < priv->child_count) - && (!local->child_up[child_index] - || !fd_ctx->opened_on[child_index])) - - child_index++; - } else { - /* skip over children that are down */ - while ((child_index < priv->child_count) - && !local->child_up[child_index]) - child_index++; } - if ((child_index == priv->child_count) && - int_lock->lock_count == 0) { + if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { + if ((is_entrylk && int_lock->entrylk_lock_count == 0) || + (!is_entrylk && int_lock->lock_count == 0)) { + gf_log (this->name, GF_LOG_INFO, + "unable to lock on even one child"); - gf_log (this->name, GF_LOG_DEBUG, - "unable to lock on even one child"); - - local->op_ret = -1; - int_lock->lock_op_ret = -1; - local->op_errno = EAGAIN; - int_lock->lock_op_errno = EAGAIN; - - afr_copy_locked_nodes (frame, this); - - afr_unlock(frame, this); + local->op_ret = -1; + int_lock->lock_op_ret = -1; - return 0; + afr_copy_locked_nodes (frame, this); - } + afr_unlock(frame, this); - if ((child_index == priv->child_count) - || (int_lock->lock_count == - afr_up_children_count (priv->child_count, - local->child_up))) { + return 0; + } + } - /* we're done locking */ + if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { + /* we're done locking */ gf_log (this->name, GF_LOG_DEBUG, "we're done locking"); @@ -980,108 +1112,85 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) int_lock->lock_op_ret = 0; int_lock->lock_cbk (frame, this); - return 0; - } + return 0; + } - switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - case AFR_METADATA_TRANSACTION: - case AFR_FLUSH_TRANSACTION: + if (!_is_lock_wind_needed (local, child_index)) { + afr_lock_blocking (frame, this, cookie + 1); + return 0; + } + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: - if (local->fd) { - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + if (local->fd) { + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, AFR_LOCK_OP, &flock, F_SETLKW, child_index); - STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->finodelk, - this->name, local->fd, - F_SETLKW, &flock); + STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->finodelk, + int_lock->domain, local->fd, + F_SETLKW, &flock, NULL); - } else { - afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + } else { + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_TRANSACTION, AFR_LOCK_OP, &flock, F_SETLKW, child_index); - STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->inodelk, - this->name, &local->loc, - F_SETLKW, &flock); - } - - break; - - case AFR_ENTRY_RENAME_TRANSACTION: - { - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); - - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, lower_name, child_index); - - - STACK_WIND_COOKIE (frame, afr_lock_lower_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, lower, lower_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK); - - break; - } + STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->inodelk, + int_lock->domain, &local->loc, + F_SETLKW, &flock, NULL); + } - case AFR_ENTRY_TRANSACTION: - if (local->fd) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, local->transaction.basename, - child_index); + break; - STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->fentrylk, - this->name, local->fd, - local->transaction.basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK); - } else { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, + case AFR_ENTRY_RENAME_TRANSACTION: + case AFR_ENTRY_TRANSACTION: + /*Accounting for child_index increments on 'down' + *and 'fd-less' children */ + + if (local->fd) { + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, + cookie); + + STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, + (void *) (long) cookie, + priv->children[child_index], + priv->children[child_index]->fops->fentrylk, + int_lock->domain, local->fd, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + } else { + AFR_TRACE_ENTRYLK_IN (frame, this, + AFR_ENTRYLK_TRANSACTION, AFR_LOCK_OP, local->transaction.basename, child_index); - STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, - &local->transaction.parent_loc, - local->transaction.basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK); - } - - break; - } - - return 0; + STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, + (void *) (long) cookie, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + int_lock->domain, + &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + } + break; + } + return 0; } int32_t @@ -1090,20 +1199,25 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this) afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; + int up_count = 0; priv = this->private; local = frame->local; int_lock = &local->internal_lock; switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - case AFR_METADATA_TRANSACTION: - case AFR_FLUSH_TRANSACTION: + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: initialize_inodelk_variables (frame, this); break; case AFR_ENTRY_RENAME_TRANSACTION: case AFR_ENTRY_TRANSACTION: + up_count = afr_up_children_count (local->child_up, + priv->child_count); + int_lock->lk_call_count = int_lock->lk_expected_count + = (int_lock->lockee_count * + up_count); initialize_entrylk_variables (frame, this); break; } @@ -1115,60 +1229,68 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this) static int32_t afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - + afr_local_t *local = NULL; int call_count = 0; - int child_index = (long) cookie; + int child_index = (long) cookie; + int copies = 0; + int index = 0; + int lockee_no = 0; + afr_private_t *priv = NULL; + + priv = this->private; - local = frame->local; + copies = priv->child_count; + index = child_index % copies; + lockee_no = child_index / copies; + + local = frame->local; int_lock = &local->internal_lock; - priv = this->private; - afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, - AFR_LOCK_OP, NULL, op_ret, + AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, op_ret, op_errno, (long) cookie); - LOCK (&frame->lock); - { - call_count = --int_lock->lk_call_count; - } - UNLOCK (&frame->lock); - - if (op_ret < 0 ) { + LOCK (&frame->lock); + { + if (op_ret < 0 ) { if (op_errno == ENOSYS) { - /* return ENOTSUP */ + /* return ENOTSUP */ gf_log (this->name, GF_LOG_ERROR, "subvolume does not support locking. " - "please load features/posix-locks xlator on server"); + "please load features/locks xlator on server"); local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; + int_lock->lock_op_ret = op_ret; - local->child_up[child_index] = 0; - int_lock->lock_op_errno = op_errno; - local->op_errno = op_errno; - } - } else if (op_ret == 0) { - int_lock->entry_locked_nodes[child_index] - |= LOCKED_YES; - int_lock->entrylk_lock_count++; + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + } else if (op_ret == 0) { + int_lock->lockee[lockee_no].locked_nodes[index] |= \ + LOCKED_YES; + int_lock->lockee[lockee_no].locked_count++; + int_lock->entrylk_lock_count++; + } + + call_count = --int_lock->lk_call_count; } + UNLOCK (&frame->lock); if (call_count == 0) { gf_log (this->name, GF_LOG_TRACE, "Last locking reply received"); - /* all locks successfull. Proceed to call FOP */ + /* all locks successful. Proceed to call FOP */ if (int_lock->entrylk_lock_count == - afr_up_children_count (priv->child_count, local->child_up)) { + int_lock->lk_expected_count) { gf_log (this->name, GF_LOG_TRACE, "All servers locked. Calling the cbk"); int_lock->lock_op_ret = 0; int_lock->lock_cbk (frame, this); } - /* Not all locks were successfull. Unlock and try locking + /* Not all locks were successful. Unlock and try locking again, this time with serially blocking locks */ else { gf_log (this->name, GF_LOG_TRACE, @@ -1185,33 +1307,27 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_fd_ctx_t *fd_ctx = NULL; - const char *basename = NULL; - loc_t *loc = NULL; - - int32_t call_count = 0; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int copies = 0; + int index = 0; + int lockee_no = 0; + int32_t call_count = 0; int i = 0; - uint64_t ctx; - int ret = 0; local = frame->local; int_lock = &local->internal_lock; priv = this->private; + copies = priv->child_count; initialize_entrylk_variables (frame, this); - basename = int_lock->lk_basename; - if (int_lock->lk_loc) - loc = int_lock->lk_loc; - if (local->fd) { - ret = fd_ctx_get (local->fd, this, &ctx); - - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + fd_ctx = afr_fd_ctx_get (local->fd, this); + if (!fd_ctx) { + gf_log (this->name, GF_LOG_INFO, "unable to get fd ctx for fd=%p", local->fd); @@ -1220,109 +1336,147 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) local->op_errno = EINVAL; int_lock->lock_op_errno = EINVAL; + afr_unlock (frame, this); return -1; } - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - call_count = internal_lock_count (frame, this, fd_ctx); + call_count = int_lock->lockee_count * internal_lock_count (frame, this); int_lock->lk_call_count = call_count; + int_lock->lk_expected_count = call_count; + + if (!call_count) { + gf_log (this->name, GF_LOG_INFO, + "fd not open on any subvolumes. aborting."); + afr_unlock (frame, this); + goto out; + } /* Send non-blocking entrylk calls only on up children and where the fd has been opened */ - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i] && fd_ctx->opened_on[i]) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_LOCK_OP, basename, i); + for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) { + index = i%copies; + lockee_no = i/copies; + if (local->child_up[index]) { + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, + i); STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fentrylk, + priv->children[index], + priv->children[index]->fops->fentrylk, this->name, local->fd, - basename, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, + NULL); + if (!--call_count) + break; } } } else { - GF_ASSERT (loc); - - call_count = internal_lock_count (frame, this, NULL); + call_count = int_lock->lockee_count * internal_lock_count (frame, this); int_lock->lk_call_count = call_count; + int_lock->lk_expected_count = call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_LOCK_OP, basename, i); + for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) { + index = i%copies; + lockee_no = i/copies; + if (local->child_up[index]) { + AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION, + AFR_LOCK_OP, + int_lock->lockee[lockee_no].basename, + i); STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, loc, basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK); + priv->children[index], + priv->children[index]->fops->entrylk, + this->name, &int_lock->lockee[lockee_no].loc, + int_lock->lockee[lockee_no].basename, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, + NULL); + + if (!--call_count) + break; } } } - +out: return 0; } int32_t afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - + afr_inodelk_t *inodelk = NULL; + afr_local_t *local = NULL; int call_count = 0; - int child_index = (long) cookie; + int child_index = (long) cookie; + afr_fd_ctx_t *fd_ctx = NULL; + - local = frame->local; + local = frame->local; int_lock = &local->internal_lock; - priv = this->private; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); - afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION, + AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION, AFR_LOCK_OP, NULL, op_ret, op_errno, (long) cookie); + if (local->fd) + fd_ctx = afr_fd_ctx_get (local->fd, this); + LOCK (&frame->lock); { - call_count = --int_lock->lk_call_count; - } - UNLOCK (&frame->lock); - - if (op_ret < 0 ) { + if (op_ret < 0) { if (op_errno == ENOSYS) { /* return ENOTSUP */ gf_log (this->name, GF_LOG_ERROR, "subvolume does not support locking. " - "please load features/posix-locks xlator on server"); + "please load features/locks xlator on " + "server"); local->op_ret = op_ret; - int_lock->lock_op_ret = op_ret; - local->child_up[child_index] = 0; - int_lock->lock_op_errno = op_errno; - local->op_errno = op_errno; - } - } else if (op_ret == 0) { - int_lock->inode_locked_nodes[child_index] - |= LOCKED_YES; - int_lock->inodelk_lock_count++; + int_lock->lock_op_ret = op_ret; + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + if (local->transaction.eager_lock) + local->transaction.eager_lock[child_index] = 0; + } else { + inodelk->locked_nodes[child_index] |= LOCKED_YES; + inodelk->lock_count++; + + if (local->transaction.eager_lock && + local->transaction.eager_lock[child_index] && + local->fd) { + /* piggybacked */ + if (op_ret == 1) { + /* piggybacked */ + } else if (op_ret == 0) { + /* lock acquired from server */ + fd_ctx->lock_acquired[child_index]++; + } + } + } + + call_count = --int_lock->lk_call_count; } + UNLOCK (&frame->lock); if (call_count == 0) { gf_log (this->name, GF_LOG_TRACE, "Last inode locking reply received"); - /* all locks successfull. Proceed to call FOP */ - if (int_lock->inodelk_lock_count == - afr_up_children_count (priv->child_count, local->child_up)) { + /* all locks successful. Proceed to call FOP */ + if (inodelk->lock_count == int_lock->lk_expected_count) { gf_log (this->name, GF_LOG_TRACE, "All servers locked. Calling the cbk"); int_lock->lock_op_ret = 0; int_lock->lock_cbk (frame, this); } - /* Not all locks were successfull. Unlock and try locking + /* Not all locks were successful. Unlock and try locking again, this time with serially blocking locks */ else { gf_log (this->name, GF_LOG_TRACE, @@ -1340,31 +1494,36 @@ int afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; afr_fd_ctx_t *fd_ctx = NULL; - - int32_t call_count = 0; - uint64_t ctx = 0; - int i = 0; - int ret = 0; - struct flock flock; + int32_t call_count = 0; + int i = 0; + int ret = 0; + struct gf_flock flock = {0,}; + struct gf_flock full_flock = {0,}; + struct gf_flock *flock_use = NULL; + int piggyback = 0; local = frame->local; int_lock = &local->internal_lock; priv = this->private; - flock.l_start = int_lock->lk_flock.l_start; - flock.l_len = int_lock->lk_flock.l_len; - flock.l_type = int_lock->lk_flock.l_type; + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + + flock.l_start = inodelk->flock.l_start; + flock.l_len = inodelk->flock.l_len; + flock.l_type = inodelk->flock.l_type; + + full_flock.l_type = inodelk->flock.l_type; initialize_inodelk_variables (frame, this); if (local->fd) { - ret = fd_ctx_get (local->fd, this, &ctx); - - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, + fd_ctx = afr_fd_ctx_get (local->fd, this); + if (!fd_ctx) { + gf_log (this->name, GF_LOG_INFO, "unable to get fd ctx for fd=%p", local->fd); @@ -1373,285 +1532,643 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) local->op_errno = EINVAL; int_lock->lock_op_errno = EINVAL; + afr_unlock (frame, this); ret = -1; goto out; } - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - call_count = internal_lock_count (frame, this, fd_ctx); + call_count = internal_lock_count (frame, this); int_lock->lk_call_count = call_count; + int_lock->lk_expected_count = call_count; + + if (!call_count) { + gf_log (this->name, GF_LOG_INFO, + "fd not open on any subvolumes. aborting."); + afr_unlock (frame, this); + goto out; + } /* Send non-blocking inodelk calls only on up children and where the fd has been opened */ for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i] && fd_ctx->opened_on[i]) { - afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION, - AFR_LOCK_OP, &flock, F_SETLK, i); + if (!local->child_up[i]) + continue; - STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->finodelk, - this->name, local->fd, - F_SETLK, &flock); + flock_use = &flock; + if (!local->transaction.eager_lock_on) { + goto wind; + } + + piggyback = 0; + local->transaction.eager_lock[i] = 1; + afr_set_delayed_post_op (frame, this); + + LOCK (&local->fd->lock); + { + if (fd_ctx->lock_acquired[i]) { + fd_ctx->lock_piggyback[i]++; + piggyback = 1; + } + } + UNLOCK (&local->fd->lock); + + if (piggyback) { + /* (op_ret == 1) => indicate piggybacked lock */ + afr_nonblocking_inodelk_cbk (frame, (void *) (long) i, + this, 1, 0, NULL); + if (!--call_count) + break; + continue; } + flock_use = &full_flock; + wind: + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_NB_TRANSACTION, + AFR_LOCK_OP, flock_use, F_SETLK, i); + + STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->finodelk, + int_lock->domain, local->fd, + F_SETLK, flock_use, NULL); + + if (!--call_count) + break; } } else { - call_count = internal_lock_count (frame, this, NULL); + call_count = internal_lock_count (frame, this); int_lock->lk_call_count = call_count; + int_lock->lk_expected_count = call_count; for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION, - AFR_LOCK_OP, &flock, F_SETLK, i); + if (!local->child_up[i]) + continue; + AFR_TRACE_INODELK_IN (frame, this, + AFR_INODELK_NB_TRANSACTION, + AFR_LOCK_OP, &flock, F_SETLK, i); - STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - this->name, &local->loc, - F_SETLK, &flock); - } + STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + int_lock->domain, &local->loc, + F_SETLK, &flock, NULL); + + if (!--call_count) + break; } } - out: return ret; } -static int -__is_lower_locked (call_frame_t *frame, xlator_t *this) +int32_t +afr_unlock (call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; + afr_local_t *local = NULL; - int count = 0; - int i = 0; + local = frame->local; - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; + if (transaction_lk_op (local)) { + if (is_afr_lock_transaction (local)) + afr_unlock_inodelk (frame, this); + else + afr_unlock_entrylk (frame, this); - for (i = 0; i < priv->child_count; i++) { - if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) - count++; + } else { + if (is_afr_lock_selfheal (local)) + afr_unlock_inodelk (frame, this); + else + afr_unlock_entrylk (frame, this); } - return count; - + return 0; } -static int -__is_higher_locked (call_frame_t *frame, xlator_t *this) +int +afr_mark_locked_nodes (xlator_t *this, fd_t *fd, + unsigned char *locked_nodes) { - afr_internal_lock_t *int_lock = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; - int count = 0; - int i = 0; + priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; + ret = afr_fd_ctx_set (this, fd); + if (ret) + goto out; - for (i = 0; i < priv->child_count; i++) { - if (int_lock->locked_nodes[i] & LOCKED_YES) - count++; + ret = fd_ctx_get (fd, this, &tmp); + if (ret) { + gf_log (this->name, GF_LOG_INFO, + "failed to get the fd ctx"); + goto out; } + fdctx = (afr_fd_ctx_t *) (long) tmp; - return count; + GF_ASSERT (fdctx->locked_on); + memcpy (fdctx->locked_on, locked_nodes, + priv->child_count); + +out: + return ret; } static int -afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this) +__is_fd_saved (xlator_t *this, fd_t *fd) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - const char *basename = NULL; - loc_t *loc = NULL; + afr_locked_fd_t *locked_fd = NULL; + afr_private_t *priv = NULL; + int found = 0; - int call_count = 0; - int i = -1; + priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; - priv = this->private; + list_for_each_entry (locked_fd, &priv->saved_fds, list) { + if (locked_fd->fd == fd) { + found = 1; + break; + } + } - basename = int_lock->lk_basename; - if (int_lock->lk_loc) - loc = int_lock->lk_loc; + return found; +} - call_count = __is_lower_locked (frame, this); - int_lock->lk_call_count = call_count; +static int +__afr_save_locked_fd (xlator_t *this, fd_t *fd) +{ + afr_private_t *priv = NULL; + afr_locked_fd_t *locked_fd = NULL; + int ret = 0; - if (!call_count){ - gf_log (this->name, GF_LOG_TRACE, - "No internal locks unlocked"); - int_lock->lock_cbk (frame, this); + priv = this->private; + + locked_fd = GF_CALLOC (1, sizeof (*locked_fd), + gf_afr_mt_locked_fd); + if (!locked_fd) { + ret = -1; goto out; } - for (i = 0; i < priv->child_count; i++) { - if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) { - afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, - AFR_UNLOCK_OP, basename, i); + locked_fd->fd = fd; + INIT_LIST_HEAD (&locked_fd->list); - STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - loc, basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + list_add_tail (&locked_fd->list, &priv->saved_fds); + +out: + return ret; +} + +int +afr_save_locked_fd (xlator_t *this, fd_t *fd) +{ + afr_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + + pthread_mutex_lock (&priv->mutex); + { + if (__is_fd_saved (this, fd)) { + gf_log (this->name, GF_LOG_DEBUG, + "fd=%p already saved", fd); + goto unlock; } + ret = __afr_save_locked_fd (this, fd); + if (ret) { + gf_log (this->name, GF_LOG_INFO, + "fd=%p could not be saved", fd); + goto unlock; + } } +unlock: + pthread_mutex_unlock (&priv->mutex); + + return ret; +} + +static int +afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_locked_fd_t *locked_fd = NULL; + + local = frame->local; + + locked_fd = local->locked_fd; + + STACK_DESTROY (frame->root); + afr_local_cleanup (local, this); + + afr_save_locked_fd (this, locked_fd->fd); -out: return 0; } - static int -afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this) +afr_get_source_lock_recovery (xlator_t *this, fd_t *fd) { - afr_local_t *local = NULL; + afr_fd_ctx_t *fdctx = NULL; + afr_private_t *priv = NULL; + uint64_t tmp = 0; + int i = 0; + int source_child = -1; + int ret = 0; + + priv = this->private; + + ret = fd_ctx_get (fd, this, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + for (i = 0; i < priv->child_count; i++) { + if (fdctx->locked_on[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "Found lock recovery source=%d", i); + source_child = i; + break; + } + } + +out: + return source_child; + +} + +int32_t +afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata); +int32_t +afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t source_child = 0; + struct gf_flock flock = {0,}; local = frame->local; + priv = this->private; + + if (op_ret) { + gf_log (this->name, GF_LOG_INFO, + "lock recovery failed"); + goto cleanup; + } + + source_child = local->source_child; + + memcpy (&flock, lock, sizeof (*lock)); + + STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk, + (void *) (long) source_child, + priv->children[source_child], + priv->children[source_child]->fops->lk, + local->fd, F_GETLK_FD, &flock, NULL); + + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + return 0; +} + +int +afr_recover_lock (call_frame_t *frame, xlator_t *this, + struct gf_flock *flock) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t lock_recovery_child = 0; + + priv = this->private; + local = frame->local; + + lock_recovery_child = local->lock_recovery_child; + + frame->root->lk_owner = flock->l_owner; + + STACK_WIND_COOKIE (frame, afr_recover_lock_cbk, + (void *) (long) lock_recovery_child, + priv->children[lock_recovery_child], + priv->children[lock_recovery_child]->fops->lk, + local->fd, F_SETLK, flock, NULL); - local->transaction.done (frame, this); return 0; } static int -afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this) +is_afr_lock_eol (struct gf_flock *lock) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - loc_t *lower = NULL; - loc_t *higher = NULL; - const char *lower_name = NULL; - const char *higher_name = NULL; + int ret = 0; - local = frame->local; - int_lock = &local->internal_lock; + if ((lock->l_type == GF_LK_EOL)) + ret = 1; - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); + return ret; +} - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); +int32_t +afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) +{ + if (op_ret) { + gf_log (this->name, GF_LOG_INFO, + "Failed to get locks on fd"); + goto cleanup; + } - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); + gf_log (this->name, GF_LOG_DEBUG, + "Got a lock on fd"); - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); + if (is_afr_lock_eol (lock)) { + gf_log (this->name, GF_LOG_INFO, + "Reached EOL on locks on fd"); + goto cleanup; + } - if (__is_higher_locked (frame, this)) { - gf_log (this->name, GF_LOG_DEBUG, - "unlocking higher"); - int_lock->lk_basename = higher_name; - int_lock->lk_loc = higher; - int_lock->lock_cbk = afr_post_unlock_higher_cbk; + afr_recover_lock (frame, this, lock); - afr_unlock_entrylk (frame, this); - } else - local->transaction.done (frame, this); + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); return 0; } static int -afr_rename_unlock (call_frame_t *frame, xlator_t *this) +afr_lock_recovery (call_frame_t *frame, xlator_t *this) { - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - loc_t *lower = NULL; - loc_t *higher = NULL; - const char *lower_name = NULL; - const char *higher_name = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + int ret = 0; + int32_t source_child = 0; + struct gf_flock flock = {0,}; - local = frame->local; - int_lock = &local->internal_lock; + priv = this->private; + local = frame->local; + + fd = local->fd; - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); + source_child = afr_get_source_lock_recovery (this, fd); + if (source_child < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not recover locks due to lock " + "split brain"); + ret = -1; + goto out; + } + + local->source_child = source_child; - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); + /* the flock can be zero filled as we're querying incrementally + the locks held on the fd. + */ + STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk, + (void *) (long) source_child, + priv->children[source_child], + priv->children[source_child]->fops->lk, + local->fd, F_GETLK_FD, &flock, NULL); - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); +out: + return ret; +} - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); +static int +afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index) +{ + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; - if (__is_lower_locked (frame, this)) { - gf_log (this->name, GF_LOG_DEBUG, - "unlocking lower"); - int_lock->lk_basename = lower_name; - int_lock->lk_loc = lower; - int_lock->lock_cbk = afr_post_unlock_lower_cbk; + ret = fd_ctx_get (fd, this, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + fdctx->opened_on[child_index] = AFR_FD_OPENED; + +out: + return ret; +} + +int32_t +afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + dict_t *xdata) +{ + int32_t child_index = (long )cookie; + int ret = 0; + + if (op_ret) { + gf_log (this->name, GF_LOG_INFO, + "Reopen during lock-recovery failed"); + goto cleanup; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Open succeeded => proceed to recover locks"); + + ret = afr_lock_recovery (frame, this); + if (ret) { + gf_log (this->name, GF_LOG_INFO, + "Lock recovery failed"); + goto cleanup; + } - afr_unlock_lower_entrylk (frame, this); - } else - afr_post_unlock_lower_cbk (frame, this); + ret = afr_mark_fd_opened (this, fd, child_index); + if (ret) { + gf_log (this->name, GF_LOG_INFO, + "Marking fd open failed"); + goto cleanup; + } return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + return 0; } static int -afr_rename_transaction (call_frame_t *frame, xlator_t *this) +afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + uint64_t tmp = 0; + afr_fd_ctx_t *fdctx = NULL; + loc_t loc = {0,}; + int32_t child_index = 0; + int ret = 0; + priv = this->private; local = frame->local; - return (local->transaction.type == - AFR_ENTRY_RENAME_TRANSACTION); + GF_ASSERT (local && local->fd); + + ret = fd_ctx_get (local->fd, this, &tmp); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to get the context of fd", + uuid_utoa (local->fd->inode->gfid)); + fdctx = (afr_fd_ctx_t *) (long) tmp; + /* TODO: instead we should return from the function */ + GF_ASSERT (fdctx); + + child_index = local->lock_recovery_child; + inode_path (local->fd->inode, NULL, (char **)&loc.path); + loc.name = strrchr (loc.path, '/'); + loc.inode = inode_ref (local->fd->inode); + loc.parent = inode_parent (local->fd->inode, 0, NULL); + + + STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk, + (void *)(long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->open, + &loc, fdctx->flags, local->fd, NULL); + + return 0; } -int32_t -afr_unlock (call_frame_t *frame, xlator_t *this) +static int +is_fd_opened (fd_t *fd, int32_t child_index) { - afr_local_t *local = NULL; + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; + + ret = fd_ctx_get (fd, THIS, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + if (fdctx->opened_on[child_index] == AFR_FD_OPENED) + ret = 1; + +out: + return ret; +} + +int +afr_attempt_lock_recovery (xlator_t *this, int32_t child_index) +{ + call_frame_t *frame = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_locked_fd_t *locked_fd = NULL; + afr_locked_fd_t *tmp = NULL; + int ret = -1; + struct list_head locks_list = {0,}; + int32_t op_errno = 0; + + + priv = this->private; + + if (list_empty (&priv->saved_fds)) + goto out; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + ret = -1; + goto out; + } + + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); local = frame->local; + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) { + ret = -1; + goto out; + } - if (transaction_lk_op (local)) { - if (is_afr_lock_transaction (local)) - afr_unlock_inodelk (frame, this); - else - if (!afr_rename_transaction (frame, this)) - afr_unlock_entrylk (frame, this); - else - afr_rename_unlock (frame, this); - } else { - if (is_afr_lock_selfheal (local)) - afr_unlock_inodelk (frame, this); - else - afr_unlock_entrylk (frame, this); + frame->local = local; + + INIT_LIST_HEAD (&locks_list); + + pthread_mutex_lock (&priv->mutex); + { + list_splice_init (&priv->saved_fds, &locks_list); } + pthread_mutex_unlock (&priv->mutex); - return 0; + list_for_each_entry_safe (locked_fd, tmp, + &locks_list, list) { + + list_del_init (&locked_fd->list); + + local->fd = fd_ref (locked_fd->fd); + local->lock_recovery_child = child_index; + local->locked_fd = locked_fd; + + if (!is_fd_opened (locked_fd->fd, child_index)) { + gf_log (this->name, GF_LOG_DEBUG, + "attempting open before lock " + "recovery"); + afr_lock_recovery_preopen (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "attempting lock recovery " + "without a preopen"); + afr_lock_recovery (frame, this); + } + } + +out: + if ((ret < 0) && frame) + AFR_STACK_DESTROY (frame); + return ret; +} + +int +afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom, + unsigned int child_count) +{ + afr_local_t *dst_local = NULL; + afr_local_t *src_local = NULL; + afr_internal_lock_t *dst_lock = NULL; + afr_internal_lock_t *src_lock = NULL; + afr_inodelk_t *dst_inodelk = NULL; + afr_inodelk_t *src_inodelk = NULL; + int ret = -1; + + src_local = src->local; + src_lock = &src_local->internal_lock; + src_inodelk = afr_get_inodelk (src_lock, dom); + dst_local = dst->local; + dst_lock = &dst_local->internal_lock; + dst_inodelk = afr_get_inodelk (dst_lock, dom); + if (!dst_inodelk || !src_inodelk) + goto out; + if (src_inodelk->locked_nodes) { + memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes, + sizeof (*dst_inodelk->locked_nodes) * child_count); + memset (src_inodelk->locked_nodes, 0, + sizeof (*src_inodelk->locked_nodes) * child_count); + } + + dst_lock->transaction_lk_type = src_lock->transaction_lk_type; + dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type; + dst_inodelk->lock_count = src_inodelk->lock_count; + src_inodelk->lock_count = 0; + ret = 0; +out: + return ret; } |
