From 63f9e2a49573a0b539c0082fd0c08c1b4d4db983 Mon Sep 17 00:00:00 2001 From: Pavan Sondur Date: Sun, 22 Aug 2010 14:08:43 +0000 Subject: cluster/afr: Use 2 phase locking for transactions and self heal. Signed-off-by: Pavan Vilas Sondur Signed-off-by: Anand V. Avati BUG: 960 () URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=960 --- xlators/cluster/afr/src/Makefile.am | 2 +- xlators/cluster/afr/src/afr-common.c | 39 +- xlators/cluster/afr/src/afr-lk-common.c | 1657 ++++++++++++++++++++++ xlators/cluster/afr/src/afr-open.c | 16 +- xlators/cluster/afr/src/afr-self-heal-common.c | 167 ++- xlators/cluster/afr/src/afr-self-heal-data.c | 203 +-- xlators/cluster/afr/src/afr-self-heal-entry.c | 224 +-- xlators/cluster/afr/src/afr-self-heal-metadata.c | 183 +-- xlators/cluster/afr/src/afr-transaction.c | 668 +++------ xlators/cluster/afr/src/afr.c | 33 + xlators/cluster/afr/src/afr.h | 122 +- 11 files changed, 2333 insertions(+), 981 deletions(-) create mode 100644 xlators/cluster/afr/src/afr-lk-common.c diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index 699b3da77be..4e4b4c75260 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = afr.la pump.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster -afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c +afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr_la_LDFLAGS = -module -avoidversion afr_la_SOURCES = $(afr_common_source) afr.c diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 103a683fa36..865b2a8a198 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -60,17 +60,6 @@ #define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL #define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL -void -afr_set_lk_owner (call_frame_t *frame, xlator_t *this) -{ - if (!frame->root->lk_owner) { - gf_log (this->name, GF_LOG_TRACE, - "Setting lk-owner=%llu", - (unsigned long long) frame->root); - frame->root->lk_owner = (uint64_t) frame->root; - } -} - uint64_t afr_is_split_brain (xlator_t *this, inode_t *inode) { @@ -318,7 +307,19 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) GF_FREE (local->pending); - GF_FREE (local->transaction.locked_nodes); + if (local->internal_lock.locked_nodes) + GF_FREE (local->internal_lock.locked_nodes); + + if (local->internal_lock.inode_locked_nodes) + GF_FREE (local->internal_lock.inode_locked_nodes); + + if (local->internal_lock.entry_locked_nodes) + GF_FREE (local->internal_lock.entry_locked_nodes); + + if (local->internal_lock.lower_locked_nodes) + GF_FREE (local->internal_lock.lower_locked_nodes); + + GF_FREE (local->transaction.child_errno); GF_FREE (local->child_errno); @@ -451,20 +452,6 @@ afr_up_children_count (int child_count, unsigned char *child_up) } -int -afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) -{ - int ret = 0; - int i; - - for (i = 0; i < child_count; i++) - if (locked_nodes[i]) - ret++; - - return ret; -} - - ino64_t afr_itransform (ino64_t ino, int child_count, int child_index) { diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c new file mode 100644 index 00000000000..dc850d38fc0 --- /dev/null +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -0,0 +1,1657 @@ +/* + Copyright (c) 2007-2009 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include "dict.h" +#include "byte-order.h" +#include "common-utils.h" + +#include "afr.h" +#include "afr-transaction.h" + +#include + + +#define LOCKED_NO 0x0 /* no lock held */ +#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */ +#define LOCKED_LOWER 0x2 /* for lower path */ + +int +afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index); + +static uint64_t afr_lock_number = 1; + +static uint64_t +get_afr_lock_number () +{ + return (++afr_lock_number); +} + +int +afr_set_lock_number (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + + int_lock->lock_number = get_afr_lock_number (); + + return 0; +} + +void +afr_set_lk_owner (call_frame_t *frame, xlator_t *this) +{ + if (!frame->root->lk_owner) { + gf_log (this->name, GF_LOG_TRACE, + "Setting lk-owner=%llu", + (unsigned long long) frame->root); + frame->root->lk_owner = (uint64_t) frame->root; + } +} + +static int +is_afr_lock_selfheal (afr_local_t *local) +{ + afr_internal_lock_t *int_lock = NULL; + int ret = -1; + + int_lock = &local->internal_lock; + + switch (int_lock->selfheal_lk_type) { + case AFR_DATA_SELF_HEAL_LK: + case AFR_METADATA_SELF_HEAL_LK: + ret = 1; + break; + case AFR_ENTRY_SELF_HEAL_LK: + ret = 0; + break; + } + + return ret; + +} + +int32_t +internal_lock_count (call_frame_t *frame, xlator_t *this, + afr_fd_ctx_t *fd_ctx) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int32_t call_count = 0; + int i = 0; + + local = frame->local; + priv = this->private; + + if (fd_ctx) { + GF_ASSERT (local->fd); + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i] && fd_ctx->opened_on[i]) + ++call_count; + } + } else { + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) + ++call_count; + } + } + + return call_count; +} + +static void +afr_print_inodelk (char *str, int size, int cmd, + struct flock *flock, uint64_t owner) +{ + char *cmd_str = NULL; + char *type_str = NULL; + + switch (cmd) { +#if F_GETLK != F_GETLK64 + case F_GETLK64: +#endif + case F_GETLK: + cmd_str = "GETLK"; + break; + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + cmd_str = "SETLK"; + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + cmd_str = "SETLKW"; + break; + + default: + cmd_str = ""; + break; + } + + switch (flock->l_type) { + case F_RDLCK: + type_str = "READ"; + break; + case F_WRLCK: + type_str = "WRITE"; + break; + case F_UNLCK: + type_str = "UNLOCK"; + break; + default: + type_str = "UNKNOWN"; + break; + } + + snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, " + "start=%llu, len=%llu, pid=%llu, lk-owner=%llu", + cmd_str, type_str, (unsigned long long) flock->l_start, + (unsigned long long) flock->l_len, + (unsigned long long) flock->l_pid, + (unsigned long long) owner); + +} + +static void +afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd, + int child_index) +{ + snprintf (str, size, "path=%s, fd=%p, child=%d", + loc->path ? loc->path : "", + fd ? fd : NULL, + child_index); +} + +void +afr_print_entrylk (char *str, int size, const char *basename, + uint64_t owner) +{ + snprintf (str, size, "Basename=%s, lk-owner=%llu", + basename ? basename : "", + (unsigned long long)owner); +} + +static void +afr_print_verdict (int op_ret, int op_errno, char *str) +{ + if (op_ret < 0) { + if (op_errno == EAGAIN) + strcpy (str, "EAGAIN"); + else + strcpy (str, "FAILED"); + } + else + strcpy (str, "GRANTED"); +} + +static void +afr_set_lock_call_type (afr_lock_call_type_t lock_call_type, + char *lock_call_type_str, + afr_internal_lock_t *int_lock) +{ + switch (lock_call_type) { + case AFR_INODELK_TRANSACTION: + if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) + strcpy (lock_call_type_str, "AFR_INODELK_TRANSACTION"); + else + strcpy (lock_call_type_str, "AFR_INODELK_SELFHEAL"); + break; + case AFR_INODELK_NB_TRANSACTION: + if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) + strcpy (lock_call_type_str, "AFR_INODELK_NB_TRANSACTION"); + else + strcpy (lock_call_type_str, "AFR_INODELK_NB_SELFHEAL"); + break; + case AFR_ENTRYLK_TRANSACTION: + if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) + strcpy (lock_call_type_str, "AFR_ENTRYLK_TRANSACTION"); + else + strcpy (lock_call_type_str, "AFR_ENTRYLK_SELFHEAL"); + break; + case AFR_ENTRYLK_NB_TRANSACTION: + if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) + strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_TRANSACTION"); + else + strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_SELFHEAL"); + break; + default: + strcpy (lock_call_type_str, "UNKNOWN"); + break; + } + +} + +static void +afr_trace_inodelk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, struct flock *flock, + int op_ret, int op_errno, int32_t child_index) +{ + xlator_t *this = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + char lockee[256]; + char lock_call_type_str[256]; + char verdict[16]; + + this = THIS; + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + if (!priv->inodelk_trace) { + return; + } + + afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + + afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); + + afr_print_verdict (op_ret, op_errno, verdict); + + gf_log (this->name, GF_LOG_NORMAL, + "[%s %s] [%s] Lockee={%s} Number={%llu}", + lock_call_type_str, + lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY", + verdict, + lockee, + (unsigned long long) int_lock->lock_number); + +} + +static void +afr_trace_inodelk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, struct flock *flock, + int32_t cmd, int32_t child_index) +{ + xlator_t *this = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + + char lock[256]; + char lockee[256]; + char lock_call_type_str[256]; + + this = THIS; + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + if (!priv->inodelk_trace) { + return; + } + + afr_print_inodelk (lock, 256, cmd, flock, frame->root->lk_owner); + afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + + afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); + + gf_log (this->name, GF_LOG_NORMAL, + "[%s %s] Lock={%s} Lockee={%s} Number={%llu}", + lock_call_type_str, + lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST", + lock, lockee, + (unsigned long long) int_lock->lock_number); + +} + +static void +afr_trace_entrylk_in (call_frame_t *frame, afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, const char *basename, + int32_t child_index) +{ + xlator_t *this = NULL; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + + char lock[256]; + char lockee[256]; + char lock_call_type_str[256]; + + this = THIS; + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + if (!priv->entrylk_trace) { + return; + } + + afr_print_entrylk (lock, 256, basename, frame->root->lk_owner); + afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + + afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); + + gf_log (this->name, GF_LOG_NORMAL, + "[%s %s] Lock={%s} Lockee={%s} Number={%llu}", + lock_call_type_str, + lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST", + lock, lockee, + (unsigned long long) int_lock->lock_number); +} + +static void +afr_trace_entrylk_out (call_frame_t *frame, afr_lock_call_type_t lock_call_type, + afr_lock_op_type_t lk_op_type, const char *basename, int op_ret, + int op_errno, int32_t child_index) +{ + xlator_t *this = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + char lock[256]; + char lockee[256]; + char lock_call_type_str[256]; + char verdict[16]; + + this = THIS; + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + if (!priv->entrylk_trace) { + return; + } + + afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index); + + afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock); + + afr_print_verdict (op_ret, op_errno, verdict); + + gf_log (this->name, GF_LOG_NORMAL, + "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}", + lock_call_type_str, + lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY", + verdict, + lock, lockee, + (unsigned long long) int_lock->lock_number); + +} + +static int +transaction_lk_op (afr_local_t *local) +{ + afr_internal_lock_t *int_lock = NULL; + int ret = -1; + + int_lock = &local->internal_lock; + + if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) { + gf_log (THIS->name, GF_LOG_DEBUG, + "lk op is for a transaction"); + ret = 1; + } + else if (int_lock->transaction_lk_type == AFR_SELFHEAL_LK) { + gf_log (THIS->name, GF_LOG_DEBUG, + "lk op is for a self heal"); + + ret = 0; + } + + if (ret == -1) + gf_log (THIS->name, GF_LOG_DEBUG, + "lk op is not set"); + + return ret; + +} + +static int +is_afr_lock_transaction (afr_local_t *local) +{ + int ret = 0; + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + ret = 1; + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + case AFR_ENTRY_TRANSACTION: + ret = 0; + break; + + } + + return ret; +} + +static int +initialize_entrylk_variables (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + + int i = 0; + + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + + int_lock->entrylk_lock_count = 0; + int_lock->lock_op_ret = -1; + int_lock->lock_op_errno = 0; + + for (i = 0; i < priv->child_count; i++) { + int_lock->entry_locked_nodes[i] = 0; + } + + return 0; +} + +static int +initialize_inodelk_variables (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + + int i = 0; + + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + + int_lock->inodelk_lock_count = 0; + int_lock->lock_op_ret = -1; + int_lock->lock_op_errno = 0; + + for (i = 0; i < priv->child_count; i++) { + int_lock->inode_locked_nodes[i] = 0; + } + + return 0; +} + +loc_t * +lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2) +{ + int ret = 0; + + ret = strcmp (l1->path, l2->path); + + if (ret == 0) + ret = strcmp (b1, b2); + + if (ret <= 0) + return l1; + else + return l2; +} + +int +afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) + +{ + int i; + int call_count = 0; + + for (i = 0; i < child_count; i++) { + if (locked_nodes[i] & LOCKED_YES) + call_count++; + } + + return call_count; +} + +/* FIXME: What if UNLOCK fails */ +static int32_t +afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + int call_count = 0; + + local = frame->local; + int_lock = &local->internal_lock; + + LOCK (&frame->lock); + { + call_count = --int_lock->lk_call_count; + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + gf_log (this->name, GF_LOG_TRACE, + "All internal locks unlocked"); + int_lock->lock_cbk (frame, this); + } + + return 0; +} + +static int32_t +afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION, + AFR_UNLOCK_OP, NULL, op_ret, + op_errno, (long) cookie); + + if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { + gf_log (this->name, GF_LOG_TRACE, + "Unlock failed for some reason"); + } + + afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno); + + return 0; + +} + +static int +afr_unlock_inodelk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + struct flock flock; + int call_count = 0; + int i = 0; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + flock.l_start = int_lock->lk_flock.l_start; + flock.l_len = int_lock->lk_flock.l_len; + flock.l_type = F_UNLCK; + + call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes, + priv->child_count); + + int_lock->lk_call_count = call_count; + + if (!call_count) { + gf_log (this->name, GF_LOG_TRACE, + "No internal locks unlocked"); + int_lock->lock_cbk (frame, this); + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (int_lock->inode_locked_nodes[i] & LOCKED_YES) { + if (local->fd) { + afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + AFR_UNLOCK_OP, &flock, F_SETLK, i); + + STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, + (void *) (long)i, + priv->children[i], + priv->children[i]->fops->finodelk, + this->name, local->fd, + F_SETLK, &flock); + } else { + afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + AFR_UNLOCK_OP, &flock, F_SETLK, i); + + STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, + (void *) (long)i, + priv->children[i], + priv->children[i]->fops->inodelk, + this->name, &local->loc, + F_SETLK, &flock); + } + + } + } + +out: + return 0; +} + +static int32_t +afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, + AFR_UNLOCK_OP, NULL, op_ret, + op_errno, (long) cookie); + + afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno); + + return 0; +} + +static int +afr_unlock_entrylk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + const char *basename = NULL; + loc_t *loc = NULL; + + int call_count = 0; + int i = -1; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + basename = int_lock->lk_basename; + if (int_lock->lk_loc) + loc = int_lock->lk_loc; + + call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes, + priv->child_count); + int_lock->lk_call_count = call_count; + + if (!call_count){ + gf_log (this->name, GF_LOG_TRACE, + "No internal locks unlocked"); + int_lock->lock_cbk (frame, this); + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (int_lock->entry_locked_nodes[i] & LOCKED_YES) { + afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, + AFR_UNLOCK_OP, basename, i); + + STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->entrylk, + this->name, + loc, basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + } + } + +out: + return 0; + +} + +static int32_t +afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int done = 0; + int child_index = (long) cookie; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_log (this->name, GF_LOG_ERROR, + "subvolume does not support locking. " + "please load features/posix-locks xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + done = 1; + } + + local->child_up[child_index] = 0; + local->op_errno = op_errno; + int_lock->lock_op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if ((op_ret == -1) && + (op_errno == ENOSYS)) { + afr_unlock (frame, this); + } else { + if (op_ret == 0) { + int_lock->locked_nodes[child_index] + |= LOCKED_YES; + int_lock->lock_count++; + } + afr_lock_blocking (frame, this, child_index + 1); + } + + return 0; +} + +static int32_t +afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION, + AFR_LOCK_OP, NULL, op_ret, + op_errno, (long) cookie); + + afr_lock_cbk (frame, cookie, this, op_ret, op_errno); + return 0; + +} + +static int32_t +afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + loc_t *lower = NULL; + loc_t *higher = NULL; + const char *lower_name = NULL; + const char *higher_name = NULL; + + int child_index = (long) cookie; + + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + + gf_log (this->name, GF_LOG_ERROR, + "subvolume does not support locking. " + "please load features/posix-locks xlator on server"); + + local->op_ret = op_ret; + } + + local->child_up[child_index] = 0; + local->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if (op_ret != 0) { + afr_unlock (frame, this); + goto out; + } else { + int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER; + int_lock->lock_count++; + } + + /* The lower path has been locked. Now lock the higher path */ + + lower = lower_path (&local->transaction.parent_loc, + local->transaction.basename, + &local->transaction.new_parent_loc, + local->transaction.new_basename); + + lower_name = (lower == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + higher = (lower == &local->transaction.parent_loc ? + &local->transaction.new_parent_loc : + &local->transaction.parent_loc); + + higher_name = (higher == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, higher_name, child_index); + + + STACK_WIND_COOKIE (frame, afr_lock_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + this->name, higher, higher_name, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + +out: + return 0; +} + +static int32_t +afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, NULL, op_ret, + op_errno, (long)cookie); + + afr_lock_cbk (frame, cookie, this, op_ret, op_errno); + return 0; +} + +static int +afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + memcpy (int_lock->inode_locked_nodes, + int_lock->locked_nodes, + priv->child_count); + int_lock->inodelk_lock_count = int_lock->lock_count; + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + case AFR_ENTRY_TRANSACTION: + memcpy (int_lock->entry_locked_nodes, + int_lock->locked_nodes, + priv->child_count); + int_lock->entrylk_lock_count = int_lock->lock_count; + break; + } + + return 0; + +} + +int +afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + loc_t *lower = NULL; + loc_t *higher = NULL; + const char *lower_name = NULL; + const char *higher_name = NULL; + + struct flock flock; + uint64_t ctx; + int ret = 0; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + flock.l_start = int_lock->lk_flock.l_start; + flock.l_len = int_lock->lk_flock.l_len; + flock.l_type = int_lock->lk_flock.l_type; + + if (local->fd) { + ret = fd_ctx_get (local->fd, this, &ctx); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "unable to get fd ctx for fd=%p", + local->fd); + + local->op_ret = -1; + int_lock->lock_op_ret = -1; + local->op_errno = EINVAL; + int_lock->lock_op_errno = EINVAL; + + afr_copy_locked_nodes (frame, this); + + afr_unlock (frame, this); + + return 0; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + /* skip over children that or down + or don't have the fd open */ + + while ((child_index < priv->child_count) + && (!local->child_up[child_index] + || !fd_ctx->opened_on[child_index])) + + child_index++; + } else { + /* skip over children that are down */ + while ((child_index < priv->child_count) + && !local->child_up[child_index]) + child_index++; + } + + if ((child_index == priv->child_count) && + int_lock->lock_count == 0) { + + gf_log (this->name, GF_LOG_DEBUG, + "unable to lock on even one child"); + + local->op_ret = -1; + int_lock->lock_op_ret = -1; + local->op_errno = EAGAIN; + int_lock->lock_op_errno = EAGAIN; + + afr_copy_locked_nodes (frame, this); + + afr_unlock(frame, this); + + return 0; + + } + + if ((child_index == priv->child_count) + || (int_lock->lock_count == + afr_up_children_count (priv->child_count, + local->child_up))) { + + /* we're done locking */ + + gf_log (this->name, GF_LOG_DEBUG, + "we're done locking"); + + afr_copy_locked_nodes (frame, this); + + int_lock->lock_op_ret = 0; + int_lock->lock_cbk (frame, this); + return 0; + } + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + + if (local->fd) { + afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + AFR_LOCK_OP, &flock, F_SETLKW, + child_index); + + STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->finodelk, + this->name, local->fd, + F_SETLKW, &flock); + + } else { + afr_trace_inodelk_in (frame, AFR_INODELK_TRANSACTION, + AFR_LOCK_OP, &flock, F_SETLKW, + child_index); + + STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->inodelk, + this->name, &local->loc, + F_SETLKW, &flock); + } + + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + { + lower = lower_path (&local->transaction.parent_loc, + local->transaction.basename, + &local->transaction.new_parent_loc, + local->transaction.new_basename); + + lower_name = (lower == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + higher = (lower == &local->transaction.parent_loc ? + &local->transaction.new_parent_loc : + &local->transaction.parent_loc); + + higher_name = (higher == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, lower_name, child_index); + + + STACK_WIND_COOKIE (frame, afr_lock_lower_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + this->name, lower, lower_name, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + + break; + } + + case AFR_ENTRY_TRANSACTION: + if (local->fd) { + afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, local->transaction.basename, + child_index); + + STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->fentrylk, + this->name, local->fd, + local->transaction.basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + } else { + afr_trace_entrylk_in (frame, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, local->transaction.basename, + child_index); + + STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + this->name, + &local->transaction.parent_loc, + local->transaction.basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + } + + break; + } + + return 0; + + +} + +int32_t +afr_blocking_lock (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + initialize_inodelk_variables (frame, this); + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + case AFR_ENTRY_TRANSACTION: + initialize_entrylk_variables (frame, this); + break; + } + + afr_lock_blocking (frame, this, 0); + + return 0; +} + +static int32_t +afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = 0; + int child_index = (long) cookie; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION, + AFR_LOCK_OP, NULL, op_ret, + op_errno, (long) cookie); + + LOCK (&frame->lock); + { + call_count = --int_lock->lk_call_count; + } + UNLOCK (&frame->lock); + + if (op_ret < 0 ) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_log (this->name, GF_LOG_ERROR, + "subvolume does not support locking. " + "please load features/posix-locks xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + + local->child_up[child_index] = 0; + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + } else if (op_ret == 0) { + int_lock->entry_locked_nodes[child_index] + |= LOCKED_YES; + int_lock->entrylk_lock_count++; + } + + if (call_count == 0) { + gf_log (this->name, GF_LOG_TRACE, + "Last locking reply received"); + /* all locks successfull. Proceed to call FOP */ + if (int_lock->entrylk_lock_count == + afr_up_children_count (priv->child_count, local->child_up)) { + gf_log (this->name, GF_LOG_TRACE, + "All servers locked. Calling the cbk"); + int_lock->lock_op_ret = 0; + int_lock->lock_cbk (frame, this); + } + /* Not all locks were successfull. Unlock and try locking + again, this time with serially blocking locks */ + else { + gf_log (this->name, GF_LOG_TRACE, + "%d servers locked. Trying again with blocking calls", + int_lock->lock_count); + + afr_unlock(frame, this); + } + } + + return 0; +} + +int +afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + const char *basename = NULL; + loc_t *loc = NULL; + + int32_t call_count = 0; + int i = 0; + uint64_t ctx; + int ret = 0; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + initialize_entrylk_variables (frame, this); + + basename = int_lock->lk_basename; + if (int_lock->lk_loc) + loc = int_lock->lk_loc; + + if (local->fd) { + ret = fd_ctx_get (local->fd, this, &ctx); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "unable to get fd ctx for fd=%p", + local->fd); + + local->op_ret = -1; + int_lock->lock_op_ret = -1; + local->op_errno = EINVAL; + int_lock->lock_op_errno = EINVAL; + + return -1; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + call_count = internal_lock_count (frame, this, fd_ctx); + int_lock->lk_call_count = call_count; + + /* Send non-blocking entrylk calls only on up children + and where the fd has been opened */ + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i] && fd_ctx->opened_on[i]) { + afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, + AFR_LOCK_OP, basename, i); + + STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fentrylk, + this->name, local->fd, + basename, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); + } + } + } else { + GF_ASSERT (loc); + + call_count = internal_lock_count (frame, this, NULL); + int_lock->lk_call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, + AFR_LOCK_OP, basename, i); + + STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->entrylk, + this->name, loc, basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + } + } + } + + return 0; +} + +int32_t +afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = 0; + int child_index = (long) cookie; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + afr_trace_inodelk_out (frame, AFR_INODELK_NB_TRANSACTION, + AFR_LOCK_OP, NULL, op_ret, + op_errno, (long) cookie); + + LOCK (&frame->lock); + { + call_count = --int_lock->lk_call_count; + } + UNLOCK (&frame->lock); + + if (op_ret < 0 ) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_log (this->name, GF_LOG_ERROR, + "subvolume does not support locking. " + "please load features/posix-locks xlator on server"); + local->op_ret = op_ret; + int_lock->lock_op_ret = op_ret; + local->child_up[child_index] = 0; + int_lock->lock_op_errno = op_errno; + local->op_errno = op_errno; + } + } else if (op_ret == 0) { + int_lock->inode_locked_nodes[child_index] + |= LOCKED_YES; + int_lock->inodelk_lock_count++; + } + + if (call_count == 0) { + gf_log (this->name, GF_LOG_TRACE, + "Last inode locking reply received"); + /* all locks successfull. Proceed to call FOP */ + if (int_lock->inodelk_lock_count == + afr_up_children_count (priv->child_count, local->child_up)) { + gf_log (this->name, GF_LOG_TRACE, + "All servers locked. Calling the cbk"); + int_lock->lock_op_ret = 0; + int_lock->lock_cbk (frame, this); + } + /* Not all locks were successfull. Unlock and try locking + again, this time with serially blocking locks */ + else { + gf_log (this->name, GF_LOG_TRACE, + "%d servers locked. Trying again with blocking calls", + int_lock->lock_count); + + afr_unlock(frame, this); + } + } + + return 0; +} + +int +afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + + int32_t call_count = 0; + uint64_t ctx = 0; + int i = 0; + int ret = 0; + struct flock flock; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + flock.l_start = int_lock->lk_flock.l_start; + flock.l_len = int_lock->lk_flock.l_len; + flock.l_type = int_lock->lk_flock.l_type; + + initialize_inodelk_variables (frame, this); + + if (local->fd) { + ret = fd_ctx_get (local->fd, this, &ctx); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "unable to get fd ctx for fd=%p", + local->fd); + + local->op_ret = -1; + int_lock->lock_op_ret = -1; + local->op_errno = EINVAL; + int_lock->lock_op_errno = EINVAL; + + ret = -1; + goto out; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + call_count = internal_lock_count (frame, this, fd_ctx); + int_lock->lk_call_count = call_count; + + /* Send non-blocking inodelk calls only on up children + and where the fd has been opened */ + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i] && fd_ctx->opened_on[i]) { + afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION, + AFR_LOCK_OP, &flock, F_SETLK, i); + + STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->finodelk, + this->name, local->fd, + F_SETLK, &flock); + + } + } + } else { + call_count = internal_lock_count (frame, this, NULL); + int_lock->lk_call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION, + AFR_LOCK_OP, &flock, F_SETLK, i); + + STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + this->name, &local->loc, + F_SETLK, &flock); + } + } + } + +out: + return ret; +} + +static int +__is_lower_locked (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int count = 0; + int i = 0; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) + count++; + } + + return count; + +} + +static int +__is_higher_locked (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int count = 0; + int i = 0; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (int_lock->locked_nodes[i] & LOCKED_YES) + count++; + } + + return count; + +} + +static int +afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + const char *basename = NULL; + loc_t *loc = NULL; + + int call_count = 0; + int i = -1; + + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; + + basename = int_lock->lk_basename; + if (int_lock->lk_loc) + loc = int_lock->lk_loc; + + call_count = __is_lower_locked (frame, this); + int_lock->lk_call_count = call_count; + + if (!call_count){ + gf_log (this->name, GF_LOG_TRACE, + "No internal locks unlocked"); + int_lock->lock_cbk (frame, this); + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) { + afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION, + AFR_UNLOCK_OP, basename, i); + + STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->entrylk, + this->name, + loc, basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + } + + } + +out: + return 0; + +} + + +static int +afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.done (frame, this); + return 0; +} + +static int +afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + loc_t *lower = NULL; + loc_t *higher = NULL; + const char *lower_name = NULL; + const char *higher_name = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + + lower = lower_path (&local->transaction.parent_loc, + local->transaction.basename, + &local->transaction.new_parent_loc, + local->transaction.new_basename); + + lower_name = (lower == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + higher = (lower == &local->transaction.parent_loc ? + &local->transaction.new_parent_loc : + &local->transaction.parent_loc); + + higher_name = (higher == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + if (__is_higher_locked (frame, this)) { + gf_log (this->name, GF_LOG_DEBUG, + "unlocking higher"); + int_lock->lk_basename = higher_name; + int_lock->lk_loc = higher; + int_lock->lock_cbk = afr_post_unlock_higher_cbk; + + afr_unlock_entrylk (frame, this); + } else + local->transaction.done (frame, this); + + return 0; +} + +static int +afr_rename_unlock (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + loc_t *lower = NULL; + loc_t *higher = NULL; + const char *lower_name = NULL; + const char *higher_name = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + + lower = lower_path (&local->transaction.parent_loc, + local->transaction.basename, + &local->transaction.new_parent_loc, + local->transaction.new_basename); + + lower_name = (lower == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + higher = (lower == &local->transaction.parent_loc ? + &local->transaction.new_parent_loc : + &local->transaction.parent_loc); + + higher_name = (higher == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + + if (__is_lower_locked (frame, this)) { + gf_log (this->name, GF_LOG_DEBUG, + "unlocking lower"); + int_lock->lk_basename = lower_name; + int_lock->lk_loc = lower; + int_lock->lock_cbk = afr_post_unlock_lower_cbk; + + afr_unlock_lower_entrylk (frame, this); + } else + afr_post_unlock_lower_cbk (frame, this); + + return 0; +} + +static int +afr_rename_transaction (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + return (local->transaction.type == + AFR_ENTRY_RENAME_TRANSACTION); + +} + +int32_t +afr_unlock (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (transaction_lk_op (local)) { + if (is_afr_lock_transaction (local)) + afr_unlock_inodelk (frame, this); + else + if (!afr_rename_transaction (frame, this)) + afr_unlock_entrylk (frame, this); + else + afr_rename_unlock (frame, this); + } else { + if (is_afr_lock_selfheal (local)) + afr_unlock_inodelk (frame, this); + else + afr_unlock_entrylk (frame, this); + } + + return 0; +} diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index bc8786155c3..c8603be33ac 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -217,8 +217,9 @@ afr_up_down_flush_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd) { - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; int ret = 0; @@ -228,8 +229,9 @@ afr_up_down_flush_open_cbk (call_frame_t *frame, void *cookie, int call_count = 0; int child_index = (long) cookie; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; LOCK (&frame->lock); { @@ -255,6 +257,7 @@ out: call_count = afr_frame_return (frame); if (call_count == 0) { + int_lock->lock_cbk = local->transaction.done; local->transaction.post_post_op (frame, this); } @@ -434,6 +437,9 @@ out: afr_local_transaction_cleanup (local, this); + gf_log (this->name, GF_LOG_TRACE, + "The up/down flush is over"); + local->up_down_flush_cbk (frame, this); return 0; @@ -454,7 +460,7 @@ afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, local->op = GF_FOP_FLUSH; -// local->fd = fd_ref (local->fd); + local->fd = fd_ref (local->fd); local->transaction.fop = afr_up_down_flush_wind; local->transaction.done = afr_up_down_flush_done; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 61fe89dfe42..6a665b66700 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1358,66 +1358,92 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) } -static int -sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + +int +afr_sh_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int call_count = 0; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + local = frame->local; + int_lock = &local->internal_lock; - local = frame->local; - sh = &local->self_heal; + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks failed."); + afr_sh_missing_entries_done (frame, this); + } else { - LOCK (&frame->lock); - { - if (op_ret == -1) { - sh->op_failed = 1; + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks done. Proceeding to FOP"); + sh_missing_entries_lookup (frame, this); + } - sh->locked_nodes[child_index] = 0; - gf_log (this->name, GF_LOG_DEBUG, - "locking inode of %s on child %d failed: %s", - local->loc.path, child_index, - strerror (op_errno)); - } else { - sh->locked_nodes[child_index] = 1; - gf_log (this->name, GF_LOG_TRACE, - "inode of %s on child %d locked", - local->loc.path, child_index); - } - } - UNLOCK (&frame->lock); + return 0; +} - call_count = afr_frame_return (frame); +int +afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - if (call_count == 0) { - if (sh->op_failed == 1) { - sh_missing_entries_finish (frame, this); - return 0; - } + local = frame->local; + int_lock = &local->internal_lock; - sh_missing_entries_lookup (frame, this); - } + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking entrylks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_sh_post_blocking_entrylk_cbk; + afr_blocking_lock (frame, this); + } else { - return 0; + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking entrylks done. Proceeding to FOP"); + sh_missing_entries_lookup (frame, this); + } + + return 0; } +static int +afr_sh_entrylk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + sh = &local->self_heal; + + int_lock->transaction_lk_type = AFR_SELFHEAL_LK; + int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK; + + afr_set_lock_number (frame, this); + + int_lock->lk_basename = local->loc.name; + int_lock->lk_loc = &sh->parent_loc; + int_lock->lock_cbk = afr_sh_post_nonblocking_entrylk_cbk; + + afr_nonblocking_entrylk (frame, this); + + return 0; +} static int afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = 0; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + sh = &local->self_heal; + priv = this->private; gf_log (this->name, GF_LOG_TRACE, "attempting to recreate missing entries for path=%s", @@ -1425,29 +1451,10 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) afr_build_parent_loc (&sh->parent_loc, &local->loc); - call_count = afr_up_children_count (priv->child_count, - local->child_up); - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, sh_missing_entries_lk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - &sh->parent_loc, local->loc.name, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); - if (!--call_count) - break; - } - } - + afr_sh_entrylk (frame, this); return 0; } - afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this) { afr_private_t *priv = NULL; @@ -1490,6 +1497,38 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this) lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode); if (l->cont.lookup.xattr) lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr); + if (l->internal_lock.inode_locked_nodes) + lc->internal_lock.inode_locked_nodes = + memdup (l->internal_lock.inode_locked_nodes, + priv->child_count); + else + lc->internal_lock.inode_locked_nodes = + GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes), + priv->child_count, + gf_afr_mt_char); + if (l->internal_lock.entry_locked_nodes) + lc->internal_lock.entry_locked_nodes = + memdup (l->internal_lock.entry_locked_nodes, + priv->child_count); + else + lc->internal_lock.entry_locked_nodes = + GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes), + priv->child_count, + gf_afr_mt_char); + if (l->internal_lock.locked_nodes) + lc->internal_lock.locked_nodes = + memdup (l->internal_lock.locked_nodes, + priv->child_count); + else + lc->internal_lock.locked_nodes = + GF_CALLOC (sizeof (*l->internal_lock.locked_nodes), + priv->child_count, + gf_afr_mt_char); + + lc->internal_lock.inodelk_lock_count = + l->internal_lock.inodelk_lock_count; + lc->internal_lock.entrylk_lock_count = + l->internal_lock.entrylk_lock_count; return lc; } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 366cac81721..8e87bc55545 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -56,7 +56,6 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; - int i = 0; local = frame->local; sh = &local->self_heal; @@ -73,8 +72,8 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this) sh->healing_fd = NULL; } - for (i = 0; i < priv->child_count; i++) - sh->locked_nodes[i] = 0; +/* for (i = 0; i < priv->child_count; i++) */ +/* sh->locked_nodes[i] = 0; */ gf_log (this->name, GF_LOG_TRACE, "self heal of %s completed", @@ -268,58 +267,18 @@ afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int afr_sh_data_unlock (call_frame_t *frame, xlator_t *this) { - struct flock flock; - int i = 0; - int call_count = 0; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_self_heal_t *sh = NULL; - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - afr_self_heal_t * sh = NULL; - - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - if (sh->data_lock_held) { - /* not our job to unlock, proceed to close */ - - afr_sh_data_close (frame, this); - return 0; - } + local = frame->local; + int_lock = &local->internal_lock; + sh = &local->self_heal; - for (i = 0; i < priv->child_count; i++) { - if (sh->locked_nodes[i]) - call_count++; - } + GF_ASSERT (!sh->data_lock_held); - if (call_count == 0) { - afr_sh_data_close (frame, this); - return 0; - } - - local->call_count = call_count; - - flock.l_start = 0; - flock.l_len = 0; - flock.l_type = F_UNLCK; - - for (i = 0; i < priv->child_count; i++) { - if (sh->locked_nodes[i]) { - gf_log (this->name, GF_LOG_TRACE, - "unlocking %s on subvolume %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, afr_sh_data_unlck_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - this->name, - &local->loc, F_SETLK, &flock); - if (!--call_count) - break; - } - } + int_lock->lock_cbk = afr_sh_data_close; + afr_unlock (frame, this); return 0; } @@ -329,13 +288,18 @@ int afr_sh_data_finish (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; local = frame->local; + sh = &local->self_heal; gf_log (this->name, GF_LOG_TRACE, "finishing data selfheal of %s", local->loc.path); - afr_sh_data_unlock (frame, this); + if (!sh->data_lock_held) + afr_sh_data_unlock (frame, this); + else + afr_sh_data_close (frame, this); return 0; } @@ -388,7 +352,7 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this) priv->child_count, AFR_DATA_TRANSACTION); erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count, - gf_afr_mt_dict_t); + gf_afr_mt_dict_t); for (i = 0; i < priv->child_count; i++) { if (sh->xattr[i]) { @@ -772,8 +736,8 @@ afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr) gf_afr_mt_int32_t); for (i = 0; i < priv->child_count; i++) { sh->pending_matrix[i] = GF_CALLOC (sizeof (int32_t), - priv->child_count, - gf_afr_mt_int32_t); + priv->child_count, + gf_afr_mt_int32_t); } sh->sources = GF_CALLOC (priv->child_count, sizeof (*sh->sources), @@ -958,96 +922,79 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this) int -afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, int child_index); +afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this); int -afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int child_index = (long) cookie; - - /* TODO: what if lock fails? */ - - local = frame->local; - sh = &local->self_heal; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - LOCK (&frame->lock); - { - if (op_ret == -1) { - sh->locked_nodes[child_index] = 0; + local = frame->local; + int_lock = &local->internal_lock; - gf_log (this->name, GF_LOG_DEBUG, - "locking of %s on child %d failed: %s", - local->loc.path, child_index, - strerror (op_errno)); - } else { - sh->locked_nodes[child_index] = 1; - sh->lock_count++; - - gf_log (this->name, GF_LOG_TRACE, - "inode of %s on child %d locked", - local->loc.path, child_index); - } - } - UNLOCK (&frame->lock); + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Blocking inodelks failed."); + afr_sh_data_done (frame, this); + } else { - afr_sh_data_lock_rec (frame, this, child_index + 1); + gf_log (this->name, GF_LOG_DEBUG, + "Blocking inodelks done. Proceeding to FOP"); + afr_sh_data_fxattrop (frame, this); + } - return 0; + return 0; } - int -afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) +afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) { - struct flock flock; - int i = 0; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - afr_self_heal_t * sh = NULL; + local = frame->local; + int_lock = &local->internal_lock; - local = frame->local; - sh = &local->self_heal; - priv = this->private; + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking inodelks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_sh_data_post_blocking_inodelk_cbk; + afr_blocking_lock (frame, this); + } else { - flock.l_start = 0; - flock.l_len = 0; - flock.l_type = F_WRLCK; + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking inodelks done. Proceeding to FOP"); + afr_sh_data_fxattrop (frame, this); + } - /* skip over children that are down */ - while ((child_index < priv->child_count) - && !local->child_up[child_index]) - child_index++; + return 0; +} - if ((child_index == priv->child_count) && - sh->lock_count == 0) { +int +afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; - gf_log (this->name, GF_LOG_DEBUG, - "unable to lock on even one child"); + local = frame->local; + int_lock = &local->internal_lock; + sh = &local->self_heal; - afr_sh_data_done (frame, this); - return 0; - } + int_lock->transaction_lk_type = AFR_SELFHEAL_LK; + int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK; - if ((child_index == priv->child_count) - || (sh->lock_count == afr_lock_server_count (priv, AFR_DATA_TRANSACTION))) { - afr_sh_data_fxattrop (frame, this); - return 0; - } + afr_set_lock_number (frame, this); + + int_lock->lk_flock.l_start = 0; + int_lock->lk_flock.l_len = 0; + int_lock->lk_flock.l_type = F_WRLCK; + int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk; - gf_log (this->name, GF_LOG_TRACE, - "locking %s on subvolume %s", - local->loc.path, priv->children[i]->name); + afr_nonblocking_inodelk (frame, this); - STACK_WIND_COOKIE (frame, afr_sh_data_lock_cbk, - (void *) (long) child_index, - priv->children[i], - priv->children[i]->fops->inodelk, - this->name, - &local->loc, F_SETLKW, &flock); return 0; } @@ -1060,7 +1007,6 @@ afr_sh_data_lock (call_frame_t *frame, xlator_t *this) afr_private_t * priv = NULL; afr_self_heal_t * sh = NULL; - int i = 0; local = frame->local; sh = &local->self_heal; @@ -1074,10 +1020,7 @@ afr_sh_data_lock (call_frame_t *frame, xlator_t *this) return 0; } - for (i = 0; i < priv->child_count; i++) - sh->locked_nodes[i] = 0; - - return afr_sh_data_lock_rec (frame, this, 0); + return afr_sh_data_lock_rec (frame, this); } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 719221175bc..bf53fdb6767 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -49,7 +49,8 @@ #include "afr-self-heal.h" #include "afr-self-heal-common.h" - +int +afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this); int afr_sh_entry_done (call_frame_t *frame, xlator_t *this) @@ -57,7 +58,6 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; afr_self_heal_t *sh = NULL; afr_private_t *priv = NULL; - int i = 0; local = frame->local; sh = &local->self_heal; @@ -71,9 +71,9 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this) fd_unref (sh->healing_fd); sh->healing_fd = NULL; - for (i = 0; i < priv->child_count; i++) { - sh->locked_nodes[i] = 0; - } +/* for (i = 0; i < priv->child_count; i++) { */ +/* sh->locked_nodes[i] = 0; */ +/* } */ gf_log (this->name, GF_LOG_TRACE, "self heal of %s completed", @@ -85,91 +85,19 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this) } -int -afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int call_count = 0; - int child_index = (long) cookie; - - /* TODO: what if lock fails? */ - - local = frame->local; - sh = &local->self_heal; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "unlocking inode of %s on child %d failed: %s", - local->loc.path, child_index, - strerror (op_errno)); - } else { - gf_log (this->name, GF_LOG_TRACE, - "unlocked inode of %s on child %d", - local->loc.path, child_index); - } - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - afr_sh_entry_done (frame, this); - } - - return 0; -} - - int afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this) { - int i = 0; - int call_count = 0; - - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - afr_self_heal_t * sh = NULL; - - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (sh->locked_nodes[i]) - call_count++; - } - - if (call_count == 0) { - afr_sh_entry_done (frame, this); - return 0; - } + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; - local->call_count = call_count; + local = frame->local; + int_lock = &local->internal_lock; - for (i = 0; i < priv->child_count; i++) { - if (sh->locked_nodes[i]) { - gf_log (this->name, GF_LOG_TRACE, - "unlocking %s on subvolume %s", - local->loc.path, priv->children[i]->name); + int_lock->lock_cbk = afr_sh_entry_done; + afr_unlock (frame, this); - STACK_WIND_COOKIE (frame, afr_sh_entry_unlck_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - &local->loc, NULL, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); - if (!--call_count) - break; - } - } - - return 0; + return 0; } @@ -376,8 +304,8 @@ build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name) if (strcmp (parent->path, "/") == 0) ret = gf_asprintf ((char **)&child->path, "/%s", name); else - ret = gf_asprintf ((char **)&child->path, "%s/%s", - parent->path, name); + ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path, + name); if (-1 == ret) { gf_log (this->name, GF_LOG_ERROR, @@ -546,8 +474,8 @@ make_trash_path (const char *path) char *c = NULL; char *tp = NULL; - tp = GF_CALLOC (strlen ("/" GF_REPLICATE_TRASH_DIR) + strlen (path) + 1, - sizeof (char), gf_afr_mt_char); + tp = GF_CALLOC (strlen ("/" GF_REPLICATE_TRASH_DIR) + strlen (path) + 1, sizeof (char), + gf_afr_mt_char); strcpy (tp, GF_REPLICATE_TRASH_DIR); strcat (tp, path); @@ -1338,7 +1266,8 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie, parentbuf = impunge_sh->parentbuf; setattr_frame = copy_frame (impunge_frame); - parent_loc = GF_CALLOC (1, sizeof (*parent_loc), gf_afr_mt_loc_t); + parent_loc = GF_CALLOC (1, sizeof (*parent_loc), + gf_afr_mt_loc_t); afr_build_parent_loc (parent_loc, &impunge_local->loc); STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk, @@ -2437,93 +2366,76 @@ afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this) return 0; } - - int -afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +afr_sh_post_blocking_entry_cbk (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int call_count = 0; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - /* TODO: what if lock fails? */ - - local = frame->local; - sh = &local->self_heal; + local = frame->local; + int_lock = &local->internal_lock; - LOCK (&frame->lock); - { - if (op_ret == -1) { - sh->op_failed = 1; + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks failed."); + afr_sh_entry_done (frame, this); + } else { - sh->locked_nodes[child_index] = 0; - gf_log (this->name, GF_LOG_DEBUG, - "locking inode of %s on child %d failed: %s", - local->loc.path, child_index, - strerror (op_errno)); - } else { - sh->locked_nodes[child_index] = 1; - gf_log (this->name, GF_LOG_TRACE, - "inode of %s on child %d locked", - local->loc.path, child_index); - } - } - UNLOCK (&frame->lock); + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks done. Proceeding to FOP"); + afr_sh_entry_lookup(frame, this); + } - call_count = afr_frame_return (frame); + return 0; +} - if (call_count == 0) { - if (sh->op_failed == 1) { - afr_sh_entry_finish (frame, this); - return 0; - } +int +afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - afr_sh_entry_lookup (frame, this); - } + local = frame->local; + int_lock = &local->internal_lock; - return 0; -} + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking entrylks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_sh_post_blocking_entry_cbk; + afr_blocking_lock (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking entrylks done. Proceeding to FOP"); + afr_sh_entry_lookup(frame, this); + } + + return 0; +} int afr_sh_entry_lock (call_frame_t *frame, xlator_t *this) { - int i = 0; - int call_count = 0; - - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - afr_self_heal_t * sh = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + local = frame->local; + int_lock = &local->internal_lock; + sh = &local->self_heal; - local = frame->local; - sh = &local->self_heal; - priv = this->private; + int_lock->transaction_lk_type = AFR_SELFHEAL_LK; + int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK; - call_count = afr_up_children_count (priv->child_count, - local->child_up); + afr_set_lock_number (frame, this); - local->call_count = call_count; + int_lock->lk_basename = NULL; + int_lock->lk_loc = &local->loc; + int_lock->lock_cbk = afr_sh_post_nonblocking_entry_cbk; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - gf_log (this->name, GF_LOG_TRACE, - "locking %s on subvolume %s", - local->loc.path, priv->children[i]->name); + afr_nonblocking_entrylk (frame, this); - STACK_WIND_COOKIE (frame, afr_sh_entry_lock_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - &local->loc, NULL, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); - if (!--call_count) - break; - } - } return 0; } diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 4501595b7a4..54972c52be0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -65,9 +65,9 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this) memset (sh->buf, 0, sizeof (struct stat) * priv->child_count); memset (sh->success, 0, sizeof (int) * priv->child_count); - for (i = 0; i < priv->child_count; i++) { - sh->locked_nodes[i] = 1; - } +/* for (i = 0; i < priv->child_count; i++) { */ +/* sh->locked_nodes[i] = 1; */ +/* } */ for (i = 0; i < priv->child_count; i++) { if (sh->xattr[i]) @@ -125,54 +125,25 @@ afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } - int -afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this) +afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = 0; - struct flock flock = {0, }; - + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - local = frame->local; - sh = &local->self_heal; - priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; - for (i = 0; i < priv->child_count; i++) { - if (sh->locked_nodes[i]) - call_count++; - } - - if (call_count == 0) { - afr_sh_metadata_done (frame, this); - return 0; - } - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - flock.l_start = 0; - flock.l_len = 0; - flock.l_type = F_UNLCK; - - if (sh->locked_nodes[i]) { - gf_log (this->name, GF_LOG_TRACE, - "unlocking %s on subvolume %s", - local->loc.path, priv->children[i]->name); + int_lock->lock_cbk = afr_sh_metadata_done; + afr_unlock (frame, this); - STACK_WIND (frame, afr_sh_metadata_unlck_cbk, - priv->children[i], - priv->children[i]->fops->inodelk, - this->name, - &local->loc, F_SETLK, &flock); + return 0; +} - if (!--call_count) - break; - } - } +int +afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this) +{ + afr_sh_inode_unlock (frame, this); return 0; } @@ -699,97 +670,77 @@ afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this) return 0; } - int -afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +afr_sh_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - /* TODO: what if lock fails? */ - - local = frame->local; - sh = &local->self_heal; - priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; - LOCK (&frame->lock); - { - if (op_ret == -1) { - sh->op_failed = 1; + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Blocking inodelks failed."); + afr_sh_metadata_done (frame, this); + } else { - sh->locked_nodes[child_index] = 0; - gf_log (this->name, GF_LOG_DEBUG, - "locking of %s on child %d failed: %s", - local->loc.path, child_index, - strerror (op_errno)); - } else { - sh->locked_nodes[child_index] = 1; - gf_log (this->name, GF_LOG_TRACE, - "inode of %s on child %d locked", - local->loc.path, child_index); - } - } - UNLOCK (&frame->lock); + gf_log (this->name, GF_LOG_DEBUG, + "Blocking inodelks done. Proceeding to FOP"); + afr_sh_metadata_lookup (frame, this); + } - call_count = afr_frame_return (frame); + return 0; +} - if (call_count == 0) { - if (sh->op_failed) { - afr_sh_metadata_finish (frame, this); - return 0; - } +int +afr_sh_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - afr_sh_metadata_lookup (frame, this); - } + local = frame->local; + int_lock = &local->internal_lock; - return 0; + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking inodelks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_sh_post_blocking_inodelk_cbk; + afr_blocking_lock (frame, this); + } else { + + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking inodelks done. Proceeding to FOP"); + afr_sh_metadata_lookup (frame, this); + } + + return 0; } int afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int i = 0; - int call_count = 0; - struct flock flock = {0, }; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + local = frame->local; + int_lock = &local->internal_lock; + sh = &local->self_heal; - local = frame->local; - sh = &local->self_heal; - priv = this->private; + int_lock->transaction_lk_type = AFR_SELFHEAL_LK; + int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK; - call_count = afr_up_children_count (priv->child_count, - local->child_up); - local->call_count = call_count; + afr_set_lock_number (frame, this); - for (i = 0; i < priv->child_count; i++) { - flock.l_start = 0; - flock.l_len = 0; - flock.l_type = F_WRLCK; + int_lock->lk_flock.l_start = 0; + int_lock->lk_flock.l_len = 0; + int_lock->lk_flock.l_type = F_WRLCK; + int_lock->lock_cbk = afr_sh_post_nonblocking_inodelk_cbk; - if (local->child_up[i]) { - gf_log (this->name, GF_LOG_TRACE, - "locking %s on subvolume %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, afr_sh_metadata_lk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - this->name, - &local->loc, F_SETLK, &flock); - - if (!--call_count) - break; - } - } + afr_nonblocking_inodelk (frame, this); return 0; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 51d1455f455..a382c12a876 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -439,245 +439,39 @@ afr_lock_server_count (afr_private_t *priv, afr_transaction_type type) switch (type) { case AFR_FLUSH_TRANSACTION: case AFR_DATA_TRANSACTION: - ret = priv->data_lock_server_count; + ret = priv->child_count; break; case AFR_METADATA_TRANSACTION: - ret = priv->metadata_lock_server_count; + ret = priv->child_count; break; case AFR_ENTRY_TRANSACTION: case AFR_ENTRY_RENAME_TRANSACTION: - ret = priv->entry_lock_server_count; + ret = priv->child_count; break; } return ret; } - -/* {{{ unlock */ - -static int -afr_transaction_locked_nodes_count (afr_local_t *local, int child_count) -{ - int i; - int call_count = 0; - - for (i = 0; i < child_count; i++) { - if (local->transaction.locked_nodes[i] & LOCKED_YES) - call_count++; - - if ((local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) - && (local->transaction.locked_nodes[i] & LOCKED_LOWER)) { - call_count++; - } - } - - return call_count; -} - - -static loc_t * -lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2) -{ - int ret = 0; - - ret = strcmp (l1->path, l2->path); - - if (ret == 0) - ret = strcmp (b1, b2); - - if (ret <= 0) - return l1; - else - return l2; -} - - -int32_t -afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_local_t *local; - int call_count = 0; - - local = frame->local; - - LOCK (&frame->lock); - { - call_count = --local->call_count; - } - UNLOCK (&frame->lock); - - if (call_count == 0) { - local->transaction.done (frame, this); - } - - return 0; -} - - -int -afr_unlock (call_frame_t *frame, xlator_t *this) -{ - struct flock flock; - - int i = 0; - int call_count = 0; - - afr_local_t *local = NULL; - afr_private_t * priv = this->private; - - loc_t * lower = NULL; - loc_t * higher = NULL; - - const char *lower_name = NULL; - const char *higher_name = NULL; - - local = frame->local; - - /* - pid has been restored to saved_pid in the fop, - so set it back to frame->root - */ - - frame->root->pid = (long) frame->root; - - call_count = afr_transaction_locked_nodes_count (local, - priv->child_count); - - if (call_count == 0) { - local->transaction.done (frame, this); - return 0; - } - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - flock.l_start = local->transaction.start; - flock.l_len = local->transaction.len; - flock.l_type = F_UNLCK; - - switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: - case AFR_METADATA_TRANSACTION: - case AFR_FLUSH_TRANSACTION: - - if (local->transaction.locked_nodes[i] & LOCKED_YES) { - if (local->fd) { - STACK_WIND (frame, afr_unlock_common_cbk, - priv->children[i], - priv->children[i]->fops->finodelk, - this->name, local->fd, - F_SETLK, &flock); - } else { - STACK_WIND (frame, afr_unlock_common_cbk, - priv->children[i], - priv->children[i]->fops->inodelk, - this->name, &local->loc, - F_SETLK, &flock); - } - - call_count--; - } - - break; - - case AFR_ENTRY_RENAME_TRANSACTION: - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); - - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - if (local->transaction.locked_nodes[i] & LOCKED_LOWER) { - STACK_WIND (frame, afr_unlock_common_cbk, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - lower, lower_name, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); - - call_count--; - } - - if (call_count && - local->transaction.locked_nodes[i] & LOCKED_YES) { - STACK_WIND (frame, afr_unlock_common_cbk, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - higher, higher_name, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); - - call_count--; - } - - break; - - case AFR_ENTRY_TRANSACTION: - if (local->transaction.locked_nodes[i] & LOCKED_YES) { - if (local->fd) { - STACK_WIND (frame, afr_unlock_common_cbk, - priv->children[i], - priv->children[i]->fops->fentrylk, - this->name, local->fd, - local->transaction.basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); - } else { - STACK_WIND (frame, afr_unlock_common_cbk, - priv->children[i], - priv->children[i]->fops->entrylk, - this->name, - &local->transaction.parent_loc, - local->transaction.basename, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK); - - } - - call_count--; - } - - break; - } - - if (!call_count) - break; - } - - return 0; -} - -/* }}} */ - - /* {{{ pending */ int32_t afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xattr) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; int call_count = -1; int (*post_post_op) (call_frame_t *, xlator_t *); - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; LOCK (&frame->lock); { @@ -692,6 +486,7 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (afr_lock_server_count (priv, local->transaction.type) == 0) { local->transaction.post_post_op = local->transaction.done; } else { + int_lock->lock_cbk = local->transaction.done; local->transaction.post_post_op = afr_unlock; } @@ -700,6 +495,7 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (afr_lock_server_count (priv, local->transaction.type) == 0) { local->transaction.done (frame, this); } else { + int_lock->lock_cbk = local->transaction.done; afr_unlock (frame, this); } } @@ -713,7 +509,7 @@ int afr_changelog_post_op (call_frame_t *frame, xlator_t *this) { afr_private_t * priv = this->private; - + afr_internal_lock_t *int_lock = NULL; int ret = 0; int i = 0; int call_count = 0; @@ -721,7 +517,8 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) afr_local_t * local = NULL; dict_t **xattr = NULL; - local = frame->local; + local = frame->local; + int_lock = &local->internal_lock; __mark_down_children (local->pending, priv->child_count, local->child_up, local->transaction.type); @@ -756,7 +553,8 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) for (i = 0; i < priv->child_count; i++) { dict_unref (xattr[i]); } - + + int_lock->lock_cbk = local->transaction.done; afr_unlock (frame, this); return 0; } @@ -929,7 +727,6 @@ int afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) { afr_private_t * priv = this->private; - int i = 0; int ret = 0; int call_count = 0; @@ -959,7 +756,9 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) for (i = 0; i < priv->child_count; i++) { dict_unref (xattr[i]); } - + + local->internal_lock.lock_cbk = + local->transaction.done; afr_unlock (frame, this); return 0; } @@ -1069,328 +868,248 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) return 0; } -/* }}} */ - -/* {{{ lock */ - -static -int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index); - -int32_t -afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int +afr_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - int done = 0; - int child_index = (long) cookie; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - gf_log (this->name, GF_LOG_ERROR, - "subvolume does not support locking. " - "please load features/posix-locks xlator on server"); - local->op_ret = op_ret; - done = 1; - } - - local->child_up[child_index] = 0; - local->op_errno = op_errno; - } - } - UNLOCK (&frame->lock); - - if ((op_ret == -1) && - (op_errno == ENOSYS)) { - afr_unlock (frame, this); + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Blocking inodelks failed."); + local->transaction.done (frame, this); } else { - if (op_ret == 0) { - local->transaction.locked_nodes[child_index] - |= LOCKED_YES; - local->transaction.lock_count++; - } - afr_lock_rec (frame, this, child_index + 1); + + gf_log (this->name, GF_LOG_DEBUG, + "Blocking inodelks done. Proceeding to FOP"); + afr_internal_lock_finish (frame, this); } - return 0; + return 0; } - -int32_t -afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int +afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int child_index = (long) cookie; - - loc_t * lower = NULL; - loc_t * higher = NULL; - - const char *lower_name = NULL; - const char *higher_name = NULL; - - priv = this->private; - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - if (op_errno == ENOSYS) { - /* return ENOTSUP */ - - gf_log (this->name, GF_LOG_ERROR, - "subvolume does not support locking. " - "please load features/posix-locks xlator on server"); - - local->op_ret = op_ret; - } - - local->child_up[child_index] = 0; - local->op_errno = op_errno; - } - } - UNLOCK (&frame->lock); - - if (op_ret != 0) { - afr_unlock (frame, this); - goto out; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + + local = frame->local; + int_lock = &local->internal_lock; + + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking inodelks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_post_blocking_inodelk_cbk; + afr_blocking_lock (frame, this); } else { - local->transaction.locked_nodes[child_index] |= LOCKED_LOWER; - } - - /* The lower path has been locked. Now lock the higher path */ - - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); - - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); - - STACK_WIND_COOKIE (frame, afr_lock_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, higher, higher_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK); + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking inodelks done. Proceeding to FOP"); + afr_internal_lock_finish (frame, this); + } -out: return 0; } - -static -int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) +int +afr_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - struct flock flock; - - int ret = 0; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - loc_t * lower = NULL; - loc_t * higher = NULL; + local = frame->local; + int_lock = &local->internal_lock; - const char *lower_name = NULL; - const char *higher_name = NULL; - - local = frame->local; - priv = this->private; - - flock.l_start = local->transaction.start; - flock.l_len = local->transaction.len; - flock.l_type = F_WRLCK; + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks failed."); + local->transaction.done (frame, this); + } else { - if (local->fd) { - ret = fd_ctx_get (local->fd, this, &ctx); + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks done. Proceeding to FOP"); + afr_internal_lock_finish (frame, this); + } - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "unable to get fd ctx for fd=%p", - local->fd); + return 0; +} - local->op_ret = -1; - local->op_errno = EINVAL; +int +afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - afr_unlock (frame, this); + local = frame->local; + int_lock = &local->internal_lock; + + /* Initiate blocking locks if non-blocking has failed */ + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking entrylks failed. Proceeding to blocking"); + int_lock->lock_cbk = afr_post_blocking_entrylk_cbk; + afr_blocking_lock (frame, this); + } else { - return 0; - } + gf_log (this->name, GF_LOG_DEBUG, + "Non blocking entrylks done. Proceeding to FOP"); + afr_internal_lock_finish (frame, this); + } - fd_ctx = (afr_fd_ctx_t *)(long) ctx; + return 0; +} - /* skip over children that or down - or don't have the fd open */ +int +afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - while ((child_index < priv->child_count) - && (!local->child_up[child_index] - || !fd_ctx->opened_on[child_index])) + local = frame->local; + int_lock = &local->internal_lock; - child_index++; + if (int_lock->lock_op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks failed."); + local->transaction.done (frame, this); } else { - /* skip over children that are down */ - while ((child_index < priv->child_count) - && !local->child_up[child_index]) - child_index++; + + gf_log (this->name, GF_LOG_DEBUG, + "Blocking entrylks done. Proceeding to FOP"); + afr_internal_lock_finish (frame, this); } + return 0; +} + +int afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - if ((child_index == priv->child_count) && - local->transaction.lock_count == 0) { + local = frame->local; + int_lock = &local->internal_lock; - gf_log (this->name, GF_LOG_DEBUG, - "unable to lock on even one child"); + GF_ASSERT (!int_lock->higher_locked); - local->op_ret = -1; - local->op_errno = EAGAIN; + int_lock->lock_cbk = afr_post_blocking_rename_cbk; + afr_blocking_lock (frame, this); - afr_unlock (frame, this); - - return 0; + return 0; +} - } +int +afr_set_transaction_flock (afr_local_t *local) +{ + afr_internal_lock_t *int_lock = NULL; - if ((child_index == priv->child_count) - || (local->transaction.lock_count == - afr_lock_server_count (priv, local->transaction.type))) { + int_lock = &local->internal_lock; - /* we're done locking */ + int_lock->lk_flock.l_len = local->transaction.len; + int_lock->lk_flock.l_start = local->transaction.start; + int_lock->lk_flock.l_type = F_WRLCK; - if (__changelog_needed_pre_op (frame, this)) { - afr_changelog_pre_op (frame, this); - } else { - __mark_all_success (local->pending, priv->child_count, - local->transaction.type); + return 0; +} - afr_pid_restore (frame); +int +afr_lock_rec (call_frame_t *frame, xlator_t *this) +{ + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; - local->transaction.fop (frame, this); - } + local = frame->local; + int_lock = &local->internal_lock; - return 0; - } + int_lock->transaction_lk_type = AFR_TRANSACTION_LK; switch (local->transaction.type) { - case AFR_DATA_TRANSACTION: + case AFR_DATA_TRANSACTION: case AFR_METADATA_TRANSACTION: case AFR_FLUSH_TRANSACTION: + afr_set_transaction_flock (local); - if (local->fd) { - STACK_WIND_COOKIE (frame, afr_lock_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->finodelk, - this->name, local->fd, - F_SETLKW, &flock); - - } else { - STACK_WIND_COOKIE (frame, afr_lock_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->inodelk, - this->name, &local->loc, - F_SETLKW, &flock); - } - - break; - - case AFR_ENTRY_RENAME_TRANSACTION: - { - lower = lower_path (&local->transaction.parent_loc, - local->transaction.basename, - &local->transaction.new_parent_loc, - local->transaction.new_basename); - - lower_name = (lower == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); + int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk; - higher = (lower == &local->transaction.parent_loc ? - &local->transaction.new_parent_loc : - &local->transaction.parent_loc); + afr_nonblocking_inodelk (frame, this); + break; - higher_name = (higher == &local->transaction.parent_loc ? - local->transaction.basename : - local->transaction.new_basename); + case AFR_ENTRY_RENAME_TRANSACTION: - STACK_WIND_COOKIE (frame, afr_lock_lower_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, lower, lower_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK); + int_lock->lock_cbk = afr_post_blocking_rename_cbk; + afr_blocking_lock (frame, this); + break; - break; - } - case AFR_ENTRY_TRANSACTION: - if (local->fd) { - STACK_WIND_COOKIE (frame, afr_lock_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->fentrylk, - this->name, local->fd, - local->transaction.basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK); - } else { - STACK_WIND_COOKIE (frame, afr_lock_cbk, - (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->entrylk, - this->name, - &local->transaction.parent_loc, - local->transaction.basename, - ENTRYLK_LOCK, ENTRYLK_WRLCK); - } - - break; - } + int_lock->lk_basename = local->transaction.basename; + if (&local->transaction.parent_loc) + int_lock->lk_loc = &local->transaction.parent_loc; + else + GF_ASSERT (local->fd); + + int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk; + afr_nonblocking_entrylk (frame, this); + break; + } - return 0; + return 0; } -int32_t afr_lock (call_frame_t *frame, xlator_t *this) +int32_t +afr_lock (call_frame_t *frame, xlator_t *this) { afr_pid_save (frame); frame->root->pid = (long) frame->root; + afr_set_lk_owner (frame, this); - return afr_lock_rec (frame, this, 0); + + afr_set_lock_number (frame, this); + + return afr_lock_rec (frame, this); } /* }}} */ +int +afr_internal_lock_finish (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + + if (__changelog_needed_pre_op (frame, this)) { + afr_changelog_pre_op (frame, this); + } else { + __mark_all_success (local->pending, priv->child_count, + local->transaction.type); + + afr_pid_restore (frame); + + local->transaction.fop (frame, this); + } + + return 0; +} + int32_t afr_transaction_resume (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; - local = frame->local; - priv = this->private; + local = frame->local; + int_lock = &local->internal_lock; + priv = this->private; if (__changelog_needed_post_op (frame, this)) { afr_changelog_post_op (frame, this); @@ -1398,6 +1117,7 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this) if (afr_lock_server_count (priv, local->transaction.type) == 0) { local->transaction.done (frame, this); } else { + int_lock->lock_cbk = local->transaction.done; afr_unlock (frame, this); } } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 629e1875e6e..9d06cd0343a 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -93,6 +93,8 @@ init (xlator_t *this) char * algo = NULL; char * change_log = NULL; char * strict_readdir = NULL; + char * inodelk_trace = NULL; + char * entrylk_trace = NULL; int32_t background_count = 0; int32_t lock_server_count = 1; @@ -250,6 +252,37 @@ init (xlator_t *this) /* Locking options */ + priv->inodelk_trace = 0; + priv->entrylk_trace = 0; + + dict_ret = dict_get_str (this->options, "inodelk-trace", + &inodelk_trace); + if (dict_ret == 0) { + ret = gf_string2boolean (inodelk_trace, &priv->inodelk_trace); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Invalid 'option inodelk-trace %s' ", + inodelk_trace); + + priv->inodelk_trace = 0; + } + } + + + dict_ret = dict_get_str (this->options, "entrylk-trace", + &entrylk_trace); + if (dict_ret == 0) { + ret = gf_string2boolean (entrylk_trace, &priv->entrylk_trace); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Invalid 'option entrylk-trace %s' ", + inodelk_trace); + + priv->entrylk_trace = 0; + } + } + + priv->data_lock_server_count = 1; priv->metadata_lock_server_count = 0; priv->entry_lock_server_count = 1; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index ea7b3ba8955..6df8c697ca6 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -73,6 +73,9 @@ typedef struct _afr_private { unsigned int metadata_lock_server_count; unsigned int entry_lock_server_count; + gf_boolean_t inodelk_trace; + gf_boolean_t entrylk_trace; + gf_boolean_t strict_readdir; unsigned int wait_count; /* # of servers to wait for success */ @@ -134,7 +137,7 @@ typedef struct { int active_source; int active_sinks; int *success; - int *locked_nodes; + unsigned char *locked_nodes; int lock_count; mode_t impunging_entry_mode; @@ -173,6 +176,32 @@ typedef enum { AFR_FLUSH_TRANSACTION, /* flush */ } afr_transaction_type; +typedef enum { + AFR_TRANSACTION_LK, + AFR_SELFHEAL_LK, +} transaction_lk_type_t; + +typedef enum { + AFR_LOCK_OP, + AFR_UNLOCK_OP, +} afr_lock_op_type_t; + +typedef enum { + AFR_DATA_SELF_HEAL_LK, + AFR_METADATA_SELF_HEAL_LK, + AFR_ENTRY_SELF_HEAL_LK, +}selfheal_lk_type_t; + +typedef enum { + AFR_INODELK_TRANSACTION, + AFR_INODELK_NB_TRANSACTION, + AFR_ENTRYLK_TRANSACTION, + AFR_ENTRYLK_NB_TRANSACTION, + AFR_INODELK_SELFHEAL, + AFR_INODELK_NB_SELFHEAL, + AFR_ENTRYLK_SELFHEAL, + AFR_ENTRYLK_NB_SELFHEAL, +} afr_lock_call_type_t; /* xattr format: trusted.afr.volume = [x y z] @@ -207,6 +236,37 @@ typedef enum { AFR_CHILD_DOWN_FLUSH, } afr_flush_type; +typedef struct { + loc_t *lk_loc; + struct flock lk_flock; + + const char *lk_basename; + const char *lower_basename; + const char *higher_basename; + char lower_locked; + char higher_locked; + + unsigned char *locked_nodes; + unsigned char *lower_locked_nodes; + unsigned char *inode_locked_nodes; + unsigned char *entry_locked_nodes; + + selfheal_lk_type_t selfheal_lk_type; + transaction_lk_type_t transaction_lk_type; + + int32_t lock_count; + int32_t inodelk_lock_count; + int32_t entrylk_lock_count; + + uint64_t lock_number; + int32_t lk_call_count; + + int32_t lock_op_ret; + int32_t lock_op_errno; + + int (*lock_cbk) (call_frame_t *, xlator_t *); + +} afr_internal_lock_t; typedef struct _afr_local { unsigned int call_count; @@ -244,6 +304,8 @@ typedef struct _afr_local { int32_t inodelk_count; int32_t entrylk_count; + afr_internal_lock_t internal_lock; + dict_t *dict; int (*up_down_flush_cbk) (call_frame_t *, xlator_t *); @@ -513,9 +575,6 @@ typedef struct _afr_local { struct { off_t start, len; - unsigned char *locked_nodes; - int lock_count; - char *basename; char *new_basename; @@ -597,6 +656,29 @@ afr_notify (xlator_t *this, int32_t event, void afr_set_lk_owner (call_frame_t *frame, xlator_t *this); +int +afr_set_lock_number (call_frame_t *frame, xlator_t *this); + + +loc_t * +lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2); + +int32_t +afr_unlock (call_frame_t *frame, xlator_t *this); + +int +afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this); + +int +afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this); + +int +afr_blocking_lock (call_frame_t *frame, xlator_t *this); + +int +afr_internal_lock_finish (call_frame_t *frame, xlator_t *this); + + int pump_start (call_frame_t *frame, xlator_t *this); int @@ -716,6 +798,10 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) local->op_ret = -1; local->op_errno = EUCLEAN; + local->internal_lock.lock_op_ret = -1; + local->internal_lock.lock_op_errno = EUCLEAN; + + return 0; } @@ -764,7 +850,7 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) local->pending = GF_CALLOC (sizeof (*local->pending), priv->child_count, gf_afr_mt_int32_t); - + if (!local->pending) { return -ENOMEM; } @@ -776,15 +862,33 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) if (!local->pending[i]) return -ENOMEM; } - - local->transaction.locked_nodes = GF_CALLOC (sizeof (*local->transaction.locked_nodes), - priv->child_count, - gf_afr_mt_char); + + local->internal_lock.inode_locked_nodes = + GF_CALLOC (sizeof (*local->internal_lock.inode_locked_nodes), + priv->child_count, + gf_afr_mt_char); + + local->internal_lock.entry_locked_nodes = + GF_CALLOC (sizeof (*local->internal_lock.entry_locked_nodes), + priv->child_count, + gf_afr_mt_char); + + local->internal_lock.locked_nodes = + GF_CALLOC (sizeof (*local->internal_lock.locked_nodes), + priv->child_count, + gf_afr_mt_char); + + local->internal_lock.lower_locked_nodes + = GF_CALLOC (sizeof (*local->internal_lock.lower_locked_nodes), + priv->child_count, + gf_afr_mt_char); local->transaction.child_errno = GF_CALLOC (sizeof (*local->transaction.child_errno), priv->child_count, gf_afr_mt_int32_t); + local->internal_lock.transaction_lk_type = AFR_TRANSACTION_LK; + return 0; } -- cgit