summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2013-07-03 21:21:48 +0530
committerVijay Bellur <vbellur@redhat.com>2013-07-03 21:28:11 -0700
commitc2abf3a6e39c5a5832a165757483bc0ae23cdb63 (patch)
treea9050545d09f522ed1ef0856a48dd5d8ce2b7904
parentb64a7f8497d9752f4da5a42aeeeebbd7727c442b (diff)
cluster/afr: Refactor inodelk to handle multiple domains
- afr_local_copy should not be memduping locked nodes, that would mean that lock is taken in self-heal on those nodes even before it actually takes the lock. So removed memdup code. Even entry lock related copying (lockee info) is also not necessary for self-heal functionality, so removing that as well. Since it is not local_copy anymore changed its name. - My editor changed tabs to spaces. Change-Id: I8dfb92cb8338e9a967c06907a8e29a8404782d61 BUG: 967717 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/5099 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c35
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c134
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c21
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c89
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h6
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c44
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c9
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c565
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h3
-rw-r--r--xlators/cluster/afr/src/afr.c2
-rw-r--r--xlators/cluster/afr/src/afr.h20
11 files changed, 500 insertions, 428 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 6c6964deceb..fbf39ed4e1c 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -826,7 +826,8 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
void
afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
{
- afr_private_t * priv = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
priv = this->private;
@@ -836,7 +837,9 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (local->internal_lock.locked_nodes);
- GF_FREE (local->internal_lock.inode_locked_nodes);
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
GF_FREE (local->internal_lock.lower_locked_nodes);
@@ -4480,11 +4483,6 @@ afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
{
int ret = -ENOMEM;
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
-
lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->locked_nodes)
@@ -4543,6 +4541,21 @@ out:
}
int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
afr_transaction_local_init (afr_local_t *local, xlator_t *this)
{
int child_up_count = 0;
@@ -4555,6 +4568,14 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (ret < 0)
goto out;
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
ret = -ENOMEM;
child_up_count = afr_up_children_count (local->child_up,
priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 812609819f5..116e1ef6983 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -559,19 +559,20 @@ initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
- int i = 0;
+ afr_inodelk_t *inodelk = NULL;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ inodelk->lock_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
return 0;
}
@@ -652,6 +653,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int32_t child_index = (long)cookie;
local = frame->local;
@@ -668,7 +670,8 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
- int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
if (local->transaction.eager_lock)
local->transaction.eager_lock[child_index] = 0;
@@ -682,6 +685,7 @@ static int
afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
struct gf_flock flock = {0,};
@@ -697,12 +701,14 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
flock.l_type = F_UNLCK;
full_flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
priv->child_count);
int_lock->lk_call_count = call_count;
@@ -718,8 +724,7 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
fd_ctx = afr_fd_ctx_get (local->fd, this);
for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->inode_locked_nodes[i] & LOCKED_YES)
- != LOCKED_YES)
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
continue;
if (local->fd) {
@@ -760,7 +765,7 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
+ int_lock->domain, local->fd,
F_SETLK, flock_use, NULL);
if (!--call_count)
@@ -775,7 +780,7 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long)i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
+ int_lock->domain, &local->loc,
F_SETLK, &flock, NULL);
if (!--call_count)
@@ -861,7 +866,7 @@ afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[index],
priv->children[index]->fops->entrylk,
- this->name,
+ int_lock->domain,
&int_lock->lockee[lockee_no].loc,
int_lock->lockee[lockee_no].basename,
ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
@@ -964,6 +969,7 @@ static int
afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -974,10 +980,10 @@ afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
break;
case AFR_ENTRY_RENAME_TRANSACTION:
@@ -1028,6 +1034,7 @@ int
afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
struct gf_flock flock = {0,};
@@ -1042,10 +1049,15 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
priv = this->private;
child_index = cookie % priv->child_count;
lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
+
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
if (local->fd) {
ret = fd_ctx_get (local->fd, this, &ctx);
@@ -1067,8 +1079,6 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
}
if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
- is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
-
if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
(!is_entrylk && int_lock->lock_count == 0)) {
gf_log (this->name, GF_LOG_INFO,
@@ -1117,7 +1127,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
+ int_lock->domain, local->fd,
F_SETLKW, &flock, NULL);
} else {
@@ -1130,7 +1140,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
+ int_lock->domain, &local->loc,
F_SETLKW, &flock, NULL);
}
@@ -1151,7 +1161,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
(void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
+ int_lock->domain, local->fd,
int_lock->lockee[lockee_no].basename,
ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
} else {
@@ -1164,7 +1174,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
(void *) (long) cookie,
priv->children[child_index],
priv->children[child_index]->fops->entrylk,
- this->name,
+ int_lock->domain,
&int_lock->lockee[lockee_no].loc,
int_lock->lockee[lockee_no].basename,
ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
@@ -1393,6 +1403,7 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
int call_count = 0;
int child_index = (long) cookie;
@@ -1401,6 +1412,7 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, NULL, op_ret,
@@ -1426,9 +1438,8 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->transaction.eager_lock)
local->transaction.eager_lock[child_index] = 0;
} else {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
if (local->transaction.eager_lock &&
local->transaction.eager_lock[child_index] &&
@@ -1451,8 +1462,7 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_TRACE,
"Last inode locking reply received");
/* all locks successful. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- int_lock->lk_expected_count) {
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
@@ -1476,6 +1486,7 @@ int
afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
@@ -1491,11 +1502,13 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- full_flock.l_type = int_lock->lk_flock.l_type;
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+
+ full_flock.l_type = inodelk->flock.l_type;
initialize_inodelk_variables (frame, this);
@@ -1569,7 +1582,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->finodelk,
- this->name, local->fd,
+ int_lock->domain, local->fd,
F_SETLK, flock_use, NULL);
if (!--call_count)
@@ -1591,7 +1604,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->inodelk,
- this->name, &local->loc,
+ int_lock->domain, &local->loc,
F_SETLK, &flock, NULL);
if (!--call_count)
@@ -2115,29 +2128,38 @@ out:
return ret;
}
-void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count)
{
- afr_local_t *dst_local = NULL;
- afr_local_t *src_local = NULL;
- afr_internal_lock_t *dst_lock = NULL;
- afr_internal_lock_t *src_lock = NULL;
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
- dst_local = dst->local;
- dst_lock = &dst_local->internal_lock;
src_local = src->local;
src_lock = &src_local->internal_lock;
- if (src_lock->inode_locked_nodes) {
- memcpy (dst_lock->inode_locked_nodes,
- src_lock->inode_locked_nodes,
- sizeof (*dst_lock->inode_locked_nodes) * child_count);
- memset (src_lock->inode_locked_nodes, 0,
- sizeof (*src_lock->inode_locked_nodes) * child_count);
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
}
dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
- dst_lock->inodelk_lock_count = src_lock->inodelk_lock_count;
- src_lock->inodelk_lock_count = 0;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 1d577cfb5ab..83846f152d2 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -143,7 +143,7 @@ sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
}
if (loop_sh && loop_sh->data_lock_held) {
- afr_sh_data_unlock (loop_frame, this,
+ afr_sh_data_unlock (loop_frame, this, this->name,
sh_destroy_frame);
} else {
sh_destroy_frame (loop_frame, this);
@@ -214,7 +214,7 @@ sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
goto out;
//We want the frame to have same lk_owner as sh_frame
//so that locks translator allows conflicting locks
- new_loop_local = afr_local_copy (local, this);
+ new_loop_local = afr_self_heal_local_init (local, this);
if (!new_loop_local)
goto out;
new_loop_frame->local = new_loop_local;
@@ -273,7 +273,7 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
new_loop_sh->offset = offset;
new_loop_sh->block_size = sh->block_size;
afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
- _gf_true, sh_loop_lock_success, sh_loop_lock_failure);
+ _gf_true, this->name, sh_loop_lock_success, sh_loop_lock_failure);
return 0;
out:
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
@@ -754,14 +754,15 @@ out:
return sh_priv;
}
-void
-afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src,
+int
+afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count)
{
afr_local_t *dst_local = NULL;
afr_self_heal_t *dst_sh = NULL;
afr_local_t *src_local = NULL;
afr_self_heal_t *src_sh = NULL;
+ int ret = -1;
dst_local = dst->local;
dst_sh = &dst_local->self_heal;
@@ -769,9 +770,12 @@ afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src,
src_sh = &src_local->self_heal;
GF_ASSERT (src_sh->data_lock_held);
GF_ASSERT (!dst_sh->data_lock_held);
- afr_lk_transfer_datalock (dst, src, child_count);
+ ret = afr_lk_transfer_datalock (dst, src, dom, child_count);
+ if (ret)
+ return ret;
src_sh->data_lock_held = _gf_false;
dst_sh->data_lock_held = _gf_true;
+ return 0;
}
int
@@ -793,7 +797,10 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
if (ret)
goto out;
- afr_sh_transfer_lock (first_loop_frame, sh_frame, priv->child_count);
+ ret = afr_sh_transfer_lock (first_loop_frame, sh_frame, this->name,
+ priv->child_count);
+ if (ret)
+ goto out;
sh->private = afr_sh_priv_init ();
if (!sh->private) {
ret = -1;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index f0915b01d2e..14283163372 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1997,6 +1997,7 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
int_lock->lk_basename = base_name;
int_lock->lk_loc = loc;
int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = this->name;
int_lock->lockee_count = 0;
afr_init_entry_lockee (&int_lock->lockee[0], local, loc,
@@ -2057,13 +2058,13 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
}
afr_local_t*
-afr_local_copy (afr_local_t *l, xlator_t *this)
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
- int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *lc = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *shc = NULL;
+ int ret = 0;
priv = this->private;
@@ -2088,11 +2089,19 @@ afr_local_copy (afr_local_t *l, xlator_t *this)
shc->type = sh->type;
uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
+ if (l->loc.path) {
+ ret = loc_copy (&lc->loc, &l->loc);
+ if (ret < 0)
+ goto out;
+ }
lc->child_up = memdup (l->child_up,
sizeof (*lc->child_up) * priv->child_count);
+ if (!lc->child_up) {
+ ret = -1;
+ goto out;
+ }
+
if (l->xattr_req)
lc->xattr_req = dict_ref (l->xattr_req);
@@ -2100,59 +2109,25 @@ afr_local_copy (afr_local_t *l, xlator_t *this)
lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
if (l->cont.lookup.xattr)
lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- sizeof (*lc->internal_lock.inode_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
-
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- sizeof (*lc->internal_lock.locked_nodes) * priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
-
- for (i = 0; i < l->internal_lock.lockee_count; i++) {
- loc_copy (&lc->internal_lock.lockee[i].loc,
- &l->internal_lock.lockee[i].loc);
-
- lc->internal_lock.lockee[i].locked_count =
- l->internal_lock.lockee[i].locked_count;
-
- if (l->internal_lock.lockee[i].basename)
- lc->internal_lock.lockee[i].basename =
- gf_strdup (l->internal_lock.lockee[i].basename);
-
- if (l->internal_lock.lockee[i].locked_nodes) {
- lc->internal_lock.lockee[i].locked_nodes =
- memdup (l->internal_lock.lockee[i].locked_nodes,
- sizeof (*lc->internal_lock.lockee[i].locked_nodes) *
- priv->child_count);
- } else {
- lc->internal_lock.lockee[i].locked_nodes =
- GF_CALLOC (priv->child_count,
- sizeof (*lc->internal_lock.lockee[i].locked_nodes),
- gf_afr_mt_char);
- }
+ lc->internal_lock.locked_nodes =
+ GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
+ priv->child_count, gf_afr_mt_char);
+ if (!lc->internal_lock.locked_nodes) {
+ ret = -1;
+ goto out;
}
- lc->internal_lock.lockee_count = l->internal_lock.lockee_count;
-
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+ ret = afr_inodelk_init (&lc->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret)
+ goto out;
out:
+ if (ret) {
+ afr_local_cleanup (lc, this);
+ lc = NULL;
+ }
return lc;
}
@@ -2241,7 +2216,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
afr_set_lk_owner (sh_frame, this, sh_frame->root);
afr_set_low_priority (sh_frame);
- sh_local = afr_local_copy (local, this);
+ sh_local = afr_self_heal_local_init (local, this);
if (!sh_local)
goto out;
sh_frame->local = sh_local;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index cc67e23fe95..9be1fdff924 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -91,13 +91,13 @@ int
afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
int child_index);
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
afr_lock_cbk_t lock_cbk);
afr_local_t *
-afr_local_copy (afr_local_t *l, xlator_t *this);
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this);
int
afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len, gf_boolean_t block,
+ off_t start, off_t len, gf_boolean_t block, char *dom,
afr_lock_cbk_t success_handler,
afr_lock_cbk_t failure_handler);
void
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 9c2f3d53c83..3c2726b8df6 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -283,23 +283,33 @@ afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
//Fun fact, lock_cbk is being used for both lock & unlock
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
afr_lock_cbk_t lock_cbk)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
afr_self_heal_t *sh = NULL;
+ int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
sh = &local->self_heal;
- GF_ASSERT (sh->data_lock_held);
-
- sh->data_lock_held = _gf_false;
+ if (strcmp (dom, this->name) == 0) {
+ sh->data_lock_held = _gf_false;
+ } else {
+ ret = -1;
+ goto out;
+ }
int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = dom;
afr_unlock (frame, this);
+out:
+ if (ret) {
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_cbk (frame, this);
+ }
return 0;
}
@@ -316,7 +326,7 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
"finishing data selfheal of %s", local->loc.path);
if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
+ afr_sh_data_unlock (frame, this, this->name, afr_sh_data_close);
else
afr_sh_data_close (frame, this);
@@ -337,7 +347,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
+ afr_sh_data_unlock (frame, this, this->name, afr_sh_data_close);
else
afr_sh_data_close (frame, this);
return 0;
@@ -380,7 +390,7 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
goto out;
}
GF_ASSERT (sh->old_loop_frame);
- afr_sh_data_lock (frame, this, 0, 0, _gf_true,
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
afr_post_sh_big_lock_success,
afr_post_sh_big_lock_failure);
}
@@ -1237,9 +1247,11 @@ afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t len)
+afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, char *dom,
+ off_t start, off_t len)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
local = frame->local;
@@ -1250,11 +1262,14 @@ afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t le
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = start;
- int_lock->lk_flock.l_len = len;
- int_lock->lk_flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
+ int_lock->domain = dom;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->flock.l_start = start;
+ inodelk->flock.l_len = len;
+ inodelk->flock.l_type = F_WRLCK;
+
afr_nonblocking_inodelk (frame, this);
return 0;
@@ -1298,7 +1313,7 @@ afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
int
afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
off_t start, off_t len, gf_boolean_t block,
- afr_lock_cbk_t success_handler,
+ char *dom, afr_lock_cbk_t success_handler,
afr_lock_cbk_t failure_handler)
{
afr_local_t * local = NULL;
@@ -1310,7 +1325,7 @@ afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
sh->data_lock_success_handler = success_handler;
sh->data_lock_failure_handler = failure_handler;
sh->data_lock_block = block;
- return afr_sh_data_lock_rec (frame, this, start, len);
+ return afr_sh_data_lock_rec (frame, this, dom, start, len);
}
int
@@ -1371,7 +1386,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
block = sh->unwind ? _gf_true : _gf_false;
- afr_sh_data_lock (frame, this, 0, 0, block,
+ afr_sh_data_lock (frame, this, 0, 0, block, this->name,
afr_sh_data_big_lock_success,
afr_sh_data_fail);
}
@@ -1493,6 +1508,7 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)
afr_sh_data_open (frame, this);
} else {
afr_sh_data_lock (frame, this, 0, 0, _gf_true,
+ this->name,
afr_sh_non_reg_lock_success,
afr_sh_data_fail);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 1f663b692fc..6d629ce9813 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -579,19 +579,22 @@ int
afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
afr_local_t *local = NULL;
local = frame->local;
int_lock = &local->internal_lock;
+ int_lock->domain = this->name;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = LLONG_MAX - 1;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_start = LLONG_MAX - 1;
+ inodelk->flock.l_len = 0;
+ inodelk->flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
afr_nonblocking_inodelk (frame, this);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index d307d5b3eee..d6d420910cf 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -79,7 +79,7 @@ afr_save_lk_owner (call_frame_t *frame)
local = frame->local;
- local->saved_lk_owner = frame->root->lk_owner;
+ local->saved_lk_owner = frame->root->lk_owner;
}
@@ -90,10 +90,9 @@ afr_restore_lk_owner (call_frame_t *frame)
local = frame->local;
- frame->root->lk_owner = local->saved_lk_owner;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
static void
__mark_all_pending (int32_t *pending[], int child_count,
afr_transaction_type type)
@@ -443,14 +442,30 @@ out:
return;
}
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
+{
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
+
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
+ }
+ return NULL;
+}
+
unsigned char*
afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
{
unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
switch (type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->inode_locked_nodes;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
break;
case AFR_ENTRY_TRANSACTION:
@@ -553,22 +568,22 @@ afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
gf_boolean_t
afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int index = -1;
- int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int index = -1;
+ int i = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
index = afr_index_for_transaction_type (local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
+ for (i = 0; i < priv->child_count; i++) {
if (local->pending[i][index] == 0)
- return _gf_false;
+ return _gf_false;
}
- return _gf_true;
+ return _gf_true;
}
@@ -619,7 +634,7 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
goto out;
}
- nothing_failed = afr_txn_nothing_failed (frame, this);
+ nothing_failed = afr_txn_nothing_failed (frame, this);
afr_compute_txn_changelog (local , priv);
@@ -645,14 +660,14 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
break;
}
- /* local->transaction.postop_piggybacked[] was
- precomputed in is_piggyback_postop() when called from
- afr_changelog_post_op_safe()
- */
+ /* local->transaction.postop_piggybacked[] was
+ precomputed in is_piggyback_postop() when called from
+ afr_changelog_post_op_safe()
+ */
- piggyback = 0;
- if (local->transaction.postop_piggybacked[i])
- piggyback = 1;
+ piggyback = 0;
+ if (local->transaction.postop_piggybacked[i])
+ piggyback = 1;
afr_set_postop_dict (local, this, xattr[i],
piggyback, i);
@@ -906,7 +921,7 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
}
UNLOCK (&local->fd->lock);
- afr_set_delayed_post_op (frame, this);
+ afr_set_delayed_post_op (frame, this);
if (piggyback)
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
@@ -1179,12 +1194,14 @@ int
afr_set_transaction_flock (afr_local_t *local)
{
afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- int_lock->lk_flock.l_len = local->transaction.len;
- int_lock->lk_flock.l_start = local->transaction.start;
- int_lock->lk_flock.l_type = F_WRLCK;
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
+ inodelk->flock.l_type = F_WRLCK;
return 0;
}
@@ -1199,6 +1216,7 @@ afr_lock_rec (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
@@ -1259,69 +1277,69 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
void
afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- /* call this function from any of the related optimizations
- which benefit from delaying post op are enabled, namely:
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
- - changelog piggybacking
- - eager locking
- */
+ - changelog piggybacking
+ - eager locking
+ */
- priv = this->private;
- if (!priv)
- return;
+ priv = this->private;
+ if (!priv)
+ return;
- if (!priv->post_op_delay_secs)
- return;
+ if (!priv->post_op_delay_secs)
+ return;
local = frame->local;
if (!local->transaction.eager_lock_on)
return;
- if (!local)
- return;
+ if (!local)
+ return;
- if (!local->fd)
- return;
+ if (!local->fd)
+ return;
- if (local->op == GF_FOP_WRITE)
- local->delayed_post_op = _gf_true;
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
}
gf_boolean_t
is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- gf_boolean_t res = _gf_false;
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
- local = frame->local;
- if (!local)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- if (!local->delayed_post_op)
- goto out;
+ if (!local->delayed_post_op)
+ goto out;
- res = _gf_true;
+ res = _gf_true;
out:
- return res;
+ return res;
}
void
afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
- call_stub_t *stub);
+ call_stub_t *stub);
void
afr_delayed_changelog_wake_up_cbk (void *data)
{
- fd_t *fd = NULL;
+ fd_t *fd = NULL;
- fd = data;
+ fd = data;
- afr_delayed_changelog_wake_up (THIS, fd);
+ afr_delayed_changelog_wake_up (THIS, fd);
}
@@ -1332,39 +1350,39 @@ afr_delayed_changelog_wake_up_cbk (void *data)
static gf_boolean_t
is_piggyback_post_op (call_frame_t *frame, fd_t *fd)
{
- afr_fd_ctx_t *fdctx = NULL;
- afr_local_t *local = NULL;
- gf_boolean_t piggyback = _gf_true;
- afr_private_t *priv = NULL;
- int i = 0;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t piggyback = _gf_true;
+ afr_private_t *priv = NULL;
+ int i = 0;
- priv = frame->this->private;
- local = frame->local;
- fdctx = afr_fd_ctx_get (fd, frame->this);
+ priv = frame->this->private;
+ local = frame->local;
+ fdctx = afr_fd_ctx_get (fd, frame->this);
- LOCK(&fd->lock);
- {
- piggyback = _gf_true;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
- if (fdctx->pre_op_piggyback[i]) {
- fdctx->pre_op_piggyback[i]--;
- local->transaction.postop_piggybacked[i] = 1;
- } else {
- /* For at least _one_ subvolume we cannot
- piggyback on the changelog, and have to
- perform a hard POST-OP and therefore fsync
- if necesssary
- */
- piggyback = _gf_false;
+ LOCK(&fd->lock);
+ {
+ piggyback = _gf_true;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+ if (fdctx->pre_op_piggyback[i]) {
+ fdctx->pre_op_piggyback[i]--;
+ local->transaction.postop_piggybacked[i] = 1;
+ } else {
+ /* For at least _one_ subvolume we cannot
+ piggyback on the changelog, and have to
+ perform a hard POST-OP and therefore fsync
+ if necesssary
+ */
+ piggyback = _gf_false;
GF_ASSERT (fdctx->pre_op_done[i]);
fdctx->pre_op_done[i]--;
- }
- }
- }
- UNLOCK(&fd->lock);
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
if (!afr_txn_nothing_failed (frame, frame->this)) {
/* something failed in this transaction,
@@ -1381,117 +1399,117 @@ is_piggyback_post_op (call_frame_t *frame, fd_t *fd)
int
afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
{
- afr_fd_ctx_t *fdctx = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
- fdctx = afr_fd_ctx_get (fd, this);
+ fdctx = afr_fd_ctx_get (fd, this);
- LOCK(&fd->lock);
- {
- fdctx->witnessed_unstable_write = _gf_true;
- }
- UNLOCK(&fd->lock);
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&fd->lock);
- return 0;
+ return 0;
}
/* TEST and CLEAR operation */
gf_boolean_t
afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
{
- afr_fd_ctx_t *fdctx = NULL;
- gf_boolean_t witness = _gf_false;
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
fdctx = afr_fd_ctx_get (fd, this);
if (!fdctx)
return _gf_true;
- LOCK(&fd->lock);
- {
- if (fdctx->witnessed_unstable_write) {
- witness = _gf_true;
- fdctx->witnessed_unstable_write = _gf_false;
- }
- }
- UNLOCK (&fd->lock);
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK (&fd->lock);
- return witness;
+ return witness;
}
int
afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *pre,
- struct iatt *post, dict_t *xdata)
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
int child_index = (long) cookie;
- int call_count = -1;
- afr_local_t *local = NULL;
+ int call_count = -1;
+ afr_local_t *local = NULL;
- priv = this->private;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
- if (afr_fop_failed (op_ret, op_errno)) {
- /* Failure of fsync() is as good as failure of previous
- write(). So treat it like one.
- */
- gf_log (this->name, GF_LOG_WARNING,
- "fsync(%s) failed on subvolume %s. Transaction was %s",
- uuid_utoa (local->fd->inode->gfid),
- priv->children[child_index]->name,
- gf_fop_list[local->op]);
-
- afr_transaction_fop_failed (frame, this, child_index);
- }
+ if (afr_fop_failed (op_ret, op_errno)) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
call_count = afr_frame_return (frame);
if (call_count == 0)
- afr_changelog_post_op_now (frame, this);
+ afr_changelog_post_op_now (frame, this);
- return 0;
+ return 0;
}
int
afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
priv->child_count);
- if (!call_count) {
- /* will go straight to unlock */
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
- local->call_count = call_count;
+ local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
- STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->fsync, local->fd,
- 1, NULL);
- if (!--call_count)
- break;
- }
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, NULL);
+ if (!--call_count)
+ break;
+ }
- return 0;
+ return 0;
}
-int
+ int
afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
@@ -1500,55 +1518,55 @@ afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
- if (is_piggyback_post_op (frame, local->fd)) {
- /* just detected that this post-op is about to
- be optimized away as a new write() has
- already piggybacked on this frame's changelog.
- */
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
+ if (is_piggyback_post_op (frame, local->fd)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
- /* Calling afr_changelog_post_op_now() now will result in
- issuing ->[f]xattrop().
-
- Performing a hard POST-OP (->[f]xattrop() FOP) is a more
- responsible operation that what it might appear on the surface.
-
- The changelog of a file (in the xattr of the file on the server)
- stores information (pending count) about the state of the file
- on the OTHER server. This changelog is blindly trusted, and must
- therefore be updated in such a way it remains trustworthy. This
- implies that decrementing the pending count (essentially "clearing
- the dirty flag") must be done STRICTLY after we are sure that the
- operation on the other server has reached stable storage.
-
- While the backend filesystem on that server will eventually flush
- it to stable storage, we (being in userspace) have no mechanism
- to get notified when the write became "stable".
-
- This means we need take matter into our own hands and issue an
- fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
- and get an acknowledgement for it. And we need to wait for the
- fsync() acknowledgement before initiating the hard POST-OP.
-
- However if the FD itself was opened in O_SYNC or O_DSYNC then
- we are already guaranteed that the writes were made stable as
- part of the FOP itself. The same holds true for NFS stable
- writes which happen on an anonymous FD with O_DSYNC or O_SYNC
- flag set in the writev() @flags param. For all other write types,
- mark a flag in the fdctx whenever an unstable write is witnessed.
- */
-
- if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
- afr_changelog_post_op_now (frame, this);
- return 0;
- }
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
/* Check whether users want durability and perform fsync/post-op
* accordingly.
@@ -1560,13 +1578,13 @@ afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
afr_changelog_post_op_now (frame, this);
}
- return 0;
+ return 0;
}
-void
+ void
afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
- call_stub_t *stub)
+ call_stub_t *stub)
{
afr_fd_ctx_t *fd_ctx = NULL;
call_frame_t *prev_frame = NULL;
@@ -1611,17 +1629,17 @@ out:
}
-void
+ void
afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (is_afr_delayed_changelog_post_op_needed (frame, this))
- afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
- else
- afr_changelog_post_op_safe (frame, this);
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
}
@@ -1632,22 +1650,22 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
The @stub gets saved in @local and gets resumed in
afr_local_cleanup()
-*/
-void
+ */
+ void
afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
{
- afr_delayed_changelog_post_op (this, NULL, fd, stub);
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
}
-void
+ void
afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
{
- afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
}
-int
+ int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
@@ -1658,18 +1676,18 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- if (local->transaction.eager_lock_on) {
- /* We don't need to retain "local" in the
- fd list anymore, writes to all subvols
- are finished by now */
- LOCK (&local->fd->lock);
- {
- list_del_init (&local->transaction.eager_locked);
- }
- UNLOCK (&local->fd->lock);
- }
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+ }
- afr_restore_lk_owner (frame);
+ afr_restore_lk_owner (frame);
if (__fop_changelog_needed (frame, this)) {
afr_changelog_post_op (frame, this);
@@ -1690,7 +1708,7 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
* afr_transaction_fop_failed - inform that an fop failed
*/
-void
+ void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
{
afr_local_t * local = NULL;
@@ -1700,54 +1718,54 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index
priv = this->private;
__mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
+ child_index, local->transaction.type);
}
-static gf_boolean_t
+ static gf_boolean_t
afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
{
- uint64_t start1 = local1->transaction.start;
- uint64_t start2 = local2->transaction.start;
- uint64_t end1 = 0;
- uint64_t end2 = 0;
-
- if (local1->transaction.len)
- end1 = start1 + local1->transaction.len - 1;
- else
- end1 = ULLONG_MAX;
-
- if (local2->transaction.len)
- end2 = start2 + local2->transaction.len - 1;
- else
- end2 = ULLONG_MAX;
-
- return ((end1 >= start2) && (end2 >= start1));
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
}
-void
+ void
afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- afr_local_t *each = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
- priv = this->private;
+ priv = this->private;
- if (!local->fd)
- return;
+ if (!local->fd)
+ return;
- if (local->transaction.type != AFR_DATA_TRANSACTION)
- return;
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
- if (!priv->eager_lock)
- return;
+ if (!priv->eager_lock)
+ return;
- fdctx = afr_fd_ctx_get (local->fd, this);
- if (!fdctx)
- return;
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
/*
* Once full file lock is acquired in eager-lock phase, overlapping
@@ -1766,22 +1784,22 @@ afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
* This check makes sure the locks are not transferred for
* overlapping writes.
*/
- LOCK (&local->fd->lock);
- {
- list_for_each_entry (each, &fdctx->eager_locked,
- transaction.eager_locked) {
- if (afr_locals_overlap (each, local)) {
- local->transaction.eager_lock_on = _gf_false;
- goto unlock;
- }
- }
-
- local->transaction.eager_lock_on = _gf_true;
- list_add_tail (&local->transaction.eager_locked,
- &fdctx->eager_locked);
- }
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
+ }
unlock:
- UNLOCK (&local->fd->lock);
+ UNLOCK (&local->fd->lock);
}
@@ -1800,11 +1818,10 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
local->transaction.type = type;
ret = afr_transaction_local_init (local, this);
- if (ret < 0) {
+ if (ret < 0)
goto out;
- }
- afr_transaction_eager_lock_init (local, this);
+ afr_transaction_eager_lock_init (local, this);
if (local->fd && local->transaction.eager_lock_on)
afr_set_lk_owner (frame, this, local->fd);
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 55e8bbcca08..108131276ea 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -23,6 +23,9 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 9a13fbdacd9..4dd1b1be395 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -380,8 +380,6 @@ init (xlator_t *this)
AFR_XATTR_PREFIX,
trav->xlator->name);
if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
ret = -ENOMEM;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index e72398a62bc..940f84189d5 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -30,6 +30,7 @@
#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 2
struct _pump_private;
@@ -385,12 +386,19 @@ int
afr_entry_lockee_cmp (const void *l1, const void *l2);
typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
+
+typedef struct {
loc_t *lk_loc;
- struct gf_flock lk_flock;
int lockee_count;
afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
@@ -399,13 +407,11 @@ typedef struct {
unsigned char *locked_nodes;
unsigned char *lower_locked_nodes;
- unsigned char *inode_locked_nodes;
selfheal_lk_type_t selfheal_lk_type;
transaction_lk_type_t transaction_lk_type;
int32_t lock_count;
- int32_t inodelk_lock_count;
int32_t entrylk_lock_count;
uint64_t lock_number;
@@ -416,6 +422,7 @@ typedef struct {
int32_t lock_op_ret;
int32_t lock_op_errno;
afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
} afr_internal_lock_t;
typedef struct _afr_locked_fd {
@@ -869,8 +876,8 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this);
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
-void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
unsigned int child_count);
int pump_start (call_frame_t *frame, xlator_t *this);
@@ -1158,4 +1165,7 @@ afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
void
afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+
#endif /* __AFR_H__ */