summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-08-20 15:48:27 +0530
committerVijay Bellur <vijay@gluster.com>2011-08-20 06:42:55 -0700
commit1af420c700fbc49b65cf7faceb3270e81cd991ce (patch)
treeee0dcfe62b4965191424b3121a4dd126e81260b8 /xlators/cluster/afr
parent2ebacdfdd3c39bf2d3139cb7d811356758a2350a (diff)
cluster/afr: Perform self-heal without locking the whole file
Change-Id: I206571c77f2d7b3c9f9d7bb82a936366fd99ce5c BUG: 3182 Reviewed-on: http://review.gluster.com/141 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-common.c98
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c13
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c58
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c15
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c302
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c122
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h1
-rw-r--r--xlators/cluster/afr/src/afr-open.c499
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c1114
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h20
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c84
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h29
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c709
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c44
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c66
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c55
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h2
-rw-r--r--xlators/cluster/afr/src/afr.h73
-rw-r--r--xlators/cluster/afr/src/pump.c2
19 files changed, 1580 insertions, 1726 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 94335bd0298..19c5a83d2da 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -706,7 +706,7 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
if (sh->locked_nodes)
GF_FREE (sh->locked_nodes);
- if (sh->healing_fd && !sh->healing_fd_opened) {
+ if (sh->healing_fd) {
fd_unref (sh->healing_fd);
sh->healing_fd = NULL;
}
@@ -724,6 +724,14 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (sh->fresh_parent_dirs);
loc_wipe (&sh->parent_loc);
+
+ if (sh->checksum)
+ GF_FREE (sh->checksum);
+
+ if (sh->write_needed)
+ GF_FREE (sh->write_needed);
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
}
@@ -795,6 +803,9 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->fresh_children)
GF_FREE (local->fresh_children);
+ if (local->fd_open_on)
+ GF_FREE (local->fd_open_on);
+
{ /* lookup */
if (local->cont.lookup.xattrs) {
afr_reset_xattr (local->cont.lookup.xattrs,
@@ -897,23 +908,34 @@ afr_frame_return (call_frame_t *frame)
return call_count;
}
-
-/**
- * up_children_count - return the number of children that are up
- */
-
int
-afr_up_children_count (int child_count, unsigned char *child_up)
+afr_set_elem_count_get (unsigned char *elems, int child_count)
{
int i = 0;
int ret = 0;
for (i = 0; i < child_count; i++)
- if (child_up[i])
+ if (elems[i])
ret++;
return ret;
}
+/**
+ * up_children_count - return the number of children that are up
+ */
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count)
+{
+ return afr_set_elem_count_get (child_up, child_count);
+}
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count)
+{
+ return afr_set_elem_count_get (children, child_count);
+}
+
gf_boolean_t
afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
{
@@ -1172,7 +1194,7 @@ afr_is_transaction_running (afr_local_t *local)
return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
}
-static void
+void
afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
gf_boolean_t is_background, ia_type_t ia_type,
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
@@ -1186,6 +1208,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
GF_ASSERT (frame);
GF_ASSERT (this);
GF_ASSERT (inode);
+ GF_ASSERT (ia_type != IA_INVAL);
local = frame->local;
local->self_heal.background = is_background;
@@ -1444,7 +1467,7 @@ static void
afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,
gf_boolean_t *sh_launched)
{
- size_t up_count = 0;
+ unsigned int up_count = 0;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -1453,7 +1476,7 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,
priv = this->private;
local = frame->local;
- up_count = afr_up_children_count (priv->child_count, local->child_up);
+ up_count = afr_up_children_count (local->child_up, priv->child_count);
if (up_count == 1) {
gf_log (this->name, GF_LOG_DEBUG,
"Only 1 child up - do not attempt to detect self heal");
@@ -1591,8 +1614,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
if (local->op_ret < 0)
goto unwind;
gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
- up_children_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
enotconn_count = priv->child_count - up_children_count;
if ((gfid_miss_count == local->success_count) &&
(enotconn_count > 0)) {
@@ -1871,7 +1894,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
if (loc->parent)
local->cont.lookup.parent_ino = loc->parent->ino;
- local->child_up = memdup (priv->child_up, priv->child_count);
+ local->child_up = memdup (priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
if (NULL == local->child_up) {
op_errno = ENOMEM;
goto out;
@@ -1883,8 +1907,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
call_count = local->call_count;
if (local->call_count == 0) {
@@ -1994,7 +2018,7 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)
fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
priv->child_count,
- gf_afr_mt_char);
+ gf_afr_mt_int32_t);
if (!fd_ctx->opened_on) {
ret = -ENOMEM;
goto unlock;
@@ -2011,12 +2035,13 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto unlock;
}
+ INIT_LIST_HEAD (&fd_ctx->paused_calls);
+ INIT_LIST_HEAD (&fd_ctx->entries);
+
ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
if (ret)
gf_log (this->name, GF_LOG_DEBUG,
"failed to set fd ctx (%p)", fd);
-
- INIT_LIST_HEAD (&fd_ctx->entries);
}
unlock:
UNLOCK (&fd->lock);
@@ -2108,7 +2133,7 @@ afr_flush_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -2174,7 +2199,7 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto out;
}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -2196,6 +2221,12 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
local->transaction.start = 0;
local->transaction.len = 0;
+ ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
+ if (ret) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto out;
+ }
afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
@@ -3474,8 +3505,8 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
{
local->op_ret = -1;
local->op_errno = EUCLEAN;
- local->call_count = afr_up_children_count (priv->child_count,
- priv->child_up);
+ local->call_count = afr_up_children_count (priv->child_up,
+ priv->child_count);
if (local->call_count == 0) {
gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
return -ENOTCONN;
@@ -3531,19 +3562,22 @@ out:
}
int
-afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
+afr_transaction_local_init (afr_local_t *local, xlator_t *this)
{
- int i;
- int child_up_count = 0;
- int ret = -ENOMEM;
+ int i = 0;
+ int child_up_count = 0;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+ priv = this->private;
ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
AFR_TRANSACTION_LK);
if (ret < 0)
goto out;
ret = -ENOMEM;
- child_up_count = afr_up_children_count (priv->child_count, local->child_up);
+ child_up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
if (priv->optimistic_change_log && child_up_count == priv->child_count)
local->optimistic_change_log = 1;
@@ -3567,6 +3601,14 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
if (!local->fresh_children)
goto out;
+ if (local->fd) {
+ local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!local->fd_open_on)
+ goto out;
+ }
+
for (i = 0; i < priv->child_count; i++) {
local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
3, /* data + metadata + entry */
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 645da2a6c57..ec3639ff73b 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -164,9 +164,6 @@ out:
sh->need_entry_self_heal = _gf_true;
sh->forced_merge = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_examine_dir_sh_unwind;
afr_self_heal_type_str_get(&local->self_heal,
sh_type_str,
@@ -177,7 +174,9 @@ out:
" forced merge option set",
sh_type_str, local->loc.path);
- afr_self_heal (frame, this, local->fd->inode);
+ afr_launch_self_heal (frame, this, local->fd->inode,
+ _gf_false, local->fd->inode->ia_type,
+ NULL, afr_examine_dir_sh_unwind);
} else {
afr_set_opendir_done (this, local->fd->inode);
@@ -205,7 +204,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
sizeof (*local->cont.opendir.checksum),
gf_afr_mt_int32_t);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
@@ -240,8 +239,8 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
priv = this->private;
local = frame->local;
- up_children_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
LOCK (&frame->lock);
{
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 21287f8b8c2..1c25d160689 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -166,7 +166,7 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child_index] = 1;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
fd_ctx->flags = local->cont.create.flags;
if (local->success_count == 0)
@@ -212,13 +212,16 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -228,7 +231,7 @@ afr_create_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -442,13 +445,16 @@ afr_mknod_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -458,7 +464,7 @@ afr_mknod_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->mknod,
@@ -667,13 +673,16 @@ afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -683,7 +692,7 @@ afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -894,13 +903,16 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -910,7 +922,7 @@ afr_link_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
priv->children[i],
priv->children[i]->fops->link,
@@ -1117,13 +1129,16 @@ afr_symlink_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1133,7 +1148,7 @@ afr_symlink_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -1338,13 +1353,16 @@ afr_rename_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1354,7 +1372,7 @@ afr_rename_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -1543,13 +1561,16 @@ afr_unlink_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1559,7 +1580,7 @@ afr_unlink_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -1741,13 +1762,16 @@ afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1757,7 +1781,7 @@ afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->entry_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
(void *) (long) i,
priv->children[i],
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index f8157482758..1258afe09fc 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -44,9 +44,6 @@
#include "compat-errno.h"
#include "compat.h"
-#include "afr.h"
-
-
/**
* Common algorithm for inode read calls:
*
@@ -399,6 +396,12 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
local->cont.fstat.ino = fd->inode->ino;
local->fd = fd_ref (fd);
+ op_ret = afr_open_fd_fix (frame, this, _gf_false);
+ if (op_ret) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
children[call_child],
children[call_child]->fops->fstat,
@@ -1036,6 +1039,12 @@ afr_readv (call_frame_t *frame, xlator_t *this,
local->cont.readv.size = size;
local->cont.readv.offset = offset;
+ op_ret = afr_open_fd_fix (frame, this, _gf_false);
+ if (op_ret) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
STACK_WIND_COOKIE (frame, afr_readv_cbk,
(void *) (long) call_child,
children[call_child],
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index c292b7493f6..faf3db40041 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -46,6 +46,7 @@
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal-common.h"
/* {{{ writev */
@@ -125,19 +126,21 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
int
afr_writev_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int i = 0;
int call_count = -1;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -147,7 +150,7 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->inode_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -188,10 +191,10 @@ afr_writev_done (call_frame_t *frame, xlator_t *this)
int
afr_do_writev (call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
local = frame->local;
@@ -235,6 +238,202 @@ out:
return 0;
}
+static int
+afr_prepare_loc (call_frame_t *frame, fd_t *fd)
+{
+ afr_local_t *local = NULL;
+ char *name = NULL;
+ char *path = NULL;
+ int ret = 0;
+
+ if ((!fd) || (!fd->inode))
+ return -1;
+
+ local = frame->local;
+ ret = inode_path (fd->inode, NULL, (char **)&path);
+ if (ret <= 0) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "Unable to get path for gfid: %s",
+ uuid_utoa (fd->inode->gfid));
+ return -1;
+ }
+
+ if (local->loc.path) {
+ if (strcmp (path, local->loc.path))
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "overwriting old loc->path %s with %s",
+ local->loc.path, path);
+ GF_FREE ((char *)local->loc.path);
+ }
+ local->loc.path = path;
+
+ name = strrchr (local->loc.path, '/');
+ if (name)
+ name++;
+ local->loc.name = name;
+
+ if (local->loc.inode) {
+ inode_unref (local->loc.inode);
+ }
+ local->loc.inode = inode_ref (fd->inode);
+
+ if (local->loc.parent) {
+ inode_unref (local->loc.parent);
+ }
+
+ local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
+
+ return 0;
+}
+
+afr_fd_paused_call_t*
+afr_paused_call_create (call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_fd_paused_call_t *paused_call = NULL;
+
+ local = frame->local;
+ GF_ASSERT (local->fop_call_continue);
+
+ paused_call = GF_CALLOC (1, sizeof (*paused_call),
+ gf_afr_fd_paused_call_t);
+ if (paused_call) {
+ INIT_LIST_HEAD (&paused_call->call_list);
+ paused_call->frame = frame;
+ }
+
+ return paused_call;
+}
+
+static int
+afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx)
+{
+ afr_fd_paused_call_t *paused_call = NULL;
+ int ret = 0;
+
+ paused_call = afr_paused_call_create (frame);
+ if (paused_call)
+ list_add (&paused_call->call_list, &fd_ctx->paused_calls);
+ else
+ ret = -ENOMEM;
+
+ return ret;
+}
+
+static void
+afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ char sh_type_str[256] = {0};
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->need_missing_entry_self_heal = _gf_true;
+ sh->need_gfid_self_heal = _gf_true;
+ sh->need_data_self_heal = _gf_true;
+ afr_self_heal_type_str_get(&local->self_heal, sh_type_str,
+ sizeof(sh_type_str));
+ gf_log (this->name, GF_LOG_INFO, "%s self-heal triggered. "
+ "path: %s, reason: Replicate up down flush, data lock "
+ "is held", sh_type_str, local->loc.path);
+
+ afr_launch_self_heal (frame, this, local->fd->inode, _gf_true,
+ local->fd->inode->ia_type, NULL, NULL);
+}
+
+int
+afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
+{
+ int ret = 0;
+ int i = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t need_self_heal = _gf_false;
+ int *need_open = NULL;
+ int need_open_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t fop_continue = _gf_true;
+ gf_boolean_t queue_fop = _gf_false;
+
+ local = frame->local;
+ priv = this->private;
+
+ GF_ASSERT (local->fd);
+ if (pause_fop)
+ GF_ASSERT (local->fop_call_continue);
+
+ ret = afr_prepare_loc (frame, local->fd);
+ if (ret < 0) {
+ //File does not exist we cant open it.
+ ret = 0;
+ goto out;
+ }
+
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->up_count < priv->up_count) {
+ need_self_heal = _gf_true;
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) &&
+ local->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ if (!need_open)
+ need_open = GF_CALLOC (priv->child_count,
+ sizeof (*need_open),
+ gf_afr_mt_int32_t);
+ need_open[i] = 1;
+ need_open_count++;
+ } else if (pause_fop && local->child_up[i] &&
+ (fd_ctx->opened_on[i] == AFR_FD_OPENING)) {
+ queue_fop = _gf_true;
+ }
+ }
+
+ if (queue_fop) {
+ GF_ASSERT (pause_fop);
+ gf_log (this->name, GF_LOG_INFO, "Pause fd %p",
+ local->fd);
+ ret = afr_pause_fd_fop (frame, this, fd_ctx);
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&local->fd->lock);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s",
+ local->loc.path);
+ fop_continue = _gf_false;
+ goto out;
+ }
+
+ if (need_self_heal)
+ afr_trigger_open_fd_self_heal (frame, this);
+
+ if (!need_open_count)
+ goto out;
+
+ gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd);
+ afr_fix_open (frame, this, fd_ctx, need_open_count, need_open);
+ fop_continue = _gf_false;
+out:
+ if (need_open)
+ GF_FREE (need_open);
+ if (fop_continue && local->fop_call_continue)
+ local->fop_call_continue (frame, this);
+ return ret;
+}
int
afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
@@ -246,8 +445,6 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = -1;
int op_ret = -1;
int op_errno = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -272,21 +469,14 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->cont.writev.iobref = iobref_ref (iobref);
local->fd = fd_ref (fd);
+ local->fop_call_continue = afr_do_writev;
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
+ ret = afr_open_fd_fix (frame, this, _gf_true);
+ if (ret) {
+ op_errno = -ret;
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->up_count < priv->up_count) {
- local->openfd_flush_cbk = afr_do_writev;
- afr_openfd_flush (frame, this, fd);
- } else {
- afr_do_writev (frame, this);
- }
-
op_ret = 0;
out:
if (op_ret == -1) {
@@ -395,13 +585,16 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -411,7 +604,7 @@ afr_truncate_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->inode_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -602,13 +795,16 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -618,7 +814,7 @@ afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->inode_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -702,8 +898,6 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
int ret = -1;
int op_ret = -1;
int op_errno = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -725,21 +919,14 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
local->cont.ftruncate.ino = fd->inode->ino;
local->fd = fd_ref (fd);
+ local->fop_call_continue = afr_do_ftruncate;
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
+ ret = afr_open_fd_fix (frame, this, _gf_true);
+ if (ret) {
+ op_errno = -ret;
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->up_count < priv->up_count) {
- local->openfd_flush_cbk = afr_do_ftruncate;
- afr_openfd_flush (frame, this, fd);
- } else {
- afr_do_ftruncate (frame, this);
- }
-
op_ret = 0;
out:
if (op_ret == -1) {
@@ -849,13 +1036,16 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -865,7 +1055,7 @@ afr_setattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->inode_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -1056,13 +1246,16 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1072,7 +1265,7 @@ afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->inode_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -1104,7 +1297,6 @@ afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
afr_fsetattr (call_frame_t *frame, xlator_t *this,
fd_t *fd, struct iatt *buf, int32_t valid)
@@ -1124,10 +1316,12 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
+ op_errno = ENOMEM;
goto out;
}
ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame->local = local;
ret = AFR_LOCAL_INIT (local, priv);
if (ret < 0) {
@@ -1135,12 +1329,9 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- transaction_frame->local = local;
-
local->op_ret = -1;
local->cont.fsetattr.ino = fd->inode->ino;
-
local->cont.fsetattr.in_buf = *buf;
local->cont.fsetattr.valid = valid;
@@ -1150,6 +1341,13 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
local->fd = fd_ref (fd);
+ op_ret = afr_open_fd_fix (frame, this, _gf_false);
+ if (ret) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
local->transaction.len = 0;
@@ -1242,13 +1440,16 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1258,7 +1459,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->inode_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
(void *) (long) i,
priv->children[i],
@@ -1424,13 +1625,16 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
int call_count = -1;
int i = 0;
local = frame->local;
priv = this->private;
+ int_lock = &local->internal_lock;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);
@@ -1440,7 +1644,7 @@ afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
+ if (local->child_up[i] && int_lock->inode_locked_nodes[i]) {
STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
(void *) (long) i,
priv->children[i],
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 17651add96d..2168ee26f69 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -88,8 +88,7 @@ is_afr_lock_selfheal (afr_local_t *local)
}
int32_t
-internal_lock_count (call_frame_t *frame, xlator_t *this,
- afr_fd_ctx_t *fd_ctx)
+internal_lock_count (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
@@ -99,10 +98,9 @@ internal_lock_count (call_frame_t *frame, xlator_t *this,
local = frame->local;
priv = this->private;
- if (fd_ctx) {
- GF_ASSERT (local->fd);
+ if (local->fd) {
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i])
+ if (local->child_up[i] && local->fd_open_on[i])
++call_count;
}
} else {
@@ -552,20 +550,24 @@ static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = (long)cookie;
local = frame->local;
+ int_lock = &local->internal_lock;
afr_trace_inodelk_out (frame, AFR_INODELK_TRANSACTION,
AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ op_errno, child_index);
if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
gf_log (this->name, GF_LOG_ERROR,
- "%s: unlock failed %s",
- local->loc.path, strerror (op_errno));
+ "%s: unlock failed on %d, reason: %s",
+ local->loc.path, child_index, strerror (op_errno));
}
+ int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
return 0;
@@ -590,6 +592,9 @@ afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
flock.l_len = int_lock->lk_flock.l_len;
flock.l_type = F_UNLCK;
+ gf_log (this->name, GF_LOG_DEBUG, "attempting data unlock range %"PRIu64
+ " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
+ frame->root->lk_owner);
call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
priv->child_count);
@@ -646,9 +651,22 @@ static int32_t
afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno)
{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = (long)cookie;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
afr_trace_entrylk_out (frame, AFR_ENTRYLK_TRANSACTION,
AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ op_errno, child_index);
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unlock failed on %d, reason: %s",
+ local->loc.path, child_index, strerror (op_errno));
+ }
afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno);
@@ -747,8 +765,7 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
afr_unlock (frame, this);
} else {
if (op_ret == 0) {
- int_lock->locked_nodes[child_index]
- |= LOCKED_YES;
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
int_lock->lock_count++;
}
afr_lock_blocking (frame, this, child_index + 1);
@@ -940,8 +957,8 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
or don't have the fd open */
while ((child_index < priv->child_count)
- && (!local->child_up[child_index]
- || !fd_ctx->opened_on[child_index]))
+ && (!local->child_up[child_index] ||
+ !local->fd_open_on[child_index]))
child_index++;
} else {
@@ -969,9 +986,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
}
if ((child_index == priv->child_count)
- || (int_lock->lock_count ==
- afr_up_children_count (priv->child_count,
- local->child_up))) {
+ || (int_lock->lock_count == int_lock->lk_expected_count)) {
/* we're done locking */
@@ -1081,8 +1096,6 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
}
return 0;
-
-
}
int32_t
@@ -1091,6 +1104,7 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int up_count = 0;
priv = this->private;
local = frame->local;
@@ -1103,6 +1117,10 @@ afr_blocking_lock (call_frame_t *frame, xlator_t *this)
break;
case AFR_ENTRY_RENAME_TRANSACTION:
+ up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ int_lock->lk_expected_count = 2 * up_count;
+ //fallthrough
case AFR_ENTRY_TRANSACTION:
initialize_entrylk_variables (frame, this);
break;
@@ -1151,8 +1169,7 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
}
} else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index]
- |= LOCKED_YES;
+ int_lock->entry_locked_nodes[child_index] |= LOCKED_YES;
int_lock->entrylk_lock_count++;
}
@@ -1161,7 +1178,7 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"Last locking reply received");
/* all locks successfull. Proceed to call FOP */
if (int_lock->entrylk_lock_count ==
- afr_up_children_count (priv->child_count, local->child_up)) {
+ int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
@@ -1181,6 +1198,20 @@ afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+void
+afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx,
+ size_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT (local->fd_open_on);
+
+ memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count);
+ for (i = 0; i < child_count; i++)
+ if (fd_ctx->opened_on[i] == AFR_FD_OPENED)
+ local->fd_open_on[i] = 1;
+}
+
int
afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
{
@@ -1192,8 +1223,6 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
loc_t *loc = NULL;
int32_t call_count = 0;
int i = 0;
- uint64_t ctx = 0;
- int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
@@ -1206,9 +1235,8 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
loc = int_lock->lk_loc;
if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
gf_log (this->name, GF_LOG_INFO,
"unable to get fd ctx for fd=%p",
local->fd);
@@ -1221,10 +1249,10 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
return -1;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- call_count = internal_lock_count (frame, this, fd_ctx);
+ afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
+ call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
if (!call_count) {
gf_log (this->name, GF_LOG_INFO,
@@ -1236,7 +1264,7 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking entrylk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i]) {
+ if (local->child_up[i] && local->fd_open_on[i]) {
afr_trace_entrylk_in (frame, AFR_ENTRYLK_NB_TRANSACTION,
AFR_LOCK_OP, basename, i);
@@ -1252,8 +1280,9 @@ afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
} else {
GF_ASSERT (loc);
- call_count = internal_lock_count (frame, this, NULL);
+ call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -1314,8 +1343,7 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
}
} else if (op_ret == 0) {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
+ int_lock->inode_locked_nodes[child_index] |= LOCKED_YES;
int_lock->inodelk_lock_count++;
}
@@ -1324,7 +1352,7 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"Last inode locking reply received");
/* all locks successfull. Proceed to call FOP */
if (int_lock->inodelk_lock_count ==
- afr_up_children_count (priv->child_count, local->child_up)) {
+ int_lock->lk_expected_count) {
gf_log (this->name, GF_LOG_TRACE,
"All servers locked. Calling the cbk");
int_lock->lock_op_ret = 0;
@@ -1352,7 +1380,6 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
int32_t call_count = 0;
- uint64_t ctx = 0;
int i = 0;
int ret = 0;
struct gf_flock flock = {0,};
@@ -1365,12 +1392,14 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
flock.l_len = int_lock->lk_flock.l_len;
flock.l_type = int_lock->lk_flock.l_type;
+ gf_log (this->name, GF_LOG_DEBUG, "attempting data lock range %"PRIu64
+ " %"PRIu64" by %"PRIu64, flock.l_start, flock.l_len,
+ frame->root->lk_owner);
initialize_inodelk_variables (frame, this);
if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
gf_log (this->name, GF_LOG_INFO,
"unable to get fd ctx for fd=%p",
local->fd);
@@ -1384,10 +1413,10 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- call_count = internal_lock_count (frame, this, fd_ctx);
+ afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
+ call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
if (!call_count) {
gf_log (this->name, GF_LOG_INFO,
@@ -1399,7 +1428,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
/* Send non-blocking inodelk calls only on up children
and where the fd has been opened */
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && fd_ctx->opened_on[i]) {
+ if (local->child_up[i] && local->fd_open_on[i]) {
afr_trace_inodelk_in (frame, AFR_INODELK_NB_TRANSACTION,
AFR_LOCK_OP, &flock, F_SETLK, i);
@@ -1417,8 +1446,9 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
}
} else {
- call_count = internal_lock_count (frame, this, NULL);
+ call_count = internal_lock_count (frame, this);
int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -1989,7 +2019,7 @@ afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
fdctx = (afr_fd_ctx_t *) (long) tmp;
- fdctx->opened_on[child_index] = 1;
+ fdctx->opened_on[child_index] = AFR_FD_OPENED;
out:
return ret;
@@ -2083,7 +2113,7 @@ is_fd_opened (fd_t *fd, int32_t child_index)
fdctx = (afr_fd_ctx_t *) (long) tmp;
- if (fdctx->opened_on[child_index])
+ if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
ret = 1;
out:
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 98e86574031..d5a988708b8 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -43,6 +43,7 @@ enum gf_afr_mem_types_ {
gf_afr_mt_pump_priv,
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
+ gf_afr_fd_paused_call_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 306f5a85af0..02d8f3ded3b 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -116,7 +116,7 @@ afr_open_cbk (call_frame_t *frame, void *cookie,
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child_index] = 1;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
fd_ctx->flags = local->cont.open.flags;
fd_ctx->wbflags = local->cont.open.wbflags;
}
@@ -141,7 +141,6 @@ unlock:
return 0;
}
-
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, int32_t wbflags)
@@ -180,7 +179,6 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
frame->local = local;
call_count = local->call_count;
-
loc_copy (&local->loc, loc);
local->cont.open.flags = flags;
@@ -209,446 +207,165 @@ out:
return 0;
}
-
-int
-afr_openfd_sh_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+//NOTE: this function should be called with holding the lock on
+//fd to which fd_ctx belongs
+void
+afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx,
+ struct list_head *list)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- uint64_t ctx = 0;
- int ret = 0;
- int call_count = 0;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- ret = fd_ctx_get (fd, this, &ctx);
+ afr_fd_paused_call_t *paused_call = NULL;
+ afr_fd_paused_call_t *tmp = NULL;
+ afr_local_t *call_local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ gf_boolean_t call = _gf_false;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", fd);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
-
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened successfully on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
+ priv = this->private;
+ list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
+ call_list) {
+ call = _gf_true;
+ call_local = paused_call->frame->local;
+ for (i = 0; i < priv->child_count; i++) {
+ if (call_local->child_up[i] &&
+ (fd_ctx->opened_on[i] == AFR_FD_OPENING))
+ call = _gf_false;
}
- }
-out:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- int_lock->lock_cbk = local->transaction.done;
- local->transaction.resume (frame, this);
+ if (call) {
+ list_del_init (&paused_call->call_list);
+ list_add (&paused_call->call_list, list);
+ }
}
-
- return 0;
}
-
-static int
-__unopened_count (int child_count, unsigned int *opened_on, unsigned char *child_up)
+void
+afr_resume_calls (xlator_t *this, struct list_head *list)
{
- int i = 0;
- int count = 0;
-
- for (i = 0; i < child_count; i++) {
- if (!opened_on[i] && child_up[i])
- count++;
+ afr_fd_paused_call_t *paused_call = NULL;
+ afr_fd_paused_call_t *tmp = NULL;
+ afr_local_t *call_local = NULL;
+
+ list_for_each_entry_safe (paused_call, tmp, list, call_list) {
+ list_del_init (&paused_call->call_list);
+ call_local = paused_call->frame->local;
+ call_local->fop_call_continue (paused_call->frame, this);
+ GF_FREE (paused_call);
}
-
- return count;
}
-
int
-afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int abandon = 0;
- int ret = 0;
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ struct list_head paused_calls = {0};
- priv = this->private;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
- /*
- * Some subvolumes might have come up on which we never
- * opened this fd in the first place. Re-open fd's on those
- * subvolumes now.
- */
+ call_count = afr_frame_return (frame);
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
+ //Note: No frame locking needed for this block of code
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- local->fd, local->loc.path);
- abandon = 1;
+ "failed to get fd context, %p", local->fd);
goto out;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
LOCK (&local->fd->lock);
{
- call_count = __unopened_count (priv->child_count,
- fd_ctx->opened_on,
- local->child_up);
- for (i = 0; i < priv->child_count; i++) {
- fd_ctx->pre_op_done[i] = 0;
- fd_ctx->pre_op_piggyback[i] = 0;
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ gf_log (this->name, GF_LOG_INFO, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ //Change open status from OPENING to NOT OPENED.
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
- }
- UNLOCK (&local->fd->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "fd not open on any subvolume %p (%s)",
- local->fd, local->loc.path);
- abandon = 1;
- goto out;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!fd_ctx->opened_on[i] && local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening fd for %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_openfd_sh_open_cbk,
- (void *)(long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags, local->fd,
- fd_ctx->wbflags);
-
- if (!--call_count)
- break;
+ if (call_count == 0) {
+ INIT_LIST_HEAD (&paused_calls);
+ afr_get_resumable_calls (this, fd_ctx, &paused_calls);
}
}
-
+ UNLOCK (&local->fd->lock);
out:
- if (abandon)
- local->transaction.resume (frame, this);
-
- return 0;
-}
-
-
-static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
-{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
-
- if ((!fd) || (!fd->inode))
- return -1;
-
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
-
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
-
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
-
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
-
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
+ if (call_count == 0) {
+ afr_resume_calls (this, &paused_calls);
+ if (local->fop_call_continue)
+ local->fop_call_continue (frame, this);
+ else
+ AFR_STACK_DESTROY (frame);
}
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
-
return 0;
}
-
int
-afr_openfd_sh (call_frame_t *frame, xlator_t *this)
+afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
+ int need_open_count, int *need_open)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- char sh_type_str[256] = {0,};
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *open_frame = NULL;
+ afr_local_t *open_local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+
+ GF_ASSERT (fd_ctx);
+ GF_ASSERT (need_open_count > 0);
+ GF_ASSERT (need_open);
local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (local->loc.path);
- /* forcibly trigger missing-entries self-heal */
-
- sh->need_missing_entry_self_heal = _gf_true;
- sh->need_gfid_self_heal = _gf_true;
- sh->data_lock_held = _gf_true;
- sh->need_data_self_heal = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_openfd_sh_unwind;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_INFO, "%s self-heal triggered. "
- "path: %s, reason: Replicate up down flush, data lock is held",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this, local->fd->inode);
-
- return 0;
-}
-
-
-int
-afr_openfd_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- int _ret = -1;
-
priv = this->private;
- local = frame->local;
-
- LOCK (&local->fd->lock);
- {
- _ret = __fd_ctx_get (local->fd, this, &ctx);
- if (_ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- local->fd, local->loc.path);
+ if (!local->fop_call_continue) {
+ open_frame = copy_frame (frame);
+ if (!open_frame) {
+ ret = -ENOMEM;
goto out;
}
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->down_count = priv->down_count;
- fd_ctx->up_count = priv->up_count;
- }
-out:
- UNLOCK (&local->fd->lock);
-
- afr_local_transaction_cleanup (local, this);
-
- gf_log (this->name, GF_LOG_TRACE,
- "The up/down flush is over");
-
- fd_unref (local->fd);
- local->openfd_flush_cbk (frame, this);
-
- return 0;
-}
-
-
-
-int
-afr_openfd_xaction (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_local_t * local = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- local = frame->local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_openfd_sh;
- local->transaction.done = afr_openfd_flush_done;
-
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- gf_log (this->name, GF_LOG_TRACE,
- "doing up/down flush on fd=%p", fd);
-
- afr_transaction (frame, this, AFR_DATA_TRANSACTION);
-
-out:
- return 0;
-}
-
-
-
-int
-afr_openfd_xaction_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- fd, local->loc.path);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = 1;
-
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened successfully on subvolume %s",
- local->loc.path, priv->children[child_index]->name);
+ ALLOC_OR_GOTO (open_local, afr_local_t, out);
+ ret = AFR_LOCAL_INIT (open_local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
+ loc_copy (&open_local->loc, &local->loc);
+ open_local->fd = fd_ref (local->fd);
+ } else {
+ ret = 0;
+ open_frame = frame;
+ open_local = local;
}
-out:
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_openfd_xaction (frame, this, local->fd);
- }
-
- return 0;
-}
-
-
-int
-afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int no_open = 0;
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- priv = this->private;
- local = frame->local;
-
- /*
- * If the file is already deleted while the fd is open, no need to
- * perform the openfd flush, call the flush_cbk and get out.
- */
- ret = afr_prepare_loc (frame, fd);
- if (ret < 0) {
- local->openfd_flush_cbk (frame, this);
- goto out;
- }
+ open_local->call_count = need_open_count;
- /*
- * Some subvolumes might have come up on which we never
- * opened this fd in the first place. Re-open fd's on those
- * subvolumes now.
- */
-
- local->fd = fd_ref (fd);
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context %p (%s)",
- fd, local->loc.path);
- no_open = 1;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- LOCK (&local->fd->lock);
- {
- call_count = __unopened_count (priv->child_count,
- fd_ctx->opened_on,
- local->child_up);
- }
- UNLOCK (&local->fd->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "fd not open on any subvolume %p (%s)",
- fd, local->loc.path);
- no_open = 1;
- goto out;
- }
-
- local->call_count = call_count;
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
+ need_open_count);
for (i = 0; i < priv->child_count; i++) {
- if (!fd_ctx->opened_on[i] && local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
+ if (need_open[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
"opening fd for %s on subvolume %s",
local->loc.path, priv->children[i]->name);
- STACK_WIND_COOKIE (frame, afr_openfd_xaction_open_cbk,
+ STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
(void *)(long) i,
priv->children[i],
priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags, fd,
- fd_ctx->wbflags);
+ &open_local->loc, fd_ctx->flags,
+ open_local->fd, fd_ctx->wbflags);
- if (!--call_count)
- break;
}
}
-
out:
- if (no_open)
- afr_openfd_xaction (frame, this, fd);
-
- return 0;
+ if (ret && open_frame)
+ AFR_STACK_DESTROY (open_frame);
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
index 04b388fe052..1c7cdf41819 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -44,279 +44,249 @@
This file contains the various self-heal algorithms
*/
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno);
+static int
+sh_destroy_frame (call_frame_t *frame, xlator_t *this)
+{
+ if (!frame)
+ goto out;
-/*
- The "full" algorithm. Copies the entire file from
- source to sinks.
-*/
-
+ AFR_STACK_DESTROY (frame);
+out:
+ return 0;
+}
static void
-sh_full_private_cleanup (call_frame_t *frame, xlator_t *this)
+sh_private_cleanup (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
local = frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
-
if (sh_priv)
GF_FREE (sh_priv);
}
-
static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call);
+sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
+{
+ int writes = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (write_needed[i])
+ writes++;
+ }
+
+ return writes;
+}
+
static int
-sh_full_loop_driver_done (call_frame_t *frame, xlator_t *this)
+sh_loop_driver_done (call_frame_t *frame, xlator_t *this,
+ call_frame_t *last_loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int32_t total_blocks = 0;
+ int32_t diff_blocks = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+ total_blocks = sh_priv->total_blocks;
+ diff_blocks = sh_priv->diff_blocks;
- sh_full_private_cleanup (frame, this);
+ sh_private_cleanup (frame, this);
if (sh->op_failed) {
+ GF_ASSERT (!last_loop_frame);
+ //loop_finish should have happened and the old_loop should be NULL
gf_log (this->name, GF_LOG_INFO,
- "full self-heal aborting on %s",
+ "self-heal aborting on %s",
local->loc.path);
local->self_heal.algo_abort_cbk (frame, this);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "full self-heal completed on %s",
- local->loc.path);
+ GF_ASSERT (last_loop_frame);
+ if (diff_blocks == total_blocks) {
+ gf_log (this->name, GF_LOG_INFO, "full self-heal "
+ "completed on %s",local->loc.path);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "diff self-heal on %s: completed. "
+ "(%d blocks of %d were different (%.2f%%))",
+ local->loc.path, diff_blocks, total_blocks,
+ ((diff_blocks * 1.0)/total_blocks) * 100);
+ }
+ sh->old_loop_frame = last_loop_frame;
local->self_heal.algo_completion_cbk (frame, this);
}
+
return 0;
}
-static int
-sh_full_loop_return (call_frame_t *rw_frame, xlator_t *this, off_t offset)
+int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
{
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- AFR_STACK_DESTROY (rw_frame);
+ if (!loop_frame)
+ goto out;
- sh_full_loop_driver (sh_frame, this, _gf_false);
+ loop_local = loop_frame->local;
+ if (loop_local) {
+ loop_sh = &loop_local->self_heal;
+ }
+ if (loop_sh && loop_sh->loop_completion_cbk) {
+ if (loop_sh->data_lock_held) {
+ afr_sh_data_unlock (loop_frame, this,
+ loop_sh->loop_completion_cbk);
+ } else {
+ loop_sh->loop_completion_cbk (loop_frame, this);
+ }
+ } else {
+ //default loop_completion_cbk destroys the loop_frame
+ if (loop_sh && !loop_sh->loop_completion_cbk)
+ GF_ASSERT (!loop_sh->data_lock_held);
+ sh_destroy_frame (loop_frame, this);
+ }
+out:
return 0;
}
-
static int
-sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
+sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
-
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- rw_sh->offset - op_ret);
-
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
- sh->op_failed = 1;
- }
- }
- UNLOCK (&sh_frame->lock);
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- call_count = afr_frame_return (rw_frame);
-
- if (call_count == 0) {
- sh_full_loop_return (rw_frame, this, rw_sh->offset - op_ret);
- }
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ gf_log (this->name, GF_LOG_DEBUG, "Aquired lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ loop_sh->data_lock_held = _gf_true;
+ loop_sh->sh_data_algo_start (loop_frame, this);
return 0;
}
-
static int
-sh_full_read_cbk (call_frame_t *rw_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref)
+sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
- off_t offset = (long) cookie;
-
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- call_count = sh->active_sinks;
-
- rw_local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, offset);
-
- if (op_ret <= 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "read from %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[sh->source]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- sh_full_loop_return (rw_frame, this, offset);
- return 0;
- }
-
- rw_sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
- gf_log (this->name, GF_LOG_DEBUG,
- "block has all 0 filled");
- sh_full_loop_return (rw_frame, this, offset);
- goto out;
- }
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
-
- STACK_WIND_COOKIE (rw_frame, sh_full_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
-
- if (!--call_count)
- break;
- }
-
-out:
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+ sh_frame = loop_sh->sh_frame;
+
+ gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
return 0;
}
-
static int
-sh_full_read_write (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
+ call_frame_t *old_loop_frame)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- afr_self_heal_t *sh = NULL;
- call_frame_t *rw_frame = NULL;
- int32_t op_errno = 0;
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
+ GF_ASSERT (sh_frame);
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
-
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- rw_sh->offset = offset;
- rw_sh->sh_frame = frame;
+ new_loop_frame = copy_frame (sh_frame);
+ if (!new_loop_frame)
+ goto out;
+ //We want the frame to have same lk_oner as sh_frame
+ new_loop_local = afr_local_copy (local, this);
+ if (!new_loop_local)
+ goto out;
+ new_loop_frame->local = new_loop_local;
- STACK_WIND_COOKIE (rw_frame, sh_full_read_cbk,
- (void *) (long) offset,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- offset);
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->sources = memdup (sh->sources,
+ priv->child_count * sizeof (*sh->sources));
+ if (!new_loop_sh->sources)
+ goto out;
+ new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
+ sizeof (*new_loop_sh->write_needed),
+ gf_afr_mt_char);
+ if (!new_loop_sh->write_needed)
+ goto out;
+ new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LEN,
+ gf_afr_mt_uint8_t);
+ if (!new_loop_sh->checksum)
+ goto out;
+ new_loop_sh->offset = offset;
+ new_loop_sh->block_size = sh->block_size;
+ new_loop_sh->old_loop_frame = old_loop_frame;
+ new_loop_sh->sh_frame = sh_frame;
+ new_loop_sh->inode = inode_ref (sh->inode);
+ new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
+ new_loop_sh->source = sh->source;
+ new_loop_sh->active_sinks = sh->active_sinks;
+ new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
+ new_loop_sh->file_has_holes = sh->file_has_holes;
+ new_loop_sh->loop_completion_cbk = sh_destroy_frame;
+ afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
+ sh_loop_lock_success, sh_loop_lock_failure);
return 0;
-
out:
sh->op_failed = 1;
-
- sh_full_loop_driver (frame, this, _gf_false);
-
+ if (new_loop_frame) {
+ new_loop_frame->local = new_loop_local;
+ }
+ if (old_loop_frame)
+ sh_loop_finish (old_loop_frame, this);
+ sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
return 0;
}
-
static int
-sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call)
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
gf_boolean_t is_driver_done = _gf_false;
blksize_t block_size = 0;
- off_t offset = 0;
int loop = 0;
+ off_t offset = 0;
priv = this->private;
- local = frame->local;
+ local = sh_frame->local;
sh = &local->self_heal;
sh_priv = sh->private;
@@ -324,23 +294,18 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_
{
if (_gf_false == is_first_call)
sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((sh->op_failed == 0) &&
+ offset = sh_priv->offset;
+ block_size = sh->block_size;
+ while ((!sh->eof_reached) && (0 == sh->op_failed) &&
(sh_priv->loops_running < priv->data_self_heal_window_size)
&& (sh_priv->offset < sh->file_size)) {
loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh->block_size;
+ sh_priv->offset += block_size;
sh_priv->loops_running++;
if (_gf_false == is_first_call)
break;
-
}
if (0 == sh_priv->loops_running) {
is_driver_done = _gf_true;
@@ -348,274 +313,130 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_
}
UNLOCK (&sh_priv->lock);
+ if (0 == loop) {
+ //loop finish does unlock, but the erasing of the pending
+ //xattrs needs to happen before that so do not finish the loop
+ if (is_driver_done && !sh->op_failed)
+ goto driver_done;
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ }
+
+ //If we have more loops to form we should finish previous loop after
+ //the next loop lock
while (loop--) {
if (sh->op_failed) {
// op failed in other loop, stop spawning more loops
- sh_full_loop_driver (frame, this, _gf_false);
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ sh_loop_driver (sh_frame, this, _gf_false, NULL);
} else {
- sh_full_read_write (frame, this, offset);
+ gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
+ "for offset %"PRId64, offset);
+
+ sh_loop_start (sh_frame, this, offset, old_loop_frame);
+ old_loop_frame = NULL;
offset += block_size;
}
}
+driver_done:
if (is_driver_done) {
- sh_full_loop_driver_done (frame, this);
+ sh_loop_driver_done (sh_frame, this, old_loop_frame);
}
-
- return 0;
-}
-
-
-int
-afr_sh_algo_full (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_full_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv)
- goto out;
-
- LOCK_INIT (&sh_priv->lock);
-
- sh->private = sh_priv;
-
- local->call_count = 0;
-
- sh_full_loop_driver (frame, this, _gf_true);
-out:
return 0;
}
-
-/*
- * The "diff" algorithm. Copies only those blocks whose checksums
- * don't match with those of source.
- */
-
-
-static void
-sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
-
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
-
- GF_FREE (sh_priv->loops[i]);
- }
- }
-
- if (sh_priv) {
- if (sh_priv->loops)
- GF_FREE (sh_priv->loops);
-
- GF_FREE (sh_priv);
- }
-
-
-}
-
-
-static uint32_t
-__make_cookie (int loop_index, int child_index)
-{
- uint32_t ret = ((loop_index << 16) | child_index);
- return ret;
-}
-
-
-static int
-__loop_index (uint32_t cookie)
-{
- return ((cookie & 0xFFFF0000) >> 16);
-}
-
-
-static int
-__child_index (uint32_t cookie)
-{
- return (cookie & 0x0000FFFF);
-}
-
-
-static void
-sh_diff_loop_state_reset (struct sh_diff_loop_state *loop_state, int child_count)
-{
- loop_state->active = _gf_false;
-// loop_state->offset = 0;
-
- memset (loop_state->write_needed,
- 0, sizeof (*loop_state->write_needed) * child_count);
-
- memset (loop_state->checksum,
- 0, MD5_DIGEST_LEN * child_count);
-}
-
-
static int
-sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
-
- sh_diff_private_cleanup (frame, this);
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
-
- local->self_heal.algo_completion_cbk (frame, this);
- }
-
- return 0;
-}
-
-static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state);
-
-static int
-sh_diff_loop_return (call_frame_t *rw_frame, xlator_t *this,
- struct sh_diff_loop_state *loop_state)
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
afr_local_t * sh_local = NULL;
afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- gf_log (this->name, GF_LOG_TRACE,
- "loop for offset %"PRId64" returned", loop_state->offset);
+ if (loop_frame) {
+ loop_local = loop_frame->local;
+ if (loop_local)
+ loop_sh = &loop_local->self_heal;
+ if (loop_sh)
+ gf_log (this->name, GF_LOG_TRACE, "loop for offset "
+ "%"PRId64" returned", loop_sh->offset);
+ }
- AFR_STACK_DESTROY (rw_frame);
+ if (op_ret == -1) {
+ sh->op_failed = 1;
+ afr_sh_set_error (sh, op_errno);
+ if (loop_frame) {
+ sh_loop_finish (loop_frame, this);
+ loop_frame = NULL;
+ }
+ }
- sh_diff_loop_driver (sh_frame, this, _gf_false, loop_state);
+ sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
return 0;
}
-
static int
-sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
struct iatt *postbuf)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
call_frame_t *sh_frame = NULL;
afr_local_t * sh_local = NULL;
afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- struct sh_diff_loop_state *loop_state = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
int call_count = 0;
int child_index = 0;
- int loop_index = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
gf_log (this->name, GF_LOG_TRACE,
"wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index,
- loop_state->offset);
+ op_ret, sh_local->loc.path, child_index, loop_sh->offset);
- LOCK (&sh_frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "write to %s failed on subvolume %s (%s)",
+ sh_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
- sh->op_failed = 1;
- }
+ sh->op_failed = 1;
+ afr_sh_set_error (loop_sh, op_errno);
}
- UNLOCK (&sh_frame->lock);
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ loop_sh->op_ret, loop_sh->op_errno);
}
return 0;
@@ -623,74 +444,71 @@ sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
static int
-sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie,
+sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count, struct iatt *buf,
struct iobref *iobref)
{
afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
- uint32_t wcookie = 0;
int i = 0;
int call_count = 0;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t * sh = NULL;
- priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_index = __loop_index ((uint32_t) (long) cookie);
- loop_state = sh_priv->loops[loop_index];
-
- call_count = sh_diff_number_of_writes_needed (loop_state->write_needed,
- priv->child_count);
-
- rw_local->call_count = call_count;
gf_log (this->name, GF_LOG_TRACE,
"read %d bytes of data from %s, offset %"PRId64"",
- op_ret, sh_local->loc.path, loop_state->offset);
-
- if ((op_ret <= 0) ||
- (call_count == 0)) {
- sh_diff_loop_return (rw_frame, this, loop_state);
+ op_ret, loop_local->loc.path, loop_sh->offset);
- return 0;
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ sh->op_failed = 1;
+ gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
+ "for %s reason :%s", sh->source,
+ sh_local->loc.path, strerror (errno));
+ } else {
+ sh->eof_reached = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
+ sh_local->loc.path);
+ }
+ sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
+ goto out;
}
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
+ if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0) {
gf_log (this->name, GF_LOG_DEBUG, "0 filled block");
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
goto out;
- }
}
- for (i = 0; i < priv->child_count; i++) {
- if (loop_state->write_needed[i]) {
- wcookie = __make_cookie (loop_index, i);
+ call_count = sh_number_of_writes_needed (loop_sh->write_needed,
+ priv->child_count);
+ GF_ASSERT (call_count > 0);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_write_cbk,
- (void *) (long) wcookie,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count,
- loop_state->offset, iobref);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!loop_sh->write_needed[i])
+ continue;
+ STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ loop_sh->healing_fd, vector, count,
+ loop_sh->offset, iobref);
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
}
out:
@@ -699,100 +517,77 @@ out:
static int
-sh_diff_read (call_frame_t *rw_frame, xlator_t *this,
- int loop_index)
+sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- struct sh_diff_loop_state *loop_state = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- uint32_t cookie = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
-
- sh_frame = rw_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- sh_priv = sh->private;
-
- loop_state = sh_priv->loops[loop_index];
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- cookie = __make_cookie (loop_index, sh->source);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_read_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh_priv->block_size,
- loop_state->offset);
+ STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->readv,
+ loop_sh->healing_fd, loop_sh->block_size,
+ loop_sh->offset);
return 0;
}
static int
-sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
+sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
uint32_t weak_checksum, uint8_t *strong_checksum)
{
- afr_private_t * priv = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t *rw_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- int loop_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
int child_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
int call_count = 0;
int i = 0;
int write_needed = 0;
priv = this->private;
- rw_local = rw_frame->local;
- rw_sh = &rw_local->self_heal;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- sh_frame = rw_sh->sh_frame;
+ sh_frame = loop_sh->sh_frame;
sh_local = sh_frame->local;
sh = &sh_local->self_heal;
sh_priv = sh->private;
- child_index = __child_index ((uint32_t) (long) cookie);
- loop_index = __loop_index ((uint32_t) (long) cookie);
-
- loop_state = sh_priv->loops[loop_index];
+ child_index = (long) cookie;
if (op_ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"checksum on %s failed on subvolume %s (%s)",
sh_local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
-
sh->op_failed = 1;
} else {
- memcpy (loop_state->checksum + child_index * MD5_DIGEST_LEN,
- strong_checksum,
- MD5_DIGEST_LEN);
+ memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LEN,
+ strong_checksum, MD5_DIGEST_LEN);
}
- call_count = afr_frame_return (rw_frame);
+ call_count = afr_frame_return (loop_frame);
if (call_count == 0) {
for (i = 0; i < priv->child_count; i++) {
if (sh->sources[i] || !sh_local->child_up[i])
continue;
- if (memcmp (loop_state->checksum + (i * MD5_DIGEST_LEN),
- loop_state->checksum + (sh->source * MD5_DIGEST_LEN),
+ if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LEN),
+ loop_sh->checksum + (sh->source * MD5_DIGEST_LEN),
MD5_DIGEST_LEN)) {
/*
Checksums differ, so this block
@@ -802,9 +597,9 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"checksum on subvolume %s at offset %"
PRId64" differs from that on source",
- priv->children[i]->name, loop_state->offset);
+ priv->children[i]->name, loop_sh->offset);
- write_needed = loop_state->write_needed[i] = 1;
+ write_needed = loop_sh->write_needed[i] = 1;
}
}
@@ -817,271 +612,130 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this,
UNLOCK (&sh_priv->lock);
if (write_needed && !sh->op_failed) {
- sh_diff_read (rw_frame, this, loop_index);
+ sh_loop_read (loop_frame, this);
} else {
- sh->offset += sh_priv->block_size;
-
- sh_diff_loop_return (rw_frame, this, loop_state);
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
}
}
return 0;
}
-
-static int
-sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max)
-{
- int i = 0;
-
- LOCK (&sh_priv->lock);
- {
- for (i = 0; i < max; i++) {
- if (sh_priv->loops[i]->active == _gf_false) {
- sh_priv->loops[i]->active = _gf_true;
- break;
- }
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (i == max) {
- gf_log ("[sh-diff]", GF_LOG_ERROR,
- "no free loops found! This shouldn't happen. Please"
- " report this to gluster-devel@nongnu.org");
- }
-
- return i;
-}
-
-
static int
-sh_diff_checksum (call_frame_t *frame, xlator_t *this, off_t offset)
+sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_local_t * rw_local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_self_heal_t * rw_sh = NULL;
- afr_sh_algo_diff_private_t * sh_priv = NULL;
- call_frame_t *rw_frame = NULL;
- uint32_t cookie = 0;
- int loop_index = 0;
- struct sh_diff_loop_state *loop_state = NULL;
- int32_t op_errno = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
-
- rw_frame = copy_frame (frame);
- if (!rw_frame)
- goto out;
-
- ALLOC_OR_GOTO (rw_local, afr_local_t, out);
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ afr_sh_algo_private_t *loop_sh_priv = NULL;
+ int call_count = 0;
+ int i = 0;
- rw_frame->local = rw_local;
- rw_sh = &rw_local->self_heal;
-
- rw_sh->offset = sh->offset;
- rw_sh->sh_frame = frame;
-
- call_count = sh->active_sinks + 1; /* sinks and source */
-
- rw_local->call_count = call_count;
-
- loop_index = sh_diff_find_unused_loop (sh_priv, priv->data_self_heal_window_size);
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- loop_state = sh_priv->loops[loop_index];
- loop_state->offset = offset;
+ loop_sh_priv = loop_sh->private;
- /* we need to send both the loop index and child index,
- so squeeze them both into a 32-bit number */
+ call_count = loop_sh->active_sinks + 1; /* sinks and source */
- cookie = __make_cookie (loop_index, sh->source);
+ loop_local->call_count = call_count;
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
- priv->children[sh->source],
- priv->children[sh->source]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size);
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
continue;
- cookie = __make_cookie (loop_index, i);
-
- STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk,
- (void *) (long) cookie,
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) i,
priv->children[i],
priv->children[i]->fops->rchecksum,
- sh->healing_fd,
- offset, sh_priv->block_size);
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size);
if (!--call_count)
break;
}
return 0;
-
-out:
- sh->op_failed = 1;
-
- sh_diff_loop_driver (frame, this, _gf_false, loop_state);
-
- return 0;
}
-
static int
-sh_diff_loop_driver (call_frame_t *frame, xlator_t *this,
- gf_boolean_t is_first_call,
- struct sh_diff_loop_state *loop_state)
+sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
- char sh_type_str[256] = {0,};
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- afr_self_heal_type_str_get(sh, sh_type_str, sizeof(sh_type_str));
-
- LOCK (&sh_priv->lock);
- {
- if (loop_state)
- sh_diff_loop_state_reset (loop_state, priv->child_count);
- if (_gf_false == is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh_priv->block_size;
- while ((0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- gf_log (this->name, GF_LOG_TRACE,
- "spawning a loop for offset %"PRId64,
- sh_priv->offset);
-
- sh_priv->offset += sh_priv->block_size;
- sh_priv->loops_running++;
-
- if (_gf_false == is_first_call)
- break;
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- sh_diff_loop_driver (frame, this, _gf_false, NULL);
- } else {
- sh_diff_checksum (frame, this, offset);
- offset += block_size;
- }
- }
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
- if (is_driver_done) {
- sh_diff_loop_driver_done (frame, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+ loop_sh->write_needed[i] = 1;
}
+ sh_loop_read (loop_frame, this);
return 0;
}
+static int
+sh_do_nothing (call_frame_t *frame, xlator_t *this)
+{
+ return 0;
+}
int
-afr_sh_algo_diff (call_frame_t *frame, xlator_t *this)
+afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
+ afr_sh_algo_fn sh_data_algo_start)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_diff_private_t *sh_priv = NULL;
- int i = 0;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
gf_afr_mt_afr_private_t);
if (!sh_priv)
- goto err;
-
- sh_priv->block_size = this->ctx->page_size;
-
- sh->private = sh_priv;
+ goto out;
LOCK_INIT (&sh_priv->lock);
- local->call_count = 0;
-
- sh_priv->loops = GF_CALLOC (priv->data_self_heal_window_size,
- sizeof (*sh_priv->loops),
- gf_afr_mt_sh_diff_loop_state);
- if (!sh_priv->loops)
- goto err;
-
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- sh_priv->loops[i] = GF_CALLOC (1, sizeof (*sh_priv->loops[i]),
- gf_afr_mt_sh_diff_loop_state);
- if (!sh_priv->loops[i])
- goto err;
-
- sh_priv->loops[i]->checksum = GF_CALLOC (priv->child_count,
- MD5_DIGEST_LEN, gf_afr_mt_uint8_t);
- if (!sh_priv->loops[i]->checksum)
- goto err;
-
- sh_priv->loops[i]->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*sh_priv->loops[i]->write_needed),
- gf_afr_mt_char);
- if (!sh_priv->loops[i]->write_needed)
- goto err;
-
- }
+ sh->private = sh_priv;
+ sh->sh_data_algo_start = sh_data_algo_start;
- sh_diff_loop_driver (frame, this, _gf_true, NULL);
+ sh_local->call_count = 0;
+ sh->loop_completion_cbk = sh_do_nothing;
+ sh_loop_driver (sh_frame, this, _gf_true, sh_frame);
+out:
return 0;
-err:
- if (sh_priv) {
- if (sh_priv->loops) {
- for (i = 0; i < priv->data_self_heal_window_size; i++) {
- if (sh_priv->loops[i]->write_needed)
- GF_FREE (sh_priv->loops[i]->write_needed);
- if (sh_priv->loops[i]->checksum)
- GF_FREE (sh_priv->loops[i]->checksum);
- if (sh_priv->loops[i])
- GF_FREE (sh_priv->loops[i]);
- }
-
- GF_FREE (sh_priv->loops);
- }
+}
- GF_FREE (sh_priv);
- }
+int
+afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
return 0;
}
+int
+afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
+ return 0;
+}
struct afr_sh_algorithm afr_self_heal_algorithms[] = {
{.name = "full", .fn = afr_sh_algo_full},
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
index 2790dbc6a52..04d8e8a6ce7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
@@ -30,31 +30,13 @@ struct afr_sh_algorithm {
};
extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-
-typedef struct {
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-} afr_sh_algo_full_private_t;
-
-struct sh_diff_loop_state {
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- gf_boolean_t active;
-};
-
typedef struct {
- size_t block_size;
-
gf_lock_t lock;
unsigned int loops_running;
off_t offset;
int32_t total_blocks;
int32_t diff_blocks;
-
- struct sh_diff_loop_state **loops;
-} afr_sh_algo_diff_private_t;
+} afr_sh_algo_private_t;
#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index f66bdff8446..0846184c29f 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -57,6 +57,29 @@ afr_sh_select_source (int sources[], int child_count)
return -1;
}
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
+ sh->success[i] = 1;
+ }
+ }
+ sh->active_sinks = active_sinks;
+}
/**
* sink_count - return number of sinks in sources array
@@ -112,7 +135,7 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
}
sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"pending_matrix: %s", buf);
}
@@ -718,7 +741,7 @@ out:
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type)
{
/* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
@@ -970,12 +993,13 @@ afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
return 0;
}
-static void
+void
afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent)
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc)
{
int child_index = 0;
afr_local_t *local = NULL;
@@ -991,15 +1015,13 @@ afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
{
if (op_ret == 0) {
sh->buf[child_index] = *buf;
- sh->parentbuf = *postparent;
sh->parentbufs[child_index] = *postparent;
sh->success_children[sh->success_count] = child_index;
sh->success_count++;
sh->xattr[child_index] = dict_ref (xattr);
} else {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
+ gf_log (this->name, GF_LOG_ERROR, "path %s on subvolume"
+ " %s => -1 (%s)", loc->path,
priv->children[child_index]->name,
strerror (op_errno));
local->self_heal.child_errno[child_index] = op_errno;
@@ -1201,6 +1223,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this)
if (sh->gfid_sh_success_cbk)
sh->gfid_sh_success_cbk (frame, this);
+ sh->type = sh->buf[sh->source].ia_type;
sh_missing_entries_create (frame, this);
return;
out:
@@ -1227,7 +1250,7 @@ afr_sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
op_errno, inode, buf, xattr,
- postparent);
+ postparent, &local->loc);
call_count = afr_frame_return (frame);
if (call_count == 0)
@@ -1417,6 +1440,8 @@ afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
for (i = 0; i < priv->child_count; i++) {
if (!purge_condition (local, priv, i))
continue;
+ gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
+ "on %d", local->loc.path, i);
afr_sh_call_entry_expunge_remove (frame, this,
(long) i, &sh->buf[i],
afr_sh_remove_entry_cbk);
@@ -1536,6 +1561,8 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this)
sh->child_errno,
priv->child_count, ENOENT);
if (fresh_child_enoents == fresh_parent_count) {
+ gf_log (this->name, GF_LOG_INFO, "Deleting stale file %s",
+ local->loc.path);
afr_sh_set_error (sh, ENOENT);
sh->op_failed = 1;
afr_sh_purge_entry (frame, this);
@@ -1570,10 +1597,13 @@ afr_sh_children_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postparent)
{
int call_count = 0;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
op_errno, inode, buf, xattr,
- postparent);
+ postparent, &local->loc);
call_count = afr_frame_return (frame);
if (call_count == 0)
@@ -1669,10 +1699,15 @@ afr_sh_conflicting_entry_lookup_cbk (call_frame_t *frame, void *cookie,
dict_t *xattr, struct iatt *postparent)
{
int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
op_errno, inode, buf, xattr,
- postparent);
+ postparent, &sh->parent_loc);
call_count = afr_frame_return (frame);
if (call_count == 0)
@@ -1716,8 +1751,7 @@ afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
priv = this->private;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
@@ -1728,7 +1762,7 @@ afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (set_gfid) {
gf_log (this->name, GF_LOG_DEBUG,
"looking up %s with gfid: %s",
- local->loc.path, uuid_utoa (sh->sh_gfid_req));
+ loc->path, uuid_utoa (sh->sh_gfid_req));
GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
afr_set_dict_gfid (xattr_req, sh->sh_gfid_req);
}
@@ -1739,7 +1773,7 @@ afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (local->child_up[i]) {
gf_log (this->name, GF_LOG_DEBUG,
"looking up %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ loc->path, priv->children[i]->name);
STACK_WIND_COOKIE (frame,
lookup_cbk,
@@ -1906,12 +1940,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
shc->need_metadata_self_heal = sh->need_metadata_self_heal;
shc->need_entry_self_heal = sh->need_entry_self_heal;
shc->forced_merge = sh->forced_merge;
- shc->healing_fd_opened = sh->healing_fd_opened;
shc->data_lock_held = sh->data_lock_held;
- if (sh->healing_fd && !sh->healing_fd_opened)
- shc->healing_fd = fd_ref (sh->healing_fd);
- else
- shc->healing_fd = sh->healing_fd;
shc->background = sh->background;
shc->type = sh->type;
@@ -1919,7 +1948,8 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
if (l->loc.path)
loc_copy (&lc->loc, &l->loc);
- lc->child_up = memdup (l->child_up, priv->child_count);
+ lc->child_up = memdup (l->child_up,
+ sizeof (*lc->child_up) * priv->child_count);
if (l->xattr_req)
lc->xattr_req = dict_ref (l->xattr_req);
@@ -1930,7 +1960,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
if (l->internal_lock.inode_locked_nodes)
lc->internal_lock.inode_locked_nodes =
memdup (l->internal_lock.inode_locked_nodes,
- priv->child_count);
+ sizeof (*lc->internal_lock.inode_locked_nodes) * priv->child_count);
else
lc->internal_lock.inode_locked_nodes =
GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
@@ -1939,7 +1969,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
if (l->internal_lock.entry_locked_nodes)
lc->internal_lock.entry_locked_nodes =
memdup (l->internal_lock.entry_locked_nodes,
- priv->child_count);
+ sizeof (*lc->internal_lock.entry_locked_nodes) * priv->child_count);
else
lc->internal_lock.entry_locked_nodes =
GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
@@ -1948,7 +1978,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
if (l->internal_lock.locked_nodes)
lc->internal_lock.locked_nodes =
memdup (l->internal_lock.locked_nodes,
- priv->child_count);
+ sizeof (*lc->internal_lock.locked_nodes) * priv->child_count);
else
lc->internal_lock.locked_nodes =
GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
@@ -1994,7 +2024,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
FRAME_SU_UNDO (bgsh_frame, afr_local_t);
- if (!sh->unwound) {
+ if (!sh->unwound && sh->unwind) {
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
}
@@ -2068,8 +2098,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
gf_afr_mt_iatt);
sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int),
gf_afr_mt_int);
- sh->success = GF_CALLOC (priv->child_count, sizeof (int),
- gf_afr_mt_int);
+ sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
+ gf_afr_mt_char);
sh->xattr = GF_CALLOC (priv->child_count, sizeof (dict_t *),
gf_afr_mt_dict_t);
sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index 043ebea2da6..3df5f0a0aa6 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -53,7 +53,7 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type);
int
@@ -82,6 +82,15 @@ afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs,
int32_t *success_children, afr_transaction_type type);
void
afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
+
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc);
+
int
afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid);
@@ -95,4 +104,22 @@ int
afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
int child_index, struct iatt *buf,
struct iatt *postparent);
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
+ afr_lock_cbk_t lock_cbk);
+afr_local_t *
+afr_local_copy (afr_local_t *l, xlator_t *this);
+int
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len,
+ afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler);
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
+typedef int
+(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr);
#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index dcaad9c8b47..5db2d94f5cb 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -50,6 +50,18 @@
#include "afr-self-heal-algorithm.h"
+extern int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
+
+int
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
+
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
+
int
afr_sh_data_done (call_frame_t *frame, xlator_t *this)
{
@@ -61,20 +73,6 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- /*
- TODO: cleanup sh->*
- */
-
- if (sh->healing_fd && !sh->healing_fd_opened) {
- /* unref only if we created the fd ourselves */
-
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- /* for (i = 0; i < priv->child_count; i++) */
- /* sh->locked_nodes[i] = 0; */
-
sh->completion_cbk (frame, this);
return 0;
@@ -97,7 +95,7 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
if (op_ret == -1) {
gf_log (this->name, GF_LOG_INFO,
- "flush or setattr failed on %s on subvolume %s: %s",
+ "flush failed on %s on subvolume %s: %s",
local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
}
@@ -113,18 +111,6 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- afr_sh_data_flush_cbk (frame, cookie, this, op_ret, op_errno);
-
- return 0;
-}
-
-
int
afr_sh_data_close (call_frame_t *frame, xlator_t *this)
{
@@ -134,8 +120,6 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
int i = 0;
int call_count = 0;
int source = 0;
- int32_t valid = 0;
- struct iatt stbuf = {0,};
local = frame->local;
sh = &local->self_heal;
@@ -143,30 +127,11 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
source = sh->source;
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- if (sh->healing_fd_opened) {
- /* not our job to close the fd */
-
- afr_sh_data_done (frame, this);
- return 0;
- }
-
- if (!sh->healing_fd) {
- afr_sh_data_done (frame, this);
- return 0;
- }
-
- call_count = (sh->active_sinks + 1) * 2;
+ call_count = (sh->active_sinks + 1);
local->call_count = call_count;
/* closed source */
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"closing fd of %s on %s",
local->loc.path, priv->children[sh->source]->name);
@@ -177,14 +142,6 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
sh->healing_fd);
call_count--;
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->setattr,
- &local->loc, &stbuf, valid);
-
- call_count--;
-
if (call_count == 0)
return 0;
@@ -192,7 +149,7 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
if (sh->sources[i] || !local->child_up[i])
continue;
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"closing fd of %s on %s",
local->loc.path, priv->children[i]->name);
@@ -202,14 +159,6 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->flush,
sh->healing_fd);
- call_count--;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid);
-
if (!--call_count)
break;
}
@@ -217,28 +166,27 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this)
return 0;
}
-
int
-afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost)
{
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
+
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
+ priv = this->private;
LOCK (&frame->lock);
{
if (op_ret == -1) {
gf_log (this->name, GF_LOG_INFO,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
+ "setattr failed on %s on subvolume %s: %s",
+ local->loc.path, priv->children[child_index]->name,
strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
}
}
UNLOCK (&frame->lock);
@@ -246,15 +194,114 @@ afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_sh_data_close (frame, this);
+ afr_sh_data_finish (frame, this);
}
return 0;
}
+int
+afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+ int source = 0;
+ int32_t valid = 0;
+ struct iatt stbuf = {0,};
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+
+ valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
+
+ stbuf.ia_atime = sh->buf[source].ia_atime;
+ stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
+ stbuf.ia_mtime = sh->buf[source].ia_mtime;
+ stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
+
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
+ local->call_count = call_count;
+
+ if (call_count == 0) {
+ GF_ASSERT (0);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc, &stbuf, valid);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
+afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ GF_ASSERT (sh->source == child_index);
+ if (op_ret != -1)
+ sh->buf[child_index] = *buf;
+ afr_sh_data_setattr (frame, this);
+
+ return 0;
+}
+
+/*
+ * If there are any writes after the self-heal is triggered then the
+ * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
+ * stat on the source and then set the [am]times
+ */
+int
+afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->fstat,
+ sh->healing_fd);
+ return 0;
+}
+
+//Fun fact, lock_cbk is being used for both lock & unlock
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
+ afr_lock_cbk_t lock_cbk)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -264,15 +311,15 @@ afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
sh = &local->self_heal;
- GF_ASSERT (!sh->data_lock_held);
+ GF_ASSERT (sh->data_lock_held);
- int_lock->lock_cbk = afr_sh_data_close;
+ sh->data_lock_held = _gf_false;
+ int_lock->lock_cbk = lock_cbk;
afr_unlock (frame, this);
return 0;
}
-
int
afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
{
@@ -285,44 +332,52 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG,
"finishing data selfheal of %s", local->loc.path);
- if (!sh->data_lock_held)
- afr_sh_data_unlock (frame, this);
+ if (sh->data_lock_held)
+ afr_sh_data_unlock (frame, this, afr_sh_data_close);
else
afr_sh_data_close (frame, this);
return 0;
}
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing failed data selfheal of %s", local->loc.path);
+
+ sh->op_failed = 1;
+ if (sh->data_lock_held)
+ afr_sh_data_unlock (frame, this, afr_sh_data_close);
+ else
+ afr_sh_data_close (frame, this);
+ return 0;
+}
int
afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xattr)
{
- afr_local_t *local = NULL;
int call_count = 0;
- long i = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
- afr_children_add_child (sh->fresh_children, i, priv->child_count);
call_count = afr_frame_return (frame);
if (call_count == 0) {
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- afr_sh_data_finish (frame, this);
+ afr_sh_data_lock (frame, this, 0, 0,
+ afr_post_sh_big_lock_success,
+ afr_post_sh_big_lock_failure);
}
return 0;
}
-
int
afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
{
@@ -339,6 +394,9 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
priv->child_count, AFR_DATA_TRANSACTION);
+ gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %"PRIu64,
+ frame->root->lk_owner);
+ afr_sh_print_pending_matrix (sh->delta_matrix, this);
erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
gf_afr_mt_dict_t);
@@ -355,12 +413,13 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
priv->child_count, AFR_DATA_TRANSACTION);
+ GF_ASSERT (call_count);
local->call_count = call_count;
for (i = 0; i < priv->child_count; i++) {
if (!erase_xattr[i])
continue;
- gf_log (this->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_DEBUG,
"erasing pending flags from %s on %s",
local->loc.path, priv->children[i]->name);
@@ -385,85 +444,6 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
}
-int
-afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_INFO,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_TRACE,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_erase_pending (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
-
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sources = sh->sources;
- call_count = sh->active_sinks;
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-
static struct afr_sh_algorithm *
sh_algo_from_name (xlator_t *this, char *name)
{
@@ -549,64 +529,138 @@ afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
struct afr_sh_algorithm *sh_algo = NULL;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
+ sh->algo_completion_cbk = afr_sh_data_erase_pending;
+ sh->algo_abort_cbk = afr_sh_data_fail;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
+ sh_algo = afr_sh_data_pick_algo (frame, this);
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return 0;
+ sh_algo->fn (frame, this);
+
+ return 0;
+}
+
+int
+afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1)
+ gf_log (this->name, GF_LOG_INFO,
+ "ftruncate of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ftruncate of %s on subvolume %s completed",
+ local->loc.path,
+ priv->children[child_index]->name);
}
- sh->active_sinks = active_sinks;
+ UNLOCK (&frame->lock);
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[source]->name, active_sinks);
+ call_count = afr_frame_return (frame);
- sh->algo_completion_cbk = afr_sh_data_trim_sinks;
- sh->algo_abort_cbk = afr_sh_data_finish;
+ if (call_count == 0)
+ afr_sh_data_sync_prepare (frame, this);
- sh_algo = afr_sh_data_pick_algo (frame, this);
+ return 0;
+}
- sh_algo->fn (frame, this);
+
+int
+afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int *sources = NULL;
+ int call_count = 0;
+ int i = 0;
+
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sources = sh->sources;
+ call_count = sh->active_sinks;
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] || !local->child_up[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->ftruncate,
+ sh->healing_fd, sh->file_size);
+
+ if (!--call_count)
+ break;
+ }
return 0;
}
+int
+afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ sh->source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (sh->source < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+out:
+ return ret;
+}
int
afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_local_t * orig_local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
int nsources = 0;
int source = 0;
int i = 0;
+ int ret = 0;
local = frame->local;
sh = &local->self_heal;
priv = this->private;
+ gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %"PRIu64,
+ frame->root->lk_owner);
nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
sh->sources, sh->success_children,
AFR_DATA_TRANSACTION);
@@ -643,30 +697,26 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
local->govinda_gOvinda = 1;
- afr_sh_data_finish (frame, this);
+ afr_sh_data_fail (frame, this);
return 0;
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
+ ret = afr_sh_inode_set_read_ctx (sh, this);
+ if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"No active sources found.");
- afr_sh_data_finish (frame, this);
+ afr_sh_data_fail (frame, this);
return 0;
}
- sh->source = source;
- sh->block_size = 65536; /* TODO: make it configurable or use macro */
+ source = sh->source;
+ sh->block_size = this->ctx->page_size;
sh->file_size = sh->buf[source].ia_size;
if (FILE_HAS_HOLES (&sh->buf[source]))
sh->file_has_holes = 1;
- orig_local = sh->orig_frame->local;
- orig_local->cont.lookup.buf.ia_size = sh->buf[source].ia_size;
-
/* detect changes not visible through pending flags -- JIC */
for (i = 0; i < priv->child_count; i++) {
if (i == source || sh->child_errno[i])
@@ -676,27 +726,25 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
sh->sources[i] = 0;
}
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-
- /*
- quick-read might have read the file, so send xattr from
- the source subvolume (http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=815)
- */
-
- dict_unref (orig_local->cont.lookup.xattr);
- if (orig_local->cont.lookup.xattrs)
- orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]);
-
- if (sh->background) {
+ if (sh->background && sh->unwind) {
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
sh->unwound = _gf_true;
}
- afr_sh_data_sync_prepare (frame, this);
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no active sinks for performing self-heal on file %s",
+ local->loc.path);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-healing file %s from subvolume %s to %d other",
+ local->loc.path, priv->children[sh->source]->name,
+ sh->active_sinks);
+ afr_sh_data_trim_sinks (frame, this);
return 0;
}
@@ -855,8 +903,8 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
local->call_count = call_count;
@@ -878,16 +926,14 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
return 0;
}
-
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+void
+afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
- int call_count = -1;
int child_index = (long) cookie;
local = frame->local;
@@ -903,12 +949,55 @@ afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
priv->children[child_index]->name);
sh->xattr[child_index] = dict_ref (xattr);
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
}
}
UNLOCK (&frame->lock);
+}
+
+int
+afr_post_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr)
+{
+ int call_count = -1;
+ int ret = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
+ op_errno, xattr);
+ local = frame->local;
+ sh = &local->self_heal;
call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ (void) afr_build_sources (this, sh->xattr, NULL,
+ sh->pending_matrix,
+ sh->sources, sh->success_children,
+ AFR_DATA_TRANSACTION);
+ ret = afr_sh_inode_set_read_ctx (sh, this);
+ if (ret)
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_set_timestamps (frame, this);
+ }
+ return 0;
+}
+
+int
+afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr)
+{
+ int call_count = -1;
+
+ afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
+ op_errno, xattr);
+
+ call_count = afr_frame_return (frame);
if (call_count == 0) {
afr_sh_data_fstat (frame, this);
}
@@ -918,7 +1007,8 @@ afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
+afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this,
+ afr_fxattrop_cbk_t fxattrop_cbk)
{
afr_self_heal_t *sh = NULL;
afr_local_t *local = NULL;
@@ -933,8 +1023,8 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
local->call_count = call_count;
@@ -963,9 +1053,12 @@ afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
}
}
+ afr_reset_xattr (sh->xattr, priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
+ STACK_WIND_COOKIE (frame, fxattrop_cbk,
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->fxattrop,
@@ -992,7 +1085,45 @@ out:
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this);
+afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->data_lock_held = _gf_true;
+ afr_sh_data_fxattrop (frame, this, afr_sh_data_fxattrop_cbk);
+ return 0;
+}
+
+int
+afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
+ "failed for %s. by %"PRIu64,
+ local->loc.path, frame->root->lk_owner);
+ sh->data_lock_failure_handler (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
+ "done for %s by %"PRIu64". Proceding to self-heal",
+ local->loc.path, frame->root->lk_owner);
+ sh->data_lock_success_handler (frame, this);
+ }
+
+ return 0;
+}
int
afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
@@ -1006,22 +1137,24 @@ afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking data inodelks "
- "failed for %s.", local->loc.path);
- sh->op_failed = 1;
- afr_sh_data_done (frame, this);
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "failed for %s. by %"PRIu64,
+ local->loc.path, frame->root->lk_owner);
+ int_lock->lock_cbk = afr_sh_data_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
} else {
gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "done for %s. Proceeding to FOP", local->loc.path);
- afr_sh_data_fxattrop (frame, this);
+ "done for %s by %"PRIu64". Proceeding to self-heal",
+ local->loc.path, frame->root->lk_owner);
+ sh->data_lock_success_handler (frame, this);
}
return 0;
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this)
+afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t len)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -1036,8 +1169,8 @@ afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this)
afr_set_lock_number (frame, this);
- int_lock->lk_flock.l_start = 0;
- int_lock->lk_flock.l_len = 0;
+ int_lock->lk_flock.l_start = start;
+ int_lock->lk_flock.l_len = len;
int_lock->lk_flock.l_type = F_WRLCK;
int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
@@ -1046,9 +1179,45 @@ afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this)
return 0;
}
+int
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ sh->data_lock_held = _gf_true;
+ afr_sh_data_fxattrop (frame, this, afr_post_sh_data_fxattrop_cbk);
+ return 0;
+}
int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_set_timestamps (frame, this);
+ return 0;
+}
+
+
+int
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len,
+ afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler)
{
afr_local_t * local = NULL;
afr_private_t * priv = NULL;
@@ -1059,18 +1228,11 @@ afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (sh->data_lock_held) {
- /* caller has held the lock already,
- so skip locking */
-
- afr_sh_data_fxattrop (frame, this);
- return 0;
- }
-
- return afr_sh_data_lock_rec (frame, this);
+ sh->data_lock_success_handler = success_handler;
+ sh->data_lock_failure_handler = failure_handler;
+ return afr_sh_data_lock_rec (frame, this, start, len);
}
-
int
afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd)
@@ -1113,7 +1275,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0) {
if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
+ afr_sh_data_fail (frame, this);
return 0;
}
@@ -1121,7 +1283,9 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"fd for %s opened, commencing sync",
local->loc.path);
- afr_sh_data_lock (frame, this);
+ afr_sh_data_lock (frame, this, 0, 0,
+ afr_sh_data_big_lock_success,
+ afr_sh_data_fail);
}
return 0;
@@ -1142,14 +1306,7 @@ afr_sh_data_open (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (sh->healing_fd_opened) {
- /* caller has opened the fd for us already, so skip open */
-
- afr_sh_data_lock (frame, this);
- return 0;
- }
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
fd = fd_create (local->loc.inode, frame->root->pid);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 9e80cb3d5a5..ddca2619db8 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -60,18 +60,10 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- /*
- TODO: cleanup sh->*
- */
-
if (sh->healing_fd)
fd_unref (sh->healing_fd);
sh->healing_fd = NULL;
- /* for (i = 0; i < priv->child_count; i++) { */
- /* sh->locked_nodes[i] = 0; */
- /* } */
-
sh->completion_cbk (frame, this);
return 0;
@@ -2192,9 +2184,7 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
int source = 0;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -2202,37 +2192,31 @@ afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
source = sh->source;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
+ afr_sh_mark_source_sinks (frame, this);
if (source != -1)
sh->success[source] = 1;
- if (active_sinks == 0) {
+ if (sh->active_sinks == 0) {
gf_log (this->name, GF_LOG_TRACE,
"no active sinks for self-heal on dir %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return 0;
}
- if (source == -1 && active_sinks < 2) {
+ if (source == -1 && sh->active_sinks < 2) {
gf_log (this->name, GF_LOG_TRACE,
"cannot sync with 0 sources and 1 sink on dir %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return 0;
}
- sh->active_sinks = active_sinks;
if (source != -1)
gf_log (this->name, GF_LOG_DEBUG,
"self-healing directory %s from subvolume %s to "
"%d other",
local->loc.path, priv->children[source]->name,
- active_sinks);
+ sh->active_sinks);
else
gf_log (this->name, GF_LOG_DEBUG,
"no active sources for %s found. "
@@ -2302,25 +2286,13 @@ afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
inode_t *inode, struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
+ int call_count = 0;
afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- }
- }
- UNLOCK (&frame->lock);
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent, &local->loc);
call_count = afr_frame_return (frame);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 5445132ab8c..fb92cd999ed 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -55,7 +55,6 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -63,18 +62,9 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
memset (sh->buf, 0, sizeof (struct iatt) * priv->child_count);
- memset (sh->success, 0, sizeof (int) * priv->child_count);
-
-/* for (i = 0; i < priv->child_count; i++) { */
-/* sh->locked_nodes[i] = 1; */
-/* } */
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
+ memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
+ afr_reset_xattr (sh->xattr, priv->child_count);
if (local->govinda_gOvinda) {
gf_log (this->name, GF_LOG_INFO,
"split-brain detected, aborting selfheal of %s",
@@ -438,9 +428,7 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
afr_private_t *priv = NULL;
- int active_sinks = 0;
int source = 0;
- int i = 0;
local = frame->local;
sh = &local->self_heal;
@@ -448,26 +436,19 @@ afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
source = sh->source;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
gf_log (this->name, GF_LOG_DEBUG,
"no active sinks for performing self-heal on file %s",
local->loc.path);
afr_sh_metadata_finish (frame, this);
return 0;
}
- sh->active_sinks = active_sinks;
gf_log (this->name, GF_LOG_TRACE,
"syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name, active_sinks);
+ local->loc.path, priv->children[source]->name,
+ sh->active_sinks);
STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
priv->children[source],
@@ -558,8 +539,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
if ((!IA_ISREG (sh->buf[source].ia_type)) &&
(!IA_ISDIR (sh->buf[source].ia_type))) {
- afr_reset_children (sh->fresh_children,
- priv->child_count);
+ afr_reset_children (sh->fresh_children, priv->child_count);
afr_get_fresh_children (sh->success_children, sh->sources,
sh->fresh_children, priv->child_count);
afr_inode_set_read_ctx (this, sh->inode, sh->source,
@@ -579,43 +559,17 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postparent)
{
afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
int call_count = 0;
int child_index = 0;
local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
child_index = (long) cookie;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->ia_type);
-
- sh->buf[child_index] = *buf;
- if (xattr)
- sh->xattr[child_index] = dict_ref (xattr);
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent, &local->loc);
call_count = afr_frame_return (frame);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index fc030433b69..6b0e9a7cfe5 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -322,7 +322,7 @@ afr_set_piggyback_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
memcpy (arr, pending[i], pending_xattr_size);
- arr[index]++;
+ arr[index] = hton32 (ntoh32(arr[index]) + 1);
ret = dict_set_bin (xattr, priv->pending_key[i],
arr, pending_xattr_size);
@@ -468,6 +468,32 @@ out:
return;
}
+int
+afr_fxattrop_call_count (afr_transaction_type type, afr_internal_lock_t *int_lock,
+ unsigned int child_count)
+{
+ int call_count = 0;
+
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ call_count = afr_locked_children_count (int_lock->inode_locked_nodes,
+ child_count);
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ call_count = afr_locked_children_count (int_lock->entry_locked_nodes,
+ child_count);
+ break;
+ }
+
+ if (type == AFR_ENTRY_RENAME_TRANSACTION) {
+ call_count *= 2;
+ }
+ return call_count;
+}
+
int
afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
@@ -503,12 +529,8 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
dict_ref (xattr[i]);
}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
-
+ call_count = afr_fxattrop_call_count (local->transaction.type, int_lock,
+ priv->child_count);
local->call_count = call_count;
if (local->fd)
@@ -546,6 +568,8 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (!local->child_up[i])
continue;
+ if (local->fd && !local->fd_open_on[i])
+ continue;
ret = afr_set_pending_dict (priv, xattr[i],
local->pending);
@@ -750,7 +774,6 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
int
afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
@@ -762,8 +785,10 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
afr_fd_ctx_t *fdctx = NULL;
afr_local_t *local = NULL;
int piggyback = 0;
+ afr_internal_lock_t *int_lock = NULL;
local = frame->local;
+ int_lock = &local->internal_lock;
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
@@ -773,13 +798,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
dict_ref (xattr[i]);
}
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
-
+ call_count = afr_fxattrop_call_count (local->transaction.type, int_lock,
+ priv->child_count);
if (call_count == 0) {
/* no child is up */
for (i = 0; i < priv->child_count; i++) {
@@ -803,6 +823,9 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
for (i = 0; i < priv->child_count; i++) {
if (!local->child_up[i])
continue;
+ if (local->fd && !local->fd_open_on[i])
+ continue;
+
ret = afr_set_pending_dict (priv, xattr[i],
local->pending);
@@ -1246,7 +1269,7 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
local = frame->local;
priv = this->private;
- afr_transaction_local_init (local, priv);
+ afr_transaction_local_init (local, this);
local->transaction.resume = afr_transaction_resume;
local->transaction.type = type;
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 4b4428cc59e..10f274feccc 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -30,4 +30,6 @@ afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 236a24a6057..a392dbefa97 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -29,6 +29,7 @@
#include "call-stub.h"
#include "compat-errno.h"
#include "afr-mem-types.h"
+#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
@@ -147,17 +148,8 @@ typedef struct {
gf_boolean_t forced_merge; /* Is this a self-heal triggered to
forcibly merge the directories? */
- gf_boolean_t healing_fd_opened; /* true if caller has already
- opened fd */
-
- gf_boolean_t data_lock_held; /* true if caller has already
- acquired 0-0 lock */
-
- fd_t *healing_fd; /* set if callers has opened fd */
-
gf_boolean_t background; /* do self-heal in background
if possible */
-
ia_type_t type; /* st_mode of the entry we're doing
self-heal on */
inode_t *inode; /* inode on which the self-heal is
@@ -208,7 +200,7 @@ typedef struct {
int source;
int active_source;
int active_sinks;
- int *success;
+ unsigned char *success;
unsigned char *locked_nodes;
int lock_count;
@@ -217,24 +209,31 @@ typedef struct {
int op_failed;
+ gf_boolean_t data_lock_held;
+ gf_boolean_t eof_reached;
+ fd_t *healing_fd;
int file_has_holes;
blksize_t block_size;
off_t file_size;
off_t offset;
+ unsigned char *write_needed;
+ uint8_t *checksum;
afr_post_remove_call_t post_remove_call;
loc_t parent_loc;
call_frame_t *orig_frame;
+ call_frame_t *old_loop_frame;
gf_boolean_t unwound;
- /* private data for the particular self-heal algorithm */
- void *private;
-
- int (*flush_self_heal_cbk) (call_frame_t *frame, xlator_t *this);
+ afr_sh_algo_private_t *private;
+ afr_lock_cbk_t data_lock_success_handler;
+ afr_lock_cbk_t data_lock_failure_handler;
int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
+ afr_lock_cbk_t loop_completion_cbk;
int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
@@ -327,12 +326,11 @@ typedef struct {
uint64_t lock_number;
int32_t lk_call_count;
+ int32_t lk_expected_count;
int32_t lock_op_ret;
int32_t lock_op_errno;
-
- int (*lock_cbk) (call_frame_t *, xlator_t *);
-
+ afr_lock_cbk_t lock_cbk;
} afr_internal_lock_t;
typedef struct _afr_locked_fd {
@@ -365,6 +363,7 @@ typedef struct _afr_local {
loc_t newloc;
fd_t *fd;
+ int32_t *fd_open_on;
glusterfs_fop_t fop;
@@ -387,7 +386,7 @@ typedef struct _afr_local {
dict_t *dict;
int optimistic_change_log;
- int (*openfd_flush_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
/*
This struct contains the arguments for the "continuation"
@@ -685,10 +684,20 @@ typedef struct _afr_local {
struct marker_str marker;
} afr_local_t;
+typedef enum {
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
+
+typedef struct {
+ struct list_head call_list;
+ call_frame_t *frame;
+} afr_fd_paused_call_t;
typedef struct {
unsigned int *pre_op_done;
- unsigned int *opened_on; /* which subvolumes the fd is open on */
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
unsigned int *pre_op_piggyback;
int flags;
@@ -703,6 +712,7 @@ typedef struct {
struct list_head entries; /* needed for readdir failover */
unsigned char *locked_on; /* which subvolumes locks have been successful */
+ struct list_head paused_calls; /* queued calls while fix_open happens */
} afr_fd_ctx_t;
@@ -790,8 +800,11 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
void
afr_build_parent_loc (loc_t *parent, loc_t *child);
-int
-afr_up_children_count (int child_count, unsigned char *child_up);
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count);
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count);
gf_boolean_t
afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
@@ -831,7 +844,7 @@ int
afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
int
-afr_openfd_flush (call_frame_t *frame, xlator_t *this, fd_t *fd);
+afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
#define AFR_STACK_UNWIND(fop, frame, params ...) \
do { \
@@ -872,7 +885,7 @@ AFR_BASENAME (const char *str)
}
int
-afr_transaction_local_init (afr_local_t *local, afr_private_t *priv);
+afr_transaction_local_init (afr_local_t *local, xlator_t *this);
int32_t
afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
@@ -957,4 +970,18 @@ afr_resultant_errno_get (int32_t *children,
void
afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
int32_t *stale_children);
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t is_background, ia_type_t ia_type,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno));
+int
+afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
+ int need_open_count, int *need_open);
+int
+afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop);
+int
+afr_set_elem_count_get (unsigned char *elems, int child_count);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 769e03b14ea..d27ecd21526 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -1654,7 +1654,7 @@ afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
if (call_count == 0) {
local->transaction.resume (frame, this);