summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-dir-read.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/afr/src/afr-dir-read.c')
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c474
1 files changed, 136 insertions, 338 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index b2a001a19..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -49,9 +40,9 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
-
int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
+afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, int32_t sh_failed)
{
afr_local_t *local = NULL;
@@ -60,7 +51,7 @@ afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this)
afr_set_opendir_done (this, local->fd->inode);
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
return 0;
}
@@ -70,15 +61,19 @@ gf_boolean_t
__checksums_differ (uint32_t *checksum, int child_count,
unsigned char *child_up)
{
- int ret = _gf_false;
- int i = 0;
- uint32_t cksum = 0;
-
- cksum = checksum[0];
+ int ret = _gf_false;
+ int i = 0;
+ uint32_t cksum = 0;
+ gf_boolean_t activate_check = _gf_false;
for (i = 0; i < child_count; i++) {
if (!child_up[i])
continue;
+ if (_gf_false == activate_check) {
+ cksum = checksum[i];
+ activate_check = _gf_true;
+ continue;
+ }
if (cksum != checksum[i]) {
ret = _gf_true;
@@ -95,22 +90,24 @@ __checksums_differ (uint32_t *checksum, int child_count,
int32_t
afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, dict_t *xdata)
{
afr_private_t * priv = NULL;
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
gf_dirent_t * entry = NULL;
gf_dirent_t * tmp = NULL;
+ char *reason = NULL;
int child_index = 0;
uint32_t entry_cksum = 0;
int call_count = 0;
off_t last_offset = 0;
- char sh_type_str[256] = {0,};
+ inode_t *inode = NULL;
priv = this->private;
local = frame->local;
sh = &local->self_heal;
+ inode = local->fd->inode;
child_index = (long) cookie;
@@ -131,7 +128,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
}
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum (entry->d_name,
+ entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
strlen (entry->d_name));
local->cont.opendir.checksum[child_index] ^= entry_cksum;
}
@@ -146,7 +143,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
(void *) (long) child_index,
priv->children[child_index],
priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset);
+ local->fd, 131072, last_offset, NULL);
return 0;
@@ -158,27 +155,18 @@ out:
priv->child_count,
local->child_up)) {
- sh->need_entry_self_heal = _gf_true;
+ sh->do_entry_self_heal = _gf_true;
sh->forced_merge = _gf_true;
- sh->type = local->fd->inode->ia_type;
- sh->background = _gf_false;
- sh->unwind = afr_examine_dir_sh_unwind;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
- gf_log (this->name, GF_LOG_INFO,
- "%s self-heal triggered. path: %s, "
- "reason: checksums of directory differ,"
- " forced merge option set",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this);
+
+ reason = "checksums of directory differ";
+ afr_launch_self_heal (frame, this, inode, _gf_false,
+ inode->ia_type, reason, NULL,
+ afr_examine_dir_sh_unwind);
} else {
- afr_set_opendir_done (this, local->fd->inode);
+ afr_set_opendir_done (this, inode);
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
}
}
@@ -201,7 +189,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
sizeof (*local->cont.opendir.checksum),
gf_afr_mt_int32_t);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
local->call_count = call_count;
@@ -211,7 +199,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->readdir,
- local->fd, 131072, 0);
+ local->fd, 131072, 0, NULL);
if (!--call_count)
break;
@@ -225,27 +213,37 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this)
int32_t
afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int32_t up_children_count = 0;
int ret = -1;
int call_count = -1;
+ int32_t child_index = 0;
priv = this->private;
local = frame->local;
+ child_index = (long) cookie;
- up_children_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
LOCK (&frame->lock);
{
- if (op_ret >= 0)
+ if (op_ret >= 0) {
local->op_ret = op_ret;
+ ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
local->op_errno = op_errno;
}
+unlock:
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
@@ -254,17 +252,8 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
if (local->op_ret != 0)
goto out;
- ret = afr_fd_ctx_set (this, local->fd);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set fd ctx for fd %p",
- local->fd);
- goto out;
- }
if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
+ up_children_count > 1 && priv->entry_self_heal) {
/*
* This is the first opendir on this inode. We need
@@ -273,7 +262,7 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
* to regular entry self-heal because the readdir
* call is sent only to the first subvolume, and
* thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anamolies).
+ * otherwise (assuming changelog shows no anomalies).
*/
gf_log (this->name, GF_LOG_TRACE,
@@ -292,7 +281,7 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
out:
AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd);
+ local->op_errno, local->fd, NULL);
return 0;
}
@@ -308,7 +297,6 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
int i = 0;
int ret = -1;
int call_count = -1;
- int32_t op_ret = -1;
int32_t op_errno = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -319,37 +307,36 @@ afr_opendir (call_frame_t *frame, xlator_t *this,
child_count = priv->child_count;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
goto out;
- }
loc_copy (&local->loc, loc);
- frame->local = local;
local->fd = fd_ref (fd);
call_count = local->call_count;
for (i = 0; i < child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
+ STACK_WIND_COOKIE (frame, afr_opendir_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ loc, fd, NULL);
if (!--call_count)
break;
}
}
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
return 0;
}
@@ -371,85 +358,6 @@ struct entry_name {
struct list_head list;
};
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
-{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
-
-out:
- return ret;
-}
-
-
-static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
-{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
-
- list_add (&n->list, &fd_ctx->entries);
- }
-}
-
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
-{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
-
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
- }
-
- return offset;
-}
-
-
static void
afr_forget_entries (fd_t *fd)
{
@@ -475,176 +383,70 @@ afr_forget_entries (fd_t *fd)
}
}
-
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
gf_dirent_t * entry = NULL;
gf_dirent_t * tmp = NULL;
- int child_index = -1;
-
- priv = this->private;
- local = frame->local;
- child_index = (long) cookie;
-
- if (op_ret == -1)
- goto out;
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry->d_ino = afr_itransform (entry->d_ino,
- priv->child_count,
- child_index);
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
list_del_init (&entry->list);
GF_FREE (entry);
}
}
-
-out:
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries);
-
- return 0;
}
-
int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+afr_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- ino_t inum = 0;
- int call_child = 0;
- int ret = 0;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- int child_index = -1;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ if (op_ret == -1)
+ goto out;
local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- child_index = (long) cookie;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (child_went_down (op_ret, op_errno)) {
- if (all_tried (child_index, priv->child_count)) {
- gf_log (this->name, GF_LOG_INFO,
- "all options tried going out");
- goto out;
- }
-
- call_child = ++child_index;
-
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, local->fd,
- local->cont.readdir.size, 0);
- return 0;
- }
- }
-
- if (op_ret != -1) {
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- inum = afr_itransform (entry->d_ino, priv->child_count,
- child_index);
- entry->d_ino = inum;
- inum = afr_itransform (entry->d_stat.ia_ino,
- priv->child_count, child_index);
- entry->d_stat.ia_ino = inum;
-
- if ((local->fd->inode == local->fd->inode->table->root)
- && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
- list_del_init (&entry->list);
- GF_FREE (entry);
- }
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
- afr_remember_entries (entries, local->fd);
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that'll make the application stop reading. So
- try to get more entries */
+int32_t
+afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[child_index]->name);
+ if (op_ret == -1)
+ goto out;
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) child_index,
- children[child_index],
- children[child_index]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries);
-
+ AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
return 0;
}
-
int32_t
afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop)
+ fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -653,86 +455,82 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
if (ret < 0) {
op_errno = -ret;
goto out;
}
- frame->local = local;
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_INFO,
- "no child is up");
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
goto out;
}
local->fd = fd_ref (fd);
local->cont.readdir.size = size;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
-
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
-
- fd_ctx->last_tried = call_child;
- }
+ local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
if (whichop == GF_FOP_READDIR)
STACK_WIND_COOKIE (frame, afr_readdir_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdir, fd,
- size, offset);
+ size, offset, dict);
else
STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
(void *) (long) call_child,
children[call_child],
children[call_child]->fops->readdirp, fd,
- size, offset);
+ size, offset, dict);
- op_ret = 0;
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
return 0;
}
int32_t
afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP);
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
return 0;
}