diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-dir-read.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 398 |
1 files changed, 94 insertions, 304 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index f2e6760cf..689dd84e6 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -51,7 +42,7 @@ int afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, - int32_t op_errno) + int32_t op_errno, int32_t sh_failed) { afr_local_t *local = NULL; @@ -60,7 +51,7 @@ afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, afr_set_opendir_done (this, local->fd->inode); AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd); + local->op_errno, local->fd, NULL); return 0; } @@ -99,7 +90,7 @@ __checksums_differ (uint32_t *checksum, int child_count, int32_t afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries) + gf_dirent_t *entries, dict_t *xdata) { afr_private_t * priv = NULL; afr_local_t * local = NULL; @@ -137,7 +128,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie, } list_for_each_entry_safe (entry, tmp, &entries->list, list) { - entry_cksum = gf_rsync_weak_checksum (entry->d_name, + entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name, strlen (entry->d_name)); local->cont.opendir.checksum[child_index] ^= entry_cksum; } @@ -152,7 +143,7 @@ afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie, (void *) (long) child_index, priv->children[child_index], priv->children[child_index]->fops->readdir, - local->fd, 131072, last_offset); + local->fd, 131072, last_offset, NULL); return 0; @@ -175,7 +166,7 @@ out: afr_set_opendir_done (this, inode); AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd); + local->op_errno, local->fd, NULL); } } @@ -208,7 +199,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this) (void *) (long) i, priv->children[i], priv->children[i]->fops->readdir, - local->fd, 131072, 0); + local->fd, 131072, 0, NULL); if (!--call_count) break; @@ -222,7 +213,7 @@ afr_examine_dir (call_frame_t *frame, xlator_t *this) int32_t afr_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - fd_t *fd) + fd_t *fd, dict_t *xdata) { afr_private_t *priv = NULL; afr_local_t *local = NULL; @@ -242,8 +233,7 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie, { if (op_ret >= 0) { local->op_ret = op_ret; - ret = afr_child_fd_ctx_set (this, fd, child_index, - 0, 0); + ret = afr_child_fd_ctx_set (this, fd, child_index, 0); if (ret) { local->op_ret = -1; local->op_errno = -ret; @@ -263,7 +253,7 @@ unlock: goto out; if (!afr_is_opendir_done (this, local->fd->inode) && - up_children_count > 1) { + up_children_count > 1 && priv->entry_self_heal) { /* * This is the first opendir on this inode. We need @@ -272,7 +262,7 @@ unlock: * to regular entry self-heal because the readdir * call is sent only to the first subvolume, and * thus files that exist only there will never be healed - * otherwise (assuming changelog shows no anamolies). + * otherwise (assuming changelog shows no anomalies). */ gf_log (this->name, GF_LOG_TRACE, @@ -291,7 +281,7 @@ unlock: out: AFR_STACK_UNWIND (opendir, frame, local->op_ret, - local->op_errno, local->fd); + local->op_errno, local->fd, NULL); return 0; } @@ -307,7 +297,6 @@ afr_opendir (call_frame_t *frame, xlator_t *this, int i = 0; int ret = -1; int call_count = -1; - int32_t op_ret = -1; int32_t op_errno = 0; VALIDATE_OR_GOTO (frame, out); @@ -318,16 +307,15 @@ afr_opendir (call_frame_t *frame, xlator_t *this, child_count = priv->child_count; - ALLOC_OR_GOTO (local, afr_local_t, out); - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) goto out; - } loc_copy (&local->loc, loc); - frame->local = local; local->fd = fd_ref (fd); call_count = local->call_count; @@ -338,18 +326,17 @@ afr_opendir (call_frame_t *frame, xlator_t *this, (void*) (long) i, priv->children[i], priv->children[i]->fops->opendir, - loc, fd); + loc, fd, NULL); if (!--call_count) break; } } - op_ret = 0; + ret = 0; out: - if (op_ret == -1) { - AFR_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd); - } + if (ret < 0) + AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL); return 0; } @@ -371,85 +358,6 @@ struct entry_name { struct list_head list; }; - -static gf_boolean_t -remembered_name (const char *name, struct list_head *entries) -{ - struct entry_name *e = NULL; - gf_boolean_t ret = _gf_false; - - list_for_each_entry (e, entries, list) { - if (!strcmp (name, e->name)) { - ret = _gf_true; - goto out; - } - } - -out: - return ret; -} - - -static void -afr_remember_entries (gf_dirent_t *entries, fd_t *fd) -{ - struct entry_name *n = NULL; - gf_dirent_t *entry = NULL; - int ret = 0; - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - - ret = fd_ctx_get (fd, THIS, &ctx); - if (ret < 0) { - gf_log (THIS->name, GF_LOG_INFO, - "could not get fd ctx for fd=%p", fd); - return; - } - - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - list_for_each_entry (entry, &entries->list, list) { - n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name); - n->name = gf_strdup (entry->d_name); - INIT_LIST_HEAD (&n->list); - - list_add (&n->list, &fd_ctx->entries); - } -} - - -static off_t -afr_filter_entries (gf_dirent_t *entries, fd_t *fd) -{ - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - int ret = 0; - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - off_t offset = 0; - - ret = fd_ctx_get (fd, THIS, &ctx); - if (ret < 0) { - gf_log (THIS->name, GF_LOG_INFO, - "could not get fd ctx for fd=%p", fd); - return -1; - } - - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - list_for_each_entry_safe (entry, tmp, &entries->list, list) { - offset = entry->d_off; - - if (remembered_name (entry->d_name, &fd_ctx->entries)) { - list_del (&entry->list); - GF_FREE (entry); - } - } - - return offset; -} - - static void afr_forget_entries (fd_t *fd) { @@ -475,174 +383,70 @@ afr_forget_entries (fd_t *fd) } } - -int32_t -afr_readdir_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries) +static void +afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd) { - afr_local_t * local = NULL; gf_dirent_t * entry = NULL; gf_dirent_t * tmp = NULL; - local = frame->local; - - if (op_ret == -1) - goto out; - list_for_each_entry_safe (entry, tmp, &entries->list, list) { - if ((local->fd->inode == local->fd->inode->table->root) - && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (__is_root_gfid (fd->inode->gfid) && + !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) { list_del_init (&entry->list); GF_FREE (entry); } } - -out: - AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries); - - return 0; } - int32_t -afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) +afr_readdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + gf_dirent_t *entries, dict_t *xdata) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int32_t next_call_child = -1; - int ret = 0; - gf_dirent_t * entry = NULL; - gf_dirent_t * tmp = NULL; - int32_t *last_index = NULL; - int32_t read_child = -1; - int32_t *fresh_children = NULL; - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - off_t offset = 0; - int32_t call_child = -1; + afr_local_t *local = NULL; - priv = this->private; - children = priv->children; + if (op_ret == -1) + goto out; local = frame->local; + afr_readdir_filter_trash_dir (entries, local->fd); - read_child = (long) cookie; - last_index = &local->cont.readdir.last_index; - fresh_children = local->fresh_children; - - /* the value of the last_index changes if afr_next_call_child is - * called. So to find the call_child of this callback use last_index - * before the next_call_child call. - */ - if (*last_index == -1) - call_child = read_child; - else - call_child = fresh_children[*last_index]; - - if (priv->strict_readdir) { - ret = fd_ctx_get (local->fd, this, &ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_INFO, - "could not get fd ctx for fd=%p", local->fd); - op_ret = -1; - op_errno = -ret; - goto out; - } - - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - if (op_ret == -1) { - next_call_child = afr_next_call_child (fresh_children, - local->child_up, - priv->child_count, - last_index, - read_child); - if (next_call_child < 0) - goto out; - gf_log (this->name, GF_LOG_TRACE, - "starting readdir afresh on child %d, offset %"PRId64, - next_call_child, (uint64_t) 0); - - fd_ctx->failed_over = _gf_true; - - STACK_WIND_COOKIE (frame, afr_readdirp_cbk, - (void *) (long) read_child, - children[next_call_child], - children[next_call_child]->fops->readdirp, - local->fd, - local->cont.readdir.size, 0); - return 0; - } - } - - if (op_ret != -1) { - list_for_each_entry_safe (entry, tmp, &entries->list, list) { - if ((local->fd->inode == local->fd->inode->table->root) - && !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) { - list_del_init (&entry->list); - GF_FREE (entry); - } - } - } - - if (priv->strict_readdir) { - if (fd_ctx->failed_over) { - if (list_empty (&entries->list)) { - gf_log (this->name, GF_LOG_DEBUG, - "no entries found"); - goto out; - } - - offset = afr_filter_entries (entries, local->fd); +out: + AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL); + return 0; +} - afr_remember_entries (entries, local->fd); - if (list_empty (&entries->list)) { - /* All the entries we got were duplicate. We - shouldn't send an empty list now, because - that'll make the application stop reading. So - try to get more entries */ +int32_t +afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + afr_local_t *local = NULL; - gf_log (this->name, GF_LOG_TRACE, - "trying to fetch non-duplicate entries " - "from offset %"PRId64", child %s", - offset, children[call_child]->name); + if (op_ret == -1) + goto out; - STACK_WIND_COOKIE (frame, afr_readdirp_cbk, - (void *) (long) read_child, - children[call_child], - children[call_child]->fops->readdirp, - local->fd, local->cont.readdir.size, offset); - return 0; - } - } else { - afr_remember_entries (entries, local->fd); - } - } + local = frame->local; + afr_readdir_filter_trash_dir (entries, local->fd); out: - AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries); - + AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL); return 0; } int32_t afr_do_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, int whichop) + fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t *local = NULL; - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - int ret = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - uint64_t read_child = 0; + afr_private_t *priv = NULL; + xlator_t **children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int ret = -1; + int32_t op_errno = 0; + uint64_t read_child = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -651,14 +455,12 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, priv = this->private; children = priv->children; - ALLOC_OR_GOTO (local, afr_local_t, out); - frame->local = local; + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) goto out; - } local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { @@ -668,79 +470,67 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children); - op_ret = afr_get_call_child (this, local->child_up, read_child, - local->fresh_children, - &call_child, - &local->cont.readdir.last_index); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; + ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.readdir.last_index); + if (ret < 0) { + op_errno = -ret; goto out; } - local->fd = fd_ref (fd); - local->cont.readdir.size = size; - - if (priv->strict_readdir) { - ret = fd_ctx_get (fd, this, &ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_INFO, - "could not get fd ctx for fd=%p", fd); - op_errno = -ret; - goto out; - } - - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - if (fd_ctx->last_tried != call_child) { - gf_log (this->name, GF_LOG_TRACE, - "first up child has changed from %d to %d, " - "restarting readdir from offset 0", - fd_ctx->last_tried, call_child); - - fd_ctx->failed_over = _gf_true; - offset = 0; - } + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) { + op_errno = EBADF; + goto out; + } - fd_ctx->last_tried = call_child; + if ((offset == 0) || (fd_ctx->call_child == -1)) { + fd_ctx->call_child = call_child; + } else if ((priv->readdir_failover == _gf_false) && + (call_child != fd_ctx->call_child)) { + op_errno = EBADF; + goto out; } + local->fd = fd_ref (fd); + local->cont.readdir.size = size; + local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL; + if (whichop == GF_FOP_READDIR) STACK_WIND_COOKIE (frame, afr_readdir_cbk, (void *) (long) call_child, children[call_child], children[call_child]->fops->readdir, fd, - size, offset); + size, offset, dict); else STACK_WIND_COOKIE (frame, afr_readdirp_cbk, (void *) (long) call_child, children[call_child], children[call_child]->fops->readdirp, fd, - size, offset); + size, offset, dict); - op_ret = 0; + return 0; out: - if (op_ret == -1) { - AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, NULL); - } + AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); return 0; } int32_t afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) + off_t offset, dict_t *xdata) { - afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR); + afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata); return 0; } int32_t afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) + off_t offset, dict_t *dict) { - afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP); + afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict); return 0; } |
