summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-open.c
diff options
context:
space:
mode:
authorVikas Gorur <vikas@gluster.com>2009-11-24 08:45:09 +0000
committerAnand V. Avati <avati@dev.gluster.com>2009-11-24 06:40:08 -0800
commit74612a456ad1602f8038fae79fee654eb427602a (patch)
treea8c57ae1b5919688faa00985aad3677e0df9ea1b /xlators/cluster/afr/src/afr-open.c
parent218959e0597b16755a98b19786ed6a42cd15cbc4 (diff)
cluster/afr: Do self-heal on reopened fds.
This patch brings in partial support for self-heal of open fds. The precondition is that the fd should have been opened successfully during the initial open() (or create()), and we assume that protocol/client has successfully reopened the fd when the subvolume comes back up. It works by doing an "up/down flush" (a dummy flush transaction to do post-op wherever necessary) and then triggering data self-heal on the file in the post-post-op hook of the dummy flush transaction. This ensures that any writes that come in during self-heal will wait until self-heal completes. The up/down flush is also done when a subvolume goes down, so that post-op is done on all subvolumes where pre-op was done. Signed-off-by: Vikas Gorur <vikas@gluster.com> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 170 (Auto-heal fails on files that are open()-ed/mmap()-ed) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=170
Diffstat (limited to 'xlators/cluster/afr/src/afr-open.c')
-rw-r--r--xlators/cluster/afr/src/afr-open.c356
1 files changed, 356 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
new file mode 100644
index 000000000..945f5cddf
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -0,0 +1,356 @@
+/*
+ Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+
+#include "afr-self-heal.h"
+
+
+int
+afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *prebuf,
+ struct stat *postbuf)
+{
+ afr_local_t * local = frame->local;
+ int ret = 0;
+
+ ret = afr_fd_ctx_set (this, local->fd);
+
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ }
+
+ AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd);
+ return 0;
+}
+
+
+int
+afr_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int ret = 0;
+
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->success_count++;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if ((local->cont.open.flags & O_TRUNC)
+ && (local->op_ret >= 0)) {
+ STACK_WIND (frame, afr_open_ftruncate_cbk,
+ this, this->fops->ftruncate,
+ fd, 0);
+ } else {
+ ret = afr_fd_ctx_set (this, fd);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set fd ctx for fd=%p",
+ fd);
+
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ }
+
+ AFR_STACK_UNWIND (open, frame, local->op_ret,
+ local->op_errno, local->fd);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, int32_t wbflags)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int i = 0;
+ int ret = -1;
+
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t wind_flags = flags & (~O_TRUNC);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, loc->inode)) {
+ /* self-heal failed */
+ op_errno = EIO;
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+ call_count = local->call_count;
+
+ local->cont.open.flags = flags;
+ local->fd = fd_ref (fd);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ loc, wind_flags, fd, wbflags);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (open, frame, op_ret, op_errno, fd);
+ }
+
+ return 0;
+}
+
+
+int
+afr_up_down_flush_sh_completion_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.post_post_op (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_up_down_flush_post_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->calling_fop = GF_FOP_FLUSH;
+
+// sh->healing_fd = local->fd;
+
+// sh->healing_fd_opened = _gf_true;
+
+ local->cont.lookup.inode = local->fd->inode;
+
+ inode_path (local->fd->inode, NULL, (char **)&local->loc.path);
+ local->loc.name = strrchr (local->loc.path, '/');
+ local->loc.inode = inode_ref (local->fd->inode);
+ local->loc.parent = inode_parent (local->fd->inode, 0, NULL);
+
+ sh->data_lock_held = _gf_true;
+
+ local->need_data_self_heal = _gf_true;
+ local->cont.lookup.buf.st_mode = local->fd->inode->st_mode;
+ local->child_count = afr_up_children_count (priv->child_count,
+ local->child_up);
+
+ sh->flush_self_heal_cbk = afr_up_down_flush_sh_completion_cbk;
+
+ afr_self_heal (frame, this, afr_up_down_flush_sh_completion_cbk,
+ _gf_false);
+
+ return 0;
+}
+
+
+int
+afr_up_down_flush_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->transaction.resume (frame, this);
+ return 0;
+}
+
+
+int
+afr_up_down_flush_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ uint64_t ctx;
+ afr_fd_ctx_t * fd_ctx = NULL;
+
+ int _ret = -1;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK (&local->fd->lock);
+ {
+ _ret = __fd_ctx_get (local->fd, this, &ctx);
+
+ if (_ret < 0) {
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ fd_ctx->down_count = priv->down_count;
+ fd_ctx->up_count = priv->up_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ fd_ctx->pre_op_done[i] = 0;
+ }
+ }
+out:
+ UNLOCK (&local->fd->lock);
+
+ local->up_down_flush_cbk (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ afr_flush_type type)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int op_ret = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ local = frame->local;
+
+ local->op = GF_FOP_FLUSH;
+
+ local->fd = fd_ref (local->fd);
+
+ local->transaction.fop = afr_up_down_flush_wind;
+ local->transaction.done = afr_up_down_flush_done;
+
+ switch (type) {
+ case AFR_CHILD_UP_FLUSH:
+ local->transaction.post_post_op = afr_up_down_flush_post_post_op;
+ break;
+
+ case AFR_CHILD_DOWN_FLUSH:
+ local->transaction.post_post_op = NULL;
+ break;
+ }
+
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "doing up/down flush on fd=%p",
+ fd);
+
+ afr_transaction (frame, this, AFR_FLUSH_TRANSACTION);
+
+ op_ret = 0;
+out:
+ return 0;
+}