diff options
author | Pavan Sondur <pavan@gluster.com> | 2010-08-06 05:31:45 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2010-08-06 04:09:07 -0700 |
commit | acdeed002d30209e0a058c2df0346d4f16c08994 (patch) | |
tree | 9c6acda8d92494952f4a80134303b9d2d1c3e1ac | |
parent | 453cb4bf0b70c876eb468def34054095cfd66359 (diff) |
add pump xlator and changes for replace-brick
Signed-off-by: Pavan Vilas Sondur <pavan@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 1235 (Bug for all pump/migrate commits)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1235
25 files changed, 5461 insertions, 2617 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 6c96d0836c8..ddb376bcbdc 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -442,8 +442,8 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount, if (!strcasecmp ("start", op)) { replace_op = GF_REPLACE_OP_START; - } else if (!strcasecmp ("stop", op)) { - replace_op = GF_REPLACE_OP_STOP; + } else if (!strcasecmp ("commit", op)) { + replace_op = GF_REPLACE_OP_COMMIT; } else if (!strcasecmp ("pause", op)) { replace_op = GF_REPLACE_OP_PAUSE; } else if (!strcasecmp ("abort", op)) { diff --git a/cli/src/cli.h b/cli/src/cli.h index c31b4631ab8..7b2de667358 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -37,6 +37,14 @@ enum argp_option_keys { ARGP_PORT_KEY = 'p', }; +typedef enum replace_brick_cmd { + REPLACE_BRICK_START, + REPLACE_BRICK_PAUSE, + REPLACE_BRICK_ABORT, + REPLACE_BRICK_STATUS, + REPLACE_BRICK_COMMIT, +} replace_brick_cmd_t; + struct cli_state; struct cli_cmd_word; struct cli_cmd_tree; @@ -116,6 +124,13 @@ struct cli_local { struct { char *volname; } defrag_vol; + + struct { + char *volume; + replace_brick_cmd_t op; + char *src_brick; + char *dst_brick; + } replace_brick; } u; }; diff --git a/cli/src/cli3_1-cops.c b/cli/src/cli3_1-cops.c index cfede807651..929f490c22e 100644 --- a/cli/src/cli3_1-cops.c +++ b/cli/src/cli3_1-cops.c @@ -31,6 +31,7 @@ #include "cli1-xdr.h" #include "cli1.h" #include "protocol-common.h" +#include "cli-mem-types.h" extern rpc_clnt_prog_t *cli_rpc_prog; extern int cli_op_ret; @@ -650,27 +651,133 @@ out: } +static int +replace_brick_mount (char *volname) +{ + char cmd_str[8192] = {0,}; + + gf_log ("", GF_LOG_DEBUG, + "creating directory"); + + snprintf (cmd_str, 4096, "mkdir -p /tmp/mnt"); + system (cmd_str); + + gf_log ("", GF_LOG_DEBUG, + "creating maintenance mount"); + + snprintf (cmd_str, 4096, "glusterfs -f /tmp/replace_brick.vol /tmp/mnt -l /tmp/pav_log -LTRACE"); + + system (cmd_str); + + return 0; +} + int gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { gf1_cli_replace_brick_rsp rsp = {0,}; int ret = 0; + cli_local_t *local = NULL; + call_frame_t *frame = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *replace_brick_op = NULL; + char status_msg[8192] = {0,}; + char cmd_str[8192] = {0,}; if (-1 == req->rpc_status) { goto out; } + frame = (call_frame_t *) myframe; + local = frame->local; + ret = gf_xdr_to_cli_replace_brick_rsp (*iov, &rsp); if (ret < 0) { gf_log ("", GF_LOG_ERROR, "error"); goto out; } + switch (local->u.replace_brick.op) { + case REPLACE_BRICK_START: + + replace_brick_op = "Replace brick start operation"; + + replace_brick_mount (local->u.replace_brick.volume); + + gf_log ("", GF_LOG_DEBUG, + "sending setxattr"); + + snprintf (cmd_str, 4096, "sleep 2; stat /tmp/mnt/ >/dev/null;setfattr -n trusted.glusterfs.pump.start /tmp/mnt/"); + + system (cmd_str); + + gf_log ("", GF_LOG_DEBUG, + "umounting"); + + snprintf (cmd_str, 4096, "umount -f /tmp/mnt 2>/dev/null"); + + ret = system (cmd_str); + + system ("rmdir /tmp/mnt"); + break; + + case REPLACE_BRICK_STATUS: + + replace_brick_op = "Replace brick status operation"; + + snprintf (cmd_str, 4096, "mkdir -p /tmp/mnt"); + system (cmd_str); + + snprintf (cmd_str, 4096, "glusterfs -f /tmp/replace_brick.vol /tmp/mnt -l /tmp/pav_log -LTRACE"); + system (cmd_str); + + gf_log ("", GF_LOG_DEBUG, + "sending getxattr"); + + ret = getxattr ("/tmp/mnt/", "trusted.glusterfs.pump.status", status_msg, 8192); + fprintf (stdout, "%s\n", status_msg); + + gf_log ("", GF_LOG_DEBUG, + "umounting"); + + snprintf (cmd_str, 4096, "umount -f /tmp/mnt 2>/dev/null"); + + ret = system (cmd_str); + + system ("rmdir /tmp/mnt"); + break; + + case REPLACE_BRICK_COMMIT: + + replace_brick_op = "Replace brick commit operation"; + + src_brick = local->u.replace_brick.src_brick; + dst_brick = local->u.replace_brick.dst_brick; + + snprintf (cmd_str, 4096, "gluster volume add-brick %s %s >/dev/null", + local->u.replace_brick.volume, dst_brick); + + ret = system (cmd_str); + + snprintf (cmd_str, 4096, "gluster volume remove-brick %s %s >/dev/null", + local->u.replace_brick.volume, src_brick); + + ret = system (cmd_str); + + break; + + default: + break; + } + gf_log ("cli", GF_LOG_NORMAL, "Received resp to replace brick"); - cli_out ("Replace Brick %s", (rsp.op_ret) ? "unsuccessful": - "successful"); + cli_out ("%s %s", + replace_brick_op ? replace_brick_op : "Unknown operation", + (rsp.op_ret) ? "unsuccessful": + "successful"); ret = rsp.op_ret; @@ -1183,25 +1290,76 @@ gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this, gf1_cli_replace_brick_req req = {0,}; int ret = 0; dict_t *dict = NULL; + cli_local_t *local = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; if (!frame || !this || !data) { ret = -1; goto out; } - dict = data; + local = GF_CALLOC (1, sizeof (*local), cli_mt_cli_local_t); + + dict = data; + + ret = dict_get_int32 (dict, "operation", (int32_t *)&req.op); + + if (ret) + goto out; + + switch (req.op) { + case GF_REPLACE_OP_START: + local->u.replace_brick.op = REPLACE_BRICK_START; + break; + case GF_REPLACE_OP_PAUSE: + local->u.replace_brick.op = REPLACE_BRICK_PAUSE; + break; + case GF_REPLACE_OP_ABORT: + local->u.replace_brick.op = REPLACE_BRICK_ABORT; + break; + case GF_REPLACE_OP_STATUS: + local->u.replace_brick.op = REPLACE_BRICK_STATUS; + break; + case GF_REPLACE_OP_COMMIT: + local->u.replace_brick.op = REPLACE_BRICK_COMMIT; + break; + + default: + break; + } + ret = dict_get_str (dict, "volname", &req.volname); if (ret) goto out; - ret = dict_get_int32 (dict, "operation", (int32_t *)&req.op); + ret = dict_get_str (dict, "src-brick", &src_brick); - if (ret) + if (ret) { + goto out; + } + + ret = dict_get_str (dict, "dst-brick", &dst_brick); + + if (ret) { goto out; + } + + local->u.replace_brick.volume = strdup (req.volname); + local->u.replace_brick.src_brick = strdup (src_brick); + local->u.replace_brick.dst_brick = strdup (dst_brick); + frame->local = local; - if (GF_REPLACE_OP_START == req.op) { + + ret = dict_allocate_and_serialize (dict, + &req.bricks.bricks_val, + (size_t *)&req.bricks.bricks_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict"); + goto out; } ret = cli_cmd_submit (&req, frame, cli_rpc_prog, @@ -1216,10 +1374,6 @@ out: GF_FREE (req.bricks.bricks_val); } - if (req.bricks.bricks_val) { - GF_FREE (req.bricks.bricks_val); - } - return ret; } diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index beb5d9db4a1..cfece659171 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -24,6 +24,20 @@ #include "syncop.h" +call_frame_t * +syncop_create_frame () +{ + struct synctask *task = NULL; + struct call_frame_t *frame = NULL; + + task = synctask_get (); + + if (task) { + frame = task->opaque; + } + + return (call_frame_t *)frame; +} void synctask_yield (struct synctask *task) @@ -95,6 +109,8 @@ synctask_wrap (struct synctask *task) */ task->complete = 1; synctask_wake (task); + + synctask_yield (task); } @@ -105,7 +121,7 @@ synctask_destroy (struct synctask *task) return; if (task->stack) - FREE (task); + FREE (task->stack); FREE (task); } @@ -305,7 +321,181 @@ syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req, return args.op_ret; } +static gf_dirent_t * +entry_copy (gf_dirent_t *source) +{ + gf_dirent_t *sink = NULL; + + sink = gf_dirent_for_name (source->d_name); + + sink->d_off = source->d_off; + sink->d_ino = source->d_ino; + sink->d_type = source->d_type; + + return sink; +} + +int32_t +syncop_readdirp_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries) +{ + struct syncargs *args = NULL; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + + int count = 0; + + args = cookie; + + INIT_LIST_HEAD (&args->entries.list); + + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (op_ret >= 0) { + list_for_each_entry (entry, &entries->list, list) { + tmp = entry_copy (entry); + gf_log (this->name, GF_LOG_TRACE, + "adding entry=%s, count=%d", + tmp->d_name, count); + list_add_tail (&tmp->list, &(args->entries.list)); + count++; + } + } + + __wake (args); + + return 0; + +} + +int +syncop_readdirp (xlator_t *subvol, + fd_t *fd, + size_t size, + off_t off, + gf_dirent_t *entries) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_readdirp_cbk, subvol->fops->readdirp, + fd, size, off); + + if (entries) + list_splice_init (&args.entries.list, &entries->list); + + errno = args.op_errno; + return args.op_ret; + +} + +int32_t +syncop_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + + __wake (args); + + return 0; +} + +int +syncop_opendir (xlator_t *subvol, + loc_t *loc, + fd_t *fd) +{ + struct syncargs args = {0, }; + SYNCOP (subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir, + loc, fd); + + errno = args.op_errno; + return args.op_ret; + +} + + +int +syncop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + + __wake (args); + + return 0; +} + + +int +syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr, + loc, dict, flags); + + errno = args.op_errno; + return args.op_ret; +} + +int +syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct statvfs *buf) + +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (op_ret == 0) { + args->statvfs_buf = *buf; + } + + __wake (args); + + return 0; +} + + +int +syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf) + +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_statfs_cbk, subvol->fops->statfs, + loc); + + if (buf) + *buf = args.statvfs_buf; + + errno = args.op_errno; + return args.op_ret; +} int syncop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h index ce364b07301..13b07ed31fd 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/syncop.h @@ -76,6 +76,8 @@ struct syncargs { struct iatt iatt1; struct iatt iatt2; dict_t *xattr; + gf_dirent_t entries; + struct statvfs statvfs_buf; /* do not touch */ pthread_mutex_t mutex; @@ -134,10 +136,10 @@ struct syncargs { #define SYNCOP(subvol, stb, cbk, op, params ...) do { \ call_frame_t *frame = NULL; \ \ - frame = create_frame (THIS, THIS->ctx->pool); \ + frame = syncop_create_frame (); \ \ __yawn (stb); \ - STACK_WIND_COOKIE (frame, (void *)stb, cbk, subvol, op, params);\ + STACK_WIND_COOKIE (frame, cbk, (void *)stb, subvol, op, params);\ __yield (stb); \ } while (0) @@ -157,8 +159,23 @@ int syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req, /* out */ struct iatt *iatt, dict_t **xattr_rsp, struct iatt *parent); +int syncop_readdirp (xlator_t *subvol, fd_t *fd, size_t size, off_t off, + /* out */ + gf_dirent_t *entries); + +int +syncop_opendir (xlator_t *subvol, + loc_t *loc, + fd_t *fd); + int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, /* out */ struct iatt *preop, struct iatt *postop); +int +syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf); + +int +syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags); + #endif /* _SYNCOP_H */ diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index e373cc42400..ef79c354fcc 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -76,6 +76,7 @@ typedef int32_t (*event_notify_fn_t) (xlator_t *this, int32_t event, void *data, #include "iatt.h" + struct _loc { const char *path; const char *name; diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h index 51fce3e9296..2f71df4a891 100644 --- a/rpc/xdr/src/cli1-xdr.h +++ b/rpc/xdr/src/cli1-xdr.h @@ -24,7 +24,7 @@ typedef enum gf1_cluster_type gf1_cluster_type; enum gf1_cli_replace_op { GF_REPLACE_OP_NONE = 0, GF_REPLACE_OP_START = 0 + 1, - GF_REPLACE_OP_STOP = 0 + 2, + GF_REPLACE_OP_COMMIT = 0 + 2, GF_REPLACE_OP_PAUSE = 0 + 3, GF_REPLACE_OP_ABORT = 0 + 4, GF_REPLACE_OP_STATUS = 0 + 5, diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index ece459ca772..699b3da77be 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -1,12 +1,17 @@ -xlator_LTLIBRARIES = afr.la +xlator_LTLIBRARIES = afr.la pump.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster -afr_la_LDFLAGS = -module -avoidversion +afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c -afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c +afr_la_LDFLAGS = -module -avoidversion +afr_la_SOURCES = $(afr_common_source) afr.c afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h afr-mem-types.h +pump_la_LDFLAGS = -module -avoidversion +pump_la_SOURCES = $(afr_common_source) pump.c +pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) @@ -15,6 +20,7 @@ CLEANFILES = uninstall-local: rm -f $(DESTDIR)$(xlatordir)/replicate.so + rm -f $(DESTDIR)$(xlatordir)/pump.so install-data-hook: - ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
\ No newline at end of file + ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c new file mode 100644 index 00000000000..aeadceddb6d --- /dev/null +++ b/xlators/cluster/afr/src/afr-common.c @@ -0,0 +1,2642 @@ +/* + Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "statedump.h" + +#include "fd.h" + +#include "afr-inode-read.h" +#include "afr-inode-write.h" +#include "afr-dir-read.h" +#include "afr-dir-write.h" +#include "afr-transaction.h" +#include "afr-self-heal.h" +#include "afr-self-heal-common.h" +#include "pump.h" + +#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL +#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL +#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL + +void +afr_set_lk_owner (call_frame_t *frame, xlator_t *this) +{ + if (!frame->root->lk_owner) { + gf_log (this->name, GF_LOG_TRACE, + "Setting lk-owner=%llu", + (unsigned long long) frame->root); + frame->root->lk_owner = (uint64_t) frame->root; + } +} + +uint64_t +afr_is_split_brain (xlator_t *this, inode_t *inode) +{ + int ret = 0; + + uint64_t ctx = 0; + uint64_t split_brain = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) + goto unlock; + + split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK; + } +unlock: + UNLOCK (&inode->lock); + +out: + return split_brain; +} + + +void +afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) +{ + uint64_t ctx = 0; + int ret = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) { + ctx = 0; + } + + if (set) { + ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx) + | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); + } else { + ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx); + } + __inode_ctx_put (inode, this, ctx); + } + UNLOCK (&inode->lock); +out: + return; +} + + +uint64_t +afr_is_opendir_done (xlator_t *this, inode_t *inode) +{ + int ret = 0; + + uint64_t ctx = 0; + uint64_t opendir_done = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) + goto unlock; + + opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK; + } +unlock: + UNLOCK (&inode->lock); + +out: + return opendir_done; +} + + +void +afr_set_opendir_done (xlator_t *this, inode_t *inode) +{ + uint64_t ctx = 0; + int ret = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) { + ctx = 0; + } + + ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx) + | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK); + + __inode_ctx_put (inode, this, ctx); + } + UNLOCK (&inode->lock); +out: + return; +} + + +uint64_t +afr_read_child (xlator_t *this, inode_t *inode) +{ + int ret = 0; + + uint64_t ctx = 0; + uint64_t read_child = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) + goto unlock; + + read_child = ctx & AFR_ICTX_READ_CHILD_MASK; + } +unlock: + UNLOCK (&inode->lock); + +out: + return read_child; +} + + +void +afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child) +{ + uint64_t ctx = 0; + int ret = 0; + + VALIDATE_OR_GOTO (inode, out); + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) { + ctx = 0; + } + + ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx) + | (AFR_ICTX_READ_CHILD_MASK & read_child); + + __inode_ctx_put (inode, this, ctx); + } + UNLOCK (&inode->lock); + +out: + return; +} + + +/** + * afr_local_cleanup - cleanup everything in frame->local + */ + +void +afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) +{ + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + + + sh = &local->self_heal; + priv = this->private; + + if (sh->buf) + GF_FREE (sh->buf); + + if (sh->xattr) { + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) { + dict_unref (sh->xattr[i]); + sh->xattr[i] = NULL; + } + } + GF_FREE (sh->xattr); + } + + if (sh->child_errno) + GF_FREE (sh->child_errno); + + if (sh->pending_matrix) { + for (i = 0; i < priv->child_count; i++) { + GF_FREE (sh->pending_matrix[i]); + } + GF_FREE (sh->pending_matrix); + } + + if (sh->delta_matrix) { + for (i = 0; i < priv->child_count; i++) { + GF_FREE (sh->delta_matrix[i]); + } + GF_FREE (sh->delta_matrix); + } + + if (sh->sources) + GF_FREE (sh->sources); + + if (sh->success) + GF_FREE (sh->success); + + if (sh->locked_nodes) + GF_FREE (sh->locked_nodes); + + if (sh->healing_fd && !sh->healing_fd_opened) { + fd_unref (sh->healing_fd); + sh->healing_fd = NULL; + } + + if (sh->linkname) + GF_FREE ((char *)sh->linkname); + + loc_wipe (&sh->parent_loc); +} + + +void +afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) +{ + int i = 0; + afr_private_t * priv = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (local->pending && local->pending[i]) + GF_FREE (local->pending[i]); + } + + GF_FREE (local->pending); + + GF_FREE (local->transaction.locked_nodes); + GF_FREE (local->transaction.child_errno); + GF_FREE (local->child_errno); + + GF_FREE (local->transaction.basename); + GF_FREE (local->transaction.new_basename); + + loc_wipe (&local->transaction.parent_loc); + loc_wipe (&local->transaction.new_parent_loc); +} + + +void +afr_local_cleanup (afr_local_t *local, xlator_t *this) +{ + int i; + afr_private_t * priv = NULL; + + if (!local) + return; + + afr_local_sh_cleanup (local, this); + + afr_local_transaction_cleanup (local, this); + + priv = this->private; + + loc_wipe (&local->loc); + loc_wipe (&local->newloc); + + if (local->fd) + fd_unref (local->fd); + + if (local->xattr_req) + dict_unref (local->xattr_req); + + GF_FREE (local->child_up); + + { /* lookup */ + if (local->cont.lookup.xattrs) { + for (i = 0; i < priv->child_count; i++) { + if (local->cont.lookup.xattrs[i]) { + dict_unref (local->cont.lookup.xattrs[i]); + local->cont.lookup.xattrs[i] = NULL; + } + } + GF_FREE (local->cont.lookup.xattrs); + local->cont.lookup.xattrs = NULL; + } + + if (local->cont.lookup.xattr) { + dict_unref (local->cont.lookup.xattr); + } + + if (local->cont.lookup.inode) { + inode_unref (local->cont.lookup.inode); + } + } + + { /* getxattr */ + if (local->cont.getxattr.name) + GF_FREE (local->cont.getxattr.name); + } + + { /* lk */ + if (local->cont.lk.locked_nodes) + GF_FREE (local->cont.lk.locked_nodes); + } + + { /* create */ + if (local->cont.create.fd) + fd_unref (local->cont.create.fd); + } + + { /* writev */ + GF_FREE (local->cont.writev.vector); + } + + { /* setxattr */ + if (local->cont.setxattr.dict) + dict_unref (local->cont.setxattr.dict); + } + + { /* removexattr */ + GF_FREE (local->cont.removexattr.name); + } + + { /* symlink */ + GF_FREE (local->cont.symlink.linkpath); + } + + { /* opendir */ + if (local->cont.opendir.checksum) + GF_FREE (local->cont.opendir.checksum); + } +} + + +int +afr_frame_return (call_frame_t *frame) +{ + afr_local_t *local = NULL; + int call_count = 0; + + local = frame->local; + + LOCK (&frame->lock); + { + call_count = --local->call_count; + } + UNLOCK (&frame->lock); + + return call_count; +} + + +/** + * up_children_count - return the number of children that are up + */ + +int +afr_up_children_count (int child_count, unsigned char *child_up) +{ + int i = 0; + int ret = 0; + + for (i = 0; i < child_count; i++) + if (child_up[i]) + ret++; + return ret; +} + + +int +afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) +{ + int ret = 0; + int i; + + for (i = 0; i < child_count; i++) + if (locked_nodes[i]) + ret++; + + return ret; +} + + +ino64_t +afr_itransform (ino64_t ino, int child_count, int child_index) +{ + ino64_t scaled_ino = -1; + + if (ino == ((uint64_t) -1)) { + scaled_ino = ((uint64_t) -1); + goto out; + } + + scaled_ino = (ino * child_count) + child_index; + +out: + return scaled_ino; +} + + +int +afr_deitransform_orig (ino64_t ino, int child_count) +{ + int index = -1; + + index = ino % child_count; + + return index; +} + + +int +afr_deitransform (ino64_t ino, int child_count) +{ + return 0; +} + + +int +afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (local->govinda_gOvinda) { + afr_set_split_brain (this, local->cont.lookup.inode, _gf_true); + } + + AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, + local->cont.lookup.inode, + &local->cont.lookup.buf, + local->cont.lookup.xattr, + &local->cont.lookup.postparent); + + return 0; +} + + +static void +afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this, + int child_index, dict_t *xattr) +{ + uint32_t open_fd_count = 0; + uint32_t inodelk_count = 0; + uint32_t entrylk_count = 0; + + int ret = 0; + + if (afr_sh_has_metadata_pending (xattr, child_index, this)) { + local->self_heal.need_metadata_self_heal = _gf_true; + gf_log(this->name, GF_LOG_DEBUG, + "metadata self-heal is pending for %s.", + local->loc.path); + } + + if (afr_sh_has_entry_pending (xattr, child_index, this)) { + local->self_heal.need_entry_self_heal = _gf_true; + gf_log(this->name, GF_LOG_DEBUG, + "entry self-heal is pending for %s.", local->loc.path); + } + + if (afr_sh_has_data_pending (xattr, child_index, this)) { + local->self_heal.need_data_self_heal = _gf_true; + gf_log(this->name, GF_LOG_DEBUG, + "data self-heal is pending for %s.", local->loc.path); + } + + ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT, + &open_fd_count); + if (ret == 0) + local->open_fd_count += open_fd_count; + + ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT, + &inodelk_count); + if (ret == 0) + local->inodelk_count += inodelk_count; + + ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT, + &entrylk_count); + if (ret == 0) + local->entrylk_count += entrylk_count; +} + + +static void +afr_lookup_self_heal_check (xlator_t *this, afr_local_t *local, + struct iatt *buf, struct iatt *lookup_buf) +{ + if (FILETYPE_DIFFERS (buf, lookup_buf)) { + /* mismatching filetypes with same name + */ + + gf_log (this->name, GF_LOG_NORMAL, + "filetype differs for %s ", local->loc.path); + + local->govinda_gOvinda = 1; + } + + if (PERMISSION_DIFFERS (buf, lookup_buf)) { + /* mismatching permissions */ + gf_log (this->name, GF_LOG_NORMAL, + "permissions differ for %s ", local->loc.path); + local->self_heal.need_metadata_self_heal = _gf_true; + } + + if (OWNERSHIP_DIFFERS (buf, lookup_buf)) { + /* mismatching permissions */ + local->self_heal.need_metadata_self_heal = _gf_true; + gf_log (this->name, GF_LOG_NORMAL, + "ownership differs for %s ", local->loc.path); + } + + if (SIZE_DIFFERS (buf, lookup_buf) + && IA_ISREG (buf->ia_type)) { + gf_log (this->name, GF_LOG_NORMAL, + "size differs for %s ", local->loc.path); + local->self_heal.need_data_self_heal = _gf_true; + } + +} + + +static void +afr_lookup_done (call_frame_t *frame, xlator_t *this, struct iatt *lookup_buf) +{ + int unwind = 1; + int source = -1; + char sh_type_str[256] = {0,}; + + afr_local_t *local = NULL; + + local = frame->local; + + local->cont.lookup.postparent.ia_ino = local->cont.lookup.parent_ino; + + if (local->cont.lookup.ino) { + local->cont.lookup.buf.ia_ino = local->cont.lookup.ino; + local->cont.lookup.buf.ia_gen = local->cont.lookup.gen; + } + + if (local->op_ret == 0) { + /* KLUDGE: assuming DHT will not itransform in + revalidate */ + if (local->cont.lookup.inode->ino) { + local->cont.lookup.buf.ia_ino = + local->cont.lookup.inode->ino; + local->cont.lookup.buf.ia_gen = + local->cont.lookup.inode->generation; + } + } + + if (local->success_count && local->enoent_count) { + local->self_heal.need_metadata_self_heal = _gf_true; + local->self_heal.need_data_self_heal = _gf_true; + local->self_heal.need_entry_self_heal = _gf_true; + gf_log(this->name, GF_LOG_NORMAL, + "entries are missing in lookup of %s.", + local->loc.path); + } + + if (local->success_count) { + /* check for split-brain case in previous lookup */ + if (afr_is_split_brain (this, + local->cont.lookup.inode)) { + local->self_heal.need_data_self_heal = _gf_true; + gf_log(this->name, GF_LOG_NORMAL, + "split brain detected during lookup of " + "%s.", local->loc.path); + } + } + + if ((local->self_heal.need_metadata_self_heal + || local->self_heal.need_data_self_heal + || local->self_heal.need_entry_self_heal) + && ((!local->cont.lookup.is_revalidate) + || (local->op_ret != -1))) { + + if (local->open_fd_count + || local->inodelk_count + || local->entrylk_count) { + + /* Someone else is doing self-heal on this file. + So just make a best effort to set the read-subvolume + and return */ + + if (IA_ISREG (local->cont.lookup.inode->ia_type)) { + source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs); + + if (source >= 0) { + afr_set_read_child (this, + local->cont.lookup.inode, + source); + } + } + } else { + if (!local->cont.lookup.inode->ia_type) { + /* fix for RT #602 */ + local->cont.lookup.inode->ia_type = + lookup_buf->ia_type; + } + + local->self_heal.background = _gf_true; + local->self_heal.type = local->cont.lookup.buf.ia_type; + local->self_heal.unwind = afr_self_heal_lookup_unwind; + + unwind = 0; + + afr_self_heal_type_str_get(&local->self_heal, + sh_type_str, + sizeof(sh_type_str)); + + gf_log (this->name, GF_LOG_NORMAL, "background %s " + "self-heal triggered. path: %s", + sh_type_str, local->loc.path); + + afr_self_heal (frame, this); + } + } + + if (unwind) { + AFR_STACK_UNWIND (lookup, frame, local->op_ret, + local->op_errno, + local->cont.lookup.inode, + &local->cont.lookup.buf, + local->cont.lookup.xattr, + &local->cont.lookup.postparent); + } +} + + +/* + * During a lookup, some errors are more "important" than + * others in that they must be given higher priority while + * returning to the user. + * + * The hierarchy is ESTALE > ENOENT > others + * + */ + +static gf_boolean_t +__error_more_important (int32_t old_errno, int32_t new_errno) +{ + gf_boolean_t ret = _gf_true; + + /* Nothing should ever overwrite ESTALE */ + if (old_errno == ESTALE) + ret = _gf_false; + + /* Nothing should overwrite ENOENT, except ESTALE */ + else if ((old_errno == ENOENT) && (new_errno != ESTALE)) + ret = _gf_false; + + return ret; +} + + +int +afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xattr, + struct iatt *postparent) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + struct iatt * lookup_buf = NULL; + + int call_count = -1; + int child_index = -1; + int first_up_child = -1; + + child_index = (long) cookie; + priv = this->private; + + LOCK (&frame->lock); + { + local = frame->local; + + lookup_buf = &local->cont.lookup.buf; + + if (op_ret == -1) { + if (op_errno == ENOENT) + local->enoent_count++; + + if (__error_more_important (local->op_errno, op_errno)) + local->op_errno = op_errno; + + if (local->op_errno == ESTALE) { + local->op_ret = -1; + } + + goto unlock; + } + + afr_lookup_collect_xattr (local, this, child_index, xattr); + + first_up_child = afr_first_up_child (priv); + + if (child_index == first_up_child) { + local->cont.lookup.ino = + afr_itransform (buf->ia_ino, + priv->child_count, + first_up_child); + local->cont.lookup.gen = buf->ia_gen; + } + + if (local->success_count == 0) { + if (local->op_errno != ESTALE) + local->op_ret = op_ret; + + local->cont.lookup.inode = inode_ref (inode); + local->cont.lookup.xattr = dict_ref (xattr); + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; + + if (priv->first_lookup && inode->ino == 1) { + gf_log (this->name, GF_LOG_TRACE, + "added root inode"); + priv->root_inode = inode_ref (inode); + priv->first_lookup = 0; + priv->child_up[1] = 0; + + LOCK (&priv->lock); + { + priv->down_count++; + } + UNLOCK (&priv->lock); + + } + + *lookup_buf = *buf; + + lookup_buf->ia_ino = afr_itransform (buf->ia_ino, + priv->child_count, + child_index); + if (priv->read_child >= 0) { + afr_set_read_child (this, + local->cont.lookup.inode, + priv->read_child); + } else { + afr_set_read_child (this, + local->cont.lookup.inode, + child_index); + } + + } else { + afr_lookup_self_heal_check (this, local, buf, lookup_buf); + + if (child_index == local->read_child_index) { + /* + lookup has succeeded on the read child. + So use its inode number + */ + if (local->cont.lookup.xattr) + dict_unref (local->cont.lookup.xattr); + + local->cont.lookup.xattr = dict_ref (xattr); + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; + + *lookup_buf = *buf; + + if (priv->read_child >= 0) { + afr_set_read_child (this, + local->cont.lookup.inode, + priv->read_child); + } else { + afr_set_read_child (this, + local->cont.lookup.inode, + local->read_child_index); + } + } + + } + + local->success_count++; + } +unlock: + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_lookup_done (frame, this, lookup_buf); + } + + return 0; +} + + +int +afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xattr, + struct iatt *postparent) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + struct iatt * lookup_buf = NULL; + + int call_count = -1; + int child_index = -1; + int first_up_child = -1; + + child_index = (long) cookie; + priv = this->private; + + LOCK (&frame->lock); + { + local = frame->local; + + lookup_buf = &local->cont.lookup.buf; + + if (op_ret == -1) { + if (op_errno == ENOENT) + local->enoent_count++; + + if (__error_more_important (local->op_errno, op_errno)) + local->op_errno = op_errno; + + if (local->op_errno == ESTALE) { + local->op_ret = -1; + } + + goto unlock; + } + + afr_lookup_collect_xattr (local, this, child_index, xattr); + + first_up_child = afr_first_up_child (priv); + + if (child_index == first_up_child) { + local->cont.lookup.ino = + afr_itransform (buf->ia_ino, + priv->child_count, + first_up_child); + local->cont.lookup.gen = buf->ia_gen; + } + + /* in case of revalidate, we need to send stat of the + * child whose stat was sent during the first lookup. + * (so that time stamp does not vary with revalidate. + * in case it is down, stat of the fist success will + * be replied */ + + /* inode number should be preserved across revalidates */ + + if (local->success_count == 0) { + if (local->op_errno != ESTALE) + local->op_ret = op_ret; + + local->cont.lookup.inode = inode_ref (inode); + local->cont.lookup.xattr = dict_ref (xattr); + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; + + *lookup_buf = *buf; + + lookup_buf->ia_ino = afr_itransform (buf->ia_ino, + priv->child_count, + child_index); + + if (priv->read_child >= 0) { + afr_set_read_child (this, + local->cont.lookup.inode, + priv->read_child); + } else { + afr_set_read_child (this, + local->cont.lookup.inode, + child_index); + } + + } else { + afr_lookup_self_heal_check (this, local, buf, lookup_buf); + + if (child_index == local->read_child_index) { + + /* + lookup has succeeded on the read child. + So use its inode number + */ + + if (local->cont.lookup.xattr) + dict_unref (local->cont.lookup.xattr); + + local->cont.lookup.xattr = dict_ref (xattr); + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; + + *lookup_buf = *buf; + + if (priv->read_child >= 0) { + afr_set_read_child (this, + local->cont.lookup.inode, + priv->read_child); + } else { + afr_set_read_child (this, + local->cont.lookup.inode, + local->read_child_index); + } + } + + } + + local->success_count++; + } +unlock: + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_lookup_done (frame, this, lookup_buf); + } + + return 0; +} + + +int +afr_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int ret = -1; + int i = 0; + + fop_lookup_cbk_t callback; + + int call_count = 0; + + uint64_t ctx; + + int32_t op_errno = 0; + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + local->op_ret = -1; + + frame->local = local; + + if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) { + op_errno = ENOENT; + goto out; + } + + loc_copy (&local->loc, loc); + + ret = inode_ctx_get (loc->inode, this, &ctx); + if (ret == 0) { + /* lookup is a revalidate */ + + callback = afr_revalidate_lookup_cbk; + + local->cont.lookup.is_revalidate = _gf_true; + local->read_child_index = afr_read_child (this, + loc->inode); + } else { + callback = afr_fresh_lookup_cbk; + + LOCK (&priv->read_child_lock); + { + local->read_child_index = (++priv->read_child_rr) + % (priv->child_count); + } + UNLOCK (&priv->read_child_lock); + } + + if (loc->parent) + local->cont.lookup.parent_ino = loc->parent->ino; + + local->child_up = memdup (priv->child_up, priv->child_count); + + local->cont.lookup.xattrs = GF_CALLOC (priv->child_count, + sizeof (*local->cont.lookup.xattr), + gf_afr_mt_dict_t); + + local->call_count = afr_up_children_count (priv->child_count, + local->child_up); + call_count = local->call_count; + + if (local->call_count == 0) { + ret = -1; + op_errno = ENOTCONN; + goto out; + } + + /* By default assume ENOTCONN. On success it will be set to 0. */ + local->op_errno = ENOTCONN; + + if (xattr_req == NULL) + local->xattr_req = dict_new (); + else + local->xattr_req = dict_ref (xattr_req); + + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i], + 3 * sizeof(int32_t)); + + /* 3 = data+metadata+entry */ + } + + ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0); + ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0); + ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, callback, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->lookup, + loc, local->xattr_req); + if (!--call_count) + break; + } + } + + ret = 0; +out: + if (ret == -1) + AFR_STACK_UNWIND (lookup, frame, -1, op_errno, + NULL, NULL, NULL, NULL); + + return 0; +} + + +/* {{{ open */ + +int +afr_fd_ctx_set (xlator_t *this, fd_t *fd) +{ + afr_private_t * priv = NULL; + + int op_ret = 0; + int ret = 0; + + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + + LOCK (&fd->lock); + { + ret = __fd_ctx_get (fd, this, &ctx); + + if (ret == 0) + goto unlock; + + fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t), + gf_afr_mt_afr_fd_ctx_t); + if (!fd_ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + + op_ret = -ENOMEM; + goto unlock; + } + + fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done), + priv->child_count, + gf_afr_mt_char); + if (!fd_ctx->pre_op_done) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + op_ret = -ENOMEM; + goto unlock; + } + + fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on), + priv->child_count, + gf_afr_mt_char); + if (!fd_ctx->opened_on) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + op_ret = -ENOMEM; + goto unlock; + } + + fd_ctx->child_failed = GF_CALLOC ( + sizeof (*fd_ctx->child_failed), + priv->child_count, + gf_afr_mt_char); + + if (!fd_ctx->child_failed) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + + op_ret = -ENOMEM; + goto unlock; + } + + fd_ctx->up_count = priv->up_count; + fd_ctx->down_count = priv->down_count; + + ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx); + if (ret < 0) { + op_ret = ret; + } + + INIT_LIST_HEAD (&fd_ctx->entries); + } +unlock: + UNLOCK (&fd->lock); +out: + return ret; +} + +/* {{{ flush */ + +int +afr_flush_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (flush, main_frame, + local->op_ret, local->op_errno); + } + + return 0; +} + + +int +afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (afr_fop_failed (op_ret, op_errno)) + afr_transaction_fop_failed (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + afr_flush_unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_flush_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int i = 0; + int call_count = -1; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_flush_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->flush, + local->fd); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_flush_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_plain_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (flush, frame, local->op_ret, local->op_errno); + + return 0; +} + + +static int +__no_pre_op_done (xlator_t *this, fd_t *fd) +{ + int i = 0; + int op_ret = 1; + + int _ret = 0; + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + afr_private_t *priv = NULL; + + priv = this->private; + + LOCK (&fd->lock); + { + _ret = __fd_ctx_get (fd, this, &ctx); + + if (_ret < 0) { + goto out; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + for (i = 0; i < priv->child_count; i++) { + if (fd_ctx->pre_op_done[i]) { + op_ret = 0; + break; + } + } + } +out: + UNLOCK (&fd->lock); + + return op_ret; +} + + +int +afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + int i = 0; + int call_count = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (__no_pre_op_done (this, fd)) { + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_plain_flush_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->flush, + fd); + if (!--call_count) + break; + } + } + } else { + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + goto out; + } + + transaction_frame->local = local; + + local->op = GF_FOP_FLUSH; + + local->transaction.fop = afr_flush_wind; + local->transaction.done = afr_flush_done; + local->transaction.unwind = afr_flush_unwind; + + local->fd = fd_ref (fd); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + + afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION); + } + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + + AFR_STACK_UNWIND (flush, frame, op_ret, op_errno); + } + + return 0; +} + +/* }}} */ + + +int +afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) +{ + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + int ret = 0; + + ret = fd_ctx_get (fd, this, &ctx); + + if (ret < 0) + goto out; + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + if (fd_ctx) { + if (fd_ctx->child_failed) + GF_FREE (fd_ctx->child_failed); + + if (fd_ctx->pre_op_done) + GF_FREE (fd_ctx->pre_op_done); + + if (fd_ctx->opened_on) + GF_FREE (fd_ctx->opened_on); + + GF_FREE (fd_ctx); + } + +out: + return 0; +} + + +int +afr_release (xlator_t *this, fd_t *fd) +{ + afr_cleanup_fd_ctx (this, fd); + + return 0; +} + + +/* {{{ fsync */ + +int +afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + int child_index = (long) cookie; + int read_child = 0; + + local = frame->local; + + read_child = afr_read_child (this, local->fd->inode); + + LOCK (&frame->lock); + { + if (child_index == read_child) { + local->read_child_returned = _gf_true; + } + + if (op_ret == 0) { + local->op_ret = 0; + + if (local->success_count == 0) { + local->cont.fsync.prebuf = *prebuf; + local->cont.fsync.postbuf = *postbuf; + } + + if (child_index == read_child) { + local->cont.fsync.prebuf = *prebuf; + local->cont.fsync.postbuf = *postbuf; + } + + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->cont.fsync.prebuf.ia_ino = local->cont.fsync.ino; + local->cont.fsync.postbuf.ia_ino = local->cont.fsync.ino; + + AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, + &local->cont.fsync.prebuf, + &local->cont.fsync.postbuf); + } + + return 0; +} + + +int +afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t datasync) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + local->fd = fd_ref (fd); + local->cont.fsync.ino = fd->inode->ino; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_fsync_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fsync, + fd, datasync); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL); + } + return 0; +} + +/* }}} */ + +/* {{{ fsync */ + +int32_t +afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret, + local->op_errno); + + return 0; +} + + +int32_t +afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t datasync) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_fsyncdir_cbk, + priv->children[i], + priv->children[i]->fops->fsyncdir, + fd, datasync); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno); + } + return 0; +} + +/* }}} */ + +/* {{{ xattrop */ + +int32_t +afr_xattrop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xattr) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno, + xattr); + + return 0; +} + + +int32_t +afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_xattrop_cbk, + priv->children[i], + priv->children[i]->fops->xattrop, + loc, optype, xattr); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL); + } + return 0; +} + +/* }}} */ + +/* {{{ fxattrop */ + +int32_t +afr_fxattrop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xattr) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno, + xattr); + + return 0; +} + + +int32_t +afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_fxattrop_cbk, + priv->children[i], + priv->children[i]->fops->fxattrop, + fd, optype, xattr); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL); + } + return 0; +} + +/* }}} */ + + +int32_t +afr_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (inodelk, frame, local->op_ret, + local->op_errno); + + return 0; +} + + +int32_t +afr_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, struct flock *flock) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_inodelk_cbk, + priv->children[i], + priv->children[i]->fops->inodelk, + volume, loc, cmd, flock); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno); + } + return 0; +} + + +int32_t +afr_finodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (finodelk, frame, local->op_ret, + local->op_errno); + + return 0; +} + + +int32_t +afr_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, struct flock *flock) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_finodelk_cbk, + priv->children[i], + priv->children[i]->fops->finodelk, + volume, fd, cmd, flock); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno); + } + return 0; +} + + +int32_t +afr_entrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (entrylk, frame, local->op_ret, + local->op_errno); + + return 0; +} + + +int32_t +afr_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_entrylk_cbk, + priv->children[i], + priv->children[i]->fops->entrylk, + volume, loc, basename, cmd, type); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno); + } + return 0; +} + + + +int32_t +afr_fentrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (fentrylk, frame, local->op_ret, + local->op_errno); + + return 0; +} + + +int32_t +afr_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_fentrylk_cbk, + priv->children[i], + priv->children[i]->fops->fentrylk, + volume, fd, basename, cmd, type); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno); + } + return 0; +} + +int32_t +afr_statfs_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct statvfs *statvfs) +{ + afr_local_t *local = NULL; + + int call_count = 0; + + LOCK (&frame->lock); + { + local = frame->local; + + if (op_ret == 0) { + local->op_ret = op_ret; + + if (local->cont.statfs.buf_set) { + if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail) + local->cont.statfs.buf = *statvfs; + } else { + local->cont.statfs.buf = *statvfs; + local->cont.statfs.buf_set = 1; + } + } + + if (op_ret == -1) + local->op_errno = op_errno; + + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno, + &local->cont.statfs.buf); + + return 0; +} + + +int32_t +afr_statfs (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + afr_private_t * priv = NULL; + int child_count = 0; + afr_local_t * local = NULL; + int i = 0; + + int ret = -1; + int call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + child_count = priv->child_count; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + call_count = local->call_count; + + for (i = 0; i < child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_statfs_cbk, + priv->children[i], + priv->children[i]->fops->statfs, + loc); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL); + } + return 0; +} + + +int32_t +afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + afr_local_t * local = NULL; + + int call_count = -1; + + local = frame->local; + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, + lock); + + return 0; +} + + +int32_t +afr_lk_unlock (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int i; + int call_count = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes, + priv->child_count); + + if (call_count == 0) { + AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, + &local->cont.lk.flock); + return 0; + } + + local->call_count = call_count; + + local->cont.lk.flock.l_type = F_UNLCK; + + for (i = 0; i < priv->child_count; i++) { + if (local->cont.lk.locked_nodes[i]) { + STACK_WIND (frame, afr_lk_unlock_cbk, + priv->children[i], + priv->children[i]->fops->lk, + local->fd, F_SETLK, + &local->cont.lk.flock); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int32_t +afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + call_count = --local->call_count; + + if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) { + local->op_ret = -1; + local->op_errno = op_errno; + + afr_lk_unlock (frame, this); + return 0; + } + + if (op_ret == 0) { + local->op_ret = 0; + local->op_errno = 0; + local->cont.lk.locked_nodes[child_index] = 1; + local->cont.lk.flock = *lock; + } + + child_index++; + + if (child_index < priv->child_count) { + STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->lk, + local->fd, local->cont.lk.cmd, + &local->cont.lk.flock); + } else if (local->op_ret == -1) { + /* all nodes have gone down */ + + AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN, &local->cont.lk.flock); + } else { + /* locking has succeeded on all nodes that are up */ + + AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, + &local->cont.lk.flock); + } + + return 0; +} + + +int +afr_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, + struct flock *flock) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int i = 0; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + AFR_LOCAL_INIT (local, priv); + + frame->local = local; + + local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count, + sizeof (*local->cont.lk.locked_nodes), + gf_afr_mt_char); + + if (!local->cont.lk.locked_nodes) { + gf_log (this->name, GF_LOG_ERROR, "Out of memory"); + op_errno = ENOMEM; + goto out; + } + + local->fd = fd_ref (fd); + local->cont.lk.cmd = cmd; + local->cont.lk.flock = *flock; + + STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0, + priv->children[i], + priv->children[i]->fops->lk, + fd, cmd, flock); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL); + } + return 0; +} + +int +afr_priv_dump (xlator_t *this) +{ + afr_private_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; + + + assert(this); + priv = this->private; + + assert(priv); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section(key_prefix); + gf_proc_dump_build_key(key, key_prefix, "child_count"); + gf_proc_dump_write(key, "%u", priv->child_count); + gf_proc_dump_build_key(key, key_prefix, "read_child_rr"); + gf_proc_dump_write(key, "%u", priv->read_child_rr); + for (i = 0; i < priv->child_count; i++) { + gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i); + gf_proc_dump_write(key, "%d", priv->child_up[i]); + gf_proc_dump_build_key(key, key_prefix, + "pending_key[%d]", i); + gf_proc_dump_write(key, "%s", priv->pending_key[i]); + } + gf_proc_dump_build_key(key, key_prefix, "data_self_heal"); + gf_proc_dump_write(key, "%d", priv->data_self_heal); + gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal"); + gf_proc_dump_write(key, "%d", priv->metadata_self_heal); + gf_proc_dump_build_key(key, key_prefix, "entry_self_heal"); + gf_proc_dump_write(key, "%d", priv->entry_self_heal); + gf_proc_dump_build_key(key, key_prefix, "data_change_log"); + gf_proc_dump_write(key, "%d", priv->data_change_log); + gf_proc_dump_build_key(key, key_prefix, "metadata_change_log"); + gf_proc_dump_write(key, "%d", priv->metadata_change_log); + gf_proc_dump_build_key(key, key_prefix, "entry_change_log"); + gf_proc_dump_write(key, "%d", priv->entry_change_log); + gf_proc_dump_build_key(key, key_prefix, "read_child"); + gf_proc_dump_write(key, "%d", priv->read_child); + gf_proc_dump_build_key(key, key_prefix, "favorite_child"); + gf_proc_dump_write(key, "%u", priv->favorite_child); + gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count"); + gf_proc_dump_write(key, "%u", priv->data_lock_server_count); + gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count"); + gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count); + gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count"); + gf_proc_dump_write(key, "%u", priv->entry_lock_server_count); + gf_proc_dump_build_key(key, key_prefix, "wait_count"); + gf_proc_dump_write(key, "%u", priv->wait_count); + + return 0; +} + + +/** + * find_child_index - find the child's index in the array of subvolumes + * @this: AFR + * @child: child + */ + +static int +find_child_index (xlator_t *this, xlator_t *child) +{ + afr_private_t *priv = NULL; + + int i = -1; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if ((xlator_t *) child == priv->children[i]) + break; + } + + return i; +} + +int32_t +afr_notify (xlator_t *this, int32_t event, + void *data, ...) +{ + afr_private_t * priv = NULL; + unsigned char * child_up = NULL; + + int i = -1; + int up_children = 0; + + priv = this->private; + + if (!priv) + return 0; + + child_up = priv->child_up; + + switch (event) { + case GF_EVENT_CHILD_UP: + i = find_child_index (this, data); + + child_up[i] = 1; + + LOCK (&priv->lock); + { + priv->up_count++; + } + UNLOCK (&priv->lock); + + /* + if all the children were down, and one child came up, + send notify to parent + */ + + for (i = 0; i < priv->child_count; i++) + if (child_up[i]) + up_children++; + + if (up_children == 1) { + gf_log (this->name, GF_LOG_NORMAL, + "Subvolume '%s' came back up; " + "going online.", ((xlator_t *)data)->name); + + default_notify (this, event, data); + } + + break; + + case GF_EVENT_CHILD_DOWN: + i = find_child_index (this, data); + + child_up[i] = 0; + + LOCK (&priv->lock); + { + priv->down_count++; + } + UNLOCK (&priv->lock); + + /* + if all children are down, and this was the last to go down, + send notify to parent + */ + + for (i = 0; i < priv->child_count; i++) + if (child_up[i]) + up_children++; + + if (up_children == 0) { + gf_log (this->name, GF_LOG_ERROR, + "All subvolumes are down. Going offline " + "until atleast one of them comes back up."); + + default_notify (this, event, data); + } + + break; + + default: + default_notify (this, event, data); + } + + return 0; + +} diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 344bdf74ece..6125ac0dd05 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -228,11 +228,14 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd) { + afr_private_t *priv = NULL; afr_local_t * local = NULL; int call_count = -1; int ret = 0; + priv = this->private; + LOCK (&frame->lock); { local = frame->local; diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index ef72fb19779..74f3d9ddb0e 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -656,7 +656,6 @@ out: return 0; } - int32_t afr_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name) @@ -671,6 +670,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, int32_t op_ret = -1; int32_t op_errno = 0; + VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (this->private, out); @@ -690,6 +690,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, op_errno = ENODATA; goto out; } + } read_child = afr_read_child (this, loc->inode); diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index eff01da8610..f02b5fe1e6d 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -47,7 +47,6 @@ #include "afr.h" #include "afr-transaction.h" - /* {{{ writev */ int @@ -1366,11 +1365,10 @@ afr_setxattr_done (call_frame_t *frame, xlator_t *this) local->transaction.unwind (frame, this); AFR_STACK_DESTROY (frame); - + return 0; } - int afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags) @@ -1390,13 +1388,6 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, priv = this->private; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - ALLOC_OR_GOTO (local, afr_local_t, out); ret = AFR_LOCAL_INIT (local, priv); @@ -1405,6 +1396,13 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, goto out; } + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + goto out; + } + transaction_frame->local = local; local->op_ret = -1; diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index 27117c1848c..3720d85e61b 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -40,6 +40,7 @@ enum gf_afr_mem_types_ { gf_afr_mt_uint8_t, gf_afr_mt_loc_t, gf_afr_mt_entry_name, + gf_afr_mt_pump_priv, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index e3e484aabbe..61fe89dfe42 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -25,7 +25,7 @@ #include "afr-transaction.h" #include "afr-self-heal-common.h" #include "afr-self-heal.h" - +#include "pump.h" /** * select_source - select a source and return it @@ -1536,7 +1536,6 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) return 0; } - int afr_self_heal (call_frame_t *frame, xlator_t *this) { @@ -1548,9 +1547,12 @@ afr_self_heal (call_frame_t *frame, xlator_t *this) call_frame_t *sh_frame = NULL; afr_local_t *sh_local = NULL; + local = frame->local; priv = this->private; + afr_set_lk_owner (frame, this); + if (local->self_heal.background) { LOCK (&priv->lock); { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 2d689e389bc..51d1455f455 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1376,7 +1376,7 @@ int32_t afr_lock (call_frame_t *frame, xlator_t *this) afr_pid_save (frame); frame->root->pid = (long) frame->root; - + afr_set_lk_owner (frame, this); return afr_lock_rec (frame, this, 0); } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index b795ea1d17f..629e1875e6e 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -28,2591 +28,17 @@ #define _CONFIG_H #include "config.h" #endif - -#include "glusterfs.h" -#include "afr.h" -#include "dict.h" -#include "xlator.h" -#include "hashfn.h" -#include "logging.h" -#include "stack.h" -#include "list.h" -#include "call-stub.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "statedump.h" - -#include "fd.h" - -#include "afr-inode-read.h" -#include "afr-inode-write.h" -#include "afr-dir-read.h" -#include "afr-dir-write.h" -#include "afr-transaction.h" -#include "afr-self-heal.h" -#include "afr-self-heal-common.h" - -#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL -#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL -#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL - - -uint64_t -afr_is_split_brain (xlator_t *this, inode_t *inode) -{ - int ret = 0; - - uint64_t ctx = 0; - uint64_t split_brain = 0; - - VALIDATE_OR_GOTO (inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); - - if (ret < 0) - goto unlock; - - split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK; - } -unlock: - UNLOCK (&inode->lock); - -out: - return split_brain; -} - - -void -afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) -{ - uint64_t ctx = 0; - int ret = 0; - - VALIDATE_OR_GOTO (inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); - - if (ret < 0) { - ctx = 0; - } - - if (set) { - ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx) - | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); - } else { - ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx); - } - __inode_ctx_put (inode, this, ctx); - } - UNLOCK (&inode->lock); -out: - return; -} - - -uint64_t -afr_is_opendir_done (xlator_t *this, inode_t *inode) -{ - int ret = 0; - - uint64_t ctx = 0; - uint64_t opendir_done = 0; - - VALIDATE_OR_GOTO (inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); - - if (ret < 0) - goto unlock; - - opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK; - } -unlock: - UNLOCK (&inode->lock); - -out: - return opendir_done; -} - - -void -afr_set_opendir_done (xlator_t *this, inode_t *inode) -{ - uint64_t ctx = 0; - int ret = 0; - - VALIDATE_OR_GOTO (inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); - - if (ret < 0) { - ctx = 0; - } - - ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx) - | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK); - - __inode_ctx_put (inode, this, ctx); - } - UNLOCK (&inode->lock); -out: - return; -} - - -uint64_t -afr_read_child (xlator_t *this, inode_t *inode) -{ - int ret = 0; - - uint64_t ctx = 0; - uint64_t read_child = 0; - - VALIDATE_OR_GOTO (inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); - - if (ret < 0) - goto unlock; - - read_child = ctx & AFR_ICTX_READ_CHILD_MASK; - } -unlock: - UNLOCK (&inode->lock); - -out: - return read_child; -} - - -void -afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child) -{ - uint64_t ctx = 0; - int ret = 0; - - VALIDATE_OR_GOTO (inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); - - if (ret < 0) { - ctx = 0; - } - - ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx) - | (AFR_ICTX_READ_CHILD_MASK & read_child); - - __inode_ctx_put (inode, this, ctx); - } - UNLOCK (&inode->lock); - -out: - return; -} - - -/** - * afr_local_cleanup - cleanup everything in frame->local - */ - -void -afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) -{ - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int i = 0; - - - sh = &local->self_heal; - priv = this->private; - - if (sh->buf) - GF_FREE (sh->buf); - - if (sh->xattr) { - for (i = 0; i < priv->child_count; i++) { - if (sh->xattr[i]) { - dict_unref (sh->xattr[i]); - sh->xattr[i] = NULL; - } - } - GF_FREE (sh->xattr); - } - - if (sh->child_errno) - GF_FREE (sh->child_errno); - - if (sh->pending_matrix) { - for (i = 0; i < priv->child_count; i++) { - GF_FREE (sh->pending_matrix[i]); - } - GF_FREE (sh->pending_matrix); - } - - if (sh->delta_matrix) { - for (i = 0; i < priv->child_count; i++) { - GF_FREE (sh->delta_matrix[i]); - } - GF_FREE (sh->delta_matrix); - } - - if (sh->sources) - GF_FREE (sh->sources); - - if (sh->success) - GF_FREE (sh->success); - - if (sh->locked_nodes) - GF_FREE (sh->locked_nodes); - - if (sh->healing_fd && !sh->healing_fd_opened) { - fd_unref (sh->healing_fd); - sh->healing_fd = NULL; - } - - if (sh->linkname) - GF_FREE ((char *)sh->linkname); - - loc_wipe (&sh->parent_loc); -} - - -void -afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) -{ - int i = 0; - afr_private_t * priv = NULL; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (local->pending && local->pending[i]) - GF_FREE (local->pending[i]); - } - - GF_FREE (local->pending); - - GF_FREE (local->transaction.locked_nodes); - GF_FREE (local->transaction.child_errno); - GF_FREE (local->child_errno); - - GF_FREE (local->transaction.basename); - GF_FREE (local->transaction.new_basename); - - loc_wipe (&local->transaction.parent_loc); - loc_wipe (&local->transaction.new_parent_loc); -} - - -void -afr_local_cleanup (afr_local_t *local, xlator_t *this) -{ - int i; - afr_private_t * priv = NULL; - - if (!local) - return; - - afr_local_sh_cleanup (local, this); - - afr_local_transaction_cleanup (local, this); - - priv = this->private; - - loc_wipe (&local->loc); - loc_wipe (&local->newloc); - - if (local->fd) - fd_unref (local->fd); - - if (local->xattr_req) - dict_unref (local->xattr_req); - - GF_FREE (local->child_up); - - { /* lookup */ - if (local->cont.lookup.xattrs) { - for (i = 0; i < priv->child_count; i++) { - if (local->cont.lookup.xattrs[i]) { - dict_unref (local->cont.lookup.xattrs[i]); - local->cont.lookup.xattrs[i] = NULL; - } - } - GF_FREE (local->cont.lookup.xattrs); - local->cont.lookup.xattrs = NULL; - } - - if (local->cont.lookup.xattr) { - dict_unref (local->cont.lookup.xattr); - } - - if (local->cont.lookup.inode) { - inode_unref (local->cont.lookup.inode); - } - } - - { /* getxattr */ - if (local->cont.getxattr.name) - GF_FREE (local->cont.getxattr.name); - } - - { /* lk */ - if (local->cont.lk.locked_nodes) - GF_FREE (local->cont.lk.locked_nodes); - } - - { /* create */ - if (local->cont.create.fd) - fd_unref (local->cont.create.fd); - } - - { /* writev */ - GF_FREE (local->cont.writev.vector); - } - - { /* setxattr */ - if (local->cont.setxattr.dict) - dict_unref (local->cont.setxattr.dict); - } - - { /* removexattr */ - GF_FREE (local->cont.removexattr.name); - } - - { /* symlink */ - GF_FREE (local->cont.symlink.linkpath); - } - - { /* opendir */ - if (local->cont.opendir.checksum) - GF_FREE (local->cont.opendir.checksum); - } -} - - -int -afr_frame_return (call_frame_t *frame) -{ - afr_local_t *local = NULL; - int call_count = 0; - - local = frame->local; - - LOCK (&frame->lock); - { - call_count = --local->call_count; - } - UNLOCK (&frame->lock); - - return call_count; -} - - -/** - * up_children_count - return the number of children that are up - */ - -int -afr_up_children_count (int child_count, unsigned char *child_up) -{ - int i = 0; - int ret = 0; - - for (i = 0; i < child_count; i++) - if (child_up[i]) - ret++; - return ret; -} - - -int -afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) -{ - int ret = 0; - int i; - - for (i = 0; i < child_count; i++) - if (locked_nodes[i]) - ret++; - - return ret; -} - - -ino64_t -afr_itransform (ino64_t ino, int child_count, int child_index) -{ - ino64_t scaled_ino = -1; - - if (ino == ((uint64_t) -1)) { - scaled_ino = ((uint64_t) -1); - goto out; - } - - scaled_ino = (ino * child_count) + child_index; - -out: - return scaled_ino; -} - - -int -afr_deitransform_orig (ino64_t ino, int child_count) -{ - int index = -1; - - index = ino % child_count; - - return index; -} - - -int -afr_deitransform (ino64_t ino, int child_count) -{ - return 0; -} - - -int -afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - if (local->govinda_gOvinda) { - afr_set_split_brain (this, local->cont.lookup.inode, _gf_true); - } - - AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, - local->cont.lookup.inode, - &local->cont.lookup.buf, - local->cont.lookup.xattr, - &local->cont.lookup.postparent); - - return 0; -} - - -static void -afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this, - int child_index, dict_t *xattr) -{ - uint32_t open_fd_count = 0; - uint32_t inodelk_count = 0; - uint32_t entrylk_count = 0; - - int ret = 0; - - if (afr_sh_has_metadata_pending (xattr, child_index, this)) { - local->self_heal.need_metadata_self_heal = _gf_true; - gf_log(this->name, GF_LOG_DEBUG, - "metadata self-heal is pending for %s.", - local->loc.path); - } - - if (afr_sh_has_entry_pending (xattr, child_index, this)) { - local->self_heal.need_entry_self_heal = _gf_true; - gf_log(this->name, GF_LOG_DEBUG, - "entry self-heal is pending for %s.", local->loc.path); - } - - if (afr_sh_has_data_pending (xattr, child_index, this)) { - local->self_heal.need_data_self_heal = _gf_true; - gf_log(this->name, GF_LOG_DEBUG, - "data self-heal is pending for %s.", local->loc.path); - } - - ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT, - &open_fd_count); - if (ret == 0) - local->open_fd_count += open_fd_count; - - ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT, - &inodelk_count); - if (ret == 0) - local->inodelk_count += inodelk_count; - - ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT, - &entrylk_count); - if (ret == 0) - local->entrylk_count += entrylk_count; -} - - -static void -afr_lookup_self_heal_check (xlator_t *this, afr_local_t *local, - struct iatt *buf, struct iatt *lookup_buf) -{ - if (FILETYPE_DIFFERS (buf, lookup_buf)) { - /* mismatching filetypes with same name - */ - - gf_log (this->name, GF_LOG_NORMAL, - "filetype differs for %s ", local->loc.path); - - local->govinda_gOvinda = 1; - } - - if (PERMISSION_DIFFERS (buf, lookup_buf)) { - /* mismatching permissions */ - gf_log (this->name, GF_LOG_NORMAL, - "permissions differ for %s ", local->loc.path); - local->self_heal.need_metadata_self_heal = _gf_true; - } - - if (OWNERSHIP_DIFFERS (buf, lookup_buf)) { - /* mismatching permissions */ - local->self_heal.need_metadata_self_heal = _gf_true; - gf_log (this->name, GF_LOG_NORMAL, - "ownership differs for %s ", local->loc.path); - } - - if (SIZE_DIFFERS (buf, lookup_buf) - && IA_ISREG (buf->ia_type)) { - gf_log (this->name, GF_LOG_NORMAL, - "size differs for %s ", local->loc.path); - local->self_heal.need_data_self_heal = _gf_true; - } - -} - - -static void -afr_lookup_done (call_frame_t *frame, xlator_t *this, struct iatt *lookup_buf) -{ - int unwind = 1; - int source = -1; - char sh_type_str[256] = {0,}; - - afr_local_t *local = NULL; - - local = frame->local; - - local->cont.lookup.postparent.ia_ino = local->cont.lookup.parent_ino; - - if (local->cont.lookup.ino) { - local->cont.lookup.buf.ia_ino = local->cont.lookup.ino; - local->cont.lookup.buf.ia_gen = local->cont.lookup.gen; - } - - if (local->op_ret == 0) { - /* KLUDGE: assuming DHT will not itransform in - revalidate */ - if (local->cont.lookup.inode->ino) { - local->cont.lookup.buf.ia_ino = - local->cont.lookup.inode->ino; - local->cont.lookup.buf.ia_gen = - local->cont.lookup.inode->generation; - } - } - - if (local->success_count && local->enoent_count) { - local->self_heal.need_metadata_self_heal = _gf_true; - local->self_heal.need_data_self_heal = _gf_true; - local->self_heal.need_entry_self_heal = _gf_true; - gf_log(this->name, GF_LOG_NORMAL, - "entries are missing in lookup of %s.", - local->loc.path); - } - - if (local->success_count) { - /* check for split-brain case in previous lookup */ - if (afr_is_split_brain (this, - local->cont.lookup.inode)) { - local->self_heal.need_data_self_heal = _gf_true; - gf_log(this->name, GF_LOG_NORMAL, - "split brain detected during lookup of " - "%s.", local->loc.path); - } - } - - if ((local->self_heal.need_metadata_self_heal - || local->self_heal.need_data_self_heal - || local->self_heal.need_entry_self_heal) - && ((!local->cont.lookup.is_revalidate) - || (local->op_ret != -1))) { - - if (local->open_fd_count - || local->inodelk_count - || local->entrylk_count) { - - /* Someone else is doing self-heal on this file. - So just make a best effort to set the read-subvolume - and return */ - - if (IA_ISREG (local->cont.lookup.inode->ia_type)) { - source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs); - - if (source >= 0) { - afr_set_read_child (this, - local->cont.lookup.inode, - source); - } - } - } else { - if (!local->cont.lookup.inode->ia_type) { - /* fix for RT #602 */ - local->cont.lookup.inode->ia_type = - lookup_buf->ia_type; - } - - local->self_heal.background = _gf_true; - local->self_heal.type = local->cont.lookup.buf.ia_type; - local->self_heal.unwind = afr_self_heal_lookup_unwind; - - unwind = 0; - - afr_self_heal_type_str_get(&local->self_heal, - sh_type_str, - sizeof(sh_type_str)); - - gf_log (this->name, GF_LOG_NORMAL, "background %s " - "self-heal triggered. path: %s", - sh_type_str, local->loc.path); - - afr_self_heal (frame, this); - } - } - - if (unwind) { - AFR_STACK_UNWIND (lookup, frame, local->op_ret, - local->op_errno, - local->cont.lookup.inode, - &local->cont.lookup.buf, - local->cont.lookup.xattr, - &local->cont.lookup.postparent); - } -} - - -/* - * During a lookup, some errors are more "important" than - * others in that they must be given higher priority while - * returning to the user. - * - * The hierarchy is ESTALE > ENOENT > others - * - */ - -static gf_boolean_t -__error_more_important (int32_t old_errno, int32_t new_errno) -{ - gf_boolean_t ret = _gf_true; - - /* Nothing should ever overwrite ESTALE */ - if (old_errno == ESTALE) - ret = _gf_false; - - /* Nothing should overwrite ENOENT, except ESTALE */ - else if ((old_errno == ENOENT) && (new_errno != ESTALE)) - ret = _gf_false; - - return ret; -} - - -int -afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xattr, - struct iatt *postparent) -{ - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - struct iatt * lookup_buf = NULL; - - int call_count = -1; - int child_index = -1; - int first_up_child = -1; - - child_index = (long) cookie; - priv = this->private; - - LOCK (&frame->lock); - { - local = frame->local; - - lookup_buf = &local->cont.lookup.buf; - - if (op_ret == -1) { - if (op_errno == ENOENT) - local->enoent_count++; - - if (__error_more_important (local->op_errno, op_errno)) - local->op_errno = op_errno; - - if (local->op_errno == ESTALE) { - local->op_ret = -1; - } - - goto unlock; - } - - afr_lookup_collect_xattr (local, this, child_index, xattr); - - first_up_child = afr_first_up_child (priv); - - if (child_index == first_up_child) { - local->cont.lookup.ino = - afr_itransform (buf->ia_ino, - priv->child_count, - first_up_child); - local->cont.lookup.gen = buf->ia_gen; - } - - if (local->success_count == 0) { - if (local->op_errno != ESTALE) - local->op_ret = op_ret; - - local->cont.lookup.inode = inode_ref (inode); - local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.xattrs[child_index] = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; - - *lookup_buf = *buf; - - lookup_buf->ia_ino = afr_itransform (buf->ia_ino, - priv->child_count, - child_index); - if (priv->read_child >= 0) { - afr_set_read_child (this, - local->cont.lookup.inode, - priv->read_child); - } else { - afr_set_read_child (this, - local->cont.lookup.inode, - child_index); - } - - } else { - afr_lookup_self_heal_check (this, local, buf, lookup_buf); - - if (child_index == local->read_child_index) { - /* - lookup has succeeded on the read child. - So use its inode number - */ - if (local->cont.lookup.xattr) - dict_unref (local->cont.lookup.xattr); - - local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.xattrs[child_index] = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; - - *lookup_buf = *buf; - - if (priv->read_child >= 0) { - afr_set_read_child (this, - local->cont.lookup.inode, - priv->read_child); - } else { - afr_set_read_child (this, - local->cont.lookup.inode, - local->read_child_index); - } - } - - } - - local->success_count++; - } -unlock: - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - afr_lookup_done (frame, this, lookup_buf); - } - - return 0; -} - - -int -afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xattr, - struct iatt *postparent) -{ - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - struct iatt * lookup_buf = NULL; - - int call_count = -1; - int child_index = -1; - int first_up_child = -1; - - child_index = (long) cookie; - priv = this->private; - - LOCK (&frame->lock); - { - local = frame->local; - - lookup_buf = &local->cont.lookup.buf; - - if (op_ret == -1) { - if (op_errno == ENOENT) - local->enoent_count++; - - if (__error_more_important (local->op_errno, op_errno)) - local->op_errno = op_errno; - - if (local->op_errno == ESTALE) { - local->op_ret = -1; - } - - goto unlock; - } - - afr_lookup_collect_xattr (local, this, child_index, xattr); - - first_up_child = afr_first_up_child (priv); - - if (child_index == first_up_child) { - local->cont.lookup.ino = - afr_itransform (buf->ia_ino, - priv->child_count, - first_up_child); - local->cont.lookup.gen = buf->ia_gen; - } - - /* in case of revalidate, we need to send stat of the - * child whose stat was sent during the first lookup. - * (so that time stamp does not vary with revalidate. - * in case it is down, stat of the fist success will - * be replied */ - - /* inode number should be preserved across revalidates */ - - if (local->success_count == 0) { - if (local->op_errno != ESTALE) - local->op_ret = op_ret; - - local->cont.lookup.inode = inode_ref (inode); - local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.xattrs[child_index] = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; - - *lookup_buf = *buf; - - lookup_buf->ia_ino = afr_itransform (buf->ia_ino, - priv->child_count, - child_index); - - if (priv->read_child >= 0) { - afr_set_read_child (this, - local->cont.lookup.inode, - priv->read_child); - } else { - afr_set_read_child (this, - local->cont.lookup.inode, - child_index); - } - - } else { - afr_lookup_self_heal_check (this, local, buf, lookup_buf); - - if (child_index == local->read_child_index) { - - /* - lookup has succeeded on the read child. - So use its inode number - */ - - if (local->cont.lookup.xattr) - dict_unref (local->cont.lookup.xattr); - - local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.xattrs[child_index] = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; - - *lookup_buf = *buf; - - if (priv->read_child >= 0) { - afr_set_read_child (this, - local->cont.lookup.inode, - priv->read_child); - } else { - afr_set_read_child (this, - local->cont.lookup.inode, - local->read_child_index); - } - } - - } - - local->success_count++; - } -unlock: - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - afr_lookup_done (frame, this, lookup_buf); - } - - return 0; -} - - -int -afr_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int ret = -1; - int i = 0; - - fop_lookup_cbk_t callback; - - int call_count = 0; - - uint64_t ctx; - - int32_t op_errno = 0; - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - local->op_ret = -1; - - frame->local = local; - - if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) { - op_errno = ENOENT; - goto out; - } - - loc_copy (&local->loc, loc); - - ret = inode_ctx_get (loc->inode, this, &ctx); - if (ret == 0) { - /* lookup is a revalidate */ - - callback = afr_revalidate_lookup_cbk; - - local->cont.lookup.is_revalidate = _gf_true; - local->read_child_index = afr_read_child (this, - loc->inode); - } else { - callback = afr_fresh_lookup_cbk; - - LOCK (&priv->read_child_lock); - { - local->read_child_index = (++priv->read_child_rr) - % (priv->child_count); - } - UNLOCK (&priv->read_child_lock); - } - - if (loc->parent) - local->cont.lookup.parent_ino = loc->parent->ino; - - local->child_up = memdup (priv->child_up, priv->child_count); - - local->cont.lookup.xattrs = GF_CALLOC (priv->child_count, - sizeof (*local->cont.lookup.xattr), - gf_afr_mt_dict_t); - - local->call_count = afr_up_children_count (priv->child_count, - local->child_up); - call_count = local->call_count; - - if (local->call_count == 0) { - ret = -1; - op_errno = ENOTCONN; - goto out; - } - - /* By default assume ENOTCONN. On success it will be set to 0. */ - local->op_errno = ENOTCONN; - - if (xattr_req == NULL) - local->xattr_req = dict_new (); - else - local->xattr_req = dict_ref (xattr_req); - - for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i], - 3 * sizeof(int32_t)); - - /* 3 = data+metadata+entry */ - } - - ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0); - ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0); - ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0); - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, callback, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->lookup, - loc, local->xattr_req); - if (!--call_count) - break; - } - } - - ret = 0; -out: - if (ret == -1) - AFR_STACK_UNWIND (lookup, frame, -1, op_errno, - NULL, NULL, NULL, NULL); - - return 0; -} - - -/* {{{ open */ - -int -afr_fd_ctx_set (xlator_t *this, fd_t *fd) -{ - afr_private_t * priv = NULL; - - int op_ret = 0; - int ret = 0; - - uint64_t ctx; - afr_fd_ctx_t * fd_ctx = NULL; - - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - - LOCK (&fd->lock); - { - ret = __fd_ctx_get (fd, this, &ctx); - - if (ret == 0) - goto unlock; - - fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t), - gf_afr_mt_afr_fd_ctx_t); - if (!fd_ctx) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - - op_ret = -ENOMEM; - goto unlock; - } - - fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done), - priv->child_count, - gf_afr_mt_char); - if (!fd_ctx->pre_op_done) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_ret = -ENOMEM; - goto unlock; - } - - fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on), - priv->child_count, - gf_afr_mt_char); - if (!fd_ctx->opened_on) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - op_ret = -ENOMEM; - goto unlock; - } - - fd_ctx->child_failed = GF_CALLOC ( - sizeof (*fd_ctx->child_failed), - priv->child_count, - gf_afr_mt_char); - - if (!fd_ctx->child_failed) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - - op_ret = -ENOMEM; - goto unlock; - } - - fd_ctx->up_count = priv->up_count; - fd_ctx->down_count = priv->down_count; - - ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx); - if (ret < 0) { - op_ret = ret; - } - - INIT_LIST_HEAD (&fd_ctx->entries); - } -unlock: - UNLOCK (&fd->lock); -out: - return ret; -} - -/* {{{ flush */ - -int -afr_flush_unwind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - call_frame_t *main_frame = NULL; - - local = frame->local; - priv = this->private; - - LOCK (&frame->lock); - { - if (local->transaction.main_frame) - main_frame = local->transaction.main_frame; - local->transaction.main_frame = NULL; - } - UNLOCK (&frame->lock); - - if (main_frame) { - AFR_STACK_UNWIND (flush, main_frame, - local->op_ret, local->op_errno); - } - - return 0; -} - - -int -afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - - int call_count = -1; - int child_index = (long) cookie; - int need_unwind = 0; - - local = frame->local; - priv = this->private; - - LOCK (&frame->lock); - { - if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_fop_failed (frame, this, child_index); - - if (op_ret != -1) { - if (local->success_count == 0) { - local->op_ret = op_ret; - } - local->success_count++; - - if (local->success_count == priv->wait_count) { - need_unwind = 1; - } - } - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - if (need_unwind) - afr_flush_unwind (frame, this); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - local->transaction.resume (frame, this); - } - - return 0; -} - - -int -afr_flush_wind (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - int i = 0; - int call_count = -1; - - local = frame->local; - priv = this->private; - - call_count = afr_up_children_count (priv->child_count, local->child_up); - - if (call_count == 0) { - local->transaction.resume (frame, this); - return 0; - } - - local->call_count = call_count; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_flush_wind_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->flush, - local->fd); - - if (!--call_count) - break; - } - } - - return 0; -} - - -int -afr_flush_done (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - - local = frame->local; - - local->transaction.unwind (frame, this); - - AFR_STACK_DESTROY (frame); - - return 0; -} - - -int -afr_plain_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) - -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (flush, frame, local->op_ret, local->op_errno); - - return 0; -} - - -static int -__no_pre_op_done (xlator_t *this, fd_t *fd) -{ - int i = 0; - int op_ret = 1; - - int _ret = 0; - uint64_t ctx; - afr_fd_ctx_t * fd_ctx = NULL; - - afr_private_t *priv = NULL; - - priv = this->private; - - LOCK (&fd->lock); - { - _ret = __fd_ctx_get (fd, this, &ctx); - - if (_ret < 0) { - goto out; - } - - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - for (i = 0; i < priv->child_count; i++) { - if (fd_ctx->pre_op_done[i]) { - op_ret = 0; - break; - } - } - } -out: - UNLOCK (&fd->lock); - - return op_ret; -} - - -int -afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - - call_frame_t * transaction_frame = NULL; - - int ret = -1; - - int op_ret = -1; - int op_errno = 0; - - int i = 0; - int call_count = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = afr_up_children_count (priv->child_count, local->child_up); - - if (__no_pre_op_done (this, fd)) { - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_plain_flush_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->flush, - fd); - if (!--call_count) - break; - } - } - } else { - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - - transaction_frame->local = local; - - local->op = GF_FOP_FLUSH; - - local->transaction.fop = afr_flush_wind; - local->transaction.done = afr_flush_done; - local->transaction.unwind = afr_flush_unwind; - - local->fd = fd_ref (fd); - - local->transaction.main_frame = frame; - local->transaction.start = 0; - local->transaction.len = 0; - - afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION); - } - - op_ret = 0; -out: - if (op_ret == -1) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); - - AFR_STACK_UNWIND (flush, frame, op_ret, op_errno); - } - - return 0; -} - -/* }}} */ - - -int -afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) -{ - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - int ret = 0; - - ret = fd_ctx_get (fd, this, &ctx); - - if (ret < 0) - goto out; - - fd_ctx = (afr_fd_ctx_t *)(long) ctx; - - if (fd_ctx) { - if (fd_ctx->child_failed) - GF_FREE (fd_ctx->child_failed); - - if (fd_ctx->pre_op_done) - GF_FREE (fd_ctx->pre_op_done); - - if (fd_ctx->opened_on) - GF_FREE (fd_ctx->opened_on); - - GF_FREE (fd_ctx); - } - -out: - return 0; -} - - -int -afr_release (xlator_t *this, fd_t *fd) -{ - afr_cleanup_fd_ctx (this, fd); - - return 0; -} - - -/* {{{ fsync */ - -int -afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - afr_local_t *local = NULL; - - int call_count = -1; - - int child_index = (long) cookie; - int read_child = 0; - - local = frame->local; - - read_child = afr_read_child (this, local->fd->inode); - - LOCK (&frame->lock); - { - if (child_index == read_child) { - local->read_child_returned = _gf_true; - } - - if (op_ret == 0) { - local->op_ret = 0; - - if (local->success_count == 0) { - local->cont.fsync.prebuf = *prebuf; - local->cont.fsync.postbuf = *postbuf; - } - - if (child_index == read_child) { - local->cont.fsync.prebuf = *prebuf; - local->cont.fsync.postbuf = *postbuf; - } - - local->success_count++; - } - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - local->cont.fsync.prebuf.ia_ino = local->cont.fsync.ino; - local->cont.fsync.postbuf.ia_ino = local->cont.fsync.ino; - - AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, - &local->cont.fsync.prebuf, - &local->cont.fsync.postbuf); - } - - return 0; -} - - -int -afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t datasync) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - local->fd = fd_ref (fd); - local->cont.fsync.ino = fd->inode->ino; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_fsync_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fsync, - fd, datasync); - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL); - } - return 0; -} - -/* }}} */ - -/* {{{ fsync */ - -int32_t -afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret, - local->op_errno); - - return 0; -} - - -int32_t -afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t datasync) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_fsyncdir_cbk, - priv->children[i], - priv->children[i]->fops->fsyncdir, - fd, datasync); - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno); - } - return 0; -} - -/* }}} */ - -/* {{{ xattrop */ - -int32_t -afr_xattrop_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xattr) -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno, - xattr); - - return 0; -} - - -int32_t -afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t optype, dict_t *xattr) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_xattrop_cbk, - priv->children[i], - priv->children[i]->fops->xattrop, - loc, optype, xattr); - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL); - } - return 0; -} - -/* }}} */ - -/* {{{ fxattrop */ - -int32_t -afr_fxattrop_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *xattr) -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno, - xattr); - - return 0; -} - - -int32_t -afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_xattrop_flags_t optype, dict_t *xattr) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_fxattrop_cbk, - priv->children[i], - priv->children[i]->fops->fxattrop, - fd, optype, xattr); - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL); - } - return 0; -} - -/* }}} */ - - -int32_t -afr_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (inodelk, frame, local->op_ret, - local->op_errno); - - return 0; -} - - -int32_t -afr_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct flock *flock) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_inodelk_cbk, - priv->children[i], - priv->children[i]->fops->inodelk, - volume, loc, cmd, flock); - - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno); - } - return 0; -} - - -int32_t -afr_finodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (finodelk, frame, local->op_ret, - local->op_errno); - - return 0; -} - - -int32_t -afr_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct flock *flock) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_finodelk_cbk, - priv->children[i], - priv->children[i]->fops->finodelk, - volume, fd, cmd, flock); - - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno); - } - return 0; -} - - -int32_t -afr_entrylk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (entrylk, frame, local->op_ret, - local->op_errno); - - return 0; -} - - -int32_t -afr_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, - const char *basename, entrylk_cmd cmd, entrylk_type type) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_entrylk_cbk, - priv->children[i], - priv->children[i]->fops->entrylk, - volume, loc, basename, cmd, type); - - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno); - } - return 0; -} - - - -int32_t -afr_fentrylk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ - afr_local_t *local = NULL; - - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (fentrylk, frame, local->op_ret, - local->op_errno); - - return 0; -} - - -int32_t -afr_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, - const char *basename, entrylk_cmd cmd, entrylk_type type) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int ret = -1; - - int i = 0; - int32_t call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - call_count = local->call_count; - frame->local = local; - - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_fentrylk_cbk, - priv->children[i], - priv->children[i]->fops->fentrylk, - volume, fd, basename, cmd, type); - - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno); - } - return 0; -} - -int32_t -afr_statfs_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct statvfs *statvfs) -{ - afr_local_t *local = NULL; - - int call_count = 0; - - LOCK (&frame->lock); - { - local = frame->local; - - if (op_ret == 0) { - local->op_ret = op_ret; - - if (local->cont.statfs.buf_set) { - if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail) - local->cont.statfs.buf = *statvfs; - } else { - local->cont.statfs.buf = *statvfs; - local->cont.statfs.buf_set = 1; - } - } - - if (op_ret == -1) - local->op_errno = op_errno; - - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno, - &local->cont.statfs.buf); - - return 0; -} - - -int32_t -afr_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc) -{ - afr_private_t * priv = NULL; - int child_count = 0; - afr_local_t * local = NULL; - int i = 0; - - int ret = -1; - int call_count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - child_count = priv->child_count; - - ALLOC_OR_GOTO (local, afr_local_t, out); - - ret = AFR_LOCAL_INIT (local, priv); - if (ret < 0) { - op_errno = -ret; - goto out; - } - - frame->local = local; - call_count = local->call_count; - - for (i = 0; i < child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_statfs_cbk, - priv->children[i], - priv->children[i]->fops->statfs, - loc); - if (!--call_count) - break; - } - } - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL); - } - return 0; -} - - -int32_t -afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct flock *lock) -{ - afr_local_t * local = NULL; - - int call_count = -1; - - local = frame->local; - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - lock); - - return 0; -} - - -int32_t -afr_lk_unlock (call_frame_t *frame, xlator_t *this) -{ - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - - int i; - int call_count = 0; - - local = frame->local; - priv = this->private; - - call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes, - priv->child_count); - - if (call_count == 0) { - AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - &local->cont.lk.flock); - return 0; - } - - local->call_count = call_count; - - local->cont.lk.flock.l_type = F_UNLCK; - - for (i = 0; i < priv->child_count; i++) { - if (local->cont.lk.locked_nodes[i]) { - STACK_WIND (frame, afr_lk_unlock_cbk, - priv->children[i], - priv->children[i]->fops->lk, - local->fd, F_SETLK, - &local->cont.lk.flock); - - if (!--call_count) - break; - } - } - - return 0; -} - - -int32_t -afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct flock *lock) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - - int call_count = -1; - int child_index = -1; - - local = frame->local; - priv = this->private; - - child_index = (long) cookie; - - call_count = --local->call_count; - - if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) { - local->op_ret = -1; - local->op_errno = op_errno; - - afr_lk_unlock (frame, this); - return 0; - } - - if (op_ret == 0) { - local->op_ret = 0; - local->op_errno = 0; - local->cont.lk.locked_nodes[child_index] = 1; - local->cont.lk.flock = *lock; - } - - child_index++; - - if (child_index < priv->child_count) { - STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index, - priv->children[child_index], - priv->children[child_index]->fops->lk, - local->fd, local->cont.lk.cmd, - &local->cont.lk.flock); - } else if (local->op_ret == -1) { - /* all nodes have gone down */ - - AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN, &local->cont.lk.flock); - } else { - /* locking has succeeded on all nodes that are up */ - - AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - &local->cont.lk.flock); - } - - return 0; -} - - -int -afr_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, - struct flock *flock) -{ - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - - int i = 0; - - int32_t op_ret = -1; - int32_t op_errno = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - ALLOC_OR_GOTO (local, afr_local_t, out); - AFR_LOCAL_INIT (local, priv); - - frame->local = local; - - local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count, - sizeof (*local->cont.lk.locked_nodes), - gf_afr_mt_char); - - if (!local->cont.lk.locked_nodes) { - gf_log (this->name, GF_LOG_ERROR, "Out of memory"); - op_errno = ENOMEM; - goto out; - } - - local->fd = fd_ref (fd); - local->cont.lk.cmd = cmd; - local->cont.lk.flock = *flock; - - STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0, - priv->children[i], - priv->children[i]->fops->lk, - fd, cmd, flock); - - op_ret = 0; -out: - if (op_ret == -1) { - AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL); - } - return 0; -} - -int -afr_priv_dump (xlator_t *this) -{ - afr_private_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; - int i = 0; - - - assert(this); - priv = this->private; - - assert(priv); - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); - gf_proc_dump_add_section(key_prefix); - gf_proc_dump_build_key(key, key_prefix, "child_count"); - gf_proc_dump_write(key, "%u", priv->child_count); - gf_proc_dump_build_key(key, key_prefix, "read_child_rr"); - gf_proc_dump_write(key, "%u", priv->read_child_rr); - for (i = 0; i < priv->child_count; i++) { - gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i); - gf_proc_dump_write(key, "%d", priv->child_up[i]); - gf_proc_dump_build_key(key, key_prefix, - "pending_key[%d]", i); - gf_proc_dump_write(key, "%s", priv->pending_key[i]); - } - gf_proc_dump_build_key(key, key_prefix, "data_self_heal"); - gf_proc_dump_write(key, "%d", priv->data_self_heal); - gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal"); - gf_proc_dump_write(key, "%d", priv->metadata_self_heal); - gf_proc_dump_build_key(key, key_prefix, "entry_self_heal"); - gf_proc_dump_write(key, "%d", priv->entry_self_heal); - gf_proc_dump_build_key(key, key_prefix, "data_change_log"); - gf_proc_dump_write(key, "%d", priv->data_change_log); - gf_proc_dump_build_key(key, key_prefix, "metadata_change_log"); - gf_proc_dump_write(key, "%d", priv->metadata_change_log); - gf_proc_dump_build_key(key, key_prefix, "entry_change_log"); - gf_proc_dump_write(key, "%d", priv->entry_change_log); - gf_proc_dump_build_key(key, key_prefix, "read_child"); - gf_proc_dump_write(key, "%d", priv->read_child); - gf_proc_dump_build_key(key, key_prefix, "favorite_child"); - gf_proc_dump_write(key, "%u", priv->favorite_child); - gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count"); - gf_proc_dump_write(key, "%u", priv->data_lock_server_count); - gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count"); - gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count); - gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count"); - gf_proc_dump_write(key, "%u", priv->entry_lock_server_count); - gf_proc_dump_build_key(key, key_prefix, "wait_count"); - gf_proc_dump_write(key, "%u", priv->wait_count); - - return 0; -} - - -/** - * find_child_index - find the child's index in the array of subvolumes - * @this: AFR - * @child: child - */ - -static int -find_child_index (xlator_t *this, xlator_t *child) -{ - afr_private_t *priv = NULL; - - int i = -1; - - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if ((xlator_t *) child == priv->children[i]) - break; - } - - return i; -} - +#include "afr-common.c" int32_t notify (xlator_t *this, int32_t event, void *data, ...) { - afr_private_t * priv = NULL; - unsigned char * child_up = NULL; - - int i = -1; - int up_children = 0; - - priv = this->private; - - if (!priv) - return 0; - - child_up = priv->child_up; - - switch (event) { - case GF_EVENT_CHILD_UP: - i = find_child_index (this, data); - - child_up[i] = 1; - - LOCK (&priv->lock); - { - priv->up_count++; - } - UNLOCK (&priv->lock); - - /* - if all the children were down, and one child came up, - send notify to parent - */ - - for (i = 0; i < priv->child_count; i++) - if (child_up[i]) - up_children++; - - if (up_children == 1) { - gf_log (this->name, GF_LOG_NORMAL, - "Subvolume '%s' came back up; " - "going online.", ((xlator_t *)data)->name); - - default_notify (this, event, data); - } + int ret = -1; - break; + ret = afr_notify (this, event, data); - case GF_EVENT_CHILD_DOWN: - i = find_child_index (this, data); - - child_up[i] = 0; - - LOCK (&priv->lock); - { - priv->down_count++; - } - UNLOCK (&priv->lock); - - /* - if all children are down, and this was the last to go down, - send notify to parent - */ - - for (i = 0; i < priv->child_count; i++) - if (child_up[i]) - up_children++; - - if (up_children == 0) { - gf_log (this->name, GF_LOG_ERROR, - "All subvolumes are down. Going offline " - "until atleast one of them comes back up."); - - default_notify (this, event, data); - } - - break; - - default: - default_notify (this, event, data); - } - - return 0; + return ret; } int32_t @@ -2624,7 +50,7 @@ mem_acct_init (xlator_t *this) return ret; ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1); - + if (ret != 0) { gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" "failed"); @@ -2727,7 +153,7 @@ init (xlator_t *this) "Defaulting to data-self-heal as 'on'", self_heal); priv->data_self_heal = 1; - } + } } priv->data_self_heal_algorithm = ""; @@ -2953,6 +379,10 @@ init (xlator_t *this) i++; } + LOCK_INIT (&priv->root_inode_lk); + priv->first_lookup = 1; + priv->root_inode = NULL; + ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 125f5c2a2f4..3fa987ee83d 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -32,6 +32,8 @@ #define AFR_XATTR_PREFIX "trusted.afr" +struct _pump_private; + typedef struct _afr_private { gf_lock_t lock; /* to guard access to child_count, etc */ unsigned int child_count; /* total number of children */ @@ -41,6 +43,10 @@ typedef struct _afr_private { xlator_t **children; + gf_lock_t root_inode_lk; + int first_lookup; + inode_t *root_inode; + unsigned char *child_up; char **pending_key; @@ -73,6 +79,9 @@ typedef struct _afr_private { uint64_t up_count; /* number of CHILD_UPs we have seen */ uint64_t down_count; /* number of CHILD_DOWNs we have seen */ + + struct _pump_private *pump_private; /* Set if we are loaded as pump */ + gf_boolean_t pump_loaded; } afr_private_t; typedef struct { @@ -205,6 +214,7 @@ typedef struct _afr_local { unsigned int success_count; unsigned int enoent_count; + unsigned int govinda_gOvinda; unsigned int read_child_index; @@ -576,6 +586,18 @@ typedef struct { #define all_tried(i, count) ((i) == (count) - 1) int +pump_command_reply (call_frame_t *frame, xlator_t *this); + +int32_t +afr_notify (xlator_t *this, int32_t event, + void *data, ...); + +void +afr_set_lk_owner (call_frame_t *frame, xlator_t *this); + +int pump_start (call_frame_t *frame, xlator_t *this); + +int afr_fd_ctx_set (xlator_t *this, fd_t *fd); uint64_t diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c new file mode 100644 index 00000000000..e69c028450b --- /dev/null +++ b/xlators/cluster/afr/src/pump.c @@ -0,0 +1,1817 @@ +/* + Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <sys/time.h> +#include <stdlib.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "afr-common.c" + +pump_state_t +pump_get_state () +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + pump_state_t ret; + + this = THIS; + priv = this->private; + pump_priv = priv->pump_private; + + LOCK (&pump_priv->pump_state_lock); + { + ret = pump_priv->pump_state; + } + UNLOCK (&pump_priv->pump_state_lock); + + return ret; +} + +int +pump_change_state (xlator_t *this, pump_state_t state) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + pump_state_t state_old; + pump_state_t state_new; + + unsigned char * child_up = NULL; + int i = 0; + + + priv = this->private; + pump_priv = priv->pump_private; + + child_up = priv->child_up; + + assert (pump_priv); + + LOCK (&pump_priv->pump_state_lock); + { + state_old = pump_priv->pump_state; + state_new = state; + + pump_priv->pump_state = state; + + switch (pump_priv->pump_state) { + case PUMP_STATE_RESUME: + case PUMP_STATE_RUNNING: + case PUMP_STATE_PAUSE: + { + priv->pump_loaded = _gf_true; + i = 1; + + child_up[i] = 1; + + LOCK (&priv->lock); + { + priv->up_count++; + } + UNLOCK (&priv->lock); + + break; + } + case PUMP_STATE_ABORT: + { + priv->pump_loaded = _gf_false; + i = 1; + + child_up[i] = 0; + + LOCK (&priv->lock); + { + priv->down_count++; + } + UNLOCK (&priv->lock); + + LOCK (&pump_priv->resume_path_lock); + { + pump_priv->number_files_pumped = 0; + } + UNLOCK (&pump_priv->resume_path_lock); + + + break; + } + + } + } + UNLOCK (&pump_priv->pump_state_lock); + + gf_log (this->name, GF_LOG_DEBUG, + "Pump changing state from %d to %d", + state_old, + state_new); + + return 0; +} + +static int +pump_set_resume_path (xlator_t *this, const char *path) +{ + int ret = 0; + + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + priv = this->private; + pump_priv = priv->pump_private; + + assert (pump_priv); + + LOCK (&pump_priv->resume_path_lock); + { + pump_priv->resume_path = strdup (path); + if (!pump_priv->resume_path) + ret = -1; + } + UNLOCK (&pump_priv->resume_path_lock); + + return ret; +} + +static void +build_child_loc (loc_t *parent, loc_t *child, char *path, char *name) +{ + child->path = path; + child->name = name; + + child->parent = inode_ref (parent->inode); + child->inode = inode_new (parent->inode->table); +} + +static char * +build_file_path (loc_t *loc, gf_dirent_t *entry) +{ + xlator_t *this = NULL; + char *file_path = NULL; + int pathlen = 0; + int total_size = 0; + + this = THIS; + + pathlen = STRLEN_0 (loc->path); + + if (IS_ROOT_PATH (loc->path)) { + total_size = pathlen + entry->d_len; + file_path = GF_CALLOC (1, total_size, gf_afr_mt_char); + if (!file_path) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + return NULL; + } + + gf_log (this->name, GF_LOG_TRACE, + "constructing file path of size=%d" + "pathlen=%d, d_len=%d", + total_size, pathlen, + entry->d_len); + + snprintf(file_path, total_size, "%s%s", loc->path, entry->d_name); + + } else { + total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */ + file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char); + if (!file_path) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + return NULL; + } + + gf_log (this->name, GF_LOG_TRACE, + "constructing file path of size=%d" + "pathlen=%d, d_len=%d", + total_size, pathlen, + entry->d_len); + + snprintf(file_path, total_size, "%s/%s", loc->path, entry->d_name); + } + + gf_log (this->name, GF_LOG_TRACE, + "path=%s and d_name=%s", loc->path, entry->d_name); + gf_log (this->name, GF_LOG_TRACE, + "constructed file_path=%s of size=%d", file_path, total_size); + + return file_path; +} + +static int +pump_check_and_update_status (xlator_t *this) +{ + pump_state_t state; + int ret = -1; + + state = pump_get_state (); + + switch (state) { + + case PUMP_STATE_RESUME: + case PUMP_STATE_RUNNING: + { + ret = 0; + break; + } + case PUMP_STATE_PAUSE: + case PUMP_STATE_ABORT: + { + ret = -1; + break; + } + default: + { + gf_log (this->name, GF_LOG_DEBUG, + "Unknown pump state"); + ret = -1; + break; + } + + } + + return ret; +} + +static const char * +pump_get_resume_path (xlator_t *this) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + const char *resume_path = NULL; + + priv = this->private; + pump_priv = priv->pump_private; + + resume_path = pump_priv->resume_path; + + return resume_path; +} + +static int +pump_update_resume_state (xlator_t *this, const char *path) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + pump_state_t state; + const char *resume_path = NULL; + + priv = this->private; + pump_priv = priv->pump_private; + + state = pump_get_state (); + + if (state == PUMP_STATE_RESUME) { + resume_path = pump_get_resume_path (this); + if (strcmp (resume_path, "/") == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Reached the resume path (/). Proceeding to change state" + " to running"); + pump_change_state (this, PUMP_STATE_RUNNING); + } else if (strcmp (resume_path, path) == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Reached the resume path. Proceeding to change state" + " to running"); + pump_change_state (this, PUMP_STATE_RUNNING); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Not yet hit the resume path:res-path=%s,path=%s", + resume_path, path); + } + } + + return 0; +} + +static gf_boolean_t +is_pump_traversal_allowed (xlator_t *this, const char *path) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + pump_state_t state; + const char *resume_path = NULL; + gf_boolean_t ret = _gf_true; + + priv = this->private; + pump_priv = priv->pump_private; + + state = pump_get_state (); + + if (state == PUMP_STATE_RESUME) { + resume_path = pump_get_resume_path (this); + if (strstr (resume_path, path)) { + gf_log (this->name, GF_LOG_DEBUG, + "On the right path to resumption path"); + ret = _gf_true; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Not the right path to resuming=> ignoring traverse"); + ret = _gf_false; + } + } + + return ret; +} + +static int +pump_update_file_stats (xlator_t *this, long source_blocks, + long sink_blocks) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + priv = this->private; + pump_priv = priv->pump_private; + + LOCK (&pump_priv->resume_path_lock); + { + pump_priv->source_blocks = source_blocks; + pump_priv->sink_blocks = sink_blocks; + } + UNLOCK (&pump_priv->resume_path_lock); + + return 0; +} + +static int +pump_save_file_stats (xlator_t *this) +{ + afr_private_t *priv = NULL; + struct statvfs source_buf = {0, }; + struct statvfs sink_buf = {0, }; + loc_t loc; + int ret = -1; + + priv = this->private; + + assert (priv->root_inode); + + build_root_loc (priv->root_inode, &loc); + + ret = syncop_statfs (PUMP_SOURCE_CHILD (this), + &loc, &source_buf); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "source statfs failed"); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "source statfs succeeded"); + } + + + ret = syncop_statfs (PUMP_SOURCE_CHILD (this), + &loc, &sink_buf); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "sink statfs failed"); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "sink statfs succeeded"); + } + + pump_update_file_stats (this, + source_buf.f_blocks, + sink_buf.f_blocks); + + return 0; + +} +static int +pump_save_path (xlator_t *this, const char *path) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + pump_state_t state; + dict_t *dict = NULL; + loc_t loc; + int dict_ret = 0; + int ret = -1; + + state = pump_get_state (); + if (state != PUMP_STATE_RUNNING) + return 0; + + priv = this->private; + pump_priv = priv->pump_private; + + assert (priv->root_inode); + + build_root_loc (priv->root_inode, &loc); + + dict = dict_new (); + dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path); + +// ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0); + ret = 0; + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setxattr failed - could not save path=%s", path); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "setxattr succeeded - saved path=%s", path); + gf_log (this->name, GF_LOG_DEBUG, + "Saving path for status info"); + + LOCK (&pump_priv->resume_path_lock); + { + pump_priv->number_files_pumped++; + + strncpy (pump_priv->current_file, path, + PATH_MAX); + } + UNLOCK (&pump_priv->resume_path_lock); + + } + + dict_unref (dict); + + return 0; +} + +static int +gf_pump_traverse_directory (loc_t *loc) +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + + off_t offset = 0; + loc_t entry_loc; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + gf_dirent_t entries; + + struct iatt iatt, parent; + dict_t *xattr_rsp; + + int source = 0; + + char *file_path = NULL; + int ret = 0; + + INIT_LIST_HEAD (&entries.list); + this = THIS; + priv = this->private; + + assert (loc->inode); + + fd = fd_create (loc->inode, PUMP_PID); + if (!fd) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create fd for %s", loc->path); + goto out; + } + + ret = syncop_opendir (priv->children[source], loc, fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "opendir failed on %s", loc->path); + goto out; + } + + gf_log (this->name, GF_LOG_TRACE, + "pump opendir on %s returned=%d", + loc->path, ret); + + while (syncop_readdirp (priv->children[source], fd, 131072, offset, &entries)) { + + if (list_empty (&entries.list)) { + gf_log (this->name, GF_LOG_TRACE, + "no more entries in directory"); + goto out; + } + + list_for_each_entry_safe (entry, tmp, &entries.list, list) { + gf_log (this->name, GF_LOG_DEBUG, + "found readdir entry=%s", entry->d_name); + + file_path = build_file_path (loc, entry); + if (!file_path) { + gf_log (this->name, GF_LOG_DEBUG, + "file path construction failed"); + goto out; + } + + build_child_loc (loc, &entry_loc, file_path, entry->d_name); + + ret = syncop_lookup (this, &entry_loc, NULL, + &iatt, &xattr_rsp, &parent); + + entry_loc.ino = iatt.ia_ino; + entry_loc.inode->ino = iatt.ia_ino; + + gf_log (this->name, GF_LOG_DEBUG, + "lookup %s => %"PRId64, + entry_loc.path, + iatt.ia_ino); + + pump_update_resume_state (this, entry_loc.path); + + if (!IS_ENTRY_CWD(entry->d_name) && + !IS_ENTRY_PARENT (entry->d_name)) { + pump_save_path (this, entry_loc.path); + pump_save_file_stats (this); + } + + ret = pump_check_and_update_status (this); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Pump beginning to exit out"); + goto out; + } + + gf_log (this->name, GF_LOG_TRACE, + "type of file=%d, IFDIR=%d", + iatt.ia_type, IA_IFDIR); + + if (IA_ISDIR (iatt.ia_type) && !IS_ENTRY_CWD(entry->d_name) && + !IS_ENTRY_PARENT (entry->d_name)) { + if (is_pump_traversal_allowed (this, entry_loc.path)) { + gf_log (this->name, GF_LOG_TRACE, + "entering dir=%s", + entry->d_name); + gf_pump_traverse_directory (&entry_loc); + } + } + + offset = entry->d_off; + loc_wipe (&entry_loc); + } + + gf_dirent_free (&entries); + gf_log (this->name, GF_LOG_TRACE, + "offset incremented to %d", + (int32_t ) offset); + + } + +out: + return 0; + +} + +void +build_root_loc (inode_t *inode, loc_t *loc) +{ + loc->path = "/"; + loc->name = ""; + loc->inode = inode; + loc->ino = 1; + loc->inode->ino = 1; + +} + +static int +pump_update_resume_path (xlator_t *this) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + const char *resume_path = NULL; + + priv = this->private; + pump_priv = priv->pump_private; + + resume_path = pump_get_resume_path (this); + + if (resume_path) { + gf_log (this->name, GF_LOG_DEBUG, + "Found a path to resume from: %s", + resume_path); + + }else { + gf_log (this->name, GF_LOG_DEBUG, + "Did not find a path=> setting to '/'"); + pump_set_resume_path (this, "/"); + } + + pump_change_state (this, PUMP_STATE_RESUME); + + return 0; +} + +static int +pump_complete_migration (xlator_t *this) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + dict_t *dict = NULL; + pump_state_t state; + loc_t loc; + int dict_ret = 0; + int ret = -1; + + priv = this->private; + pump_priv = priv->pump_private; + + assert (priv->root_inode); + + build_root_loc (priv->root_inode, &loc); + + dict = dict_new (); + + state = pump_get_state (); + if (state == PUMP_STATE_RUNNING) { + gf_log (this->name, GF_LOG_DEBUG, + "Pump finished pumping"); + + pump_priv->pump_finished = _gf_true; + + dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon"); + + ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setxattr failed - while notifying source complete"); + } + dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon"); + + ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setxattr failed - while notifying sink complete"); + } + } + + return 0; +} + +static int +pump_task (void *data) +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + + + loc_t loc; + struct iatt iatt, parent; + dict_t *xattr_rsp; + + int ret = -1; + + this = THIS; + priv = this->private; + + assert (priv->root_inode); + + build_root_loc (priv->root_inode, &loc); + + ret = syncop_lookup (this, &loc, NULL, + &iatt, &xattr_rsp, &parent); + + gf_log (this->name, GF_LOG_TRACE, + "lookup: ino=%"PRId64", path=%s", + loc.ino, + loc.path); + + ret = pump_check_and_update_status (this); + if (ret < 0) { + goto out; + } + + pump_update_resume_path (this); + + gf_pump_traverse_directory (&loc); + + pump_complete_migration (this); +out: + return 0; +} + + +static int +pump_task_completion (int ret, void *data) +{ + xlator_t *this = NULL; + call_frame_t *frame = NULL; + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + this = THIS; + + frame = (call_frame_t *) data; + + priv = this->private; + pump_priv = priv->pump_private; + + inode_unref (priv->root_inode); + + gf_log (this->name, GF_LOG_DEBUG, + "Pump xlator exiting"); + return 0; +} + +int +pump_start (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + call_frame_t *pump_frame = NULL; + + int ret = -1; + + priv = this->private; + pump_priv = priv->pump_private; + + pump_frame = copy_frame (frame); + + if (!pump_frame->root->lk_owner) + pump_frame->root->lk_owner = PUMP_LK_OWNER; + + ret = synctask_new (pump_priv->env, pump_task, pump_task_completion, + pump_frame); + + if (ret != -1) { + gf_log (this->name, GF_LOG_TRACE, + "setting pump as started"); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "starting pump failed"); + pump_change_state (this, PUMP_STATE_ABORT); + } + + return ret; +} + +gf_boolean_t +is_pump_loaded (xlator_t *this) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + gf_boolean_t ret; + + priv = this->private; + pump_priv = priv->pump_private; + + if (priv->pump_loaded) { + gf_log (this->name, GF_LOG_DEBUG, + "Pump is already started"); + ret = _gf_true; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Pump is not started"); + ret = _gf_false; + } + + return ret; + +} + +int32_t +pump_cmd_start_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + afr_local_t *local = NULL; + char *path = NULL; + + pump_state_t state; + int ret = 0; + int dict_ret = -1; + + local = frame->local; + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "getxattr failed - changing pump state to RUNNING with '/'"); + path = "/"; + } else { + gf_log (this->name, GF_LOG_TRACE, + "getxattr succeeded"); + + dict_ret = dict_get_str (dict, PUMP_PATH, &path); + if (dict_ret < 0) + path = "/"; + } + + state = pump_get_state (); + if ((state == PUMP_STATE_RUNNING) || + (state == PUMP_STATE_RESUME)) { + gf_log (this->name, GF_LOG_ERROR, + "Pump is already started"); + ret = -1; + goto out; + } + + pump_set_resume_path (this, path); + pump_change_state (this, PUMP_STATE_RUNNING); + + ret = pump_start (frame, this); + +out: + local->op_ret = ret; + pump_command_reply (frame, this); + return 0; +} + +int +pump_execute_status (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + pump_private_t *pump_priv = NULL; + + uint64_t number_files = 0; + + char filename[PATH_MAX]; + char *dict_str = NULL; + + int32_t op_ret = 0; + int32_t op_errno = 0; + + dict_t *dict = NULL; + int ret = -1; + + priv = this->private; + pump_priv = priv->pump_private; + + LOCK (&pump_priv->resume_path_lock); + { + number_files = pump_priv->number_files_pumped; + strncpy (filename, pump_priv->current_file, PATH_MAX); + } + UNLOCK (&pump_priv->resume_path_lock); + + dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char); + if (!dict_str) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + if (pump_priv->pump_finished) { + snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ", + number_files); + } else { + snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ", + number_files, filename); + } + + dict = dict_new (); + + ret = dict_set_str (dict, PUMP_CMD_STATUS, dict_str); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_str returned negative value"); + } + + op_ret = 0; + +out: + + AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict); + + dict_unref (dict); + GF_FREE (dict_str); + + return 0; +} + +int +pump_execute_pause (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + pump_change_state (this, PUMP_STATE_PAUSE); + + local->op_ret = 0; + pump_command_reply (frame, this); + + return 0; +} + +int +pump_execute_start (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = 0; + loc_t loc; + + priv = this->private; + local = frame->local; + + if (!priv->root_inode) { + gf_log (this->name, GF_LOG_NORMAL, + "Pump xlator cannot be started without an initial lookup"); + ret = -1; + goto out; + } + + assert (priv->root_inode); + + build_root_loc (priv->root_inode, &loc); + + STACK_WIND (frame, + pump_cmd_start_getxattr_cbk, + PUMP_SOURCE_CHILD(this), + PUMP_SOURCE_CHILD(this)->fops->getxattr, + &loc, + PUMP_PATH); + + ret = 0; + +out: + if (ret < 0) { + local->op_ret = ret; + pump_command_reply (frame, this); + } + + return 0; +} + +int32_t +pump_cmd_abort_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Aborting pump failed. Please remove xattr" + PUMP_PATH "of the source child's '/'"); + local->op_ret = -1; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "remove xattr succeeded"); + local->op_ret = 0; + } + + pump_change_state (this, PUMP_STATE_ABORT); + + pump_command_reply (frame, this); + return 0; +} + +int +pump_execute_abort (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + pump_private_t *pump_priv = NULL; + loc_t root_loc; + int ret = 0; + + priv = this->private; + local = frame->local; + pump_priv = priv->pump_private; + + if (!priv->pump_loaded) { + gf_log (this->name, GF_LOG_ERROR, + "Trying to abort pump xlator which is not loaded"); + ret = -1; + goto out; + } + + assert (priv->root_inode); + + build_root_loc (priv->root_inode, &root_loc); + + STACK_WIND (frame, + pump_cmd_abort_removexattr_cbk, + PUMP_SOURCE_CHILD (this), + PUMP_SOURCE_CHILD (this)->fops->removexattr, + &root_loc, + PUMP_PATH); + + ret = 0; + +out: + if (ret < 0) { + local->op_ret = ret; + pump_command_reply (frame, this); + } + + return 0; +} + +gf_boolean_t +pump_command_status (xlator_t *this, dict_t *dict) +{ + char *cmd = NULL; + int dict_ret = -1; + int ret = _gf_true; + + dict_ret = dict_get_str (dict, PUMP_CMD_STATUS, &cmd); + if (dict_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Not a pump status command"); + ret = _gf_false; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Hit a pump command - status"); + ret = _gf_true; + +out: + return ret; + +} + +gf_boolean_t +pump_command_pause (xlator_t *this, dict_t *dict) +{ + char *cmd = NULL; + int dict_ret = -1; + int ret = _gf_true; + + dict_ret = dict_get_str (dict, PUMP_CMD_PAUSE, &cmd); + if (dict_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Not a pump pause command"); + ret = _gf_false; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Hit a pump command - pause"); + ret = _gf_true; + +out: + return ret; + +} + +gf_boolean_t +pump_command_abort (xlator_t *this, dict_t *dict) +{ + char *cmd = NULL; + int dict_ret = -1; + int ret = _gf_true; + + dict_ret = dict_get_str (dict, PUMP_CMD_ABORT, &cmd); + if (dict_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Not a pump abort command"); + ret = _gf_false; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Hit a pump command - abort"); + ret = _gf_true; + +out: + return ret; + +} + +gf_boolean_t +pump_command_start (xlator_t *this, dict_t *dict) +{ + char *cmd = NULL; + int dict_ret = -1; + int ret = _gf_true; + + dict_ret = dict_get_str (dict, PUMP_CMD_START, &cmd); + if (dict_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "Not a pump start command"); + ret = _gf_false; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Hit a pump command - start"); + ret = _gf_true; + +out: + return ret; + +} + +struct _xattr_key { + char *key; + struct list_head list; +}; + +static void +__gather_xattr_keys (dict_t *dict, char *key, data_t *value, + void *data) +{ + struct list_head * list = data; + struct _xattr_key * xkey = NULL; + + if (!strncmp (key, AFR_XATTR_PREFIX, + strlen (AFR_XATTR_PREFIX))) { + + xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key); + if (!xkey) + return; + + xkey->key = key; + INIT_LIST_HEAD (&xkey->list); + + list_add_tail (&xkey->list, list); + } +} + +static void +__filter_xattrs (dict_t *dict) +{ + struct list_head keys; + + struct _xattr_key *key; + struct _xattr_key *tmp; + + INIT_LIST_HEAD (&keys); + + dict_foreach (dict, __gather_xattr_keys, + (void *) &keys); + + list_for_each_entry_safe (key, tmp, &keys, list) { + dict_del (dict, key->key); + + list_del_init (&key->list); + + GF_FREE (key); + } +} + +int32_t +pump_getxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *dict) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + int read_child = -1; + + priv = this->private; + children = priv->children; + + local = frame->local; + + read_child = (long) cookie; + + if (op_ret == -1) { + retry: + last_tried = local->cont.getxattr.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + this_try = ++local->cont.getxattr.last_tried; + + if (this_try == read_child) { + goto retry; + } + + unwind = 0; + STACK_WIND_COOKIE (frame, pump_getxattr_cbk, + (void *) (long) read_child, + children[this_try], + children[this_try]->fops->getxattr, + &local->loc, + local->cont.getxattr.name); + } + +out: + if (unwind) { + if (op_ret >= 0 && dict) + __filter_xattrs (dict); + + AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict); + } + + return 0; +} + +int32_t +pump_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name) +{ + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t * local = NULL; + + int read_child = -1; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv->children, out); + + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + frame->local = local; + + if (name) { + if (!strncmp (name, AFR_XATTR_PREFIX, + strlen (AFR_XATTR_PREFIX))) { + + op_errno = ENODATA; + goto out; + } + + if (!strcmp (name, PUMP_CMD_STATUS)) { + gf_log (this->name, GF_LOG_DEBUG, + "Hit pump command - status"); + pump_execute_status (frame, this); + op_ret = 0; + goto out; + } + } + + read_child = afr_read_child (this, loc->inode); + + if (read_child >= 0) { + call_child = read_child; + + local->cont.getxattr.last_tried = -1; + } else { + call_child = afr_first_up_child (priv); + + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_DEBUG, + "no child is up"); + goto out; + } + + local->cont.getxattr.last_tried = call_child; + } + + loc_copy (&local->loc, loc); + if (name) + local->cont.getxattr.name = gf_strdup (name); + + STACK_WIND_COOKIE (frame, pump_getxattr_cbk, + (void *) (long) call_child, + children[call_child], children[call_child]->fops->getxattr, + loc, name); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL); + } + return 0; +} + +static int +afr_setxattr_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (setxattr, main_frame, + local->op_ret, local->op_errno) + } + return 0; +} + +static int +afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + } + local->success_count++; + + if (local->success_count == priv->child_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + +static int +afr_setxattr_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->setxattr, + &local->loc, + local->cont.setxattr.dict, + local->cont.setxattr.flags); + + if (!--call_count) + break; + } + } + + return 0; +} + + +static int +afr_setxattr_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + +int32_t +pump_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int +pump_command_reply (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) + gf_log (this->name, GF_LOG_NORMAL, + "Command failed"); + else + gf_log (this->name, GF_LOG_NORMAL, + "Command succeeded"); + + AFR_STACK_UNWIND (setxattr, + frame, + local->op_ret, + local->op_errno); + + return 0; +} + +int +pump_parse_command (call_frame_t *frame, xlator_t *this, + afr_local_t *local, dict_t *dict) +{ + + int ret = -1; + + if (pump_command_start (this, dict)) { + frame->local = local; + ret = pump_execute_start (frame, this); + + } else if (pump_command_pause (this, dict)) { + frame->local = local; + ret = pump_execute_pause (frame, this); + + } else if (pump_command_abort (this, dict)) { + frame->local = local; + ret = pump_execute_abort (frame, this); + } + return ret; +} + +int +pump_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int32_t flags) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + ret = pump_parse_command (frame, this, + local, dict); + if (ret >= 0) { + op_ret = 0; + goto out; + } + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + goto out; + } + + transaction_frame->local = local; + + local->op_ret = -1; + + local->cont.setxattr.dict = dict_ref (dict); + local->cont.setxattr.flags = flags; + + local->transaction.fop = afr_setxattr_wind; + local->transaction.done = afr_setxattr_done; + local->transaction.unwind = afr_setxattr_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = LLONG_MAX - 1; + local->transaction.len = 0; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno); + } + + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + +int32_t +notify (xlator_t *this, int32_t event, + void *data, ...) +{ + int ret = -1; + + switch (event) { + case GF_EVENT_CHILD_DOWN: + pump_change_state (this, PUMP_STATE_ABORT); + break; + } + + ret = afr_notify (this, event, data); + + return ret; +} + +int32_t +init (xlator_t *this) +{ + afr_private_t * priv = NULL; + pump_private_t *pump_priv = NULL; + int child_count = 0; + xlator_list_t * trav = NULL; + int i = 0; + int ret = -1; + int op_errno = 0; + + int source_child = 0; + + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "pump translator needs a source and sink" + "subvolumes defined."); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Volume is dangling."); + } + + ALLOC_OR_GOTO (this->private, afr_private_t, out); + + priv = this->private; + + priv->read_child = source_child; + priv->favorite_child = source_child; + priv->background_self_heal_count = 0; + + priv->data_self_heal = 1; + priv->metadata_self_heal = 1; + priv->entry_self_heal = 1; + + priv->data_self_heal_algorithm = ""; + + priv->data_self_heal_window_size = 16; + + priv->data_change_log = 1; + priv->metadata_change_log = 1; + priv->entry_change_log = 1; + + /* Locking options */ + + /* Lock server count infact does not matter. Locks are held + on all subvolumes, in this case being the source + and the sink. + */ + + priv->data_lock_server_count = 2; + priv->metadata_lock_server_count = 2; + priv->entry_lock_server_count = 2; + + priv->strict_readdir = _gf_false; + + trav = this->children; + while (trav) { + child_count++; + trav = trav->next; + } + + priv->wait_count = 1; + + if (child_count != 2) { + gf_log (this->name, GF_LOG_ERROR, + "There should be exactly 2 children - one source " + "and one sink"); + return -1; + } + priv->child_count = child_count; + + LOCK_INIT (&priv->lock); + LOCK_INIT (&priv->read_child_lock); + + priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, + gf_afr_mt_char); + if (!priv->child_up) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_errno = ENOMEM; + goto out; + } + + priv->children = GF_CALLOC (sizeof (xlator_t *), child_count, + gf_afr_mt_xlator_t); + if (!priv->children) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_errno = ENOMEM; + goto out; + } + + priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key), + child_count, + gf_afr_mt_char); + if (!priv->pending_key) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_errno = ENOMEM; + goto out; + } + + trav = this->children; + i = 0; + while (i < child_count) { + priv->children[i] = trav->xlator; + + ret = asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX, + trav->xlator->name); + if (-1 == ret) { + gf_log (this->name, GF_LOG_ERROR, + "asprintf failed to set pending key"); + op_errno = ENOMEM; + goto out; + } + + trav = trav->next; + i++; + } + + priv->first_lookup = 1; + priv->root_inode = NULL; + + pump_priv = GF_CALLOC (1, sizeof (*pump_priv), + gf_afr_mt_pump_priv); + if (!pump_priv) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + op_errno = ENOMEM; + goto out; + } + + LOCK_INIT (&pump_priv->resume_path_lock); + LOCK_INIT (&pump_priv->pump_state_lock); + + pump_priv->resume_path = GF_CALLOC (1, PATH_MAX, + gf_afr_mt_char); + if (!pump_priv->resume_path) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + pump_priv->env = syncenv_new (0); + if (!pump_priv->env) { + gf_log (this->name, GF_LOG_ERROR, + "Could not create new sync-environment"); + ret = -1; + goto out; + } + + priv->pump_private = pump_priv; + + pump_change_state (this, PUMP_STATE_ABORT); + + ret = 0; +out: + return ret; +} + +int +fini (xlator_t *this) +{ + return 0; +} + + +struct xlator_fops fops = { + .lookup = afr_lookup, + .open = afr_open, + .lk = afr_lk, + .flush = afr_flush, + .statfs = afr_statfs, + .fsync = afr_fsync, + .fsyncdir = afr_fsyncdir, + .xattrop = afr_xattrop, + .fxattrop = afr_fxattrop, + .inodelk = afr_inodelk, + .finodelk = afr_finodelk, + .entrylk = afr_entrylk, + .fentrylk = afr_fentrylk, + + /* inode read */ + .access = afr_access, + .stat = afr_stat, + .fstat = afr_fstat, + .readlink = afr_readlink, + .getxattr = pump_getxattr, + .readv = afr_readv, + + /* inode write */ + .writev = afr_writev, + .truncate = afr_truncate, + .ftruncate = afr_ftruncate, + .setxattr = pump_setxattr, + .setattr = afr_setattr, + .fsetattr = afr_fsetattr, + .removexattr = afr_removexattr, + + /* dir read */ + .opendir = afr_opendir, + .readdir = afr_readdir, + .readdirp = afr_readdirp, + + /* dir write */ + .create = afr_create, + .mknod = afr_mknod, + .mkdir = afr_mkdir, + .unlink = afr_unlink, + .rmdir = afr_rmdir, + .link = afr_link, + .symlink = afr_symlink, + .rename = afr_rename, +}; + +struct xlator_dumpops dumpops = { + .priv = afr_priv_dump, +}; + + +struct xlator_cbks cbks = { + .release = afr_release, + .releasedir = afr_releasedir, +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h new file mode 100644 index 00000000000..15799002b18 --- /dev/null +++ b/xlators/cluster/afr/src/pump.h @@ -0,0 +1,96 @@ +/* + Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __PUMP_H__ +#define __PUMP_H__ + +#include "syncop.h" + +#define PUMP_PID 696969 +#define PUMP_LK_OWNER 696969 + +#define IS_ROOT_PATH(path) (!strcmp (path, "/")) +#define IS_ENTRY_CWD(entry) (!strcmp (entry, ".")) +#define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) + +#define PUMP_CMD_START "trusted.glusterfs.pump.start" +#define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort" +#define PUMP_CMD_PAUSE "trusted.glusterfs.pump.pause" +#define PUMP_CMD_STATUS "trusted.glusterfs.pump.status" + +#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete" +#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete" + +#define PUMP_PATH "trusted.glusterfs.pump-path" + +#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator) +#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator) + +typedef enum { + PUMP_STATE_RUNNING, + PUMP_STATE_RESUME, + PUMP_STATE_PAUSE, + PUMP_STATE_ABORT, +} pump_state_t; + +typedef struct _pump_private { + struct syncenv *env; + const char *resume_path; + gf_lock_t resume_path_lock; + gf_lock_t pump_state_lock; + pump_state_t pump_state; + long source_blocks; + long sink_blocks; + char current_file[PATH_MAX]; + uint64_t number_files_pumped; + gf_boolean_t pump_finished; +} pump_private_t; + +void +build_root_loc (inode_t *inode, loc_t *loc); +int pump_start (call_frame_t *frame, xlator_t *this); + +gf_boolean_t +pump_command_start (xlator_t *this, dict_t *dict); + +int +pump_execute_start (call_frame_t *frame, xlator_t *this); + +gf_boolean_t +pump_command_pause (xlator_t *this, dict_t *dict); + +int +pump_execute_pause (call_frame_t *frame, xlator_t *this); + +gf_boolean_t +pump_command_abort (xlator_t *this, dict_t *dict); + +int +pump_execute_abort (call_frame_t *frame, xlator_t *this); + +gf_boolean_t +pump_command_status (xlator_t *this, dict_t *dict); + +int +pump_execute_status (call_frame_t *frame, xlator_t *this); + +gf_boolean_t +is_pump_loaded (xlator_t *this); + +#endif /* __PUMP_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 9aa4801e99b..a8b334c3dd9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -848,6 +848,44 @@ out: } int +glusterd_handle_replace_brick (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf1_cli_replace_brick_req cli_req = {0,}; + dict_t *dict = NULL; + + GF_ASSERT (req); + + if (!gf_xdr_to_cli_replace_brick_req (req->msg[0], &cli_req)) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log ("glusterd", GF_LOG_NORMAL, "Received replace brick req"); + + if (cli_req.bricks.bricks_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.bricks.bricks_val, + cli_req.bricks.bricks_len, + &dict); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + } + + ret = glusterd_replace_brick (req, dict); + +out: + return ret; +} + +int glusterd_handle_remove_brick (rpcsvc_request_t *req) { int32_t ret = -1; @@ -1657,6 +1695,23 @@ glusterd_add_brick (rpcsvc_request_t *req, dict_t *dict) } int32_t +glusterd_replace_brick (rpcsvc_request_t *req, dict_t *dict) +{ + int32_t ret = -1; + + GF_ASSERT (req); + GF_ASSERT (dict); + + glusterd_op_set_op (GD_OP_REPLACE_BRICK); + + glusterd_op_set_ctx (GD_OP_REPLACE_BRICK, dict); + + ret = glusterd_op_txn_begin (); + + return ret; +} + +int32_t glusterd_remove_brick (rpcsvc_request_t *req, dict_t *dict) { int32_t ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 31e2a9e50f2..8f6861417be 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -84,6 +84,7 @@ glusterd_op_get_len (glusterd_op_t op) case GD_OP_START_BRICK: break; + case GD_OP_REPLACE_BRICK: case GD_OP_ADD_BRICK: { dict_t *dict = glusterd_op_get_ctx (op); @@ -210,6 +211,20 @@ glusterd_op_build_payload (glusterd_op_t op, gd1_mgmt_stage_op_req **req) } break; + case GD_OP_REPLACE_BRICK: + { + dict_t *dict = NULL; + dict = glusterd_op_get_ctx (op); + GF_ASSERT (dict); + ret = dict_allocate_and_serialize (dict, + &stage_req->buf.buf_val, + (size_t *)&stage_req->buf.buf_len); + if (ret) { + goto out; + } + } + break; + case GD_OP_REMOVE_BRICK: { dict_t *dict = NULL; @@ -533,6 +548,55 @@ out: } static int +glusterd_op_stage_replace_brick (gd1_mgmt_stage_op_req *req) +{ + int ret = 0; + dict_t *dict = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + + GF_ASSERT (req); + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + goto out; + } + /* Need to do a little more error checking and validation */ + ret = dict_get_str (dict, "src-brick", &src_brick); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "src brick=%s", src_brick); + + ret = dict_get_str (dict, "dst-brick", &dst_brick); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "dest brick=%s", dst_brick); + + ret = 0; + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static int glusterd_op_stage_remove_brick (gd1_mgmt_stage_op_req *req) { int ret = 0; @@ -828,6 +892,310 @@ out: } static int +replace_brick_start_dst_brick (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *dst_brickinfo) +{ glusterd_conf_t *priv = NULL; + char cmd_str[8192] = {0,}; + char filename[PATH_MAX]; + FILE *file = NULL; + int ret; + + priv = THIS->private; + + snprintf (filename, PATH_MAX, "%s/vols/%s/replace_dst_brick.vol", + priv->workdir, volinfo->volname); + + + file = fopen (filename, "a+"); + if (!file) { + gf_log ("", GF_LOG_DEBUG, + "Open of volfile failed"); + return -1; + } + + truncate (filename, 0); + + fprintf (file, "volume src-posix\n"); + fprintf (file, "type storage/posix\n"); + fprintf (file, "option directory %s\n", + dst_brickinfo->path); + fprintf (file, "end-volume\n"); + fprintf (file, "volume %s\n", + dst_brickinfo->path); + fprintf (file, "type features/locks\n"); + fprintf (file, "subvolumes src-posix\n"); + fprintf (file, "end-volume\n"); + fprintf (file, "volume src-server\n"); + fprintf (file, "type protocol/server\n"); + fprintf (file, "option auth.addr.%s.allow *\n", + dst_brickinfo->path); + fprintf (file, "option listen-port 34034\n"); + fprintf (file, "subvolumes %s\n", + dst_brickinfo->path); + fprintf (file, "end-volume\n"); + + gf_log ("", GF_LOG_DEBUG, + "starting dst brick"); + + snprintf (cmd_str, 4096, "glusterfs -f %s/vols/%s/replace_dst_brick.vol", + priv->workdir, volinfo->volname); + + ret = system (cmd_str); + + + fclose (file); + + return 0; +} + +static int +replace_brick_spawn_brick (glusterd_volinfo_t *volinfo, dict_t *dict, + glusterd_brickinfo_t *dst_brickinfo) + +{ + + replace_brick_start_dst_brick (volinfo, dst_brickinfo); + + return 0; +} + +static int +replace_brick_generate_volfile (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo) +{ + glusterd_conf_t *priv = NULL; + FILE *file = NULL; + char filename[PATH_MAX]; + int ret; + + priv = THIS->private; + + gf_log ("", GF_LOG_DEBUG, + "Creating volfile"); + + snprintf (filename, PATH_MAX, "%s/vols/%s/replace_brick.vol", + priv->workdir, volinfo->volname); + + file = fopen (filename, "a+"); + if (!file) { + gf_log ("", GF_LOG_DEBUG, + "Open of volfile failed"); + return -1; + } + + ret = truncate (filename, 0); + ret = unlink ("/tmp/replace_brick.vol"); + + fprintf (file, "volume client/protocol\n"); + fprintf (file, "type protocol/client\n"); + fprintf (file, "option remote-host %s\n", + src_brickinfo->hostname); + fprintf (file, "option remote-subvolume %s\n", + src_brickinfo->path); + fprintf (file, "option remote-port 34034\n"); + fprintf (file, "echo end-volume\n"); + + ret = symlink(filename, "/tmp/replace_brick.vol"); + if (!ret) { + gf_log ("", GF_LOG_DEBUG, + "symlink call failed"); + return -1; + } + + fclose (file); + + return 0; +} + +static int +replace_brick_start_source_brick (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src_brickinfo, + glusterd_brickinfo_t *dst_brickinfo) +{ glusterd_conf_t *priv = NULL; + char filename[PATH_MAX]; + FILE *file = NULL; + char cmd_str[8192] = {0,}; + int ret; + + priv = THIS->private; + + gf_log ("", GF_LOG_DEBUG, + "Creating volfile"); + + snprintf (filename, PATH_MAX, "%s/vols/%s/replace_source_brick.vol", + priv->workdir, volinfo->volname); + + file = fopen (filename, "a+"); + if (!file) { + gf_log ("", GF_LOG_DEBUG, + "Open of volfile failed"); + return -1; + } + + ret = truncate (filename, 0); + + fprintf (file, "volume src-posix\n"); + fprintf (file, "type storage/posix\n"); + fprintf (file, "option directory %s\n", + src_brickinfo->path); + fprintf (file, "end-volume\n"); + fprintf (file, "volume locks\n"); + fprintf (file, "type features/locks\n"); + fprintf (file, "subvolumes src-posix\n"); + fprintf (file, "end-volume\n"); + fprintf (file, "volume remote-client\n"); + fprintf (file, "type protocol/client\n"); + fprintf (file, "option remote-host %s\n", + dst_brickinfo->hostname); + fprintf (file, "option remote-port 34034\n"); + fprintf (file, "option remote-subvolume %s\n", + dst_brickinfo->path); + fprintf (file, "end-volume\n"); + fprintf (file, "volume %s\n", + src_brickinfo->path); + fprintf (file, "type cluster/pump\n"); + fprintf (file, "subvolumes locks remote-client\n"); + fprintf (file, "end-volume\n"); + fprintf (file, "volume src-server\n"); + fprintf (file, "type protocol/server\n"); + fprintf (file, "option auth.addr.%s.allow *\n", + src_brickinfo->path); + fprintf (file, "option listen-port 34034\n"); + fprintf (file, "subvolumes %s\n", + src_brickinfo->path); + fprintf (file, "end-volume\n"); + + + gf_log ("", GF_LOG_DEBUG, + "starting source brick"); + + snprintf (cmd_str, 4096, "glusterfs -f %s/vols/%s/replace_source_brick.vol -l /tmp/b_log -LTRACE", + priv->workdir, volinfo->volname); + + ret = system (cmd_str); + + fclose (file); + + return 0; +} + +static int +replace_brick_mount (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src_brickinfo, + glusterd_brickinfo_t *dst_brickinfo, gf1_cli_replace_op op) +{ + + gf_log ("", GF_LOG_DEBUG, + "starting source brick"); + + replace_brick_start_source_brick (volinfo, src_brickinfo, + dst_brickinfo); + + return 0; +} + +static int +glusterd_op_replace_brick (gd1_mgmt_stage_op_req *req) +{ + int ret = 0; + dict_t *dict = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *str = NULL; + + glusterd_brickinfo_t *src_brickinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + goto out; + } + + /* Need to do a little more error checking and validation */ + ret = dict_get_str (dict, "src-brick", &src_brick); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "src brick=%s", src_brick); + + ret = dict_get_str (dict, "dst-brick", &dst_brick); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "dst brick=%s", dst_brick); + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + + str = strdup (src_brick); + + ret = glusterd_brickinfo_get (str, volinfo, + &src_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); + goto out; + } + + ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo"); + goto out; + } + + replace_brick_generate_volfile (volinfo, src_brickinfo); + + if (!glusterd_is_local_addr (src_brickinfo->hostname)) { + gf_log ("", GF_LOG_NORMAL, + "I AM THE SOURCE HOST"); + replace_brick_mount (volinfo, src_brickinfo, dst_brickinfo, + req->op); + } else if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { + gf_log ("", GF_LOG_NORMAL, + "I AM THE DESTINATION HOST"); + replace_brick_spawn_brick (volinfo, dict, dst_brickinfo); + } + + ret = 0; + +out: + return ret; +} + +static int glusterd_op_remove_brick (gd1_mgmt_stage_op_req *req) { int ret = 0; @@ -1609,6 +1977,11 @@ glusterd_op_stage_validate (gd1_mgmt_stage_op_req *req) ret = glusterd_op_stage_add_brick (req); break; + case GD_OP_REPLACE_BRICK: + ret = glusterd_op_stage_replace_brick (req); + break; + + case GD_OP_REMOVE_BRICK: ret = glusterd_op_stage_remove_brick (req); break; @@ -1652,6 +2025,10 @@ glusterd_op_commit_perform (gd1_mgmt_stage_op_req *req) ret = glusterd_op_add_brick (req); break; + case GD_OP_REPLACE_BRICK: + ret = glusterd_op_replace_brick (req); + break; + case GD_OP_REMOVE_BRICK: ret = glusterd_op_remove_brick (req); break; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 6b32bd3a0d8..5ef0ce1e5c1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -73,7 +73,7 @@ glusterd_unset_lock_owner (uuid_t owner) return 0; } -static int32_t +int32_t glusterd_is_local_addr (char *hostname) { int32_t ret = -1; @@ -598,11 +598,14 @@ glusterd_brickinfo_from_brick (char *brick, glusterd_brickinfo_t *new_brickinfo = NULL; char *hostname = NULL; char *path = NULL; + char *tmp = NULL; GF_ASSERT (brick); GF_ASSERT (brickinfo); - hostname = strtok (brick, ":"); + tmp = strdup (brick); + + hostname = strtok (tmp, ":"); path = strtok (NULL, ":"); GF_ASSERT (hostname); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index fc8b33ab1b9..dd97e67a68c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -115,4 +115,6 @@ glusterd_is_cli_op_req (int32_t op); int32_t glusterd_brickinfo_get (char *brick, glusterd_volinfo_t *volinfo, glusterd_brickinfo_t **brickinfo); +int32_t +glusterd_is_local_addr (char *hostname); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 9f0986bb62f..368c0a6847a 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -249,6 +249,13 @@ glusterd_add_brick (rpcsvc_request_t *req, dict_t *dict); int glusterd_handle_add_brick (rpcsvc_request_t *req); + +int32_t +glusterd_replace_brick (rpcsvc_request_t *req, dict_t *dict); + +int +glusterd_handle_replace_brick (rpcsvc_request_t *req); + int glusterd_handle_remove_brick (rpcsvc_request_t *req); diff --git a/xlators/mgmt/glusterd/src/glusterd3_1-mops.c b/xlators/mgmt/glusterd/src/glusterd3_1-mops.c index 7169121d574..86d09194e11 100644 --- a/xlators/mgmt/glusterd/src/glusterd3_1-mops.c +++ b/xlators/mgmt/glusterd/src/glusterd3_1-mops.c @@ -1164,6 +1164,10 @@ glusterd_handle_rpc_msg (rpcsvc_request_t *req) ret = glusterd_handle_add_brick (req); break; + case GD_MGMT_CLI_REPLACE_BRICK: + ret = glusterd_handle_replace_brick (req); + break; + case GD_MGMT_CLI_REMOVE_BRICK: ret = glusterd_handle_remove_brick (req); break; @@ -1203,6 +1207,7 @@ rpcsvc_actor_t glusterd1_mgmt_actors[] = { [GD_MGMT_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GD_MGMT_CLI_DELETE_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, [GD_MGMT_CLI_GET_VOLUME] = { "GET_VOLUME", GD_MGMT_CLI_GET_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, [GD_MGMT_CLI_ADD_BRICK] = { "ADD_BRICK", GD_MGMT_CLI_ADD_BRICK, glusterd_handle_rpc_msg, NULL, NULL}, + [GD_MGMT_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GD_MGMT_CLI_REPLACE_BRICK, glusterd_handle_rpc_msg, NULL, NULL}, [GD_MGMT_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GD_MGMT_CLI_REMOVE_BRICK, glusterd_handle_rpc_msg, NULL, NULL}, }; |