summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavan Sondur <pavan@gluster.com>2010-08-06 05:31:45 +0000
committerAnand V. Avati <avati@dev.gluster.com>2010-08-06 04:09:07 -0700
commitacdeed002d30209e0a058c2df0346d4f16c08994 (patch)
tree9c6acda8d92494952f4a80134303b9d2d1c3e1ac
parent453cb4bf0b70c876eb468def34054095cfd66359 (diff)
add pump xlator and changes for replace-brick
Signed-off-by: Pavan Vilas Sondur <pavan@gluster.com> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 1235 (Bug for all pump/migrate commits) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1235
-rw-r--r--cli/src/cli-cmd-parser.c4
-rw-r--r--cli/src/cli.h15
-rw-r--r--cli/src/cli3_1-cops.c174
-rw-r--r--libglusterfs/src/syncop.c192
-rw-r--r--libglusterfs/src/syncop.h21
-rw-r--r--libglusterfs/src/xlator.h1
-rw-r--r--rpc/xdr/src/cli1-xdr.h2
-rw-r--r--xlators/cluster/afr/src/Makefile.am16
-rw-r--r--xlators/cluster/afr/src/afr-common.c2642
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c3
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c3
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c18
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h1
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c2
-rw-r--r--xlators/cluster/afr/src/afr.c2590
-rw-r--r--xlators/cluster/afr/src/afr.h22
-rw-r--r--xlators/cluster/afr/src/pump.c1817
-rw-r--r--xlators/cluster/afr/src/pump.h96
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c55
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c377
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd3_1-mops.c5
25 files changed, 5461 insertions, 2617 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 6c96d0836..ddb376bcb 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -442,8 +442,8 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
if (!strcasecmp ("start", op)) {
replace_op = GF_REPLACE_OP_START;
- } else if (!strcasecmp ("stop", op)) {
- replace_op = GF_REPLACE_OP_STOP;
+ } else if (!strcasecmp ("commit", op)) {
+ replace_op = GF_REPLACE_OP_COMMIT;
} else if (!strcasecmp ("pause", op)) {
replace_op = GF_REPLACE_OP_PAUSE;
} else if (!strcasecmp ("abort", op)) {
diff --git a/cli/src/cli.h b/cli/src/cli.h
index c31b4631a..7b2de6673 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -37,6 +37,14 @@ enum argp_option_keys {
ARGP_PORT_KEY = 'p',
};
+typedef enum replace_brick_cmd {
+ REPLACE_BRICK_START,
+ REPLACE_BRICK_PAUSE,
+ REPLACE_BRICK_ABORT,
+ REPLACE_BRICK_STATUS,
+ REPLACE_BRICK_COMMIT,
+} replace_brick_cmd_t;
+
struct cli_state;
struct cli_cmd_word;
struct cli_cmd_tree;
@@ -116,6 +124,13 @@ struct cli_local {
struct {
char *volname;
} defrag_vol;
+
+ struct {
+ char *volume;
+ replace_brick_cmd_t op;
+ char *src_brick;
+ char *dst_brick;
+ } replace_brick;
} u;
};
diff --git a/cli/src/cli3_1-cops.c b/cli/src/cli3_1-cops.c
index cfede8076..929f490c2 100644
--- a/cli/src/cli3_1-cops.c
+++ b/cli/src/cli3_1-cops.c
@@ -31,6 +31,7 @@
#include "cli1-xdr.h"
#include "cli1.h"
#include "protocol-common.h"
+#include "cli-mem-types.h"
extern rpc_clnt_prog_t *cli_rpc_prog;
extern int cli_op_ret;
@@ -650,27 +651,133 @@ out:
}
+static int
+replace_brick_mount (char *volname)
+{
+ char cmd_str[8192] = {0,};
+
+ gf_log ("", GF_LOG_DEBUG,
+ "creating directory");
+
+ snprintf (cmd_str, 4096, "mkdir -p /tmp/mnt");
+ system (cmd_str);
+
+ gf_log ("", GF_LOG_DEBUG,
+ "creating maintenance mount");
+
+ snprintf (cmd_str, 4096, "glusterfs -f /tmp/replace_brick.vol /tmp/mnt -l /tmp/pav_log -LTRACE");
+
+ system (cmd_str);
+
+ return 0;
+}
+
int
gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_replace_brick_rsp rsp = {0,};
int ret = 0;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *replace_brick_op = NULL;
+ char status_msg[8192] = {0,};
+ char cmd_str[8192] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
+ frame = (call_frame_t *) myframe;
+ local = frame->local;
+
ret = gf_xdr_to_cli_replace_brick_rsp (*iov, &rsp);
if (ret < 0) {
gf_log ("", GF_LOG_ERROR, "error");
goto out;
}
+ switch (local->u.replace_brick.op) {
+ case REPLACE_BRICK_START:
+
+ replace_brick_op = "Replace brick start operation";
+
+ replace_brick_mount (local->u.replace_brick.volume);
+
+ gf_log ("", GF_LOG_DEBUG,
+ "sending setxattr");
+
+ snprintf (cmd_str, 4096, "sleep 2; stat /tmp/mnt/ >/dev/null;setfattr -n trusted.glusterfs.pump.start /tmp/mnt/");
+
+ system (cmd_str);
+
+ gf_log ("", GF_LOG_DEBUG,
+ "umounting");
+
+ snprintf (cmd_str, 4096, "umount -f /tmp/mnt 2>/dev/null");
+
+ ret = system (cmd_str);
+
+ system ("rmdir /tmp/mnt");
+ break;
+
+ case REPLACE_BRICK_STATUS:
+
+ replace_brick_op = "Replace brick status operation";
+
+ snprintf (cmd_str, 4096, "mkdir -p /tmp/mnt");
+ system (cmd_str);
+
+ snprintf (cmd_str, 4096, "glusterfs -f /tmp/replace_brick.vol /tmp/mnt -l /tmp/pav_log -LTRACE");
+ system (cmd_str);
+
+ gf_log ("", GF_LOG_DEBUG,
+ "sending getxattr");
+
+ ret = getxattr ("/tmp/mnt/", "trusted.glusterfs.pump.status", status_msg, 8192);
+ fprintf (stdout, "%s\n", status_msg);
+
+ gf_log ("", GF_LOG_DEBUG,
+ "umounting");
+
+ snprintf (cmd_str, 4096, "umount -f /tmp/mnt 2>/dev/null");
+
+ ret = system (cmd_str);
+
+ system ("rmdir /tmp/mnt");
+ break;
+
+ case REPLACE_BRICK_COMMIT:
+
+ replace_brick_op = "Replace brick commit operation";
+
+ src_brick = local->u.replace_brick.src_brick;
+ dst_brick = local->u.replace_brick.dst_brick;
+
+ snprintf (cmd_str, 4096, "gluster volume add-brick %s %s >/dev/null",
+ local->u.replace_brick.volume, dst_brick);
+
+ ret = system (cmd_str);
+
+ snprintf (cmd_str, 4096, "gluster volume remove-brick %s %s >/dev/null",
+ local->u.replace_brick.volume, src_brick);
+
+ ret = system (cmd_str);
+
+ break;
+
+ default:
+ break;
+ }
+
gf_log ("cli", GF_LOG_NORMAL, "Received resp to replace brick");
- cli_out ("Replace Brick %s", (rsp.op_ret) ? "unsuccessful":
- "successful");
+ cli_out ("%s %s",
+ replace_brick_op ? replace_brick_op : "Unknown operation",
+ (rsp.op_ret) ? "unsuccessful":
+ "successful");
ret = rsp.op_ret;
@@ -1183,25 +1290,76 @@ gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
gf1_cli_replace_brick_req req = {0,};
int ret = 0;
dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
if (!frame || !this || !data) {
ret = -1;
goto out;
}
- dict = data;
+ local = GF_CALLOC (1, sizeof (*local), cli_mt_cli_local_t);
+
+ dict = data;
+
+ ret = dict_get_int32 (dict, "operation", (int32_t *)&req.op);
+
+ if (ret)
+ goto out;
+
+ switch (req.op) {
+ case GF_REPLACE_OP_START:
+ local->u.replace_brick.op = REPLACE_BRICK_START;
+ break;
+ case GF_REPLACE_OP_PAUSE:
+ local->u.replace_brick.op = REPLACE_BRICK_PAUSE;
+ break;
+ case GF_REPLACE_OP_ABORT:
+ local->u.replace_brick.op = REPLACE_BRICK_ABORT;
+ break;
+ case GF_REPLACE_OP_STATUS:
+ local->u.replace_brick.op = REPLACE_BRICK_STATUS;
+ break;
+ case GF_REPLACE_OP_COMMIT:
+ local->u.replace_brick.op = REPLACE_BRICK_COMMIT;
+ break;
+
+ default:
+ break;
+ }
+
ret = dict_get_str (dict, "volname", &req.volname);
if (ret)
goto out;
- ret = dict_get_int32 (dict, "operation", (int32_t *)&req.op);
+ ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret)
+ if (ret) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+
+ if (ret) {
goto out;
+ }
+
+ local->u.replace_brick.volume = strdup (req.volname);
+ local->u.replace_brick.src_brick = strdup (src_brick);
+ local->u.replace_brick.dst_brick = strdup (dst_brick);
+ frame->local = local;
- if (GF_REPLACE_OP_START == req.op) {
+
+ ret = dict_allocate_and_serialize (dict,
+ &req.bricks.bricks_val,
+ (size_t *)&req.bricks.bricks_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get serialized length of dict");
+ goto out;
}
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
@@ -1216,10 +1374,6 @@ out:
GF_FREE (req.bricks.bricks_val);
}
- if (req.bricks.bricks_val) {
- GF_FREE (req.bricks.bricks_val);
- }
-
return ret;
}
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index beb5d9db4..cfece6591 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -24,6 +24,20 @@
#include "syncop.h"
+call_frame_t *
+syncop_create_frame ()
+{
+ struct synctask *task = NULL;
+ struct call_frame_t *frame = NULL;
+
+ task = synctask_get ();
+
+ if (task) {
+ frame = task->opaque;
+ }
+
+ return (call_frame_t *)frame;
+}
void
synctask_yield (struct synctask *task)
@@ -95,6 +109,8 @@ synctask_wrap (struct synctask *task)
*/
task->complete = 1;
synctask_wake (task);
+
+ synctask_yield (task);
}
@@ -105,7 +121,7 @@ synctask_destroy (struct synctask *task)
return;
if (task->stack)
- FREE (task);
+ FREE (task->stack);
FREE (task);
}
@@ -305,7 +321,181 @@ syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req,
return args.op_ret;
}
+static gf_dirent_t *
+entry_copy (gf_dirent_t *source)
+{
+ gf_dirent_t *sink = NULL;
+
+ sink = gf_dirent_for_name (source->d_name);
+
+ sink->d_off = source->d_off;
+ sink->d_ino = source->d_ino;
+ sink->d_type = source->d_type;
+
+ return sink;
+}
+
+int32_t
+syncop_readdirp_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ int count = 0;
+
+ args = cookie;
+
+ INIT_LIST_HEAD (&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ list_for_each_entry (entry, &entries->list, list) {
+ tmp = entry_copy (entry);
+ gf_log (this->name, GF_LOG_TRACE,
+ "adding entry=%s, count=%d",
+ tmp->d_name, count);
+ list_add_tail (&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_readdirp (xlator_t *subvol,
+ fd_t *fd,
+ size_t size,
+ off_t off,
+ gf_dirent_t *entries)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_readdirp_cbk, subvol->fops->readdirp,
+ fd, size, off);
+
+ if (entries)
+ list_splice_init (&args.entries.list, &entries->list);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int32_t
+syncop_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_opendir (xlator_t *subvol,
+ loc_t *loc,
+ fd_t *fd)
+{
+ struct syncargs args = {0, };
+ SYNCOP (subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir,
+ loc, fd);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+
+int
+syncop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr,
+ loc, dict, flags);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct statvfs *buf)
+
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret == 0) {
+ args->statvfs_buf = *buf;
+ }
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf)
+
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_statfs_cbk, subvol->fops->statfs,
+ loc);
+
+ if (buf)
+ *buf = args.statvfs_buf;
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
int
syncop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
index ce364b073..13b07ed31 100644
--- a/libglusterfs/src/syncop.h
+++ b/libglusterfs/src/syncop.h
@@ -76,6 +76,8 @@ struct syncargs {
struct iatt iatt1;
struct iatt iatt2;
dict_t *xattr;
+ gf_dirent_t entries;
+ struct statvfs statvfs_buf;
/* do not touch */
pthread_mutex_t mutex;
@@ -134,10 +136,10 @@ struct syncargs {
#define SYNCOP(subvol, stb, cbk, op, params ...) do { \
call_frame_t *frame = NULL; \
\
- frame = create_frame (THIS, THIS->ctx->pool); \
+ frame = syncop_create_frame (); \
\
__yawn (stb); \
- STACK_WIND_COOKIE (frame, (void *)stb, cbk, subvol, op, params);\
+ STACK_WIND_COOKIE (frame, cbk, (void *)stb, subvol, op, params);\
__yield (stb); \
} while (0)
@@ -157,8 +159,23 @@ int syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req,
/* out */
struct iatt *iatt, dict_t **xattr_rsp, struct iatt *parent);
+int syncop_readdirp (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ /* out */
+ gf_dirent_t *entries);
+
+int
+syncop_opendir (xlator_t *subvol,
+ loc_t *loc,
+ fd_t *fd);
+
int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
/* out */
struct iatt *preop, struct iatt *postop);
+int
+syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf);
+
+int
+syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags);
+
#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index e373cc424..ef79c354f 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -76,6 +76,7 @@ typedef int32_t (*event_notify_fn_t) (xlator_t *this, int32_t event, void *data,
#include "iatt.h"
+
struct _loc {
const char *path;
const char *name;
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
index 51fce3e92..2f71df4a8 100644
--- a/rpc/xdr/src/cli1-xdr.h
+++ b/rpc/xdr/src/cli1-xdr.h
@@ -24,7 +24,7 @@ typedef enum gf1_cluster_type gf1_cluster_type;
enum gf1_cli_replace_op {
GF_REPLACE_OP_NONE = 0,
GF_REPLACE_OP_START = 0 + 1,
- GF_REPLACE_OP_STOP = 0 + 2,
+ GF_REPLACE_OP_COMMIT = 0 + 2,
GF_REPLACE_OP_PAUSE = 0 + 3,
GF_REPLACE_OP_ABORT = 0 + 4,
GF_REPLACE_OP_STATUS = 0 + 5,
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index ece459ca7..699b3da77 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,12 +1,17 @@
-xlator_LTLIBRARIES = afr.la
+xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_la_LDFLAGS = -module -avoidversion
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c
-afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c
+afr_la_LDFLAGS = -module -avoidversion
+afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h afr-mem-types.h
+pump_la_LDFLAGS = -module -avoidversion
+pump_la_SOURCES = $(afr_common_source) pump.c
+pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS)
@@ -15,6 +20,7 @@ CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
+ rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
- ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so \ No newline at end of file
+ ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
new file mode 100644
index 000000000..aeadceddb
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -0,0 +1,2642 @@
+/*
+ Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+#include "pump.h"
+
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
+#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
+#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+
+void
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this)
+{
+ if (!frame->root->lk_owner) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Setting lk-owner=%llu",
+ (unsigned long long) frame->root);
+ frame->root->lk_owner = (uint64_t) frame->root;
+ }
+}
+
+uint64_t
+afr_is_split_brain (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+
+ uint64_t ctx = 0;
+ uint64_t split_brain = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0)
+ goto unlock;
+
+ split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+out:
+ return split_brain;
+}
+
+
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0) {
+ ctx = 0;
+ }
+
+ if (set) {
+ ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx)
+ | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
+ } else {
+ ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx);
+ }
+ __inode_ctx_put (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+out:
+ return;
+}
+
+
+uint64_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+
+ uint64_t ctx = 0;
+ uint64_t opendir_done = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0)
+ goto unlock;
+
+ opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+out:
+ return opendir_done;
+}
+
+
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0) {
+ ctx = 0;
+ }
+
+ ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx)
+ | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+
+ __inode_ctx_put (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+out:
+ return;
+}
+
+
+uint64_t
+afr_read_child (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+
+ uint64_t ctx = 0;
+ uint64_t read_child = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0)
+ goto unlock;
+
+ read_child = ctx & AFR_ICTX_READ_CHILD_MASK;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+out:
+ return read_child;
+}
+
+
+void
+afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx);
+
+ if (ret < 0) {
+ ctx = 0;
+ }
+
+ ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx)
+ | (AFR_ICTX_READ_CHILD_MASK & read_child);
+
+ __inode_ctx_put (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return;
+}
+
+
+/**
+ * afr_local_cleanup - cleanup everything in frame->local
+ */
+
+void
+afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->buf)
+ GF_FREE (sh->buf);
+
+ if (sh->xattr) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
+ }
+ }
+ GF_FREE (sh->xattr);
+ }
+
+ if (sh->child_errno)
+ GF_FREE (sh->child_errno);
+
+ if (sh->pending_matrix) {
+ for (i = 0; i < priv->child_count; i++) {
+ GF_FREE (sh->pending_matrix[i]);
+ }
+ GF_FREE (sh->pending_matrix);
+ }
+
+ if (sh->delta_matrix) {
+ for (i = 0; i < priv->child_count; i++) {
+ GF_FREE (sh->delta_matrix[i]);
+ }
+ GF_FREE (sh->delta_matrix);
+ }
+
+ if (sh->sources)
+ GF_FREE (sh->sources);
+
+ if (sh->success)
+ GF_FREE (sh->success);
+
+ if (sh->locked_nodes)
+ GF_FREE (sh->locked_nodes);
+
+ if (sh->healing_fd && !sh->healing_fd_opened) {
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ }
+
+ if (sh->linkname)
+ GF_FREE ((char *)sh->linkname);
+
+ loc_wipe (&sh->parent_loc);
+}
+
+
+void
+afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t * priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->pending && local->pending[i])
+ GF_FREE (local->pending[i]);
+ }
+
+ GF_FREE (local->pending);
+
+ GF_FREE (local->transaction.locked_nodes);
+ GF_FREE (local->transaction.child_errno);
+ GF_FREE (local->child_errno);
+
+ GF_FREE (local->transaction.basename);
+ GF_FREE (local->transaction.new_basename);
+
+ loc_wipe (&local->transaction.parent_loc);
+ loc_wipe (&local->transaction.new_parent_loc);
+}
+
+
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this)
+{
+ int i;
+ afr_private_t * priv = NULL;
+
+ if (!local)
+ return;
+
+ afr_local_sh_cleanup (local, this);
+
+ afr_local_transaction_cleanup (local, this);
+
+ priv = this->private;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ GF_FREE (local->child_up);
+
+ { /* lookup */
+ if (local->cont.lookup.xattrs) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lookup.xattrs[i]) {
+ dict_unref (local->cont.lookup.xattrs[i]);
+ local->cont.lookup.xattrs[i] = NULL;
+ }
+ }
+ GF_FREE (local->cont.lookup.xattrs);
+ local->cont.lookup.xattrs = NULL;
+ }
+
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ }
+
+ if (local->cont.lookup.inode) {
+ inode_unref (local->cont.lookup.inode);
+ }
+ }
+
+ { /* getxattr */
+ if (local->cont.getxattr.name)
+ GF_FREE (local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ if (local->cont.lk.locked_nodes)
+ GF_FREE (local->cont.lk.locked_nodes);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref (local->cont.create.fd);
+ }
+
+ { /* writev */
+ GF_FREE (local->cont.writev.vector);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref (local->cont.setxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE (local->cont.removexattr.name);
+ }
+
+ { /* symlink */
+ GF_FREE (local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ if (local->cont.opendir.checksum)
+ GF_FREE (local->cont.opendir.checksum);
+ }
+}
+
+
+int
+afr_frame_return (call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ return call_count;
+}
+
+
+/**
+ * up_children_count - return the number of children that are up
+ */
+
+int
+afr_up_children_count (int child_count, unsigned char *child_up)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (child_up[i])
+ ret++;
+ return ret;
+}
+
+
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
+{
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < child_count; i++)
+ if (locked_nodes[i])
+ ret++;
+
+ return ret;
+}
+
+
+ino64_t
+afr_itransform (ino64_t ino, int child_count, int child_index)
+{
+ ino64_t scaled_ino = -1;
+
+ if (ino == ((uint64_t) -1)) {
+ scaled_ino = ((uint64_t) -1);
+ goto out;
+ }
+
+ scaled_ino = (ino * child_count) + child_index;
+
+out:
+ return scaled_ino;
+}
+
+
+int
+afr_deitransform_orig (ino64_t ino, int child_count)
+{
+ int index = -1;
+
+ index = ino % child_count;
+
+ return index;
+}
+
+
+int
+afr_deitransform (ino64_t ino, int child_count)
+{
+ return 0;
+}
+
+
+int
+afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->govinda_gOvinda) {
+ afr_set_split_brain (this, local->cont.lookup.inode, _gf_true);
+ }
+
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+
+ return 0;
+}
+
+
+static void
+afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this,
+ int child_index, dict_t *xattr)
+{
+ uint32_t open_fd_count = 0;
+ uint32_t inodelk_count = 0;
+ uint32_t entrylk_count = 0;
+
+ int ret = 0;
+
+ if (afr_sh_has_metadata_pending (xattr, child_index, this)) {
+ local->self_heal.need_metadata_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ }
+
+ if (afr_sh_has_entry_pending (xattr, child_index, this)) {
+ local->self_heal.need_entry_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.", local->loc.path);
+ }
+
+ if (afr_sh_has_data_pending (xattr, child_index, this)) {
+ local->self_heal.need_data_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.", local->loc.path);
+ }
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if (ret == 0)
+ local->open_fd_count += open_fd_count;
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
+ &inodelk_count);
+ if (ret == 0)
+ local->inodelk_count += inodelk_count;
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
+ &entrylk_count);
+ if (ret == 0)
+ local->entrylk_count += entrylk_count;
+}
+
+
+static void
+afr_lookup_self_heal_check (xlator_t *this, afr_local_t *local,
+ struct iatt *buf, struct iatt *lookup_buf)
+{
+ if (FILETYPE_DIFFERS (buf, lookup_buf)) {
+ /* mismatching filetypes with same name
+ */
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "filetype differs for %s ", local->loc.path);
+
+ local->govinda_gOvinda = 1;
+ }
+
+ if (PERMISSION_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ gf_log (this->name, GF_LOG_NORMAL,
+ "permissions differ for %s ", local->loc.path);
+ local->self_heal.need_metadata_self_heal = _gf_true;
+ }
+
+ if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ local->self_heal.need_metadata_self_heal = _gf_true;
+ gf_log (this->name, GF_LOG_NORMAL,
+ "ownership differs for %s ", local->loc.path);
+ }
+
+ if (SIZE_DIFFERS (buf, lookup_buf)
+ && IA_ISREG (buf->ia_type)) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "size differs for %s ", local->loc.path);
+ local->self_heal.need_data_self_heal = _gf_true;
+ }
+
+}
+
+
+static void
+afr_lookup_done (call_frame_t *frame, xlator_t *this, struct iatt *lookup_buf)
+{
+ int unwind = 1;
+ int source = -1;
+ char sh_type_str[256] = {0,};
+
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->cont.lookup.postparent.ia_ino = local->cont.lookup.parent_ino;
+
+ if (local->cont.lookup.ino) {
+ local->cont.lookup.buf.ia_ino = local->cont.lookup.ino;
+ local->cont.lookup.buf.ia_gen = local->cont.lookup.gen;
+ }
+
+ if (local->op_ret == 0) {
+ /* KLUDGE: assuming DHT will not itransform in
+ revalidate */
+ if (local->cont.lookup.inode->ino) {
+ local->cont.lookup.buf.ia_ino =
+ local->cont.lookup.inode->ino;
+ local->cont.lookup.buf.ia_gen =
+ local->cont.lookup.inode->generation;
+ }
+ }
+
+ if (local->success_count && local->enoent_count) {
+ local->self_heal.need_metadata_self_heal = _gf_true;
+ local->self_heal.need_data_self_heal = _gf_true;
+ local->self_heal.need_entry_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_NORMAL,
+ "entries are missing in lookup of %s.",
+ local->loc.path);
+ }
+
+ if (local->success_count) {
+ /* check for split-brain case in previous lookup */
+ if (afr_is_split_brain (this,
+ local->cont.lookup.inode)) {
+ local->self_heal.need_data_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_NORMAL,
+ "split brain detected during lookup of "
+ "%s.", local->loc.path);
+ }
+ }
+
+ if ((local->self_heal.need_metadata_self_heal
+ || local->self_heal.need_data_self_heal
+ || local->self_heal.need_entry_self_heal)
+ && ((!local->cont.lookup.is_revalidate)
+ || (local->op_ret != -1))) {
+
+ if (local->open_fd_count
+ || local->inodelk_count
+ || local->entrylk_count) {
+
+ /* Someone else is doing self-heal on this file.
+ So just make a best effort to set the read-subvolume
+ and return */
+
+ if (IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs);
+
+ if (source >= 0) {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ source);
+ }
+ }
+ } else {
+ if (!local->cont.lookup.inode->ia_type) {
+ /* fix for RT #602 */
+ local->cont.lookup.inode->ia_type =
+ lookup_buf->ia_type;
+ }
+
+ local->self_heal.background = _gf_true;
+ local->self_heal.type = local->cont.lookup.buf.ia_type;
+ local->self_heal.unwind = afr_self_heal_lookup_unwind;
+
+ unwind = 0;
+
+ afr_self_heal_type_str_get(&local->self_heal,
+ sh_type_str,
+ sizeof(sh_type_str));
+
+ gf_log (this->name, GF_LOG_NORMAL, "background %s "
+ "self-heal triggered. path: %s",
+ sh_type_str, local->loc.path);
+
+ afr_self_heal (frame, this);
+ }
+ }
+
+ if (unwind) {
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno,
+ local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+ }
+}
+
+
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > ENOENT > others
+ *
+ */
+
+static gf_boolean_t
+__error_more_important (int32_t old_errno, int32_t new_errno)
+{
+ gf_boolean_t ret = _gf_true;
+
+ /* Nothing should ever overwrite ESTALE */
+ if (old_errno == ESTALE)
+ ret = _gf_false;
+
+ /* Nothing should overwrite ENOENT, except ESTALE */
+ else if ((old_errno == ENOENT) && (new_errno != ESTALE))
+ ret = _gf_false;
+
+ return ret;
+}
+
+
+int
+afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ struct iatt * lookup_buf = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+ int first_up_child = -1;
+
+ child_index = (long) cookie;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ lookup_buf = &local->cont.lookup.buf;
+
+ if (op_ret == -1) {
+ if (op_errno == ENOENT)
+ local->enoent_count++;
+
+ if (__error_more_important (local->op_errno, op_errno))
+ local->op_errno = op_errno;
+
+ if (local->op_errno == ESTALE) {
+ local->op_ret = -1;
+ }
+
+ goto unlock;
+ }
+
+ afr_lookup_collect_xattr (local, this, child_index, xattr);
+
+ first_up_child = afr_first_up_child (priv);
+
+ if (child_index == first_up_child) {
+ local->cont.lookup.ino =
+ afr_itransform (buf->ia_ino,
+ priv->child_count,
+ first_up_child);
+ local->cont.lookup.gen = buf->ia_gen;
+ }
+
+ if (local->success_count == 0) {
+ if (local->op_errno != ESTALE)
+ local->op_ret = op_ret;
+
+ local->cont.lookup.inode = inode_ref (inode);
+ local->cont.lookup.xattr = dict_ref (xattr);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparent = *postparent;
+
+ if (priv->first_lookup && inode->ino == 1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "added root inode");
+ priv->root_inode = inode_ref (inode);
+ priv->first_lookup = 0;
+ priv->child_up[1] = 0;
+
+ LOCK (&priv->lock);
+ {
+ priv->down_count++;
+ }
+ UNLOCK (&priv->lock);
+
+ }
+
+ *lookup_buf = *buf;
+
+ lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
+ priv->child_count,
+ child_index);
+ if (priv->read_child >= 0) {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ priv->read_child);
+ } else {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ child_index);
+ }
+
+ } else {
+ afr_lookup_self_heal_check (this, local, buf, lookup_buf);
+
+ if (child_index == local->read_child_index) {
+ /*
+ lookup has succeeded on the read child.
+ So use its inode number
+ */
+ if (local->cont.lookup.xattr)
+ dict_unref (local->cont.lookup.xattr);
+
+ local->cont.lookup.xattr = dict_ref (xattr);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparent = *postparent;
+
+ *lookup_buf = *buf;
+
+ if (priv->read_child >= 0) {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ priv->read_child);
+ } else {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ local->read_child_index);
+ }
+ }
+
+ }
+
+ local->success_count++;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_lookup_done (frame, this, lookup_buf);
+ }
+
+ return 0;
+}
+
+
+int
+afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ struct iatt * lookup_buf = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+ int first_up_child = -1;
+
+ child_index = (long) cookie;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ lookup_buf = &local->cont.lookup.buf;
+
+ if (op_ret == -1) {
+ if (op_errno == ENOENT)
+ local->enoent_count++;
+
+ if (__error_more_important (local->op_errno, op_errno))
+ local->op_errno = op_errno;
+
+ if (local->op_errno == ESTALE) {
+ local->op_ret = -1;
+ }
+
+ goto unlock;
+ }
+
+ afr_lookup_collect_xattr (local, this, child_index, xattr);
+
+ first_up_child = afr_first_up_child (priv);
+
+ if (child_index == first_up_child) {
+ local->cont.lookup.ino =
+ afr_itransform (buf->ia_ino,
+ priv->child_count,
+ first_up_child);
+ local->cont.lookup.gen = buf->ia_gen;
+ }
+
+ /* in case of revalidate, we need to send stat of the
+ * child whose stat was sent during the first lookup.
+ * (so that time stamp does not vary with revalidate.
+ * in case it is down, stat of the fist success will
+ * be replied */
+
+ /* inode number should be preserved across revalidates */
+
+ if (local->success_count == 0) {
+ if (local->op_errno != ESTALE)
+ local->op_ret = op_ret;
+
+ local->cont.lookup.inode = inode_ref (inode);
+ local->cont.lookup.xattr = dict_ref (xattr);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparent = *postparent;
+
+ *lookup_buf = *buf;
+
+ lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
+ priv->child_count,
+ child_index);
+
+ if (priv->read_child >= 0) {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ priv->read_child);
+ } else {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ child_index);
+ }
+
+ } else {
+ afr_lookup_self_heal_check (this, local, buf, lookup_buf);
+
+ if (child_index == local->read_child_index) {
+
+ /*
+ lookup has succeeded on the read child.
+ So use its inode number
+ */
+
+ if (local->cont.lookup.xattr)
+ dict_unref (local->cont.lookup.xattr);
+
+ local->cont.lookup.xattr = dict_ref (xattr);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparent = *postparent;
+
+ *lookup_buf = *buf;
+
+ if (priv->read_child >= 0) {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ priv->read_child);
+ } else {
+ afr_set_read_child (this,
+ local->cont.lookup.inode,
+ local->read_child_index);
+ }
+ }
+
+ }
+
+ local->success_count++;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_lookup_done (frame, this, lookup_buf);
+ }
+
+ return 0;
+}
+
+
+int
+afr_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+
+ fop_lookup_cbk_t callback;
+
+ int call_count = 0;
+
+ uint64_t ctx;
+
+ int32_t op_errno = 0;
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ local->op_ret = -1;
+
+ frame->local = local;
+
+ if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ loc_copy (&local->loc, loc);
+
+ ret = inode_ctx_get (loc->inode, this, &ctx);
+ if (ret == 0) {
+ /* lookup is a revalidate */
+
+ callback = afr_revalidate_lookup_cbk;
+
+ local->cont.lookup.is_revalidate = _gf_true;
+ local->read_child_index = afr_read_child (this,
+ loc->inode);
+ } else {
+ callback = afr_fresh_lookup_cbk;
+
+ LOCK (&priv->read_child_lock);
+ {
+ local->read_child_index = (++priv->read_child_rr)
+ % (priv->child_count);
+ }
+ UNLOCK (&priv->read_child_lock);
+ }
+
+ if (loc->parent)
+ local->cont.lookup.parent_ino = loc->parent->ino;
+
+ local->child_up = memdup (priv->child_up, priv->child_count);
+
+ local->cont.lookup.xattrs = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.lookup.xattr),
+ gf_afr_mt_dict_t);
+
+ local->call_count = afr_up_children_count (priv->child_count,
+ local->child_up);
+ call_count = local->call_count;
+
+ if (local->call_count == 0) {
+ ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ /* By default assume ENOTCONN. On success it will be set to 0. */
+ local->op_errno = ENOTCONN;
+
+ if (xattr_req == NULL)
+ local->xattr_req = dict_new ();
+ else
+ local->xattr_req = dict_ref (xattr_req);
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
+ 3 * sizeof(int32_t));
+
+ /* 3 = data+metadata+entry */
+ }
+
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0);
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, callback, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret == -1)
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* {{{ open */
+
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+
+ int op_ret = 0;
+ int ret = 0;
+
+ uint64_t ctx;
+ afr_fd_ctx_t * fd_ctx = NULL;
+
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ LOCK (&fd->lock);
+ {
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret == 0)
+ goto unlock;
+
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+
+ op_ret = -ENOMEM;
+ goto unlock;
+ }
+
+ fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_done) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_ret = -ENOMEM;
+ goto unlock;
+ }
+
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->opened_on) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_ret = -ENOMEM;
+ goto unlock;
+ }
+
+ fd_ctx->child_failed = GF_CALLOC (
+ sizeof (*fd_ctx->child_failed),
+ priv->child_count,
+ gf_afr_mt_char);
+
+ if (!fd_ctx->child_failed) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+
+ op_ret = -ENOMEM;
+ goto unlock;
+ }
+
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret < 0) {
+ op_ret = ret;
+ }
+
+ INIT_LIST_HEAD (&fd_ctx->entries);
+ }
+unlock:
+ UNLOCK (&fd->lock);
+out:
+ return ret;
+}
+
+/* {{{ flush */
+
+int
+afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (flush, main_frame,
+ local->op_ret, local->op_errno);
+ }
+
+ return 0;
+}
+
+
+int
+afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ afr_flush_unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_flush_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int i = 0;
+ int call_count = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ local->fd);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_flush_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_plain_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (flush, frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+static int
+__no_pre_op_done (xlator_t *this, fd_t *fd)
+{
+ int i = 0;
+ int op_ret = 1;
+
+ int _ret = 0;
+ uint64_t ctx;
+ afr_fd_ctx_t * fd_ctx = NULL;
+
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ LOCK (&fd->lock);
+ {
+ _ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (_ret < 0) {
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->pre_op_done[i]) {
+ op_ret = 0;
+ break;
+ }
+ }
+ }
+out:
+ UNLOCK (&fd->lock);
+
+ return op_ret;
+}
+
+
+int
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ int i = 0;
+ int call_count = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (__no_pre_op_done (this, fd)) {
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_plain_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ fd);
+ if (!--call_count)
+ break;
+ }
+ }
+ } else {
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op = GF_FOP_FLUSH;
+
+ local->transaction.fop = afr_flush_wind;
+ local->transaction.done = afr_flush_done;
+ local->transaction.unwind = afr_flush_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+
+ afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION);
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+
+int
+afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+{
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &ctx);
+
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ if (fd_ctx) {
+ if (fd_ctx->child_failed)
+ GF_FREE (fd_ctx->child_failed);
+
+ if (fd_ctx->pre_op_done)
+ GF_FREE (fd_ctx->pre_op_done);
+
+ if (fd_ctx->opened_on)
+ GF_FREE (fd_ctx->opened_on);
+
+ GF_FREE (fd_ctx);
+ }
+
+out:
+ return 0;
+}
+
+
+int
+afr_release (xlator_t *this, fd_t *fd)
+{
+ afr_cleanup_fd_ctx (this, fd);
+
+ return 0;
+}
+
+
+/* {{{ fsync */
+
+int
+afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ int child_index = (long) cookie;
+ int read_child = 0;
+
+ local = frame->local;
+
+ read_child = afr_read_child (this, local->fd->inode);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (local->success_count == 0) {
+ local->cont.fsync.prebuf = *prebuf;
+ local->cont.fsync.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.fsync.prebuf = *prebuf;
+ local->cont.fsync.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->cont.fsync.prebuf.ia_ino = local->cont.fsync.ino;
+ local->cont.fsync.postbuf.ia_ino = local->cont.fsync.ino;
+
+ AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
+ &local->cont.fsync.prebuf,
+ &local->cont.fsync.postbuf);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+ local->cont.fsync.ino = fd->inode->ino;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsync_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsync,
+ fd, datasync);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
+ }
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fsync */
+
+int32_t
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fsyncdir_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fsyncdir,
+ fd, datasync);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ xattrop */
+
+int32_t
+afr_xattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
+ xattr);
+
+ return 0;
+}
+
+
+int32_t
+afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_xattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ loc, optype, xattr);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fxattrop */
+
+int32_t
+afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
+ xattr);
+
+ return 0;
+}
+
+
+int32_t
+afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fxattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ fd, optype, xattr);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+/* }}} */
+
+
+int32_t
+afr_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_inodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ volume, loc, cmd, flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
+ local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_finodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ volume, fd, cmd, flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
+ local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_entrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ volume, loc, basename, cmd, type);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+
+int32_t
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
+ local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fentrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ volume, fd, basename, cmd, type);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+int32_t
+afr_statfs_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct statvfs *statvfs)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = 0;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == 0) {
+ local->op_ret = op_ret;
+
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
+ local->cont.statfs.buf = *statvfs;
+ } else {
+ local->cont.statfs.buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ }
+ }
+
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf);
+
+ return 0;
+}
+
+
+int32_t
+afr_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ afr_private_t * priv = NULL;
+ int child_count = 0;
+ afr_local_t * local = NULL;
+ int i = 0;
+
+ int ret = -1;
+ int call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+ child_count = priv->child_count;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_statfs_cbk,
+ priv->children[i],
+ priv->children[i]->fops->statfs,
+ loc);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct flock *lock)
+{
+ afr_local_t * local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ lock);
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int i;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
+ priv->child_count);
+
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.flock);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->cont.lk.flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND (frame, afr_lk_unlock_cbk,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ local->fd, F_SETLK,
+ &local->cont.lk.flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct flock *lock)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ call_count = --local->call_count;
+
+ if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_lk_unlock (frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk,
+ local->fd, local->cont.lk.cmd,
+ &local->cont.lk.flock);
+ } else if (local->op_ret == -1) {
+ /* all nodes have gone down */
+
+ AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN, &local->cont.lk.flock);
+ } else {
+ /* locking has succeeded on all nodes that are up */
+
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.flock);
+ }
+
+ return 0;
+}
+
+
+int
+afr_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd,
+ struct flock *flock)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int i = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_INIT (local, priv);
+
+ frame->local = local;
+
+ local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.flock = *flock;
+
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ fd, cmd, flock);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+int
+afr_priv_dump (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+
+ assert(this);
+ priv = this->private;
+
+ assert(priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_build_key(key, key_prefix, "child_count");
+ gf_proc_dump_write(key, "%u", priv->child_count);
+ gf_proc_dump_build_key(key, key_prefix, "read_child_rr");
+ gf_proc_dump_write(key, "%u", priv->read_child_rr);
+ for (i = 0; i < priv->child_count; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ gf_proc_dump_build_key(key, key_prefix,
+ "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ }
+ gf_proc_dump_build_key(key, key_prefix, "data_self_heal");
+ gf_proc_dump_write(key, "%d", priv->data_self_heal);
+ gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal");
+ gf_proc_dump_write(key, "%d", priv->metadata_self_heal);
+ gf_proc_dump_build_key(key, key_prefix, "entry_self_heal");
+ gf_proc_dump_write(key, "%d", priv->entry_self_heal);
+ gf_proc_dump_build_key(key, key_prefix, "data_change_log");
+ gf_proc_dump_write(key, "%d", priv->data_change_log);
+ gf_proc_dump_build_key(key, key_prefix, "metadata_change_log");
+ gf_proc_dump_write(key, "%d", priv->metadata_change_log);
+ gf_proc_dump_build_key(key, key_prefix, "entry_change_log");
+ gf_proc_dump_write(key, "%d", priv->entry_change_log);
+ gf_proc_dump_build_key(key, key_prefix, "read_child");
+ gf_proc_dump_write(key, "%d", priv->read_child);
+ gf_proc_dump_build_key(key, key_prefix, "favorite_child");
+ gf_proc_dump_write(key, "%u", priv->favorite_child);
+ gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count");
+ gf_proc_dump_write(key, "%u", priv->data_lock_server_count);
+ gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count");
+ gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count);
+ gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count");
+ gf_proc_dump_write(key, "%u", priv->entry_lock_server_count);
+ gf_proc_dump_build_key(key, key_prefix, "wait_count");
+ gf_proc_dump_write(key, "%u", priv->wait_count);
+
+ return 0;
+}
+
+
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
+
+static int
+find_child_index (xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+
+ int i = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((xlator_t *) child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
+
+int32_t
+afr_notify (xlator_t *this, int32_t event,
+ void *data, ...)
+{
+ afr_private_t * priv = NULL;
+ unsigned char * child_up = NULL;
+
+ int i = -1;
+ int up_children = 0;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
+
+ child_up = priv->child_up;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ i = find_child_index (this, data);
+
+ child_up[i] = 1;
+
+ LOCK (&priv->lock);
+ {
+ priv->up_count++;
+ }
+ UNLOCK (&priv->lock);
+
+ /*
+ if all the children were down, and one child came up,
+ send notify to parent
+ */
+
+ for (i = 0; i < priv->child_count; i++)
+ if (child_up[i])
+ up_children++;
+
+ if (up_children == 1) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "Subvolume '%s' came back up; "
+ "going online.", ((xlator_t *)data)->name);
+
+ default_notify (this, event, data);
+ }
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ i = find_child_index (this, data);
+
+ child_up[i] = 0;
+
+ LOCK (&priv->lock);
+ {
+ priv->down_count++;
+ }
+ UNLOCK (&priv->lock);
+
+ /*
+ if all children are down, and this was the last to go down,
+ send notify to parent
+ */
+
+ for (i = 0; i < priv->child_count; i++)
+ if (child_up[i])
+ up_children++;
+
+ if (up_children == 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "All subvolumes are down. Going offline "
+ "until atleast one of them comes back up.");
+
+ default_notify (this, event, data);
+ }
+
+ break;
+
+ default:
+ default_notify (this, event, data);
+ }
+
+ return 0;
+
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 344bdf74e..6125ac0dd 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -228,11 +228,14 @@ afr_opendir_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
fd_t *fd)
{
+ afr_private_t *priv = NULL;
afr_local_t * local = NULL;
int call_count = -1;
int ret = 0;
+ priv = this->private;
+
LOCK (&frame->lock);
{
local = frame->local;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index ef72fb197..74f3d9ddb 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -656,7 +656,6 @@ out:
return 0;
}
-
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name)
@@ -671,6 +670,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
int32_t op_ret = -1;
int32_t op_errno = 0;
+
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (this->private, out);
@@ -690,6 +690,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
op_errno = ENODATA;
goto out;
}
+
}
read_child = afr_read_child (this, loc->inode);
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index eff01da86..f02b5fe1e 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -47,7 +47,6 @@
#include "afr.h"
#include "afr-transaction.h"
-
/* {{{ writev */
int
@@ -1366,11 +1365,10 @@ afr_setxattr_done (call_frame_t *frame, xlator_t *this)
local->transaction.unwind (frame, this);
AFR_STACK_DESTROY (frame);
-
+
return 0;
}
-
int
afr_setxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *dict, int32_t flags)
@@ -1390,13 +1388,6 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
ALLOC_OR_GOTO (local, afr_local_t, out);
ret = AFR_LOCAL_INIT (local, priv);
@@ -1405,6 +1396,13 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
goto out;
}
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ goto out;
+ }
+
transaction_frame->local = local;
local->op_ret = -1;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 27117c184..3720d85e6 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -40,6 +40,7 @@ enum gf_afr_mem_types_ {
gf_afr_mt_uint8_t,
gf_afr_mt_loc_t,
gf_afr_mt_entry_name,
+ gf_afr_mt_pump_priv,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index e3e484aab..61fe89dfe 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -25,7 +25,7 @@
#include "afr-transaction.h"
#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
-
+#include "pump.h"
/**
* select_source - select a source and return it
@@ -1536,7 +1536,6 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
return 0;
}
-
int
afr_self_heal (call_frame_t *frame, xlator_t *this)
{
@@ -1548,9 +1547,12 @@ afr_self_heal (call_frame_t *frame, xlator_t *this)
call_frame_t *sh_frame = NULL;
afr_local_t *sh_local = NULL;
+
local = frame->local;
priv = this->private;
+ afr_set_lk_owner (frame, this);
+
if (local->self_heal.background) {
LOCK (&priv->lock);
{
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 2d689e389..51d1455f4 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1376,7 +1376,7 @@ int32_t afr_lock (call_frame_t *frame, xlator_t *this)
afr_pid_save (frame);
frame->root->pid = (long) frame->root;
-
+ afr_set_lk_owner (frame, this);
return afr_lock_rec (frame, this, 0);
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index b795ea1d1..629e1875e 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -28,2591 +28,17 @@
#define _CONFIG_H
#include "config.h"
#endif
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "fd.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
-
-
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t split_brain = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK;
- }
-unlock:
- UNLOCK (&inode->lock);
-
-out:
- return split_brain;
-}
-
-
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
-{
- uint64_t ctx = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0) {
- ctx = 0;
- }
-
- if (set) {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- } else {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx);
- }
- __inode_ctx_put (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
-out:
- return;
-}
-
-
-uint64_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t opendir_done = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK;
- }
-unlock:
- UNLOCK (&inode->lock);
-
-out:
- return opendir_done;
-}
-
-
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
-{
- uint64_t ctx = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0) {
- ctx = 0;
- }
-
- ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
-
- __inode_ctx_put (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
-out:
- return;
-}
-
-
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- read_child = ctx & AFR_ICTX_READ_CHILD_MASK;
- }
-unlock:
- UNLOCK (&inode->lock);
-
-out:
- return read_child;
-}
-
-
-void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
-{
- uint64_t ctx = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0) {
- ctx = 0;
- }
-
- ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx)
- | (AFR_ICTX_READ_CHILD_MASK & read_child);
-
- __inode_ctx_put (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
-
-out:
- return;
-}
-
-
-/**
- * afr_local_cleanup - cleanup everything in frame->local
- */
-
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
-
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->buf)
- GF_FREE (sh->buf);
-
- if (sh->xattr) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
- GF_FREE (sh->xattr);
- }
-
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
-
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->pending_matrix[i]);
- }
- GF_FREE (sh->pending_matrix);
- }
-
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- GF_FREE (sh->delta_matrix[i]);
- }
- GF_FREE (sh->delta_matrix);
- }
-
- if (sh->sources)
- GF_FREE (sh->sources);
-
- if (sh->success)
- GF_FREE (sh->success);
-
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
-
- if (sh->healing_fd && !sh->healing_fd_opened) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
-
- loc_wipe (&sh->parent_loc);
-}
-
-
-void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- afr_private_t * priv = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- GF_FREE (local->pending[i]);
- }
-
- GF_FREE (local->pending);
-
- GF_FREE (local->transaction.locked_nodes);
- GF_FREE (local->transaction.child_errno);
- GF_FREE (local->child_errno);
-
- GF_FREE (local->transaction.basename);
- GF_FREE (local->transaction.new_basename);
-
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
-}
-
-
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
-{
- int i;
- afr_private_t * priv = NULL;
-
- if (!local)
- return;
-
- afr_local_sh_cleanup (local, this);
-
- afr_local_transaction_cleanup (local, this);
-
- priv = this->private;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->xattr_req)
- dict_unref (local->xattr_req);
-
- GF_FREE (local->child_up);
-
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lookup.xattrs[i]) {
- dict_unref (local->cont.lookup.xattrs[i]);
- local->cont.lookup.xattrs[i] = NULL;
- }
- }
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
-
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
-
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
- }
-
- { /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
- }
-
- { /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
- }
-
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- }
-
- { /* writev */
- GF_FREE (local->cont.writev.vector);
- }
-
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
-
- { /* removexattr */
- GF_FREE (local->cont.removexattr.name);
- }
-
- { /* symlink */
- GF_FREE (local->cont.symlink.linkpath);
- }
-
- { /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
- }
-}
-
-
-int
-afr_frame_return (call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- int call_count = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- return call_count;
-}
-
-
-/**
- * up_children_count - return the number of children that are up
- */
-
-int
-afr_up_children_count (int child_count, unsigned char *child_up)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++)
- if (child_up[i])
- ret++;
- return ret;
-}
-
-
-int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
-{
- int ret = 0;
- int i;
-
- for (i = 0; i < child_count; i++)
- if (locked_nodes[i])
- ret++;
-
- return ret;
-}
-
-
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index)
-{
- ino64_t scaled_ino = -1;
-
- if (ino == ((uint64_t) -1)) {
- scaled_ino = ((uint64_t) -1);
- goto out;
- }
-
- scaled_ino = (ino * child_count) + child_index;
-
-out:
- return scaled_ino;
-}
-
-
-int
-afr_deitransform_orig (ino64_t ino, int child_count)
-{
- int index = -1;
-
- index = ino % child_count;
-
- return index;
-}
-
-
-int
-afr_deitransform (ino64_t ino, int child_count)
-{
- return 0;
-}
-
-
-int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode, _gf_true);
- }
-
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
-
- return 0;
-}
-
-
-static void
-afr_lookup_collect_xattr (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
-{
- uint32_t open_fd_count = 0;
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
-
- int ret = 0;
-
- if (afr_sh_has_metadata_pending (xattr, child_index, this)) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
-
- if (afr_sh_has_entry_pending (xattr, child_index, this)) {
- local->self_heal.need_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
-
- if (afr_sh_has_data_pending (xattr, child_index, this)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
- }
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT,
- &open_fd_count);
- if (ret == 0)
- local->open_fd_count += open_fd_count;
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
-}
-
-
-static void
-afr_lookup_self_heal_check (xlator_t *this, afr_local_t *local,
- struct iatt *buf, struct iatt *lookup_buf)
-{
- if (FILETYPE_DIFFERS (buf, lookup_buf)) {
- /* mismatching filetypes with same name
- */
-
- gf_log (this->name, GF_LOG_NORMAL,
- "filetype differs for %s ", local->loc.path);
-
- local->govinda_gOvinda = 1;
- }
-
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- gf_log (this->name, GF_LOG_NORMAL,
- "permissions differ for %s ", local->loc.path);
- local->self_heal.need_metadata_self_heal = _gf_true;
- }
-
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.need_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_NORMAL,
- "ownership differs for %s ", local->loc.path);
- }
-
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_NORMAL,
- "size differs for %s ", local->loc.path);
- local->self_heal.need_data_self_heal = _gf_true;
- }
-
-}
-
-
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this, struct iatt *lookup_buf)
-{
- int unwind = 1;
- int source = -1;
- char sh_type_str[256] = {0,};
-
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->cont.lookup.postparent.ia_ino = local->cont.lookup.parent_ino;
-
- if (local->cont.lookup.ino) {
- local->cont.lookup.buf.ia_ino = local->cont.lookup.ino;
- local->cont.lookup.buf.ia_gen = local->cont.lookup.gen;
- }
-
- if (local->op_ret == 0) {
- /* KLUDGE: assuming DHT will not itransform in
- revalidate */
- if (local->cont.lookup.inode->ino) {
- local->cont.lookup.buf.ia_ino =
- local->cont.lookup.inode->ino;
- local->cont.lookup.buf.ia_gen =
- local->cont.lookup.inode->generation;
- }
- }
-
- if (local->success_count && local->enoent_count) {
- local->self_heal.need_metadata_self_heal = _gf_true;
- local->self_heal.need_data_self_heal = _gf_true;
- local->self_heal.need_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_NORMAL,
- "entries are missing in lookup of %s.",
- local->loc.path);
- }
-
- if (local->success_count) {
- /* check for split-brain case in previous lookup */
- if (afr_is_split_brain (this,
- local->cont.lookup.inode)) {
- local->self_heal.need_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_NORMAL,
- "split brain detected during lookup of "
- "%s.", local->loc.path);
- }
- }
-
- if ((local->self_heal.need_metadata_self_heal
- || local->self_heal.need_data_self_heal
- || local->self_heal.need_entry_self_heal)
- && ((!local->cont.lookup.is_revalidate)
- || (local->op_ret != -1))) {
-
- if (local->open_fd_count
- || local->inodelk_count
- || local->entrylk_count) {
-
- /* Someone else is doing self-heal on this file.
- So just make a best effort to set the read-subvolume
- and return */
-
- if (IA_ISREG (local->cont.lookup.inode->ia_type)) {
- source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs);
-
- if (source >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- source);
- }
- }
- } else {
- if (!local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type =
- lookup_buf->ia_type;
- }
-
- local->self_heal.background = _gf_true;
- local->self_heal.type = local->cont.lookup.buf.ia_type;
- local->self_heal.unwind = afr_self_heal_lookup_unwind;
-
- unwind = 0;
-
- afr_self_heal_type_str_get(&local->self_heal,
- sh_type_str,
- sizeof(sh_type_str));
-
- gf_log (this->name, GF_LOG_NORMAL, "background %s "
- "self-heal triggered. path: %s",
- sh_type_str, local->loc.path);
-
- afr_self_heal (frame, this);
- }
- }
-
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
- }
-}
-
-
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
-
-static gf_boolean_t
-__error_more_important (int32_t old_errno, int32_t new_errno)
-{
- gf_boolean_t ret = _gf_true;
-
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
-
- /* Nothing should overwrite ENOENT, except ESTALE */
- else if ((old_errno == ENOENT) && (new_errno != ESTALE))
- ret = _gf_false;
-
- return ret;
-}
-
-
-int
-afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
-
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
-
- child_index = (long) cookie;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- lookup_buf = &local->cont.lookup.buf;
-
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
-
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
-
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
-
- goto unlock;
- }
-
- afr_lookup_collect_xattr (local, this, child_index, xattr);
-
- first_up_child = afr_first_up_child (priv);
-
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
- local->cont.lookup.gen = buf->ia_gen;
- }
-
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
-
- } else {
- afr_lookup_self_heal_check (this, local, buf, lookup_buf);
-
- if (child_index == local->read_child_index) {
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
-
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- local->read_child_index);
- }
- }
-
- }
-
- local->success_count++;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
- }
-
- return 0;
-}
-
-
-int
-afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct iatt * lookup_buf = NULL;
-
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
-
- child_index = (long) cookie;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- lookup_buf = &local->cont.lookup.buf;
-
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
-
- if (__error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
-
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
-
- goto unlock;
- }
-
- afr_lookup_collect_xattr (local, this, child_index, xattr);
-
- first_up_child = afr_first_up_child (priv);
-
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->ia_ino,
- priv->child_count,
- first_up_child);
- local->cont.lookup.gen = buf->ia_gen;
- }
-
- /* in case of revalidate, we need to send stat of the
- * child whose stat was sent during the first lookup.
- * (so that time stamp does not vary with revalidate.
- * in case it is down, stat of the fist success will
- * be replied */
-
- /* inode number should be preserved across revalidates */
-
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- lookup_buf->ia_ino = afr_itransform (buf->ia_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
-
- } else {
- afr_lookup_self_heal_check (this, local, buf, lookup_buf);
-
- if (child_index == local->read_child_index) {
-
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
-
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
-
- local->cont.lookup.xattr = dict_ref (xattr);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparent = *postparent;
-
- *lookup_buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- local->read_child_index);
- }
- }
-
- }
-
- local->success_count++;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_lookup_done (frame, this, lookup_buf);
- }
-
- return 0;
-}
-
-
-int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
-
- fop_lookup_cbk_t callback;
-
- int call_count = 0;
-
- uint64_t ctx;
-
- int32_t op_errno = 0;
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- local->op_ret = -1;
-
- frame->local = local;
-
- if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
- op_errno = ENOENT;
- goto out;
- }
-
- loc_copy (&local->loc, loc);
-
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
-
- callback = afr_revalidate_lookup_cbk;
-
- local->cont.lookup.is_revalidate = _gf_true;
- local->read_child_index = afr_read_child (this,
- loc->inode);
- } else {
- callback = afr_fresh_lookup_cbk;
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
- }
-
- if (loc->parent)
- local->cont.lookup.parent_ino = loc->parent->ino;
-
- local->child_up = memdup (priv->child_up, priv->child_count);
-
- local->cont.lookup.xattrs = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
-
- local->call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- call_count = local->call_count;
-
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
- goto out;
- }
-
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
-
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
-
- /* 3 = data+metadata+entry */
- }
-
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, callback, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, local->xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- ret = 0;
-out:
- if (ret == -1)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
-
- return 0;
-}
-
-
-/* {{{ open */
-
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
-
- int op_ret = 0;
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
-
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
-
- if (ret == 0)
- goto unlock;
-
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
-
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->opened_on) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->child_failed = GF_CALLOC (
- sizeof (*fd_ctx->child_failed),
- priv->child_count,
- gf_afr_mt_char);
-
- if (!fd_ctx->child_failed) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
-
- op_ret = -ENOMEM;
- goto unlock;
- }
-
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
-
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret < 0) {
- op_ret = ret;
- }
-
- INIT_LIST_HEAD (&fd_ctx->entries);
- }
-unlock:
- UNLOCK (&fd->lock);
-out:
- return ret;
-}
-
-/* {{{ flush */
-
-int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-
-int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- afr_flush_unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_plain_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (flush, frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-static int
-__no_pre_op_done (xlator_t *this, fd_t *fd)
-{
- int i = 0;
- int op_ret = 1;
-
- int _ret = 0;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
-
- if (_ret < 0) {
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- for (i = 0; i < priv->child_count; i++) {
- if (fd_ctx->pre_op_done[i]) {
- op_ret = 0;
- break;
- }
- }
- }
-out:
- UNLOCK (&fd->lock);
-
- return op_ret;
-}
-
-
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- int i = 0;
- int call_count = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (__no_pre_op_done (this, fd)) {
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_plain_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- fd);
- if (!--call_count)
- break;
- }
- }
- } else {
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- transaction_frame->local = local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION);
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, op_ret, op_errno);
- }
-
- return 0;
-}
-
-/* }}} */
-
-
-int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
-{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0)
- goto out;
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx) {
- if (fd_ctx->child_failed)
- GF_FREE (fd_ctx->child_failed);
-
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
-
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
-
- GF_FREE (fd_ctx);
- }
-
-out:
- return 0;
-}
-
-
-int
-afr_release (xlator_t *this, fd_t *fd)
-{
- afr_cleanup_fd_ctx (this, fd);
-
- return 0;
-}
-
-
-/* {{{ fsync */
-
-int
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- int child_index = (long) cookie;
- int read_child = 0;
-
- local = frame->local;
-
- read_child = afr_read_child (this, local->fd->inode);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->cont.fsync.prebuf.ia_ino = local->cont.fsync.ino;
- local->cont.fsync.postbuf.ia_ino = local->cont.fsync.ino;
-
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf);
- }
-
- return 0;
-}
-
-
-int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- local->fd = fd_ref (fd);
- local->cont.fsync.ino = fd->inode->ino;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsync_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fsync */
-
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsyncdir_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ xattrop */
-
-int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (xattrop, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fxattrop */
-
-int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-/* }}} */
-
-
-int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno);
- }
- return 0;
-}
-
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
-{
- afr_local_t *local = NULL;
-
- int call_count = 0;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
-
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
-
- return 0;
-}
-
-
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- int child_count = 0;
- afr_local_t * local = NULL;
- int i = 0;
-
- int ret = -1;
- int call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- child_count = priv->child_count;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (statfs, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t * local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock);
-
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
-
- if (call_count == 0) {
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- return 0;
- }
-
- local->call_count = call_count;
-
- local->cont.lk.flock.l_type = F_UNLCK;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- call_count = --local->call_count;
-
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- afr_lk_unlock (frame, this);
- return 0;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.locked_nodes[child_index] = 1;
- local->cont.lk.flock = *lock;
- }
-
- child_index++;
-
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.flock);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
-
- AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN, &local->cont.lk.flock);
- } else {
- /* locking has succeeded on all nodes that are up */
-
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- }
-
- return 0;
-}
-
-
-int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd,
- struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int i = 0;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
-
- frame->local = local;
-
- local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes),
- gf_afr_mt_char);
-
- if (!local->cont.lk.locked_nodes) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.flock = *flock;
-
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-int
-afr_priv_dump (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
-
- assert(this);
- priv = this->private;
-
- assert(priv);
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_build_key(key, key_prefix, "child_count");
- gf_proc_dump_write(key, "%u", priv->child_count);
- gf_proc_dump_build_key(key, key_prefix, "read_child_rr");
- gf_proc_dump_write(key, "%u", priv->read_child_rr);
- for (i = 0; i < priv->child_count; i++) {
- gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i);
- gf_proc_dump_write(key, "%d", priv->child_up[i]);
- gf_proc_dump_build_key(key, key_prefix,
- "pending_key[%d]", i);
- gf_proc_dump_write(key, "%s", priv->pending_key[i]);
- }
- gf_proc_dump_build_key(key, key_prefix, "data_self_heal");
- gf_proc_dump_write(key, "%d", priv->data_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal");
- gf_proc_dump_write(key, "%d", priv->metadata_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "entry_self_heal");
- gf_proc_dump_write(key, "%d", priv->entry_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "data_change_log");
- gf_proc_dump_write(key, "%d", priv->data_change_log);
- gf_proc_dump_build_key(key, key_prefix, "metadata_change_log");
- gf_proc_dump_write(key, "%d", priv->metadata_change_log);
- gf_proc_dump_build_key(key, key_prefix, "entry_change_log");
- gf_proc_dump_write(key, "%d", priv->entry_change_log);
- gf_proc_dump_build_key(key, key_prefix, "read_child");
- gf_proc_dump_write(key, "%d", priv->read_child);
- gf_proc_dump_build_key(key, key_prefix, "favorite_child");
- gf_proc_dump_write(key, "%u", priv->favorite_child);
- gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->data_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->entry_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "wait_count");
- gf_proc_dump_write(key, "%u", priv->wait_count);
-
- return 0;
-}
-
-
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
-
-static int
-find_child_index (xlator_t *this, xlator_t *child)
-{
- afr_private_t *priv = NULL;
-
- int i = -1;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
- }
-
- return i;
-}
-
+#include "afr-common.c"
int32_t
notify (xlator_t *this, int32_t event,
void *data, ...)
{
- afr_private_t * priv = NULL;
- unsigned char * child_up = NULL;
-
- int i = -1;
- int up_children = 0;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- child_up = priv->child_up;
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- i = find_child_index (this, data);
-
- child_up[i] = 1;
-
- LOCK (&priv->lock);
- {
- priv->up_count++;
- }
- UNLOCK (&priv->lock);
-
- /*
- if all the children were down, and one child came up,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
-
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_NORMAL,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
-
- default_notify (this, event, data);
- }
+ int ret = -1;
- break;
+ ret = afr_notify (this, event, data);
- case GF_EVENT_CHILD_DOWN:
- i = find_child_index (this, data);
-
- child_up[i] = 0;
-
- LOCK (&priv->lock);
- {
- priv->down_count++;
- }
- UNLOCK (&priv->lock);
-
- /*
- if all children are down, and this was the last to go down,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
-
- if (up_children == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
-
- default_notify (this, event, data);
- }
-
- break;
-
- default:
- default_notify (this, event, data);
- }
-
- return 0;
+ return ret;
}
int32_t
@@ -2624,7 +50,7 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
-
+
if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
"failed");
@@ -2727,7 +153,7 @@ init (xlator_t *this)
"Defaulting to data-self-heal as 'on'",
self_heal);
priv->data_self_heal = 1;
- }
+ }
}
priv->data_self_heal_algorithm = "";
@@ -2953,6 +379,10 @@ init (xlator_t *this)
i++;
}
+ LOCK_INIT (&priv->root_inode_lk);
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
+
ret = 0;
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 125f5c2a2..3fa987ee8 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -32,6 +32,8 @@
#define AFR_XATTR_PREFIX "trusted.afr"
+struct _pump_private;
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -41,6 +43,10 @@ typedef struct _afr_private {
xlator_t **children;
+ gf_lock_t root_inode_lk;
+ int first_lookup;
+ inode_t *root_inode;
+
unsigned char *child_up;
char **pending_key;
@@ -73,6 +79,9 @@ typedef struct _afr_private {
uint64_t up_count; /* number of CHILD_UPs we have seen */
uint64_t down_count; /* number of CHILD_DOWNs we have seen */
+
+ struct _pump_private *pump_private; /* Set if we are loaded as pump */
+ gf_boolean_t pump_loaded;
} afr_private_t;
typedef struct {
@@ -205,6 +214,7 @@ typedef struct _afr_local {
unsigned int success_count;
unsigned int enoent_count;
+
unsigned int govinda_gOvinda;
unsigned int read_child_index;
@@ -576,6 +586,18 @@ typedef struct {
#define all_tried(i, count) ((i) == (count) - 1)
int
+pump_command_reply (call_frame_t *frame, xlator_t *this);
+
+int32_t
+afr_notify (xlator_t *this, int32_t event,
+ void *data, ...);
+
+void
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this);
+
+int pump_start (call_frame_t *frame, xlator_t *this);
+
+int
afr_fd_ctx_set (xlator_t *this, fd_t *fd);
uint64_t
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
new file mode 100644
index 000000000..e69c02845
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.c
@@ -0,0 +1,1817 @@
+/*
+ Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <stdlib.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "afr-common.c"
+
+pump_state_t
+pump_get_state ()
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t ret;
+
+ this = THIS;
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ ret = pump_priv->pump_state;
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ return ret;
+}
+
+int
+pump_change_state (xlator_t *this, pump_state_t state)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t state_old;
+ pump_state_t state_new;
+
+ unsigned char * child_up = NULL;
+ int i = 0;
+
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ child_up = priv->child_up;
+
+ assert (pump_priv);
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ state_old = pump_priv->pump_state;
+ state_new = state;
+
+ pump_priv->pump_state = state;
+
+ switch (pump_priv->pump_state) {
+ case PUMP_STATE_RESUME:
+ case PUMP_STATE_RUNNING:
+ case PUMP_STATE_PAUSE:
+ {
+ priv->pump_loaded = _gf_true;
+ i = 1;
+
+ child_up[i] = 1;
+
+ LOCK (&priv->lock);
+ {
+ priv->up_count++;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+ }
+ case PUMP_STATE_ABORT:
+ {
+ priv->pump_loaded = _gf_false;
+ i = 1;
+
+ child_up[i] = 0;
+
+ LOCK (&priv->lock);
+ {
+ priv->down_count++;
+ }
+ UNLOCK (&priv->lock);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped = 0;
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+
+ break;
+ }
+
+ }
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump changing state from %d to %d",
+ state_old,
+ state_new);
+
+ return 0;
+}
+
+static int
+pump_set_resume_path (xlator_t *this, const char *path)
+{
+ int ret = 0;
+
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ assert (pump_priv);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->resume_path = strdup (path);
+ if (!pump_priv->resume_path)
+ ret = -1;
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return ret;
+}
+
+static void
+build_child_loc (loc_t *parent, loc_t *child, char *path, char *name)
+{
+ child->path = path;
+ child->name = name;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+}
+
+static char *
+build_file_path (loc_t *loc, gf_dirent_t *entry)
+{
+ xlator_t *this = NULL;
+ char *file_path = NULL;
+ int pathlen = 0;
+ int total_size = 0;
+
+ this = THIS;
+
+ pathlen = STRLEN_0 (loc->path);
+
+ if (IS_ROOT_PATH (loc->path)) {
+ total_size = pathlen + entry->d_len;
+ file_path = GF_CALLOC (1, total_size, gf_afr_mt_char);
+ if (!file_path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ return NULL;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "constructing file path of size=%d"
+ "pathlen=%d, d_len=%d",
+ total_size, pathlen,
+ entry->d_len);
+
+ snprintf(file_path, total_size, "%s%s", loc->path, entry->d_name);
+
+ } else {
+ total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */
+ file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char);
+ if (!file_path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ return NULL;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "constructing file path of size=%d"
+ "pathlen=%d, d_len=%d",
+ total_size, pathlen,
+ entry->d_len);
+
+ snprintf(file_path, total_size, "%s/%s", loc->path, entry->d_name);
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "path=%s and d_name=%s", loc->path, entry->d_name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "constructed file_path=%s of size=%d", file_path, total_size);
+
+ return file_path;
+}
+
+static int
+pump_check_and_update_status (xlator_t *this)
+{
+ pump_state_t state;
+ int ret = -1;
+
+ state = pump_get_state ();
+
+ switch (state) {
+
+ case PUMP_STATE_RESUME:
+ case PUMP_STATE_RUNNING:
+ {
+ ret = 0;
+ break;
+ }
+ case PUMP_STATE_PAUSE:
+ case PUMP_STATE_ABORT:
+ {
+ ret = -1;
+ break;
+ }
+ default:
+ {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unknown pump state");
+ ret = -1;
+ break;
+ }
+
+ }
+
+ return ret;
+}
+
+static const char *
+pump_get_resume_path (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ const char *resume_path = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ resume_path = pump_priv->resume_path;
+
+ return resume_path;
+}
+
+static int
+pump_update_resume_state (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t state;
+ const char *resume_path = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strcmp (resume_path, "/") == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reached the resume path (/). Proceeding to change state"
+ " to running");
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else if (strcmp (resume_path, path) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reached the resume path. Proceeding to change state"
+ " to running");
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not yet hit the resume path:res-path=%s,path=%s",
+ resume_path, path);
+ }
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+is_pump_traversal_allowed (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t state;
+ const char *resume_path = NULL;
+ gf_boolean_t ret = _gf_true;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strstr (resume_path, path)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "On the right path to resumption path");
+ ret = _gf_true;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not the right path to resuming=> ignoring traverse");
+ ret = _gf_false;
+ }
+ }
+
+ return ret;
+}
+
+static int
+pump_update_file_stats (xlator_t *this, long source_blocks,
+ long sink_blocks)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->source_blocks = source_blocks;
+ pump_priv->sink_blocks = sink_blocks;
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return 0;
+}
+
+static int
+pump_save_file_stats (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ struct statvfs source_buf = {0, };
+ struct statvfs sink_buf = {0, };
+ loc_t loc;
+ int ret = -1;
+
+ priv = this->private;
+
+ assert (priv->root_inode);
+
+ build_root_loc (priv->root_inode, &loc);
+
+ ret = syncop_statfs (PUMP_SOURCE_CHILD (this),
+ &loc, &source_buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "source statfs failed");
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "source statfs succeeded");
+ }
+
+
+ ret = syncop_statfs (PUMP_SOURCE_CHILD (this),
+ &loc, &sink_buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "sink statfs failed");
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "sink statfs succeeded");
+ }
+
+ pump_update_file_stats (this,
+ source_buf.f_blocks,
+ sink_buf.f_blocks);
+
+ return 0;
+
+}
+static int
+pump_save_path (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ pump_state_t state;
+ dict_t *dict = NULL;
+ loc_t loc;
+ int dict_ret = 0;
+ int ret = -1;
+
+ state = pump_get_state ();
+ if (state != PUMP_STATE_RUNNING)
+ return 0;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ assert (priv->root_inode);
+
+ build_root_loc (priv->root_inode, &loc);
+
+ dict = dict_new ();
+ dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
+
+// ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
+ ret = 0;
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed - could not save path=%s", path);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr succeeded - saved path=%s", path);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Saving path for status info");
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped++;
+
+ strncpy (pump_priv->current_file, path,
+ PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ }
+
+ dict_unref (dict);
+
+ return 0;
+}
+
+static int
+gf_pump_traverse_directory (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+
+ off_t offset = 0;
+ loc_t entry_loc;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t entries;
+
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp;
+
+ int source = 0;
+
+ char *file_path = NULL;
+ int ret = 0;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+ priv = this->private;
+
+ assert (loc->inode);
+
+ fd = fd_create (loc->inode, PUMP_PID);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", loc->path);
+ goto out;
+ }
+
+ ret = syncop_opendir (priv->children[source], loc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opendir failed on %s", loc->path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "pump opendir on %s returned=%d",
+ loc->path, ret);
+
+ while (syncop_readdirp (priv->children[source], fd, 131072, offset, &entries)) {
+
+ if (list_empty (&entries.list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no more entries in directory");
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found readdir entry=%s", entry->d_name);
+
+ file_path = build_file_path (loc, entry);
+ if (!file_path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "file path construction failed");
+ goto out;
+ }
+
+ build_child_loc (loc, &entry_loc, file_path, entry->d_name);
+
+ ret = syncop_lookup (this, &entry_loc, NULL,
+ &iatt, &xattr_rsp, &parent);
+
+ entry_loc.ino = iatt.ia_ino;
+ entry_loc.inode->ino = iatt.ia_ino;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup %s => %"PRId64,
+ entry_loc.path,
+ iatt.ia_ino);
+
+ pump_update_resume_state (this, entry_loc.path);
+
+ if (!IS_ENTRY_CWD(entry->d_name) &&
+ !IS_ENTRY_PARENT (entry->d_name)) {
+ pump_save_path (this, entry_loc.path);
+ pump_save_file_stats (this);
+ }
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump beginning to exit out");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "type of file=%d, IFDIR=%d",
+ iatt.ia_type, IA_IFDIR);
+
+ if (IA_ISDIR (iatt.ia_type) && !IS_ENTRY_CWD(entry->d_name) &&
+ !IS_ENTRY_PARENT (entry->d_name)) {
+ if (is_pump_traversal_allowed (this, entry_loc.path)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "entering dir=%s",
+ entry->d_name);
+ gf_pump_traverse_directory (&entry_loc);
+ }
+ }
+
+ offset = entry->d_off;
+ loc_wipe (&entry_loc);
+ }
+
+ gf_dirent_free (&entries);
+ gf_log (this->name, GF_LOG_TRACE,
+ "offset incremented to %d",
+ (int32_t ) offset);
+
+ }
+
+out:
+ return 0;
+
+}
+
+void
+build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->name = "";
+ loc->inode = inode;
+ loc->ino = 1;
+ loc->inode->ino = 1;
+
+}
+
+static int
+pump_update_resume_path (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ const char *resume_path = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ resume_path = pump_get_resume_path (this);
+
+ if (resume_path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Found a path to resume from: %s",
+ resume_path);
+
+ }else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Did not find a path=> setting to '/'");
+ pump_set_resume_path (this, "/");
+ }
+
+ pump_change_state (this, PUMP_STATE_RESUME);
+
+ return 0;
+}
+
+static int
+pump_complete_migration (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ dict_t *dict = NULL;
+ pump_state_t state;
+ loc_t loc;
+ int dict_ret = 0;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ assert (priv->root_inode);
+
+ build_root_loc (priv->root_inode, &loc);
+
+ dict = dict_new ();
+
+ state = pump_get_state ();
+ if (state == PUMP_STATE_RUNNING) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump finished pumping");
+
+ pump_priv->pump_finished = _gf_true;
+
+ dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
+
+ ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed - while notifying source complete");
+ }
+ dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
+
+ ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed - while notifying sink complete");
+ }
+ }
+
+ return 0;
+}
+
+static int
+pump_task (void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+
+
+ loc_t loc;
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp;
+
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ assert (priv->root_inode);
+
+ build_root_loc (priv->root_inode, &loc);
+
+ ret = syncop_lookup (this, &loc, NULL,
+ &iatt, &xattr_rsp, &parent);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "lookup: ino=%"PRId64", path=%s",
+ loc.ino,
+ loc.path);
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ goto out;
+ }
+
+ pump_update_resume_path (this);
+
+ gf_pump_traverse_directory (&loc);
+
+ pump_complete_migration (this);
+out:
+ return 0;
+}
+
+
+static int
+pump_task_completion (int ret, void *data)
+{
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ this = THIS;
+
+ frame = (call_frame_t *) data;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ inode_unref (priv->root_inode);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump xlator exiting");
+ return 0;
+}
+
+int
+pump_start (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ call_frame_t *pump_frame = NULL;
+
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ pump_frame = copy_frame (frame);
+
+ if (!pump_frame->root->lk_owner)
+ pump_frame->root->lk_owner = PUMP_LK_OWNER;
+
+ ret = synctask_new (pump_priv->env, pump_task, pump_task_completion,
+ pump_frame);
+
+ if (ret != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "setting pump as started");
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "starting pump failed");
+ pump_change_state (this, PUMP_STATE_ABORT);
+ }
+
+ return ret;
+}
+
+gf_boolean_t
+is_pump_loaded (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ gf_boolean_t ret;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ if (priv->pump_loaded) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump is already started");
+ ret = _gf_true;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump is not started");
+ ret = _gf_false;
+ }
+
+ return ret;
+
+}
+
+int32_t
+pump_cmd_start_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ afr_local_t *local = NULL;
+ char *path = NULL;
+
+ pump_state_t state;
+ int ret = 0;
+ int dict_ret = -1;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr failed - changing pump state to RUNNING with '/'");
+ path = "/";
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "getxattr succeeded");
+
+ dict_ret = dict_get_str (dict, PUMP_PATH, &path);
+ if (dict_ret < 0)
+ path = "/";
+ }
+
+ state = pump_get_state ();
+ if ((state == PUMP_STATE_RUNNING) ||
+ (state == PUMP_STATE_RESUME)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pump is already started");
+ ret = -1;
+ goto out;
+ }
+
+ pump_set_resume_path (this, path);
+ pump_change_state (this, PUMP_STATE_RUNNING);
+
+ ret = pump_start (frame, this);
+
+out:
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ return 0;
+}
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ uint64_t number_files = 0;
+
+ char filename[PATH_MAX];
+ char *dict_str = NULL;
+
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ number_files = pump_priv->number_files_pumped;
+ strncpy (filename, pump_priv->current_file, PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char);
+ if (!dict_str) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (pump_priv->pump_finished) {
+ snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ",
+ number_files);
+ } else {
+ snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ",
+ number_files, filename);
+ }
+
+ dict = dict_new ();
+
+ ret = dict_set_str (dict, PUMP_CMD_STATUS, dict_str);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_str returned negative value");
+ }
+
+ op_ret = 0;
+
+out:
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+
+ dict_unref (dict);
+ GF_FREE (dict_str);
+
+ return 0;
+}
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ pump_change_state (this, PUMP_STATE_PAUSE);
+
+ local->op_ret = 0;
+ pump_command_reply (frame, this);
+
+ return 0;
+}
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = 0;
+ loc_t loc;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!priv->root_inode) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "Pump xlator cannot be started without an initial lookup");
+ ret = -1;
+ goto out;
+ }
+
+ assert (priv->root_inode);
+
+ build_root_loc (priv->root_inode, &loc);
+
+ STACK_WIND (frame,
+ pump_cmd_start_getxattr_cbk,
+ PUMP_SOURCE_CHILD(this),
+ PUMP_SOURCE_CHILD(this)->fops->getxattr,
+ &loc,
+ PUMP_PATH);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+
+ return 0;
+}
+
+int32_t
+pump_cmd_abort_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Aborting pump failed. Please remove xattr"
+ PUMP_PATH "of the source child's '/'");
+ local->op_ret = -1;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "remove xattr succeeded");
+ local->op_ret = 0;
+ }
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ pump_command_reply (frame, this);
+ return 0;
+}
+
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ pump_private_t *pump_priv = NULL;
+ loc_t root_loc;
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+ pump_priv = priv->pump_private;
+
+ if (!priv->pump_loaded) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Trying to abort pump xlator which is not loaded");
+ ret = -1;
+ goto out;
+ }
+
+ assert (priv->root_inode);
+
+ build_root_loc (priv->root_inode, &root_loc);
+
+ STACK_WIND (frame,
+ pump_cmd_abort_removexattr_cbk,
+ PUMP_SOURCE_CHILD (this),
+ PUMP_SOURCE_CHILD (this)->fops->removexattr,
+ &root_loc,
+ PUMP_PATH);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, PUMP_CMD_STATUS, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump status command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - status");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, PUMP_CMD_PAUSE, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump pause command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - pause");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, PUMP_CMD_ABORT, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump abort command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - abort");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, PUMP_CMD_START, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump start command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - start");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+struct _xattr_key {
+ char *key;
+ struct list_head list;
+};
+
+static void
+__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ struct list_head * list = data;
+ struct _xattr_key * xkey = NULL;
+
+ if (!strncmp (key, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+
+ xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
+ if (!xkey)
+ return;
+
+ xkey->key = key;
+ INIT_LIST_HEAD (&xkey->list);
+
+ list_add_tail (&xkey->list, list);
+ }
+}
+
+static void
+__filter_xattrs (dict_t *dict)
+{
+ struct list_head keys;
+
+ struct _xattr_key *key;
+ struct _xattr_key *tmp;
+
+ INIT_LIST_HEAD (&keys);
+
+ dict_foreach (dict, __gather_xattr_keys,
+ (void *) &keys);
+
+ list_for_each_entry_safe (key, tmp, &keys, list) {
+ dict_del (dict, key->key);
+
+ list_del_init (&key->list);
+
+ GF_FREE (key);
+ }
+}
+
+int32_t
+pump_getxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+ int read_child = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ retry:
+ last_tried = local->cont.getxattr.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.getxattr.last_tried;
+
+ if (this_try == read_child) {
+ goto retry;
+ }
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
+ (void *) (long) read_child,
+ children[this_try],
+ children[this_try]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict);
+ }
+
+ return 0;
+}
+
+int32_t
+pump_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t * local = NULL;
+
+ int read_child = -1;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ frame->local = local;
+
+ if (name) {
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ if (!strcmp (name, PUMP_CMD_STATUS)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit pump command - status");
+ pump_execute_status (frame, this);
+ op_ret = 0;
+ goto out;
+ }
+ }
+
+ read_child = afr_read_child (this, loc->inode);
+
+ if (read_child >= 0) {
+ call_child = read_child;
+
+ local->cont.getxattr.last_tried = -1;
+ } else {
+ call_child = afr_first_up_child (priv);
+
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no child is up");
+ goto out;
+ }
+
+ local->cont.getxattr.last_tried = call_child;
+ }
+
+ loc_copy (&local->loc, loc);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->getxattr,
+ loc, name);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+static int
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (setxattr, main_frame,
+ local->op_ret, local->op_errno)
+ }
+ return 0;
+}
+
+static int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc,
+ local->cont.setxattr.dict,
+ local->cont.setxattr.flags);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int32_t
+pump_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int
+pump_command_reply (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0)
+ gf_log (this->name, GF_LOG_NORMAL,
+ "Command failed");
+ else
+ gf_log (this->name, GF_LOG_NORMAL,
+ "Command succeeded");
+
+ AFR_STACK_UNWIND (setxattr,
+ frame,
+ local->op_ret,
+ local->op_errno);
+
+ return 0;
+}
+
+int
+pump_parse_command (call_frame_t *frame, xlator_t *this,
+ afr_local_t *local, dict_t *dict)
+{
+
+ int ret = -1;
+
+ if (pump_command_start (this, dict)) {
+ frame->local = local;
+ ret = pump_execute_start (frame, this);
+
+ } else if (pump_command_pause (this, dict)) {
+ frame->local = local;
+ ret = pump_execute_pause (frame, this);
+
+ } else if (pump_command_abort (this, dict)) {
+ frame->local = local;
+ ret = pump_execute_abort (frame, this);
+ }
+ return ret;
+}
+
+int
+pump_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = pump_parse_command (frame, this,
+ local, dict);
+ if (ret >= 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+
+ local->transaction.fop = afr_setxattr_wind;
+ local->transaction.done = afr_setxattr_done;
+ local->transaction.unwind = afr_setxattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
+{
+ int ret = -1;
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ pump_change_state (this, PUMP_STATE_ABORT);
+ break;
+ }
+
+ ret = afr_notify (this, event, data);
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ int child_count = 0;
+ xlator_list_t * trav = NULL;
+ int i = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ int source_child = 0;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pump translator needs a source and sink"
+ "subvolumes defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling.");
+ }
+
+ ALLOC_OR_GOTO (this->private, afr_private_t, out);
+
+ priv = this->private;
+
+ priv->read_child = source_child;
+ priv->favorite_child = source_child;
+ priv->background_self_heal_count = 0;
+
+ priv->data_self_heal = 1;
+ priv->metadata_self_heal = 1;
+ priv->entry_self_heal = 1;
+
+ priv->data_self_heal_algorithm = "";
+
+ priv->data_self_heal_window_size = 16;
+
+ priv->data_change_log = 1;
+ priv->metadata_change_log = 1;
+ priv->entry_change_log = 1;
+
+ /* Locking options */
+
+ /* Lock server count infact does not matter. Locks are held
+ on all subvolumes, in this case being the source
+ and the sink.
+ */
+
+ priv->data_lock_server_count = 2;
+ priv->metadata_lock_server_count = 2;
+ priv->entry_lock_server_count = 2;
+
+ priv->strict_readdir = _gf_false;
+
+ trav = this->children;
+ while (trav) {
+ child_count++;
+ trav = trav->next;
+ }
+
+ priv->wait_count = 1;
+
+ if (child_count != 2) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "There should be exactly 2 children - one source "
+ "and one sink");
+ return -1;
+ }
+ priv->child_count = child_count;
+
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->read_child_lock);
+
+ priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->child_up) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
+ child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+
+ ret = asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed to set pending key");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = trav->next;
+ i++;
+ }
+
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
+
+ pump_priv = GF_CALLOC (1, sizeof (*pump_priv),
+ gf_afr_mt_pump_priv);
+ if (!pump_priv) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK_INIT (&pump_priv->resume_path_lock);
+ LOCK_INIT (&pump_priv->pump_state_lock);
+
+ pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
+ gf_afr_mt_char);
+ if (!pump_priv->resume_path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ pump_priv->env = syncenv_new (0);
+ if (!pump_priv->env) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not create new sync-environment");
+ ret = -1;
+ goto out;
+ }
+
+ priv->pump_private = pump_priv;
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+fini (xlator_t *this)
+{
+ return 0;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = afr_lookup,
+ .open = afr_open,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsync = afr_fsync,
+ .fsyncdir = afr_fsyncdir,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = pump_getxattr,
+ .readv = afr_readv,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = pump_setxattr,
+ .setattr = afr_setattr,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+
+ /* dir read */
+ .opendir = afr_opendir,
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = afr_priv_dump,
+};
+
+
+struct xlator_cbks cbks = {
+ .release = afr_release,
+ .releasedir = afr_releasedir,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
new file mode 100644
index 000000000..15799002b
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.h
@@ -0,0 +1,96 @@
+/*
+ Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __PUMP_H__
+#define __PUMP_H__
+
+#include "syncop.h"
+
+#define PUMP_PID 696969
+#define PUMP_LK_OWNER 696969
+
+#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
+#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
+#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
+
+#define PUMP_CMD_START "trusted.glusterfs.pump.start"
+#define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort"
+#define PUMP_CMD_PAUSE "trusted.glusterfs.pump.pause"
+#define PUMP_CMD_STATUS "trusted.glusterfs.pump.status"
+
+#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
+#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
+
+#define PUMP_PATH "trusted.glusterfs.pump-path"
+
+#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator)
+#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator)
+
+typedef enum {
+ PUMP_STATE_RUNNING,
+ PUMP_STATE_RESUME,
+ PUMP_STATE_PAUSE,
+ PUMP_STATE_ABORT,
+} pump_state_t;
+
+typedef struct _pump_private {
+ struct syncenv *env;
+ const char *resume_path;
+ gf_lock_t resume_path_lock;
+ gf_lock_t pump_state_lock;
+ pump_state_t pump_state;
+ long source_blocks;
+ long sink_blocks;
+ char current_file[PATH_MAX];
+ uint64_t number_files_pumped;
+ gf_boolean_t pump_finished;
+} pump_private_t;
+
+void
+build_root_loc (inode_t *inode, loc_t *loc);
+int pump_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+is_pump_loaded (xlator_t *this);
+
+#endif /* __PUMP_H__ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 9aa4801e9..a8b334c3d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -848,6 +848,44 @@ out:
}
int
+glusterd_handle_replace_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_replace_brick_req cli_req = {0,};
+ dict_t *dict = NULL;
+
+ GF_ASSERT (req);
+
+ if (!gf_xdr_to_cli_replace_brick_req (req->msg[0], &cli_req)) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_NORMAL, "Received replace brick req");
+
+ if (cli_req.bricks.bricks_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.bricks.bricks_val,
+ cli_req.bricks.bricks_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ ret = glusterd_replace_brick (req, dict);
+
+out:
+ return ret;
+}
+
+int
glusterd_handle_remove_brick (rpcsvc_request_t *req)
{
int32_t ret = -1;
@@ -1657,6 +1695,23 @@ glusterd_add_brick (rpcsvc_request_t *req, dict_t *dict)
}
int32_t
+glusterd_replace_brick (rpcsvc_request_t *req, dict_t *dict)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+
+ glusterd_op_set_op (GD_OP_REPLACE_BRICK);
+
+ glusterd_op_set_ctx (GD_OP_REPLACE_BRICK, dict);
+
+ ret = glusterd_op_txn_begin ();
+
+ return ret;
+}
+
+int32_t
glusterd_remove_brick (rpcsvc_request_t *req, dict_t *dict)
{
int32_t ret = -1;
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 31e2a9e50..8f6861417 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -84,6 +84,7 @@ glusterd_op_get_len (glusterd_op_t op)
case GD_OP_START_BRICK:
break;
+ case GD_OP_REPLACE_BRICK:
case GD_OP_ADD_BRICK:
{
dict_t *dict = glusterd_op_get_ctx (op);
@@ -210,6 +211,20 @@ glusterd_op_build_payload (glusterd_op_t op, gd1_mgmt_stage_op_req **req)
}
break;
+ case GD_OP_REPLACE_BRICK:
+ {
+ dict_t *dict = NULL;
+ dict = glusterd_op_get_ctx (op);
+ GF_ASSERT (dict);
+ ret = dict_allocate_and_serialize (dict,
+ &stage_req->buf.buf_val,
+ (size_t *)&stage_req->buf.buf_len);
+ if (ret) {
+ goto out;
+ }
+ }
+ break;
+
case GD_OP_REMOVE_BRICK:
{
dict_t *dict = NULL;
@@ -533,6 +548,55 @@ out:
}
static int
+glusterd_op_stage_replace_brick (gd1_mgmt_stage_op_req *req)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+
+ GF_ASSERT (req);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict");
+ goto out;
+ }
+ /* Need to do a little more error checking and validation */
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_DEBUG,
+ "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get dest brick");
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_DEBUG,
+ "dest brick=%s", dst_brick);
+
+ ret = 0;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
glusterd_op_stage_remove_brick (gd1_mgmt_stage_op_req *req)
{
int ret = 0;
@@ -828,6 +892,310 @@ out:
}
static int
+replace_brick_start_dst_brick (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *dst_brickinfo)
+{ glusterd_conf_t *priv = NULL;
+ char cmd_str[8192] = {0,};
+ char filename[PATH_MAX];
+ FILE *file = NULL;
+ int ret;
+
+ priv = THIS->private;
+
+ snprintf (filename, PATH_MAX, "%s/vols/%s/replace_dst_brick.vol",
+ priv->workdir, volinfo->volname);
+
+
+ file = fopen (filename, "a+");
+ if (!file) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Open of volfile failed");
+ return -1;
+ }
+
+ truncate (filename, 0);
+
+ fprintf (file, "volume src-posix\n");
+ fprintf (file, "type storage/posix\n");
+ fprintf (file, "option directory %s\n",
+ dst_brickinfo->path);
+ fprintf (file, "end-volume\n");
+ fprintf (file, "volume %s\n",
+ dst_brickinfo->path);
+ fprintf (file, "type features/locks\n");
+ fprintf (file, "subvolumes src-posix\n");
+ fprintf (file, "end-volume\n");
+ fprintf (file, "volume src-server\n");
+ fprintf (file, "type protocol/server\n");
+ fprintf (file, "option auth.addr.%s.allow *\n",
+ dst_brickinfo->path);
+ fprintf (file, "option listen-port 34034\n");
+ fprintf (file, "subvolumes %s\n",
+ dst_brickinfo->path);
+ fprintf (file, "end-volume\n");
+
+ gf_log ("", GF_LOG_DEBUG,
+ "starting dst brick");
+
+ snprintf (cmd_str, 4096, "glusterfs -f %s/vols/%s/replace_dst_brick.vol",
+ priv->workdir, volinfo->volname);
+
+ ret = system (cmd_str);
+
+
+ fclose (file);
+
+ return 0;
+}
+
+static int
+replace_brick_spawn_brick (glusterd_volinfo_t *volinfo, dict_t *dict,
+ glusterd_brickinfo_t *dst_brickinfo)
+
+{
+
+ replace_brick_start_dst_brick (volinfo, dst_brickinfo);
+
+ return 0;
+}
+
+static int
+replace_brick_generate_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ FILE *file = NULL;
+ char filename[PATH_MAX];
+ int ret;
+
+ priv = THIS->private;
+
+ gf_log ("", GF_LOG_DEBUG,
+ "Creating volfile");
+
+ snprintf (filename, PATH_MAX, "%s/vols/%s/replace_brick.vol",
+ priv->workdir, volinfo->volname);
+
+ file = fopen (filename, "a+");
+ if (!file) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Open of volfile failed");
+ return -1;
+ }
+
+ ret = truncate (filename, 0);
+ ret = unlink ("/tmp/replace_brick.vol");
+
+ fprintf (file, "volume client/protocol\n");
+ fprintf (file, "type protocol/client\n");
+ fprintf (file, "option remote-host %s\n",
+ src_brickinfo->hostname);
+ fprintf (file, "option remote-subvolume %s\n",
+ src_brickinfo->path);
+ fprintf (file, "option remote-port 34034\n");
+ fprintf (file, "echo end-volume\n");
+
+ ret = symlink(filename, "/tmp/replace_brick.vol");
+ if (!ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "symlink call failed");
+ return -1;
+ }
+
+ fclose (file);
+
+ return 0;
+}
+
+static int
+replace_brick_start_source_brick (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src_brickinfo,
+ glusterd_brickinfo_t *dst_brickinfo)
+{ glusterd_conf_t *priv = NULL;
+ char filename[PATH_MAX];
+ FILE *file = NULL;
+ char cmd_str[8192] = {0,};
+ int ret;
+
+ priv = THIS->private;
+
+ gf_log ("", GF_LOG_DEBUG,
+ "Creating volfile");
+
+ snprintf (filename, PATH_MAX, "%s/vols/%s/replace_source_brick.vol",
+ priv->workdir, volinfo->volname);
+
+ file = fopen (filename, "a+");
+ if (!file) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Open of volfile failed");
+ return -1;
+ }
+
+ ret = truncate (filename, 0);
+
+ fprintf (file, "volume src-posix\n");
+ fprintf (file, "type storage/posix\n");
+ fprintf (file, "option directory %s\n",
+ src_brickinfo->path);
+ fprintf (file, "end-volume\n");
+ fprintf (file, "volume locks\n");
+ fprintf (file, "type features/locks\n");
+ fprintf (file, "subvolumes src-posix\n");
+ fprintf (file, "end-volume\n");
+ fprintf (file, "volume remote-client\n");
+ fprintf (file, "type protocol/client\n");
+ fprintf (file, "option remote-host %s\n",
+ dst_brickinfo->hostname);
+ fprintf (file, "option remote-port 34034\n");
+ fprintf (file, "option remote-subvolume %s\n",
+ dst_brickinfo->path);
+ fprintf (file, "end-volume\n");
+ fprintf (file, "volume %s\n",
+ src_brickinfo->path);
+ fprintf (file, "type cluster/pump\n");
+ fprintf (file, "subvolumes locks remote-client\n");
+ fprintf (file, "end-volume\n");
+ fprintf (file, "volume src-server\n");
+ fprintf (file, "type protocol/server\n");
+ fprintf (file, "option auth.addr.%s.allow *\n",
+ src_brickinfo->path);
+ fprintf (file, "option listen-port 34034\n");
+ fprintf (file, "subvolumes %s\n",
+ src_brickinfo->path);
+ fprintf (file, "end-volume\n");
+
+
+ gf_log ("", GF_LOG_DEBUG,
+ "starting source brick");
+
+ snprintf (cmd_str, 4096, "glusterfs -f %s/vols/%s/replace_source_brick.vol -l /tmp/b_log -LTRACE",
+ priv->workdir, volinfo->volname);
+
+ ret = system (cmd_str);
+
+ fclose (file);
+
+ return 0;
+}
+
+static int
+replace_brick_mount (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src_brickinfo,
+ glusterd_brickinfo_t *dst_brickinfo, gf1_cli_replace_op op)
+{
+
+ gf_log ("", GF_LOG_DEBUG,
+ "starting source brick");
+
+ replace_brick_start_source_brick (volinfo, src_brickinfo,
+ dst_brickinfo);
+
+ return 0;
+}
+
+static int
+glusterd_op_replace_brick (gd1_mgmt_stage_op_req *req)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *str = NULL;
+
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict");
+ goto out;
+ }
+
+ /* Need to do a little more error checking and validation */
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get dest brick");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dst brick=%s", dst_brick);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ str = strdup (src_brick);
+
+ ret = glusterd_brickinfo_get (str, volinfo,
+ &src_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo");
+ goto out;
+ }
+
+ replace_brick_generate_volfile (volinfo, src_brickinfo);
+
+ if (!glusterd_is_local_addr (src_brickinfo->hostname)) {
+ gf_log ("", GF_LOG_NORMAL,
+ "I AM THE SOURCE HOST");
+ replace_brick_mount (volinfo, src_brickinfo, dst_brickinfo,
+ req->op);
+ } else if (!glusterd_is_local_addr (dst_brickinfo->hostname)) {
+ gf_log ("", GF_LOG_NORMAL,
+ "I AM THE DESTINATION HOST");
+ replace_brick_spawn_brick (volinfo, dict, dst_brickinfo);
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
glusterd_op_remove_brick (gd1_mgmt_stage_op_req *req)
{
int ret = 0;
@@ -1609,6 +1977,11 @@ glusterd_op_stage_validate (gd1_mgmt_stage_op_req *req)
ret = glusterd_op_stage_add_brick (req);
break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_stage_replace_brick (req);
+ break;
+
+
case GD_OP_REMOVE_BRICK:
ret = glusterd_op_stage_remove_brick (req);
break;
@@ -1652,6 +2025,10 @@ glusterd_op_commit_perform (gd1_mgmt_stage_op_req *req)
ret = glusterd_op_add_brick (req);
break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_replace_brick (req);
+ break;
+
case GD_OP_REMOVE_BRICK:
ret = glusterd_op_remove_brick (req);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 6b32bd3a0..5ef0ce1e5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -73,7 +73,7 @@ glusterd_unset_lock_owner (uuid_t owner)
return 0;
}
-static int32_t
+int32_t
glusterd_is_local_addr (char *hostname)
{
int32_t ret = -1;
@@ -598,11 +598,14 @@ glusterd_brickinfo_from_brick (char *brick,
glusterd_brickinfo_t *new_brickinfo = NULL;
char *hostname = NULL;
char *path = NULL;
+ char *tmp = NULL;
GF_ASSERT (brick);
GF_ASSERT (brickinfo);
- hostname = strtok (brick, ":");
+ tmp = strdup (brick);
+
+ hostname = strtok (tmp, ":");
path = strtok (NULL, ":");
GF_ASSERT (hostname);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index fc8b33ab1..dd97e67a6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -115,4 +115,6 @@ glusterd_is_cli_op_req (int32_t op);
int32_t
glusterd_brickinfo_get (char *brick, glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t **brickinfo);
+int32_t
+glusterd_is_local_addr (char *hostname);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 9f0986bb6..368c0a684 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -249,6 +249,13 @@ glusterd_add_brick (rpcsvc_request_t *req, dict_t *dict);
int
glusterd_handle_add_brick (rpcsvc_request_t *req);
+
+int32_t
+glusterd_replace_brick (rpcsvc_request_t *req, dict_t *dict);
+
+int
+glusterd_handle_replace_brick (rpcsvc_request_t *req);
+
int
glusterd_handle_remove_brick (rpcsvc_request_t *req);
diff --git a/xlators/mgmt/glusterd/src/glusterd3_1-mops.c b/xlators/mgmt/glusterd/src/glusterd3_1-mops.c
index 7169121d5..86d09194e 100644
--- a/xlators/mgmt/glusterd/src/glusterd3_1-mops.c
+++ b/xlators/mgmt/glusterd/src/glusterd3_1-mops.c
@@ -1164,6 +1164,10 @@ glusterd_handle_rpc_msg (rpcsvc_request_t *req)
ret = glusterd_handle_add_brick (req);
break;
+ case GD_MGMT_CLI_REPLACE_BRICK:
+ ret = glusterd_handle_replace_brick (req);
+ break;
+
case GD_MGMT_CLI_REMOVE_BRICK:
ret = glusterd_handle_remove_brick (req);
break;
@@ -1203,6 +1207,7 @@ rpcsvc_actor_t glusterd1_mgmt_actors[] = {
[GD_MGMT_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GD_MGMT_CLI_DELETE_VOLUME, glusterd_handle_rpc_msg, NULL, NULL},
[GD_MGMT_CLI_GET_VOLUME] = { "GET_VOLUME", GD_MGMT_CLI_GET_VOLUME, glusterd_handle_rpc_msg, NULL, NULL},
[GD_MGMT_CLI_ADD_BRICK] = { "ADD_BRICK", GD_MGMT_CLI_ADD_BRICK, glusterd_handle_rpc_msg, NULL, NULL},
+ [GD_MGMT_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GD_MGMT_CLI_REPLACE_BRICK, glusterd_handle_rpc_msg, NULL, NULL},
[GD_MGMT_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GD_MGMT_CLI_REMOVE_BRICK, glusterd_handle_rpc_msg, NULL, NULL},
};