summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/Makefile.am3
-rw-r--r--xlators/bindings/Makefile.am1
-rw-r--r--xlators/bindings/python/Makefile.am1
-rw-r--r--xlators/bindings/python/src/Makefile.am19
-rw-r--r--xlators/bindings/python/src/gluster.py47
-rw-r--r--xlators/bindings/python/src/glusterstack.py55
-rw-r--r--xlators/bindings/python/src/glustertypes.py167
-rw-r--r--xlators/bindings/python/src/python.c235
-rw-r--r--xlators/bindings/python/src/testxlator.py56
-rw-r--r--xlators/cluster/Makefile.am3
-rw-r--r--xlators/cluster/afr/Makefile.am3
-rw-r--r--xlators/cluster/afr/src/Makefile.am20
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c345
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h47
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c1786
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h59
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c721
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h47
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c2024
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h63
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c1073
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h66
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1030
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c2038
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c791
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h52
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c957
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h36
-rw-r--r--xlators/cluster/afr/src/afr.c2338
-rw-r--r--xlators/cluster/afr/src/afr.h523
-rw-r--r--xlators/cluster/dht/Makefile.am1
-rw-r--r--xlators/cluster/dht/src/Makefile.am30
-rw-r--r--xlators/cluster/dht/src/dht-common.c3470
-rw-r--r--xlators/cluster/dht/src/dht-common.h212
-rw-r--r--xlators/cluster/dht/src/dht-hashfn-tea.c146
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c88
-rw-r--r--xlators/cluster/dht/src/dht-helper.c326
-rw-r--r--xlators/cluster/dht/src/dht-layout.c543
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c224
-rw-r--r--xlators/cluster/dht/src/dht-rename.c562
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c460
-rw-r--r--xlators/cluster/dht/src/dht.c222
-rw-r--r--xlators/cluster/dht/src/nufa.c684
-rw-r--r--xlators/cluster/ha/Makefile.am3
-rw-r--r--xlators/cluster/ha/src/Makefile.am15
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c191
-rw-r--r--xlators/cluster/ha/src/ha.c3479
-rw-r--r--xlators/cluster/ha/src/ha.h59
-rw-r--r--xlators/cluster/map/Makefile.am3
-rw-r--r--xlators/cluster/map/src/Makefile.am15
-rw-r--r--xlators/cluster/map/src/map-helper.c357
-rw-r--r--xlators/cluster/map/src/map.c2193
-rw-r--r--xlators/cluster/map/src/map.h76
-rw-r--r--xlators/cluster/stripe/Makefile.am3
-rw-r--r--xlators/cluster/stripe/src/Makefile.am14
-rw-r--r--xlators/cluster/stripe/src/stripe.c3286
-rw-r--r--xlators/cluster/unify/Makefile.am3
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1225
-rw-r--r--xlators/cluster/unify/src/unify.c4451
-rw-r--r--xlators/cluster/unify/src/unify.h132
-rw-r--r--xlators/debug/Makefile.am3
-rw-r--r--xlators/debug/error-gen/Makefile.am3
-rw-r--r--xlators/debug/error-gen/src/Makefile.am14
-rw-r--r--xlators/debug/error-gen/src/error-gen.c1780
-rw-r--r--xlators/debug/trace/Makefile.am3
-rw-r--r--xlators/debug/trace/src/Makefile.am14
-rw-r--r--xlators/debug/trace/src/trace.c2321
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/rot-13/Makefile.am3
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am14
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c200
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h33
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/filter/Makefile.am3
-rw-r--r--xlators/features/filter/src/Makefile.am13
-rw-r--r--xlators/features/filter/src/filter.c1768
-rw-r--r--xlators/features/locks/Makefile.am3
-rw-r--r--xlators/features/locks/src/Makefile.am20
-rw-r--r--xlators/features/locks/src/common.c561
-rw-r--r--xlators/features/locks/src/common.h59
-rw-r--r--xlators/features/locks/src/internal.c762
-rw-r--r--xlators/features/locks/src/locks.h111
-rw-r--r--xlators/features/locks/src/posix.c834
-rw-r--r--xlators/features/locks/tests/unit-test.c75
-rw-r--r--xlators/features/path-convertor/Makefile.am3
-rw-r--r--xlators/features/path-convertor/src/Makefile.am14
-rw-r--r--xlators/features/path-convertor/src/path.c1217
-rw-r--r--xlators/features/quota/Makefile.am3
-rw-r--r--xlators/features/quota/src/Makefile.am13
-rw-r--r--xlators/features/quota/src/quota.c1056
-rw-r--r--xlators/features/trash/Makefile.am3
-rw-r--r--xlators/features/trash/src/Makefile.am13
-rw-r--r--xlators/features/trash/src/trash.c596
-rw-r--r--xlators/meta/Makefile.am1
-rw-r--r--xlators/meta/src/Makefile.am10
-rw-r--r--xlators/meta/src/meta.c1285
-rw-r--r--xlators/meta/src/meta.h48
-rw-r--r--xlators/meta/src/misc.c67
-rw-r--r--xlators/meta/src/misc.h31
-rw-r--r--xlators/meta/src/tree.c176
-rw-r--r--xlators/meta/src/tree.h35
-rw-r--r--xlators/meta/src/view.c258
-rw-r--r--xlators/meta/src/view.h32
-rw-r--r--xlators/mount/Makefile.am3
-rw-r--r--xlators/mount/fuse/Makefile.am3
-rw-r--r--xlators/mount/fuse/src/Makefile.am14
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c2859
-rw-r--r--xlators/mount/fuse/src/fuse-extra.c137
-rw-r--r--xlators/mount/fuse/src/fuse-extra.h42
-rw-r--r--xlators/mount/fuse/utils/Makefile.am10
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in152
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in181
-rw-r--r--xlators/performance/Makefile.am3
-rw-r--r--xlators/performance/io-cache/Makefile.am3
-rw-r--r--xlators/performance/io-cache/src/Makefile.am14
-rw-r--r--xlators/performance/io-cache/src/io-cache.c1478
-rw-r--r--xlators/performance/io-cache/src/io-cache.h330
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c201
-rw-r--r--xlators/performance/io-cache/src/page.c778
-rw-r--r--xlators/performance/io-threads/Makefile.am3
-rw-r--r--xlators/performance/io-threads/src/Makefile.am14
-rw-r--r--xlators/performance/io-threads/src/io-threads.c1254
-rw-r--r--xlators/performance/io-threads/src/io-threads.h99
-rw-r--r--xlators/performance/read-ahead/Makefile.am3
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am14
-rw-r--r--xlators/performance/read-ahead/src/page.c487
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c890
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h194
-rw-r--r--xlators/performance/stat-prefetch/Makefile.am1
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am11
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c508
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h32
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am12
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c399
-rw-r--r--xlators/performance/write-behind/Makefile.am3
-rw-r--r--xlators/performance/write-behind/src/Makefile.am12
-rw-r--r--xlators/performance/write-behind/src/write-behind.c1444
-rw-r--r--xlators/protocol/Makefile.am3
-rw-r--r--xlators/protocol/client/Makefile.am3
-rw-r--r--xlators/protocol/client/src/Makefile.am16
-rw-r--r--xlators/protocol/client/src/client-protocol.c6671
-rw-r--r--xlators/protocol/client/src/client-protocol.h173
-rw-r--r--xlators/protocol/client/src/saved-frames.c178
-rw-r--r--xlators/protocol/client/src/saved-frames.h74
-rw-r--r--xlators/protocol/server/Makefile.am3
-rw-r--r--xlators/protocol/server/src/Makefile.am18
-rw-r--r--xlators/protocol/server/src/server-dentry.c413
-rw-r--r--xlators/protocol/server/src/server-helpers.c586
-rw-r--r--xlators/protocol/server/src/server-helpers.h77
-rw-r--r--xlators/protocol/server/src/server-protocol.c7984
-rw-r--r--xlators/protocol/server/src/server-protocol.h143
-rw-r--r--xlators/storage/Makefile.am3
-rw-r--r--xlators/storage/bdb/Makefile.am3
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c394
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1455
-rw-r--r--xlators/storage/bdb/src/bdb.c3371
-rw-r--r--xlators/storage/bdb/src/bdb.h439
-rw-r--r--xlators/storage/posix/Makefile.am3
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix.c3715
-rw-r--r--xlators/storage/posix/src/posix.h110
-rw-r--r--xlators/storage/posix/src/xattr-cache.c521
-rw-r--r--xlators/storage/posix/src/xattr-cache.h65
166 files changed, 88606 insertions, 0 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
new file mode 100644
index 00000000000..2abb5219488
--- /dev/null
+++ b/xlators/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = cluster storage protocol performance debug features encryption mount
+
+CLEANFILES =
diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am
new file mode 100644
index 00000000000..f7766580257
--- /dev/null
+++ b/xlators/bindings/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = $(BINDINGS_SUBDIRS)
diff --git a/xlators/bindings/python/Makefile.am b/xlators/bindings/python/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/bindings/python/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
new file mode 100644
index 00000000000..c0b9141c667
--- /dev/null
+++ b/xlators/bindings/python/src/Makefile.am
@@ -0,0 +1,19 @@
+
+xlator_PROGRAMS = python.so
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings
+
+python_PYTHON = gluster.py glustertypes.py glusterstack.py
+
+pythondir = $(xlatordir)/python
+
+python_so_SOURCES = python.c
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
+ $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
+
+AM_LDFLAGS = $(PYTHON_LDFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
new file mode 100644
index 00000000000..ee0eb131011
--- /dev/null
+++ b/xlators/bindings/python/src/gluster.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
+# This file is part of GlusterFS.
+#
+# GlusterFS is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# GlusterFS is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see
+# <http://www.gnu.org/licenses/>.
+from ctypes import *
+from glustertypes import *
+from glusterstack import *
+import sys
+import inspect
+
+libglusterfs = CDLL("libglusterfs.so")
+_gf_log = libglusterfs._gf_log
+_gf_log.restype = c_int32
+_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p]
+
+gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel")
+
+GF_LOG_NONE = 0
+GF_LOG_CRITICAL = 1
+GF_LOG_ERROR = 2
+GF_LOG_WARNING = 3
+GF_LOG_DEBUG = 4
+
+def gf_log(module, level, fmt, *params):
+ if level <= gf_log_loglevel:
+ frame = sys._getframe(1)
+ _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name,
+ frame.f_lineno, level, fmt, *params)
+
+class ComplexTranslator(object):
+ def __init__(self, xlator):
+ self.xlator = xlator_t.from_address(xlator)
+
+ def __getattr__(self, item):
+ return getattr(self.xlator, item)
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
new file mode 100644
index 00000000000..ba24c81652e
--- /dev/null
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
+# This file is part of GlusterFS.
+#
+# GlusterFS is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# GlusterFS is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see
+# <http://www.gnu.org/licenses/>.
+from ctypes import *
+from glustertypes import *
+
+libc = CDLL("libc.so.6")
+calloc = libc.calloc
+calloc.argtypes = [c_int, c_int]
+calloc.restype = c_void_p
+
+# TODO: Can these be done in C somehow?
+def stack_wind(frame, rfn, obj, fn, *params):
+ """Frame is a frame object"""
+ _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t))
+ _new[0].root = frame.root
+ _new[0].next = frame.root[0].frames.next
+ _new[0].prev = pointer(frame.root[0].frames)
+ if frame.root[0].frames.next:
+ frame.root[0].frames.next[0].prev = _new
+ frame.root[0].frames.next = _new
+ _new[0].this = obj
+ # TODO: Type checking like tmp_cbk?
+ _new[0].ret = rfn
+ _new[0].parent = pointer(frame)
+ _new[0].cookie = cast(_new, c_void_p)
+ # TODO: Initialize lock
+ #_new.lock.init()
+ frame.ref_count += 1
+ fn(_new, obj, *params)
+
+def stack_unwind(frame, *params):
+ """Frame is a frame object"""
+ fn = frame[0].ret
+ parent = frame[0].parent[0]
+ parent.ref_count -= 1
+
+ op_ret = params[0]
+ op_err = params[1]
+ params = params[2:]
+ fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this,
+ op_ret, op_err, *params)
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
new file mode 100644
index 00000000000..e9069d07c72
--- /dev/null
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
+# This file is part of GlusterFS.
+#
+# GlusterFS is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# GlusterFS is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see
+# <http://www.gnu.org/licenses/>.
+from ctypes import *
+import collections
+
+#
+# Forward declaration of some gluster types
+#
+class call_frame_t(Structure):
+ pass
+
+class call_ctx_t(Structure):
+ pass
+
+class call_pool_t(Structure):
+ pass
+
+class xlator_t(Structure):
+ def _getFirstChild(self):
+ return self.children[0].xlator
+ firstChild = property(_getFirstChild)
+
+class xlator_list_t(Structure):
+ pass
+
+class xlator_fops(Structure):
+ pass
+
+class xlator_mops(Structure):
+ pass
+
+class glusterfs_ctx_t(Structure):
+ pass
+
+class list_head(Structure):
+ pass
+
+class dict_t(Structure):
+ pass
+
+class inode_table_t(Structure):
+ pass
+
+class fd_t(Structure):
+ pass
+
+class iovec(Structure):
+ _fields_ = [
+ ("iov_base", c_void_p),
+ ("iov_len", c_size_t),
+ ]
+
+ def __init__(self, s):
+ self.iov_base = cast(c_char_p(s), c_void_p)
+ self.iov_len = len(s)
+
+ def getBytes(self):
+ return string_at(self.iov_base, self.iov_len)
+
+# This is a pthread_spinlock_t
+# TODO: what happens to volatile-ness?
+gf_lock_t = c_int
+
+uid_t = c_uint32
+gid_t = c_uint32
+pid_t = c_int32
+
+off_t = c_int64
+
+#
+# Function pointer types
+#
+ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t),
+ POINTER(xlator_t), c_int32, c_int32)
+
+fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t))
+init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t))
+event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p)
+
+list_head._fields_ = [
+ ("next", POINTER(list_head)),
+ ("prev", POINTER(list_head)),
+ ]
+
+call_frame_t._fields_ = [
+ ("root", POINTER(call_ctx_t)),
+ ("parent", POINTER(call_frame_t)),
+ ("next", POINTER(call_frame_t)),
+ ("prev", POINTER(call_frame_t)),
+ ("local", c_void_p),
+ ("this", POINTER(xlator_t)),
+ ("ret", ret_fn_t),
+ ("ref_count", c_int32),
+ ("lock", gf_lock_t),
+ ("cookie", c_void_p),
+ ("op", c_int32),
+ ("type", c_int8),
+ ]
+
+call_ctx_t._fields_ = [
+ ("all_frames", list_head),
+ ("trans", c_void_p),
+ ("pool", call_pool_t),
+ ("unique", c_uint64),
+ ("state", c_void_p),
+ ("uid", uid_t),
+ ("gid", gid_t),
+ ("pid", pid_t),
+ ("frames", call_frame_t),
+ ("req_refs", POINTER(dict_t)),
+ ("rsp_refs", POINTER(dict_t)),
+ ]
+
+xlator_t._fields_ = [
+ ("name", c_char_p),
+ ("type", c_char_p),
+ ("next", POINTER(xlator_t)),
+ ("prev", POINTER(xlator_t)),
+ ("parent", POINTER(xlator_t)),
+ ("children", POINTER(xlator_list_t)),
+ ("fops", POINTER(xlator_fops)),
+ ("mops", POINTER(xlator_mops)),
+ ("fini", fini_fn_t),
+ ("init", init_fn_t),
+ ("notify", event_notify_fn_t),
+ ("options", POINTER(dict_t)),
+ ("ctx", POINTER(glusterfs_ctx_t)),
+ ("itable", POINTER(inode_table_t)),
+ ("ready", c_char),
+ ("private", c_void_p),
+ ]
+
+xlator_list_t._fields_ = [
+ ("xlator", POINTER(xlator_t)),
+ ("next", POINTER(xlator_list_t)),
+ ]
+
+fop_functions = collections.defaultdict(lambda: c_void_p)
+fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod',
+ 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access',
+ 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink',
+ 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush',
+ 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir',
+ 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir',
+ # TODO: Call backs?
+ ]
+
+fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(iovec), c_int32,
+ off_t)
+
+fop_functions['writev'] = fop_writev_t
+xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names]
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
new file mode 100644
index 00000000000..739ef732900
--- /dev/null
+++ b/xlators/bindings/python/src/python.c
@@ -0,0 +1,235 @@
+/*
+ Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <Python.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+typedef struct
+{
+ char *scriptname;
+ PyObject *pXlator;
+ PyObject *pScriptModule;
+ PyObject *pGlusterModule;
+ PyThreadState *pInterp;
+
+ PyObject *pFrameType, *pVectorType, *pFdType;
+} python_private_t;
+
+int32_t
+python_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ python_private_t *priv = (python_private_t *)this->private;
+ gf_log("python", GF_LOG_DEBUG, "In writev");
+ if (PyObject_HasAttrString(priv->pXlator, "writev"))
+ {
+
+ PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev",
+ "O O O i l",
+ PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame),
+ PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd),
+ PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector),
+ count,
+ offset);
+ if (PyErr_Occurred())
+ {
+ PyErr_Print();
+ }
+ Py_XDECREF(retval);
+ }
+ else
+ {
+ return default_writev(frame, this, fd, vector, count, offset);
+ }
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .writev = python_writev
+};
+
+struct xlator_mops mops = {
+};
+
+static PyObject *
+AnonModule_FromFile (const char* fname)
+{
+ // Get the builtins
+ PyThreadState* pThread = PyThreadState_Get();
+ PyObject *pBuiltins = pThread->interp->builtins;
+
+ if (PyErr_Occurred())
+ {
+ PyErr_Print();
+ return NULL;
+ }
+
+ // Create a new dictionary for running code in
+ PyObject *pModuleDict = PyDict_New();
+ PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins);
+ Py_INCREF(pBuiltins);
+
+ // Run the file in the new context
+ FILE* fp = fopen(fname, "r");
+ PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict);
+ fclose(fp);
+ if (PyErr_Occurred())
+ {
+ PyErr_Print();
+ Py_DECREF(pModuleDict);
+ Py_DECREF(pBuiltins);
+ return NULL;
+ }
+
+ // Create an object to hold the new context
+ PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict);
+ if (PyErr_Occurred())
+ {
+ PyErr_Print();
+ Py_DECREF(pModuleDict);
+ Py_DECREF(pBuiltins);
+ return NULL;
+ }
+ PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict);
+ if (PyErr_Occurred())
+ {
+ PyErr_Print();
+ Py_DECREF(pModuleDict);
+ Py_DECREF(pBuiltins);
+ Py_XDECREF(pModule);
+ return NULL;
+ }
+
+ // Set the new context's dictionary to the one we used to run the code
+ // inside
+ PyObject_SetAttrString(pModule, "__dict__", pModuleDict);
+ if (PyErr_Occurred())
+ {
+ PyErr_Print();
+ Py_DECREF(pModuleDict);
+ Py_DECREF(pBuiltins);
+ Py_DECREF(pModule);
+ return NULL;
+ }
+
+ return pModule;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ // This is ok to call more than once per process
+ Py_InitializeEx(0);
+
+ if (!this->children) {
+ gf_log ("python", GF_LOG_ERROR,
+ "FATAL: python should have exactly one child");
+ return -1;
+ }
+
+ python_private_t *priv = CALLOC (sizeof (python_private_t), 1);
+ ERR_ABORT (priv);
+
+ data_t *scriptname = dict_get (this->options, "scriptname");
+ if (scriptname) {
+ priv->scriptname = data_to_str(scriptname);
+ } else {
+ gf_log("python", GF_LOG_ERROR,
+ "FATAL: python requires the scriptname parameter");
+ return -1;
+ }
+
+ priv->pInterp = Py_NewInterpreter();
+
+ // Adjust python's path
+ PyObject *syspath = PySys_GetObject("path");
+ PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
+ PyList_Append(syspath, path);
+ Py_DECREF(path);
+
+ gf_log("python", GF_LOG_DEBUG,
+ "Loading gluster module");
+
+ priv->pGlusterModule = PyImport_ImportModule("gluster");
+ if (PyErr_Occurred())
+ {
+ PyErr_Print();
+ return -1;
+ }
+
+ priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t");
+ priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t");
+ priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
+
+ gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
+
+ priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
+ if (!priv->pScriptModule || PyErr_Occurred())
+ {
+ gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname);
+ PyErr_Print();
+ return -1;
+ }
+
+ if (!PyObject_HasAttrString(priv->pScriptModule, "xlator"))
+ {
+ gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname);
+ return -1;
+ }
+ gf_log("python", GF_LOG_DEBUG, "Instantiating translator");
+ priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&",
+ PyLong_FromVoidPtr, this);
+ if (PyErr_Occurred() || !priv->pXlator)
+ {
+ PyErr_Print();
+ return -1;
+ }
+
+ this->private = priv;
+
+ gf_log ("python", GF_LOG_DEBUG, "python xlator loaded");
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ python_private_t *priv = (python_private_t*)(this->private);
+ Py_DECREF(priv->pXlator);
+ Py_DECREF(priv->pScriptModule);
+ Py_DECREF(priv->pGlusterModule);
+ Py_DECREF(priv->pFrameType);
+ Py_DECREF(priv->pFdType);
+ Py_DECREF(priv->pVectorType);
+ Py_EndInterpreter(priv->pInterp);
+ return;
+}
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
new file mode 100644
index 00000000000..507455c856a
--- /dev/null
+++ b/xlators/bindings/python/src/testxlator.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
+# This file is part of GlusterFS.
+#
+# GlusterFS is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# GlusterFS is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see
+# <http://www.gnu.org/licenses/>.
+
+"""
+This is a test translator written in python.
+
+Important things to note:
+ This file must be import-able from glusterfsd. This probably means
+ setting PYTHONPATH to where this file is located.
+
+ This file must have a top-level xlator class object that will be
+ used to instantiate individual translators.
+"""
+from gluster import *
+
+class MyXlator(ComplexTranslator):
+ name = "MyXlator"
+ def writev_cbk(self, frame, cookie, op_ret, op_errno, buf):
+ stack_unwind(frame, op_ret, op_errno, buf)
+ return 0
+
+ def writev(self, frame, fd, vector, count, offset):
+ gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
+ # TODO: Use cookie to pass this to writev_cbk
+ old_count = vector.iov_len
+
+ data = vector.getBytes().encode("zlib")
+
+ vector = iovec(data)
+ gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
+
+ @ret_fn_t
+ def rfn(frame, prev, this, op_ret, op_errno, *params):
+ if len(params) == 0:
+ params = [0]
+ return self.writev_cbk(frame, prev, old_count, op_errno, *params)
+
+ stack_wind(frame, rfn, self.firstChild,
+ self.firstChild[0].fops[0].writev, fd, vector, count, offset)
+ return 0
+
+xlator = MyXlator
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
new file mode 100644
index 00000000000..a6ddb3564a9
--- /dev/null
+++ b/xlators/cluster/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = unify stripe afr dht ha map
+
+CLEANFILES =
diff --git a/xlators/cluster/afr/Makefile.am b/xlators/cluster/afr/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/cluster/afr/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
new file mode 100644
index 00000000000..1bde9e5bad7
--- /dev/null
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -0,0 +1,20 @@
+xlator_LTLIBRARIES = afr.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+afr_la_LDFLAGS = -module -avoidversion
+
+afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c
+afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/replicate.so
+
+install-data-hook:
+ ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so \ No newline at end of file
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
new file mode 100644
index 00000000000..0c65ca8528d
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -0,0 +1,345 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+#include "afr.h"
+
+
+int32_t
+afr_opendir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd)
+{
+ afr_local_t * local = NULL;
+
+ int call_count = -1;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, local->fd);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int child_count = 0;
+ int i = 0;
+
+ int ret = -1;
+ int call_count = -1;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ child_count = priv->child_count;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+ local->fd = fd_ref (fd);
+
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_opendir_cbk,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ loc, fd);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, fd);
+ }
+
+ return 0;
+}
+
+
+/**
+ * Common algorithm for directory read calls:
+ *
+ * - Try the fop on the first child that is up
+ * - if we have failed due to ENOTCONN:
+ * try the next child
+ *
+ * Applicable to: readdir
+ */
+
+int32_t
+afr_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.readdir.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+
+ this_try = ++local->cont.readdir.last_tried;
+ unwind = 0;
+
+ STACK_WIND (frame, afr_readdir_cbk,
+ children[this_try],
+ children[this_try]->fops->readdir,
+ local->fd, local->cont.readdir.size,
+ local->cont.readdir.offset);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.readdir.last_tried = call_child;
+
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+
+ STACK_WIND (frame, afr_readdir_cbk,
+ children[call_child], children[call_child]->fops->readdir,
+ fd, size, offset);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_getdents_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dir_entry_t *entry, int32_t count)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.getdents.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+
+ this_try = ++local->cont.getdents.last_tried;
+ unwind = 0;
+
+ STACK_WIND (frame, afr_getdents_cbk,
+ children[this_try],
+ children[this_try]->fops->getdents,
+ local->fd, local->cont.getdents.size,
+ local->cont.getdents.offset, local->cont.getdents.flag);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, entry, count);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_getdents (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, int32_t flag)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.getdents.last_tried = call_child;
+
+ local->fd = fd_ref (fd);
+
+ local->cont.getdents.size = size;
+ local->cont.getdents.offset = offset;
+ local->cont.getdents.flag = flag;
+
+ frame->local = local;
+
+ STACK_WIND (frame, afr_getdents_cbk,
+ children[call_child], children[call_child]->fops->getdents,
+ fd, size, offset, flag);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
new file mode 100644
index 00000000000..172ec3c90c4
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __DIR_READ_H__
+#define __DIR_READ_H__
+
+
+int32_t
+afr_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd);
+
+int32_t
+afr_closedir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd);
+
+int32_t
+afr_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset);
+
+
+int32_t
+afr_getdents (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, int32_t flag);
+
+
+int32_t
+afr_checksum (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags);
+
+
+#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
new file mode 100644
index 00000000000..87a6e09b5be
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -0,0 +1,1786 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+
+void
+afr_build_parent_loc (loc_t *parent, loc_t *child)
+{
+ char *tmp = NULL;
+
+ if (!child->parent) {
+ loc_copy (parent, child);
+ return;
+ }
+
+ tmp = strdup (child->path);
+ parent->path = strdup (dirname (tmp));
+ FREE (tmp);
+
+ parent->name = strrchr (parent->path, '/');
+ if (parent->name)
+ parent->name++;
+
+ parent->inode = inode_ref (child->parent);
+ parent->parent = inode_parent (parent->inode, 0, NULL);
+ parent->ino = parent->inode->ino;
+}
+
+
+/* {{{ create */
+
+int
+afr_create_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame)
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd,
+ local->cont.create.inode,
+ &local->cont.create.buf);
+ return 0;
+}
+
+
+int
+afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0)
+ || (child_index == priv->read_child)) {
+ local->cont.create.buf = *buf;
+ local->cont.create.buf.st_ino =
+ afr_itransform (buf->st_ino,
+ priv->child_count,
+ child_index);
+ }
+ local->cont.create.inode = inode;
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_create_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->create,
+ &local->loc,
+ local->cont.create.flags,
+ local->cont.create.mode,
+ local->cont.create.fd);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_create_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->cont.create.flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref (fd);
+
+ local->transaction.fop = afr_create_wind;
+ local->transaction.done = afr_create_done;
+ local->transaction.unwind = afr_create_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ mknod */
+
+int
+afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame)
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ local->cont.mknod.inode,
+ &local->cont.mknod.buf);
+ return 0;
+}
+
+
+int
+afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0)
+ || (child_index == priv->read_child)) {
+ local->cont.mknod.buf = *buf;
+ local->cont.mknod.buf.st_ino =
+ afr_itransform (buf->st_ino,
+ priv->child_count,
+ child_index);
+ }
+ local->cont.mknod.inode = inode;
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_mknod_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_mknod_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t dev)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+
+ local->transaction.fop = afr_mknod_wind;
+ local->transaction.done = afr_mknod_done;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ mkdir */
+
+
+int
+afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame)
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ local->cont.mkdir.inode,
+ &local->cont.mkdir.buf);
+ return 0;
+}
+
+
+int
+afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0)
+ || (child_index == priv->read_child)) {
+ local->cont.mkdir.buf = *buf;
+ local->cont.mkdir.buf.st_ino =
+ afr_itransform (buf->st_ino, priv->child_count,
+ child_index);
+ }
+ local->cont.mkdir.inode = inode;
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mkdir,
+ &local->loc, local->cont.mkdir.mode);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_mkdir_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->cont.mkdir.mode = mode;
+
+ local->transaction.fop = afr_mkdir_wind;
+ local->transaction.done = afr_mkdir_done;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ link */
+
+
+int
+afr_link_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.link.buf.st_ino = local->cont.link.ino;
+
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ local->cont.link.inode,
+ &local->cont.link.buf);
+ }
+
+ return 0;
+}
+
+
+int
+afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0)
+ || (child_index == priv->read_child)) {
+ local->cont.link.buf = *buf;
+ local->cont.link.buf.st_ino =
+ afr_itransform (buf->st_ino, priv->child_count,
+ child_index);
+ }
+ local->cont.link.inode = inode;
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_link_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->link,
+ &local->loc,
+ &local->newloc);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_link_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
+
+ local->cont.link.ino = oldloc->inode->ino;
+
+ local->transaction.fop = afr_link_wind;
+ local->transaction.done = afr_link_done;
+ local->transaction.unwind = afr_link_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ symlink */
+
+
+int
+afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame)
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ local->cont.symlink.inode,
+ &local->cont.symlink.buf);
+ return 0;
+}
+
+
+int
+afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0)
+ || (child_index == priv->read_child)) {
+ local->cont.symlink.buf = *buf;
+ local->cont.symlink.buf.st_ino =
+ afr_itransform (buf->st_ino, priv->child_count,
+ child_index);
+ }
+ local->cont.symlink.inode = inode;
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->symlink,
+ local->cont.symlink.linkpath,
+ &local->loc);
+
+ if (!--call_count)
+ break;
+
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_symlink_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->cont.symlink.ino = loc->inode->ino;
+ local->cont.symlink.linkpath = strdup (linkpath);
+
+ local->transaction.fop = afr_symlink_wind;
+ local->transaction.done = afr_symlink_done;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ rename */
+
+int
+afr_rename_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.rename.buf.st_ino = local->cont.rename.ino;
+
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.rename.buf);
+ }
+
+ return 0;
+}
+
+
+int
+afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if ((op_ret != -1) && (local->success_count == 0)) {
+ local->op_ret = op_ret;
+
+ if (buf) {
+ local->cont.rename.buf = *buf;
+ local->cont.rename.buf.st_ino =
+ afr_itransform (buf->st_ino, priv->child_count,
+ child_index);
+ }
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_rename_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rename,
+ &local->loc,
+ &local->newloc);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
+
+ local->cont.rename.ino = oldloc->inode->ino;
+
+ local->transaction.fop = afr_rename_wind;
+ local->transaction.done = afr_rename_done;
+ local->transaction.unwind = afr_rename_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
+ afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ unlink */
+
+int
+afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame)
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_unlink_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->unlink,
+ &local->loc);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_unlink_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int32_t
+afr_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.fop = afr_unlink_wind;
+ local->transaction.done = afr_unlink_done;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ rmdir */
+
+
+
+int
+afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame)
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count)
+ need_unwind = 1;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rmdir,
+ &local->loc);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_rmdir_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.fop = afr_rmdir_wind;
+ local->transaction.done = afr_rmdir_done;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ afr_build_parent_loc (&local->transaction.parent_loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ setdents */
+
+int32_t
+afr_setdents_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if ((op_ret != -1) && (local->success_count == 0)) {
+ local->op_ret = op_ret;
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_setdents_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_setdents_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setdents,
+ local->fd, local->cont.setdents.flags,
+ local->cont.setdents.entries,
+ local->cont.setdents.count);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_setdents_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_setdents (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+
+ local->cont.setdents.flags = flags;
+ local->cont.setdents.entries = entries;
+ local->cont.setdents.count = count;
+
+ local->transaction.fop = afr_setdents_wind;
+ local->transaction.done = afr_setdents_done;
+
+ local->transaction.basename = NULL;
+ local->transaction.pending = AFR_ENTRY_PENDING;
+
+ afr_transaction (frame, this, AFR_ENTRY_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+/* }}} */
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
new file mode 100644
index 00000000000..e6e8a5e797c
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __DIR_WRITE_H__
+#define __DIR_WRITE_H__
+
+int32_t
+afr_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd);
+
+int32_t
+afr_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t dev);
+
+int32_t
+afr_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode);
+
+int32_t
+afr_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc);
+
+int32_t
+afr_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc);
+
+int32_t
+afr_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc);
+
+int32_t
+afr_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc);
+
+int32_t
+afr_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *oldloc);
+
+int32_t
+afr_setdents (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+
+#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
new file mode 100644
index 00000000000..a6c99ec0576
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -0,0 +1,721 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+#include "afr.h"
+
+
+/**
+ * Common algorithm for inode read calls:
+ *
+ * - Try the fop on the first child that is up
+ * - if we have failed due to ENOTCONN:
+ * try the next child
+ *
+ * Applicable to: access, stat, fstat, readlink, getxattr
+ */
+
+/* {{{ access */
+
+int32_t
+afr_access_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.access.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.access.last_tried;
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->access,
+ &local->loc, local->cont.access.mask);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.access.last_tried = call_child;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
+
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->access,
+ loc, mask);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ stat */
+
+int32_t
+afr_stat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int deitransform_child = -1;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ deitransform_child = (long) cookie;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ retry:
+ last_tried = local->cont.stat.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.stat.last_tried;
+
+ if (this_try == deitransform_child) {
+ goto retry;
+ }
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_stat_cbk,
+ (void *) (long) deitransform_child,
+ children[this_try],
+ children[this_try]->fops->stat,
+ &local->loc);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret != -1)
+ buf->st_ino = local->cont.stat.ino;
+
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_stat (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int call_child = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ call_child = afr_deitransform (loc->inode->ino, priv->child_count);
+ loc_copy (&local->loc, loc);
+
+ /*
+ if stat fails from the deitranform'd child, we try
+ all children starting with the first one
+ */
+ local->cont.stat.last_tried = -1;
+ local->cont.stat.ino = loc->inode->ino;
+
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->stat,
+ loc);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ fstat */
+
+int32_t
+afr_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int deitransform_child = -1;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ deitransform_child = (long) cookie;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ retry:
+ last_tried = local->cont.fstat.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.fstat.last_tried;
+
+ if (this_try == deitransform_child) {
+ /*
+ skip the deitransform'd child since if we are here
+ we must have already tried that child
+ */
+ goto retry;
+ }
+
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk,
+ (void *) (long) deitransform_child,
+ children[this_try],
+ children[this_try]->fops->fstat,
+ local->fd);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret != -1)
+ buf->st_ino = local->cont.fstat.ino;
+
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int call_child = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ VALIDATE_OR_GOTO (fd->inode, out);
+
+ call_child = afr_deitransform (fd->inode->ino, priv->child_count);
+
+ /*
+ if fstat fails from the deitranform'd child, we try
+ all children starting with the first one
+ */
+ local->cont.fstat.last_tried = -1;
+ local->cont.fstat.ino = fd->inode->ino;
+ local->fd = fd_ref (fd);
+
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->fstat,
+ fd);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ readlink */
+
+int32_t
+afr_readlink_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ const char *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.readlink.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.readlink.last_tried;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->readlink,
+ &local->loc,
+ local->cont.readlink.size);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.readlink.last_tried = call_child;
+ loc_copy (&local->loc, loc);
+ local->cont.readlink.size = size;
+
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->readlink,
+ loc, size);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ getxattr */
+
+int32_t
+afr_getxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ last_tried = local->cont.getxattr.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.getxattr.last_tried;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, dict);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t * local = NULL;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ frame->local = local;
+
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.getxattr.last_tried = call_child;
+ loc_copy (&local->loc, loc);
+ if (name)
+ local->cont.getxattr.name = strdup (name);
+
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->getxattr,
+ loc, name);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ readv */
+
+/**
+ * read algorithm:
+ *
+ * if the user has specified a read subvolume, use it
+ * otherwise -
+ * use the inode number to hash it to one of the subvolumes, and
+ * read from there (to balance read load)
+ *
+ * if any of the above read's fail, try the children in sequence
+ * beginning at the beginning
+ */
+
+int32_t
+afr_readv_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int unwind = 1;
+ int last_tried = -1;
+ int this_try = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ retry:
+ last_tried = local->cont.readv.last_tried;
+
+ if (all_tried (last_tried, priv->child_count)) {
+ goto out;
+ }
+ this_try = ++local->cont.readv.last_tried;
+
+ if (this_try == priv->read_child) {
+ /*
+ skip the read child since if we are here
+ we must have already tried that child
+ */
+ goto retry;
+ }
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) this_try,
+ children[this_try],
+ children[this_try]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset);
+ }
+
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+
+ int call_child = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+ children = priv->children;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ frame->local = local;
+
+ if (priv->read_child != -1) {
+ call_child = priv->read_child;
+
+ /*
+ if read fails from the read child, we try
+ all children starting with the first one
+ */
+ local->cont.readv.last_tried = -1;
+ } else {
+ call_child = afr_first_up_child (priv);
+ if (call_child == -1) {
+ op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no child is up :(");
+ goto out;
+ }
+
+ local->cont.readv.last_tried = call_child;
+ }
+
+ local->fd = fd_ref (fd);
+
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readv,
+ fd, size, offset);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, 0, NULL);
+ }
+ return 0;
+}
+
+/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
new file mode 100644
index 00000000000..6b3bd2da850
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __INODE_READ_H__
+#define __INODE_READ_H__
+
+int32_t
+afr_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask);
+
+int32_t
+afr_stat (call_frame_t *frame, xlator_t *this,
+ loc_t *loc);
+
+int32_t
+afr_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd);
+
+int32_t
+afr_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size);
+
+int32_t
+afr_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset);
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name);
+
+#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
new file mode 100644
index 00000000000..267350b2c4a
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -0,0 +1,2024 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+
+/* {{{ chmod */
+
+
+int
+afr_chmod_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.chmod.buf.st_ino = local->cont.chmod.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.chmod.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_chmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.chmod.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ afr_chmod_unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_chmod_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int i = 0;
+ int call_count = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_chmod_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->chmod,
+ &local->loc,
+ local->cont.chmod.mode);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_chmod_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int32_t
+afr_chmod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->cont.chmod.mode = mode;
+ local->cont.chmod.ino = loc->inode->ino;
+
+ local->transaction.fop = afr_chmod_wind;
+ local->transaction.done = afr_chmod_done;
+ local->transaction.unwind = afr_chmod_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ local->transaction.pending = AFR_METADATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+
+/* {{{ fchmod */
+
+int
+afr_fchmod_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.fchmod.buf.st_ino = local->cont.fchmod.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.fchmod.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_fchmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.fchmod.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ afr_fchmod_unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fchmod_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int i = 0;
+ int call_count = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_fchmod_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fchmod,
+ local->fd,
+ local->cont.fchmod.mode);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fchmod_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int32_t
+afr_fchmod (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, mode_t mode)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->cont.fchmod.mode = mode;
+ local->cont.fchmod.ino = fd->inode->ino;
+
+ local->transaction.fop = afr_fchmod_wind;
+ local->transaction.done = afr_fchmod_done;
+ local->transaction.unwind = afr_fchmod_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ local->transaction.pending = AFR_METADATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ chown */
+
+int
+afr_chown_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.chown.buf.st_ino = local->cont.chown.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.chown.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_chown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.chown.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_chown_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_chown_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->chown,
+ &local->loc, local->cont.chown.uid,
+ local->cont.chown.gid);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_chown_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_chown (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, uid_t uid, gid_t gid)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->cont.chown.uid = uid;
+ local->cont.chown.gid = gid;
+ local->cont.chown.ino = loc->inode->ino;
+
+ local->transaction.fop = afr_chown_wind;
+ local->transaction.done = afr_chown_done;
+ local->transaction.unwind = afr_chown_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ local->transaction.pending = AFR_METADATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ chown */
+
+int
+afr_fchown_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.fchown.buf.st_ino = local->cont.fchown.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.fchown.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_fchown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.fchown.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fchown_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_fchown_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fchown,
+ local->fd, local->cont.fchown.uid,
+ local->cont.fchown.gid);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_fchown_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_fchown (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, uid_t uid, gid_t gid)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->cont.fchown.uid = uid;
+ local->cont.fchown.gid = gid;
+ local->cont.fchown.ino = fd->inode->ino;
+
+ local->transaction.fop = afr_fchown_wind;
+ local->transaction.done = afr_fchown_done;
+ local->transaction.unwind = afr_fchown_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ local->transaction.pending = AFR_METADATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ writev */
+
+int
+afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.writev.buf.st_ino = local->cont.writev.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.writev.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.writev.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.unwind (frame, this);
+
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_writev_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int i = 0;
+ int call_count = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ local->fd,
+ local->cont.writev.vector,
+ local->cont.writev.count,
+ local->cont.writev.offset);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_writev_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->cont.writev.refs)
+ dict_unref (local->cont.writev.refs);
+ local->cont.writev.refs = NULL;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op = GF_FOP_WRITE;
+ local->cont.writev.vector = iov_dup (vector, count);
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.ino = fd->inode->ino;
+
+ if (frame->root->req_refs)
+ local->cont.writev.refs = dict_ref (frame->root->req_refs);
+
+ local->transaction.fop = afr_writev_wind;
+ local->transaction.done = afr_writev_done;
+ local->transaction.unwind = afr_writev_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ if (fd->flags & O_APPEND) {
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ } else {
+ local->transaction.start = offset;
+ local->transaction.len = iov_length (vector, count);
+ }
+
+ local->transaction.pending = AFR_DATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ truncate */
+
+int
+afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.truncate.buf.st_ino = local->cont.truncate.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.truncate.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.truncate.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_truncate_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->truncate,
+ &local->loc,
+ local->cont.truncate.offset);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_truncate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.truncate.offset = offset;
+ local->cont.truncate.ino = loc->inode->ino;
+
+ local->transaction.fop = afr_truncate_wind;
+ local->transaction.done = afr_truncate_done;
+ local->transaction.unwind = afr_truncate_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = offset;
+ local->transaction.pending = AFR_DATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+
+/* }}} */
+
+/* {{{ ftruncate */
+
+
+int
+afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.ftruncate.buf.st_ino = local->cont.ftruncate.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.ftruncate.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.ftruncate.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->ftruncate,
+ local->fd, local->cont.ftruncate.offset);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op = GF_FOP_FTRUNCATE;
+ local->op_ret = -1;
+
+ local->cont.ftruncate.offset = offset;
+ local->cont.ftruncate.ino = fd->inode->ino;
+
+ local->transaction.fop = afr_ftruncate_wind;
+ local->transaction.done = afr_ftruncate_done;
+ local->transaction.unwind = afr_ftruncate_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = offset;
+ local->transaction.pending = AFR_DATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ utimens */
+
+
+int
+afr_utimens_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ local->cont.utimens.buf.st_ino = local->cont.utimens.ino;
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
+ &local->cont.utimens.buf);
+ }
+ return 0;
+}
+
+
+int
+afr_utimens_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 1;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (child_went_down (op_ret, op_errno))
+ afr_transaction_child_died (frame, this, child_index);
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.utimens.buf = *buf;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_utimens_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_utimens_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->utimens,
+ &local->loc,
+ local->cont.utimens.tv);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_utimens_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+
+ local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_utimens (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct timespec tv[2])
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.utimens.tv[0] = tv[0];
+ local->cont.utimens.tv[1] = tv[1];
+
+ local->cont.utimens.ino = loc->inode->ino;
+
+ local->transaction.fop = afr_utimens_wind;
+ local->transaction.done = afr_utimens_done;
+ local->transaction.unwind = afr_utimens_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ local->transaction.pending = AFR_METADATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ setxattr */
+
+
+int
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno)
+ }
+ return 0;
+}
+
+
+int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc,
+ local->cont.setxattr.dict,
+ local->cont.setxattr.flags);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+
+ local->transaction.fop = afr_setxattr_wind;
+ local->transaction.done = afr_setxattr_done;
+ local->transaction.unwind = afr_setxattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ local->transaction.pending = AFR_METADATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ removexattr */
+
+
+int
+afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno)
+ }
+ return 0;
+}
+
+
+int
+afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc,
+ local->cont.removexattr.name);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+afr_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+
+ int ret = -1;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.removexattr.name = strdup (name);
+
+ local->transaction.fop = afr_removexattr_wind;
+ local->transaction.done = afr_removexattr_done;
+ local->transaction.unwind = afr_removexattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ local->transaction.pending = AFR_METADATA_PENDING;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
new file mode 100644
index 00000000000..9c0b5cad314
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -0,0 +1,63 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __INODE_WRITE_H__
+#define __INODE_WRITE_H__
+
+int32_t
+afr_chmod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode);
+
+int32_t
+afr_chown (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, uid_t uid, gid_t gid);
+
+int
+afr_fchown (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, uid_t uid, gid_t gid);
+
+int32_t
+afr_fchmod (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, mode_t mode);
+
+int32_t
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset);
+
+int32_t
+afr_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset);
+
+int32_t
+afr_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset);
+
+int32_t
+afr_utimens (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct timespec tv[2]);
+
+int32_t
+afr_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags);
+
+int32_t
+afr_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name);
+
+#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
new file mode 100644
index 00000000000..45d06516965
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -0,0 +1,1073 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "byte-order.h"
+
+#include "afr.h"
+#include "afr-transaction.h"
+#include "afr-self-heal-common.h"
+#include "afr-self-heal.h"
+
+
+/**
+ * select_source - select a source and return it
+ * TODO: take into account option 'favorite-child'
+ */
+
+int
+afr_sh_select_source (int sources[], int child_count)
+{
+ int i;
+ for (i = 0; i < child_count; i++)
+ if (sources[i])
+ return i;
+
+ return -1;
+}
+
+
+/**
+ * sink_count - return number of sinks in sources array
+ */
+
+int
+afr_sh_sink_count (int sources[], int child_count)
+{
+ int i;
+ int sinks = 0;
+ for (i = 0; i < child_count; i++)
+ if (!sources[i])
+ sinks++;
+ return sinks;
+}
+
+int
+afr_sh_source_count (int sources[], int child_count)
+{
+ int i;
+ int nsource = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (sources[i])
+ nsource++;
+ return nsource;
+}
+
+
+int
+afr_sh_supress_errenous_children (int sources[], int child_errno[],
+ int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (child_errno[i] && sources[i]) {
+ sources[i] = 0;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_supress_empty_children (int sources[], dict_t *xattr[],
+ struct stat *buf,
+ int child_count, const char *key)
+{
+ int i = 0;
+ int32_t *pending = NULL;
+ int ret = 0;
+ int all_xattr_missing = 1;
+
+ /* if the file was created by afr with xattrs */
+ for (i = 0; i < child_count; i++) {
+ if (!xattr[i])
+ continue;
+
+ ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending));
+ if (ret != 0) {
+ continue;
+ }
+
+ all_xattr_missing = 0;
+ break;
+ }
+
+ if (all_xattr_missing) {
+ /* supress 0byte files.. this avoids empty file created
+ by dir selfheal to overwrite the 'good' file */
+ for (i = 0; i < child_count; i++) {
+ if (!buf[i].st_size)
+ sources[i] = 0;
+ }
+ goto out;
+ }
+
+
+ for (i = 0; i < child_count; i++) {
+ if (!xattr[i]) {
+ sources[i] = 0;
+ continue;
+ }
+
+ ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending));
+ if (ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+
+ if (!pending) {
+ sources[i] = 0;
+ continue;
+ }
+ }
+
+out:
+ return 0;
+}
+
+
+void
+afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+
+ char *buf = NULL;
+ char *ptr = NULL;
+
+ int i, j;
+
+ /* 10 digits per entry + 1 space + '[' and ']' */
+ buf = MALLOC (priv->child_count * 11 + 8);
+
+ for (i = 0; i < priv->child_count; i++) {
+ ptr = buf;
+ ptr += sprintf (ptr, "[ ");
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ ptr += sprintf (ptr, "]");
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pending_matrix: %s", buf);
+ }
+
+ FREE (buf);
+}
+
+
+void
+afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[],
+ int child_count, const char *key)
+{
+ int i = 0;
+ int j = 0;
+ int32_t *pending = NULL;
+ int ret = -1;
+
+ /* start clean */
+ for (i = 0; i < child_count; i++) {
+ for (j = 0; j < child_count; j++) {
+ pending_matrix[i][j] = 0;
+ }
+ }
+
+ for (i = 0; i < child_count; i++) {
+ if (!xattr[i])
+ continue;
+
+ pending = NULL;
+
+ ret = dict_get_ptr (xattr[i], (char *) key,
+ VOID(&pending));
+ if (ret != 0)
+ continue;
+
+ for (j = 0; j < child_count; j++) {
+ pending_matrix[i][j] = ntoh32 (pending[j]);
+ }
+ }
+}
+
+
+/**
+ * mark_sources: Mark all 'source' nodes and return number of source
+ * nodes found
+ */
+
+int
+afr_sh_mark_sources (int32_t *pending_matrix[], int sources[], int child_count)
+{
+ int i = 0;
+ int j = 0;
+
+ int nsources = 0;
+
+
+ /* start clean */
+ for (i = 0; i < child_count; i++) {
+ sources[i] = 0;
+ }
+
+ /*
+ Let's 'normalize' the pending matrix first,
+ by disregarding all pending entries that refer
+ to themselves
+ */
+ for (i = 0; i < child_count; i++) {
+ pending_matrix[i][i] = 0;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ for (j = 0; j < child_count; j++) {
+ if (pending_matrix[j][i])
+ break;
+ }
+
+ if (j == child_count) {
+ nsources++;
+ sources[i] = 1;
+ }
+ }
+
+ return nsources;
+}
+
+
+void
+afr_sh_pending_to_delta (int32_t *pending_matrix[], int32_t *delta_matrix[],
+ int success[], int child_count)
+{
+ int i = 0;
+ int j = 0;
+
+ /* start clean */
+ for (i = 0; i < child_count; i++) {
+ for (j = 0; j < child_count; j++) {
+ delta_matrix[i][j] = 0;
+ }
+ }
+
+ for (i = 0; i < child_count; i++) {
+ for (j = 0; j < child_count; j++) {
+ if (!success[j])
+ continue;
+ delta_matrix[i][j] = -pending_matrix[i][j];
+ }
+ }
+}
+
+
+int
+afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[],
+ int child_count, const char *key)
+{
+ int i = 0;
+ int j = 0;
+
+ int ret = 0;
+
+ int32_t *pending = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (!xattr[i])
+ continue;
+
+ pending = CALLOC (sizeof (int32_t), child_count);
+ for (j = 0; j < child_count; j++) {
+ pending[j] = hton32 (delta_matrix[i][j]);
+ }
+
+ ret = dict_set_bin (xattr[i], (char *) key, pending,
+ child_count * sizeof (int32_t));
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *pending = NULL;
+ void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = dict_get_ptr (xattr, AFR_METADATA_PENDING, &tmp_pending);
+
+ if (ret != 0)
+ return 0;
+
+ pending = tmp_pending;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == child_count)
+ continue;
+ if (pending[i])
+ return 1;
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *pending = NULL;
+ void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = dict_get_ptr (xattr, AFR_DATA_PENDING, &tmp_pending);
+
+ if (ret != 0)
+ return 0;
+
+ pending = tmp_pending;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == child_count)
+ continue;
+ if (pending[i])
+ return 1;
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *pending = NULL;
+ void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ ret = dict_get_ptr (xattr, AFR_ENTRY_PENDING, &tmp_pending);
+
+ if (ret != 0)
+ return 0;
+
+ pending = tmp_pending;
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == child_count)
+ continue;
+ if (pending[i])
+ return 1;
+ }
+
+ return 0;
+}
+
+
+
+/**
+ * is_matrix_zero - return true if pending matrix is all zeroes
+ */
+
+int
+afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
+{
+ int i, j;
+
+ for (i = 0; i < child_count; i++)
+ for (j = 0; j < child_count; j++)
+ if (pending_matrix[i][j])
+ return 0;
+ return 1;
+}
+
+
+int
+afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
+ memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i])
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
+ }
+
+ if (local->govinda_gOvinda) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "aborting selfheal of %s",
+ local->loc.path);
+ sh->completion_cbk (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to metadata check on %s",
+ local->loc.path);
+ afr_self_heal_metadata (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+sh_missing_entries_unlck_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_sh_missing_entries_done (frame, this);
+ }
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_self_heal_t *sh = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlocking %"PRId64"/%s on subvolume %s",
+ sh->parent_loc.inode->ino, local->loc.name,
+ priv->children[i]->name);
+
+ STACK_WIND (frame, sh_missing_entries_unlck_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ &sh->parent_loc, local->loc.name,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+}
+
+
+static int
+sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int op_errno, struct stat *stbuf)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static int
+sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *chown_frame = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ struct stat *buf = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ buf = &sh->buf[sh->source];
+ child_index = (long) cookie;
+
+ if (op_ret == 0) {
+ chown_frame = copy_frame (frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "chown %s to %d %d on subvolume %s",
+ local->loc.path, buf->st_uid, buf->st_gid,
+ priv->children[child_index]->name);
+
+ STACK_WIND (chown_frame, sh_destroy_cbk,
+ priv->children[child_index],
+ priv->children[child_index]->fops->chown,
+ &local->loc,
+ buf->st_uid, buf->st_gid);
+ }
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ sh_missing_entries_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int enoent_count = 0;
+ int call_count = 0;
+ mode_t st_mode = 0;
+ dev_t st_dev = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sh->child_errno[i] == ENOENT)
+ enoent_count++;
+
+ call_count = enoent_count;
+ local->call_count = call_count;
+
+ st_mode = sh->buf[sh->source].st_mode;
+ st_dev = sh->buf[sh->source].st_dev;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "mknod %s mode 0%o on %d subvolumes",
+ local->loc.path, st_mode, enoent_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->child_errno[i] == ENOENT) {
+ STACK_WIND_COOKIE (frame,
+ sh_missing_entries_newentry_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mknod,
+ &local->loc, st_mode, st_dev);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int enoent_count = 0;
+ int call_count = 0;
+ mode_t st_mode = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sh->child_errno[i] == ENOENT)
+ enoent_count++;
+
+ call_count = enoent_count;
+ local->call_count = call_count;
+
+ st_mode = sh->buf[sh->source].st_mode;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "mkdir %s mode 0%o on %d subvolumes",
+ local->loc.path, st_mode, enoent_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->child_errno[i] == ENOENT) {
+ STACK_WIND_COOKIE (frame,
+ sh_missing_entries_newentry_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mkdir,
+ &local->loc, st_mode);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this,
+ const char *link)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int enoent_count = 0;
+ int call_count = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (sh->child_errno[i] == ENOENT)
+ enoent_count++;
+
+ call_count = enoent_count;
+ local->call_count = call_count;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "symlink %s -> %s on %d subvolumes",
+ local->loc.path, link, enoent_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->child_errno[i] == ENOENT) {
+ STACK_WIND_COOKIE (frame,
+ sh_missing_entries_newentry_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->symlink,
+ link, &local->loc);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *link)
+{
+ if (op_ret > 0)
+ sh_missing_entries_symlink (frame, this, link);
+ else
+ sh_missing_entries_finish (frame, this);
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ STACK_WIND (frame, sh_missing_entries_readlink_cbk,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->readlink,
+ &local->loc, 4096);
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int type = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int enoent_count = 0;
+ int govinda_gOvinda = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->child_errno[i]) {
+ if (sh->child_errno[i] == ENOENT)
+ enoent_count++;
+ } else {
+ if (type) {
+ if (type != (sh->buf[i].st_mode & S_IFMT))
+ govinda_gOvinda = 1;
+ } else {
+ sh->source = i;
+ type = sh->buf[i].st_mode & S_IFMT;
+ }
+ }
+ }
+
+ if (govinda_gOvinda) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "conflicing filetypes exist for path %s. returning.",
+ local->loc.path);
+
+ local->govinda_gOvinda = 1;
+ sh_missing_entries_finish (frame, this);
+ return 0;
+ }
+
+ if (!type) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no source found for %s. all nodes down?. returning.",
+ local->loc.path);
+ /* subvolumes down and/or file does not exist */
+ sh_missing_entries_finish (frame, this);
+ return 0;
+ }
+
+ if (enoent_count == 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no missing files - %s. proceeding to metadata check",
+ local->loc.path);
+ /* proceed to next step - metadata self-heal */
+ sh_missing_entries_finish (frame, this);
+ return 0;
+ }
+
+ switch (type) {
+ case S_IFSOCK:
+ case S_IFREG:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFIFO:
+ sh_missing_entries_mknod (frame, this);
+ break;
+ case S_IFLNK:
+ sh_missing_entries_readlink (frame, this);
+ break;
+ case S_IFDIR:
+ sh_missing_entries_mkdir (frame, this);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown file type: 0%o", type);
+ local->govinda_gOvinda = 1;
+ sh_missing_entries_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ int child_index = 0;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "path %s on subvolume %s is of mode 0%o",
+ local->loc.path,
+ priv->children[child_index]->name,
+ buf->st_mode);
+
+ local->self_heal.buf[child_index] = *buf;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "path %s on subvolume %s => -1 (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+
+ local->self_heal.child_errno[child_index] = op_errno;
+ }
+
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ sh_missing_entries_create (frame, this);
+ }
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr_req = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ call_count = local->child_count;
+ priv = this->private;
+
+ local->call_count = call_count;
+
+ xattr_req = dict_new();
+
+ if (xattr_req)
+ ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING,
+ priv->child_count * sizeof(int32_t));
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame,
+ sh_missing_entries_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, xattr_req);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
+}
+
+
+static int
+sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ sh->op_failed = 1;
+
+ gf_log (this->name,
+ (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "locking inode of %s on child %d failed: %s",
+ local->loc.path, child_index,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inode of %s on child %d locked",
+ local->loc.path, child_index);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (sh->op_failed == 1) {
+ sh_missing_entries_finish (frame, this);
+ return 0;
+ }
+
+ sh_missing_entries_lookup (frame, this);
+ }
+
+ return 0;
+}
+
+
+static int
+afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "attempting to recreate missing entries for path=%s",
+ local->loc.path);
+
+ afr_build_parent_loc (&sh->parent_loc, &local->loc);
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, sh_missing_entries_lk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ &sh->parent_loc, local->loc.name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_self_heal (call_frame_t *frame, xlator_t *this,
+ int (*completion_cbk) (call_frame_t *, xlator_t *))
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "performing self heal on %s (metadata=%d data=%d entry=%d)",
+ local->loc.path,
+ local->need_metadata_self_heal,
+ local->need_data_self_heal,
+ local->need_entry_self_heal);
+
+ sh->completion_cbk = completion_cbk;
+
+ sh->buf = CALLOC (priv->child_count, sizeof (struct stat));
+ sh->child_errno = CALLOC (priv->child_count, sizeof (int));
+ sh->success = CALLOC (priv->child_count, sizeof (int));
+ sh->xattr = CALLOC (priv->child_count, sizeof (dict_t *));
+ sh->sources = CALLOC (sizeof (*sh->sources), priv->child_count);
+
+ sh->pending_matrix = CALLOC (sizeof (int32_t *), priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ sh->pending_matrix[i] = CALLOC (sizeof (int32_t),
+ priv->child_count);
+ }
+
+ sh->delta_matrix = CALLOC (sizeof (int32_t *), priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ sh->delta_matrix[i] = CALLOC (sizeof (int32_t),
+ priv->child_count);
+ }
+
+ if (local->success_count && local->enoent_count) {
+ afr_self_heal_missing_entries (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to metadata check on %s",
+ local->loc.path);
+ afr_sh_missing_entries_done (frame, this);
+ }
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
new file mode 100644
index 00000000000..9dd597f0787
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -0,0 +1,66 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __AFR_SELF_HEAL_COMMON_H__
+#define __AFR_SELF_HEAL_COMMON_H__
+
+#define FILE_HAS_HOLES(buf) (((buf)->st_size) > ((buf)->st_blocks * 512))
+
+int
+afr_sh_select_source (int sources[], int child_count);
+
+int
+afr_sh_sink_count (int sources[], int child_count);
+
+int
+afr_sh_source_count (int sources[], int child_count);
+
+int
+afr_sh_supress_errenous_children (int sources[], int child_errno[],
+ int child_count);
+
+int
+afr_sh_supress_empty_children (int sources[], dict_t *xattr[],
+ struct stat *buf,
+ int child_count, const char *key);
+
+void
+afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
+
+void
+afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[],
+ int child_count, const char *key);
+
+void
+afr_sh_pending_to_delta (int32_t *pending_matrix[], int32_t *delta_matrix[],
+ int32_t success[], int child_count);
+
+int
+afr_sh_mark_sources (int32_t *pending_matrix[], int sources[],
+ int child_count);
+
+int
+afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[],
+ int child_count, const char *key);
+
+int
+afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
+
+
+#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
new file mode 100644
index 00000000000..3a48da48587
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -0,0 +1,1030 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+
+
+
+int
+afr_sh_data_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ /*
+ TODO: cleanup sh->*
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self heal of %s completed",
+ local->loc.path);
+
+ sh->completion_cbk (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ afr_sh_data_done (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_close (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (!sh->healing_fd) {
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+
+ call_count = sh->active_sinks + 1;
+ local->call_count = call_count;
+
+
+ /* closed source */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "closing fd of %s on %s",
+ local->loc.path, priv->children[sh->source]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->flush,
+ sh->healing_fd);
+ call_count--;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] || !local->child_up[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "closing fd of %s on %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ sh->healing_fd);
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "locking inode of %s on child %d failed: %s",
+ local->loc.path, child_index,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inode of %s on child %d locked",
+ local->loc.path, child_index);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_sh_data_close (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
+{
+ struct flock flock;
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t * sh = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ flock.l_start = 0;
+ flock.l_len = 0;
+ flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlocking %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_unlck_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ &local->loc, F_SETLK, &flock);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing data selfheal of %s", local->loc.path);
+
+ afr_sh_data_unlock (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_data_finish (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+
+ afr_sh_pending_to_delta (sh->pending_matrix, sh->delta_matrix,
+ sh->success, priv->child_count);
+
+ erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+
+ erase_xattr[i] = get_new_dict();
+ dict_ref (erase_xattr[i]);
+ }
+ }
+
+ afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr,
+ priv->child_count, AFR_DATA_PENDING);
+
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "erasing pending flags from %s on %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
+ if (!--call_count)
+ break;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ FREE (erase_xattr);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1)
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ftruncate of %s on subvolume %s completed",
+ local->loc.path,
+ priv->children[child_index]->name);
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_sh_data_erase_pending (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int *sources = NULL;
+ int call_count = 0;
+ int i = 0;
+
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sources = sh->sources;
+ call_count = sh->active_sinks;
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] || !local->child_up[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->ftruncate,
+ sh->healing_fd, sh->file_size);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_data_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ int child_index = (long) cookie;
+ int call_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
+ op_ret, local->loc.path, child_index, sh->offset - op_ret);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "write to %s failed on subvolume %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->op_failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_sh_data_read_write_iter (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_read_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct stat *buf)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ int child_index = (long) cookie;
+ int i = 0;
+ int call_count = 0;
+
+ off_t offset;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ call_count = sh->active_sinks;
+
+ local->call_count = call_count;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "read %d bytes of data from %s on child %d, offset %"PRId64"",
+ op_ret, local->loc.path, child_index, sh->offset);
+
+ if (op_ret <= 0) {
+ afr_sh_data_trim_sinks (frame, this);
+ return 0;
+ }
+
+ /* what if we read less than block size? */
+ offset = sh->offset;
+ sh->offset += op_ret;
+
+ frame->root->req_refs = frame->root->rsp_refs;
+
+ if (sh->file_has_holes) {
+ if (iov_0filled (vector, count) == 0) {
+ /* the iter function depends on the
+ sh->offset already being updated
+ above
+ */
+ afr_sh_data_read_write_iter (frame, this);
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] || !local->child_up[i])
+ continue;
+
+ /* this is a sink, so write to it */
+ STACK_WIND_COOKIE (frame, afr_sh_data_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ sh->healing_fd, vector, count, offset);
+
+ if (!--call_count)
+ break;
+ }
+
+out:
+ return 0;
+}
+
+
+int
+afr_sh_data_read_write (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_read_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->readv,
+ sh->healing_fd, sh->block_size,
+ sh->offset);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (sh->op_failed) {
+ afr_sh_data_finish (frame, this);
+ goto out;
+ }
+
+ if (sh->offset >= sh->file_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "closing fd's of %s",
+ local->loc.path);
+ afr_sh_data_trim_sinks (frame, this);
+
+ goto out;
+ }
+
+ afr_sh_data_read_write (frame, this);
+
+out:
+ return 0;
+}
+
+
+int
+afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ /* TODO: some of the open's might fail.
+ In that case, modify cleanup fn to send flush on those
+ fd's which are already open */
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "open of %s failed on child %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->op_failed = 1;
+ }
+
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (sh->op_failed) {
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd for %s opened, commencing sync",
+ local->loc.path);
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "sourcing file %s from %s to other sinks",
+ local->loc.path, priv->children[sh->source]->name);
+
+ afr_sh_data_read_write (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_open (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int call_count = 0;
+
+ int source = -1;
+ int *sources = NULL;
+
+ fd_t *fd = NULL;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = sh->active_sinks + 1;
+ local->call_count = call_count;
+
+ fd = fd_create (local->loc.inode, frame->root->pid);
+ sh->healing_fd = fd;
+
+ source = local->self_heal.source;
+ sources = local->self_heal.sources;
+
+ sh->block_size = 65536;
+ sh->file_size = sh->buf[source].st_size;
+
+ if (FILE_HAS_HOLES (&sh->buf[source]))
+ sh->file_has_holes = 1;
+
+ /* open source */
+ STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
+ (void *) (long) source,
+ priv->children[source],
+ priv->children[source]->fops->open,
+ &local->loc, O_RDONLY|O_LARGEFILE, fd);
+ call_count--;
+
+ /* open sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if(sources[i] || !local->child_up[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ &local->loc,
+ O_WRONLY|O_LARGEFILE, fd);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
+ int source = 0;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ }
+ }
+ sh->success[source] = 1;
+
+ if (active_sinks == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sinks for performing self-heal on file %s",
+ local->loc.path);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+ sh->active_sinks = active_sinks;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "syncing data of %s from subvolume %s to %d active sinks",
+ local->loc.path, priv->children[source]->name, active_sinks);
+
+ afr_sh_data_open (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int nsources = 0;
+ int source = 0;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr,
+ priv->child_count, AFR_DATA_PENDING);
+
+ afr_sh_print_pending_matrix (sh->pending_matrix, this);
+
+
+ afr_sh_mark_sources (sh->pending_matrix, sh->sources,
+ priv->child_count);
+
+ afr_sh_supress_empty_children (sh->sources, sh->xattr, sh->buf,
+ priv->child_count, AFR_DATA_PENDING);
+
+ afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
+ priv->child_count);
+
+ nsources = afr_sh_source_count (sh->sources, priv->child_count);
+
+ if ((nsources == 0)
+ && (priv->favorite_child != -1)
+ && (sh->child_errno[priv->favorite_child] == 0)) {
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "Picking favorite child %s as authentic source to resolve conflicting data of %s",
+ priv->children[priv->favorite_child]->name,
+ local->loc.path);
+
+ sh->sources[priv->favorite_child] = 1;
+
+ nsources = afr_sh_source_count (sh->sources,
+ priv->child_count);
+ }
+
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to resolve conflicting data of %s. "
+ "Please resolve manually by deleting the file %s "
+ "from all but the preferred subvolume. "
+ "Please consider 'option favorite-child <>'",
+ local->loc.path, local->loc.path);
+
+ local->govinda_gOvinda = 1;
+
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ sh->source = source;
+
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || sh->child_errno[i])
+ continue;
+
+ if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
+ sh->sources[i] = 0;
+ }
+
+ afr_sh_data_sync_prepare (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ sh->xattr[child_index] = dict_ref (xattr);
+ sh->buf[child_index] = *buf;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_sh_data_fix (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_lookup (call_frame_t *frame, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr_req = NULL;
+
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ xattr_req = dict_new();
+ if (xattr_req)
+ ret = dict_set_uint64 (xattr_req, AFR_DATA_PENDING,
+ priv->child_count * sizeof(int32_t));
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_sh_data_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
+}
+
+
+int
+afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ /* TODO: what if lock fails? */
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ sh->op_failed = 1;
+
+ gf_log (this->name,
+ (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "locking of %s on child %d failed: %s",
+ local->loc.path, child_index,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inode of %s on child %d locked",
+ local->loc.path, child_index);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (sh->op_failed) {
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ afr_sh_data_lookup (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
+{
+ struct flock flock;
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t * sh = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ flock.l_start = 0;
+ flock.l_len = 0;
+ flock.l_type = F_WRLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "locking %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_lock_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ &local->loc, F_SETLK, &flock);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_self_heal_data (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = this->private;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (local->need_data_self_heal && priv->data_self_heal) {
+ afr_sh_data_lock (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "not doing data self heal on %s",
+ local->loc.path);
+ afr_sh_data_done (frame, this);
+ }
+
+ return 0;
+}
+
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
new file mode 100644
index 00000000000..ec341922ee7
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -0,0 +1,2038 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+
+
+
+int
+afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ /*
+ TODO: cleanup sh->*
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self heal of %s completed",
+ local->loc.path);
+
+ sh->completion_cbk (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ /* TODO: what if lock fails? */
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unlocking inode of %s on child %d failed: %s",
+ local->loc.path, child_index,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlocked inode of %s on child %d",
+ local->loc.path, child_index);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ afr_sh_entry_done (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t * sh = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlocking %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_entry_unlck_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ &local->loc, NULL,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing entry selfheal of %s", local->loc.path);
+
+ afr_sh_entry_unlock (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_entry_finish (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+
+ afr_sh_pending_to_delta (sh->pending_matrix, sh->delta_matrix,
+ sh->success, priv->child_count);
+
+ erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+
+ erase_xattr[i] = get_new_dict();
+ dict_ref (erase_xattr[i]);
+ }
+ }
+
+ afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr,
+ priv->child_count, AFR_ENTRY_PENDING);
+
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "erasing pending flags from %s on %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
+ if (!--call_count)
+ break;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ FREE (erase_xattr);
+
+ return 0;
+}
+
+
+
+static int
+next_active_source (call_frame_t *frame, xlator_t *this,
+ int current_active_source)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int source = -1;
+ int next_active_source = -1;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ source = sh->source;
+
+ if (source != -1) {
+ if (current_active_source != source)
+ next_active_source = source;
+ goto out;
+ }
+
+ /*
+ the next active sink becomes the source for the
+ 'conservative decision' of merging all entries
+ */
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0)
+ && (local->child_up[i] == 1)
+ && (i > current_active_source)) {
+
+ next_active_source = i;
+ break;
+ }
+ }
+out:
+ return next_active_source;
+}
+
+
+
+static int
+next_active_sink (call_frame_t *frame, xlator_t *this,
+ int current_active_sink)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int next_active_sink = -1;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ /*
+ the next active sink becomes the source for the
+ 'conservative decision' of merging all entries
+ */
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0)
+ && (local->child_up[i] == 1)
+ && (i > current_active_sink)) {
+
+ next_active_sink = i;
+ break;
+ }
+ }
+
+ return next_active_sink;
+}
+
+
+int
+build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ int ret = -1;
+
+ if (!child) {
+ goto out;
+ }
+
+ if (strcmp (parent->path, "/") == 0)
+ asprintf ((char **)&child->path, "/%s", name);
+ else
+ asprintf ((char **)&child->path, "%s/%s", parent->path, name);
+
+ if (!child->path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+
+ if (!child->inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret == -1)
+ loc_wipe (child);
+
+ return ret;
+}
+
+
+int
+afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
+ int active_src);
+
+int
+afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
+ int active_src)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_entry_expunge_subvol (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int active_src = 0;
+ call_frame_t *frame = NULL;
+
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+
+ active_src = (long) cookie;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "removed %s on %s",
+ expunge_local->loc.path,
+ priv->children[active_src]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "removing %s on %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ }
+
+ AFR_STACK_DESTROY (expunge_frame);
+ afr_sh_entry_expunge_entry_done (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "removing directory %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->rmdir,
+ &expunge_local->loc);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "unlinking file %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->unlink,
+ &expunge_local->loc);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src, struct stat *buf)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int source = 0;
+ call_frame_t *frame = NULL;
+ int type = 0;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ source = expunge_sh->source;
+
+ type = (buf->st_mode & S_IFMT);
+
+ switch (type) {
+ case S_IFSOCK:
+ case S_IFREG:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFIFO:
+ case S_IFLNK:
+ afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
+
+ break;
+ case S_IFDIR:
+ afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ expunge_local->loc.path,
+ priv->children[source]->name, type);
+ goto out;
+ break;
+ }
+
+ return 0;
+out:
+ AFR_STACK_DESTROY (expunge_frame);
+ afr_sh_entry_expunge_entry_done (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *x)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ active_src = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "lookup of %s on %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
+
+ return 0;
+out:
+ AFR_STACK_DESTROY (expunge_frame);
+ afr_sh_entry_expunge_entry_done (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->lookup,
+ &expunge_local->loc, 0);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *x)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int source = 0;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ active_src = expunge_sh->active_source;
+ source = (long) cookie;
+
+ if (op_ret == -1 && op_errno == ENOENT) {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "missing entry %s on %s",
+ expunge_local->loc.path,
+ priv->children[source]->name);
+
+ afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
+
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s exists under %s",
+ expunge_local->loc.path,
+ priv->children[source]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "looking up %s under %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[source]->name,
+ strerror (op_errno));
+ }
+
+ AFR_STACK_DESTROY (expunge_frame);
+ afr_sh_entry_expunge_entry_done (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
+ char *name)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = -1;
+ call_frame_t *expunge_frame = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int active_src = 0;
+ int source = 0;
+ int op_errno = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+ source = sh->source;
+
+ if ((strcmp (name, ".") == 0)
+ || (strcmp (name, "..") == 0)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "skipping inspection of %s under %s",
+ name, local->loc.path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inspecting existance of %s under %s",
+ name, local->loc.path);
+
+ expunge_frame = copy_frame (frame);
+ if (!expunge_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
+
+ expunge_frame->local = expunge_local;
+ expunge_sh = &expunge_local->self_heal;
+ expunge_sh->sh_frame = frame;
+ expunge_sh->active_source = active_src;
+
+ ret = build_child_loc (this, &expunge_local->loc, &local->loc, name);
+ if (ret != 0) {
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s on %s", expunge_local->loc.path,
+ priv->children[source]->name);
+
+ STACK_WIND_COOKIE (expunge_frame,
+ afr_sh_entry_expunge_entry_cbk,
+ (void *) (long) source,
+ priv->children[source],
+ priv->children[source]->fops->lookup,
+ &expunge_local->loc, 0);
+
+ ret = 0;
+out:
+ if (ret == -1)
+ afr_sh_entry_expunge_entry_done (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ gf_dirent_t *entry = NULL;
+ off_t last_offset = 0;
+ int active_src = 0;
+ int entry_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "readdir of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir of %s on subvolume %s complete",
+ local->loc.path,
+ priv->children[active_src]->name);
+ }
+
+ afr_sh_entry_expunge_all (frame, this);
+ return 0;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ entry_count++;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir'ed %d entries from %s",
+ entry_count, priv->children[active_src]->name);
+
+ sh->offset = last_offset;
+ local->call_count = entry_count;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ afr_sh_entry_expunge_entry (frame, this, entry->d_name);
+ }
+
+ return 0;
+}
+
+int
+afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
+ int active_src)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readdir,
+ sh->healing_fd, sh->block_size, sh->offset);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int active_src = -1;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->offset = 0;
+
+ if (sh->source == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sources for %s to expunge entries",
+ local->loc.path);
+ goto out;
+ }
+
+ active_src = next_active_sink (frame, this, sh->active_source);
+ sh->active_source = active_src;
+
+ if (sh->op_failed) {
+ goto out;
+ }
+
+ if (active_src == -1) {
+ /* completed creating missing files on all subvolumes */
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "expunging entries of %s on %s to other sinks",
+ local->loc.path, priv->children[active_src]->name);
+
+ afr_sh_entry_expunge_subvol (frame, this, active_src);
+
+ return 0;
+out:
+ afr_sh_entry_erase_pending (frame, this);
+ return 0;
+
+}
+
+
+int
+afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
+ int active_src);
+
+int
+afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
+ int active_src)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_entry_impunge_subvol (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_utimens_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct stat *stbuf)
+{
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+ int child_index = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ frame = impunge_sh->sh_frame;
+ child_index = (long) cookie;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "utimes set for %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting utimes of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (impunge_frame);
+ afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_chown_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct stat *stbuf)
+{
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+ int child_index = 0;
+ struct timespec ts[2];
+
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ frame = impunge_sh->sh_frame;
+ child_index = (long) cookie;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ownership of %s on %s changed",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting ownership of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
+ ts[0] = impunge_local->cont.lookup.buf.st_atim;
+ ts[1] = impunge_local->cont.lookup.buf.st_mtim;
+#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
+ ts[0] = impunge_local->cont.lookup.buf.st_atimespec;
+ ts[1] = impunge_local->cont.lookup.buf.st_mtimespec;
+#else
+ ts[0].tv_sec = impunge_local->cont.lookup.buf.st_atime;
+ ts[1].tv_sec = impunge_local->cont.lookup.buf.st_mtime;
+#endif
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_utimens_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->utimens,
+ &impunge_local->loc, ts);
+
+ return 0;
+
+out:
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (impunge_frame);
+ afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+ int child_index = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ frame = impunge_sh->sh_frame;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting ownership of %s on %s to %d/%d",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ impunge_local->cont.lookup.buf.st_uid,
+ impunge_local->cont.lookup.buf.st_gid);
+
+ inode->st_mode = stbuf->st_mode;
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_chown_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->chown,
+ &impunge_local->loc,
+ impunge_local->cont.lookup.buf.st_uid,
+ impunge_local->cont.lookup.buf.st_gid);
+ return 0;
+
+out:
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (impunge_frame);
+ afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, struct stat *stbuf)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "creating file %s mode=0%o dev=0x%"GF_PRI_DEV" on %s",
+ impunge_local->loc.path,
+ stbuf->st_mode, stbuf->st_rdev,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->mknod,
+ &impunge_local->loc,
+ stbuf->st_mode, stbuf->st_rdev);
+
+ return 0;
+}
+
+
+
+int
+afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, struct stat *stbuf)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "creating directory %s mode=0%o on %s",
+ impunge_local->loc.path,
+ stbuf->st_mode,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->mkdir,
+ &impunge_local->loc, stbuf->st_mode);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, const char *linkname)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "creating symlink %s -> %s on %s",
+ impunge_local->loc.path, linkname,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->symlink,
+ linkname, &impunge_local->loc);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *linkname)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = -1;
+ call_frame_t *frame = NULL;
+ int call_count = -1;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ frame = impunge_sh->sh_frame;
+ active_src = impunge_sh->active_source;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "readlink of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
+ linkname);
+ return 0;
+
+out:
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (impunge_frame);
+ afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, struct stat *stbuf)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
+ (void *) (long) child_index,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readlink,
+ &impunge_local->loc, 4096);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf,
+ dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int active_src = 0;
+ int type = 0;
+ int child_index = 0;
+ call_frame_t *frame = NULL;
+ int call_count = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ frame = impunge_sh->sh_frame;
+
+ child_index = (long) cookie;
+
+ active_src = impunge_sh->active_source;
+
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "looking up %s on %s (for %s) failed (%s)",
+ impunge_local->loc.path,
+ priv->children[active_src]->name,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ impunge_local->cont.lookup.buf = *buf;
+ type = (buf->st_mode & S_IFMT);
+
+ switch (type) {
+ case S_IFSOCK:
+ case S_IFREG:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFIFO:
+ afr_sh_entry_impunge_mknod (impunge_frame, this,
+ child_index, buf);
+ break;
+ case S_IFLNK:
+ afr_sh_entry_impunge_readlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ case S_IFDIR:
+ afr_sh_entry_impunge_mkdir (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ impunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ goto out;
+ break;
+ }
+
+ return 0;
+
+out:
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (impunge_frame);
+ afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_recreate (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int active_src = 0;
+
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ active_src = impunge_sh->active_source;
+
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_recreate_lookup_cbk,
+ (void *) (long) child_index,
+ priv->children[active_src],
+ priv->children[active_src]->fops->lookup,
+ &impunge_local->loc, 0);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *x)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ frame = impunge_sh->sh_frame;
+ child_index = (long) cookie;
+ active_src = impunge_sh->active_source;
+
+ if (op_ret == -1 && op_errno == ENOENT) {
+ /* decrease call_count in recreate-callback */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "missing entry %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+
+ afr_sh_entry_impunge_recreate (impunge_frame, this,
+ child_index);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s exists under %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "looking up %s under %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (impunge_frame);
+ afr_sh_entry_impunge_entry_done (frame, this, active_src);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
+ char *name)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = -1;
+ call_frame_t *impunge_frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int active_src = 0;
+ int i = 0;
+ int call_count = 0;
+ int op_errno = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+
+ if ((strcmp (name, ".") == 0)
+ || (strcmp (name, "..") == 0)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "skipping inspection of %s under %s",
+ name, local->loc.path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inspecting existance of %s under %s",
+ name, local->loc.path);
+
+ impunge_frame = copy_frame (frame);
+ if (!impunge_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
+
+ impunge_frame->local = impunge_local;
+ impunge_sh = &impunge_local->self_heal;
+ impunge_sh->sh_frame = frame;
+ impunge_sh->active_source = active_src;
+
+ ret = build_child_loc (this, &impunge_local->loc, &local->loc, name);
+ if (ret != 0) {
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == active_src)
+ continue;
+ if (local->child_up[i] == 0)
+ continue;
+ if (sh->sources[i] == 1)
+ continue;
+ call_count++;
+ }
+
+ impunge_local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == active_src)
+ continue;
+ if (local->child_up[i] == 0)
+ continue;
+ if (sh->sources[i] == 1)
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s on %s", impunge_local->loc.path,
+ priv->children[i]->name);
+
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_entry_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &impunge_local->loc, 0);
+
+ if (!--call_count)
+ break;
+ }
+
+ ret = 0;
+out:
+ if (ret == -1)
+ afr_sh_entry_impunge_entry_done (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ gf_dirent_t *entry = NULL;
+ off_t last_offset = 0;
+ int active_src = 0;
+ int entry_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "readdir of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir of %s on subvolume %s complete",
+ local->loc.path,
+ priv->children[active_src]->name);
+ }
+
+ afr_sh_entry_impunge_all (frame, this);
+ return 0;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ entry_count++;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir'ed %d entries from %s",
+ entry_count, priv->children[active_src]->name);
+
+ sh->offset = last_offset;
+ local->call_count = entry_count;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ afr_sh_entry_impunge_entry (frame, this, entry->d_name);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
+ int active_src)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readdir,
+ sh->healing_fd, sh->block_size, sh->offset);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int active_src = -1;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->offset = 0;
+
+ active_src = next_active_source (frame, this, sh->active_source);
+ sh->active_source = active_src;
+
+ if (sh->op_failed) {
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+
+ if (active_src == -1) {
+ /* completed creating missing files on all subvolumes */
+ afr_sh_entry_expunge_all (frame, this);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "impunging entries of %s on %s to other sinks",
+ local->loc.path, priv->children[active_src]->name);
+
+ afr_sh_entry_impunge_subvol (frame, this, active_src);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ /* TODO: some of the open's might fail.
+ In that case, modify cleanup fn to send flush on those
+ fd's which are already open */
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir of %s failed on child %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->op_failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (sh->op_failed) {
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd for %s opened, commencing sync",
+ local->loc.path);
+
+ sh->active_source = -1;
+ afr_sh_entry_impunge_all (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int call_count = 0;
+
+ int source = -1;
+ int *sources = NULL;
+
+ fd_t *fd = NULL;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = local->self_heal.source;
+ sources = local->self_heal.sources;
+
+ sh->block_size = 131072;
+ sh->offset = 0;
+
+ call_count = sh->active_sinks;
+ if (source != -1)
+ call_count++;
+
+ local->call_count = call_count;
+
+ fd = fd_create (local->loc.inode, frame->root->pid);
+ sh->healing_fd = fd;
+
+ if (source != -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening directory %s on subvolume %s (source)",
+ local->loc.path, priv->children[source]->name);
+
+ /* open source */
+ STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
+ (void *) (long) source,
+ priv->children[source],
+ priv->children[source]->fops->opendir,
+ &local->loc, fd);
+ call_count--;
+ }
+
+ /* open sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] || !local->child_up[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening directory %s on subvolume %s (sink)",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, fd);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
+ int source = 0;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ }
+ }
+ if (source != -1)
+ sh->success[source] = 1;
+
+ if (active_sinks == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sinks for self-heal on dir %s",
+ local->loc.path);
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+ if (source == -1 && active_sinks < 2) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot sync with 0 sources and 1 sink on dir %s",
+ local->loc.path);
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+ sh->active_sinks = active_sinks;
+
+ if (source != -1)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "syncing %s from subvolume %s to %d active sinks",
+ local->loc.path, priv->children[source]->name,
+ active_sinks);
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sources for %s found. "
+ "merging all entries as a conservative decision",
+ local->loc.path);
+
+ afr_sh_entry_open (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr,
+ priv->child_count, AFR_ENTRY_PENDING);
+
+ afr_sh_print_pending_matrix (sh->pending_matrix, this);
+
+
+ afr_sh_mark_sources (sh->pending_matrix, sh->sources,
+ priv->child_count);
+
+ afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
+ priv->child_count);
+
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ sh->source = source;
+
+ afr_sh_entry_sync_prepare (frame, this);
+
+ return 0;
+}
+
+
+
+int
+afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ sh->xattr[child_index] = dict_ref (xattr);
+ sh->buf[child_index] = *buf;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_sh_entry_fix (frame, this);
+ }
+
+ return 0;
+}
+
+
+
+int
+afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)
+{
+ afr_self_heal_t * sh = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ dict_t *xattr_req = NULL;
+ int ret = 0;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ xattr_req = dict_new();
+ if (xattr_req)
+ ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING,
+ priv->child_count * sizeof(int32_t));
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame,
+ afr_sh_entry_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
+}
+
+
+
+int
+afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ /* TODO: what if lock fails? */
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ sh->op_failed = 1;
+
+ gf_log (this->name,
+ (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "locking inode of %s on child %d failed: %s",
+ local->loc.path, child_index,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inode of %s on child %d locked",
+ local->loc.path, child_index);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (sh->op_failed == 1) {
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+
+ afr_sh_entry_lookup (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_entry_lock (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t * sh = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "locking %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_entry_lock_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ &local->loc, NULL,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (local->need_entry_self_heal && priv->entry_self_heal) {
+ afr_sh_entry_lock (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to completion on %s",
+ local->loc.path);
+ afr_sh_entry_done (frame, this);
+ }
+
+ return 0;
+}
+
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
new file mode 100644
index 00000000000..e65a426db6c
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -0,0 +1,791 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+
+
+int
+afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
+ memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
+ memset (sh->success, 0, sizeof (int) * priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i])
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
+ }
+
+ if (local->govinda_gOvinda) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "aborting selfheal of %s",
+ local->loc.path);
+ sh->completion_cbk (frame, this);
+ } else {
+ if (S_ISREG (local->cont.lookup.buf.st_mode)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to data check on %s",
+ local->loc.path);
+ afr_self_heal_data (frame, this);
+ return 0;
+ }
+
+ if (S_ISDIR (local->cont.lookup.buf.st_mode)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to entry check on %s",
+ local->loc.path);
+ afr_self_heal_entry (frame, this);
+ return 0;
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "completed self heal of %s",
+ local->loc.path);
+
+ sh->completion_cbk (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_metadata_done (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ struct flock flock = {0, };
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ flock.l_start = 0;
+ flock.l_len = 0;
+ flock.l_type = F_UNLCK;
+
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlocking %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND (frame, afr_sh_metadata_unlck_cbk,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ &local->loc, F_SETLK, &flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_metadata_finish (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+
+ afr_sh_pending_to_delta (sh->pending_matrix, sh->delta_matrix,
+ sh->success, priv->child_count);
+
+ erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+
+ erase_xattr[i] = get_new_dict();
+ dict_ref (erase_xattr[i]);
+ }
+ }
+
+ afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr,
+ priv->child_count, AFR_METADATA_PENDING);
+
+ local->call_count = call_count;
+
+ if (call_count == 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "metadata of %s not healed on any subvolume",
+ local->loc.path);
+
+ afr_sh_metadata_finish (frame, this);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "erasing pending flags from %s on %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
+ if (!--call_count)
+ break;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ FREE (erase_xattr);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting attributes failed for %s on %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+
+ sh->success[child_index] = 0;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_metadata_erase_pending (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ int active_sinks = 0;
+ int call_count = 0;
+ int i = 0;
+ struct timespec ts[2];
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+ active_sinks = sh->active_sinks;
+
+ /*
+ * 4 calls per sink - chown, chmod, utimes, setxattr
+ */
+ if (xattr)
+ call_count = active_sinks * 4;
+ else
+ call_count = active_sinks * 3;
+
+ local->call_count = call_count;
+
+#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
+ ts[0] = sh->buf[source].st_atim;
+ ts[1] = sh->buf[source].st_mtim;
+#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
+ ts[0] = sh->buf[source].st_atimespec;
+ ts[1] = sh->buf[source].st_mtimespec;
+#else
+ ts[0].tv_sec = sh->buf[source].st_atime;
+ ts[1].tv_sec = sh->buf[source].st_mtime;
+#endif
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (call_count == 0) {
+ break;
+ }
+ if (sh->sources[i] || !local->child_up[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "syncing metadata of %s from %s to %s",
+ local->loc.path, priv->children[source]->name,
+ priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->chown,
+ &local->loc,
+ sh->buf[source].st_uid,
+ sh->buf[source].st_gid);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->chmod,
+ &local->loc, sh->buf[source].st_mode);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->utimens,
+ &local->loc, ts);
+
+ call_count = call_count - 3;
+
+ if (!xattr)
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc, xattr, 0);
+ call_count--;
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
+ local->loc.path, priv->children[source]->name,
+ strerror (op_errno));
+
+ afr_sh_metadata_sync (frame, this, NULL);
+ } else {
+ dict_del (xattr, AFR_DATA_PENDING);
+ dict_del (xattr, AFR_METADATA_PENDING);
+ dict_del (xattr, AFR_ENTRY_PENDING);
+ afr_sh_metadata_sync (frame, this, xattr);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
+ int source = 0;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ }
+ }
+ sh->success[source] = 1;
+
+ if (active_sinks == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sinks for performing self-heal on file %s",
+ local->loc.path);
+ afr_sh_metadata_finish (frame, this);
+ return 0;
+ }
+ sh->active_sinks = active_sinks;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "syncing metadata of %s from subvolume %s to %d active sinks",
+ local->loc.path, priv->children[source]->name, active_sinks);
+
+ STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
+ priv->children[source],
+ priv->children[source]->fops->getxattr,
+ &local->loc, NULL);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int nsources = 0;
+ int source = 0;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr,
+ priv->child_count, AFR_METADATA_PENDING);
+
+ afr_sh_print_pending_matrix (sh->pending_matrix, this);
+
+ afr_sh_mark_sources (sh->pending_matrix, sh->sources,
+ priv->child_count);
+
+ afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
+ priv->child_count);
+
+ nsources = afr_sh_source_count (sh->sources, priv->child_count);
+
+ if ((nsources == 0)
+ && (priv->favorite_child != -1)
+ && (sh->child_errno[priv->favorite_child] == 0)) {
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
+ priv->children[priv->favorite_child]->name,
+ local->loc.path);
+
+ sh->sources[priv->favorite_child] = 1;
+
+ nsources = afr_sh_source_count (sh->sources,
+ priv->child_count);
+ }
+
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to resolve conflicting metadata of %s. "
+ "Please resolve manually by fixing the "
+ "permissions/ownership of %s on your subvolumes. "
+ "You can also consider 'option favorite-child <>'",
+ local->loc.path, local->loc.path);
+
+ local->govinda_gOvinda = 1;
+
+ afr_sh_metadata_finish (frame, this);
+ return 0;
+ }
+
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ sh->source = source;
+
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || sh->child_errno[i])
+ continue;
+
+ if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
+ sh->sources[i] = 0;
+
+ if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
+ sh->sources[i] = 0;
+ }
+
+ afr_sh_metadata_sync_prepare (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "path %s on subvolume %s is of mode 0%o",
+ local->loc.path,
+ priv->children[child_index]->name,
+ buf->st_mode);
+
+ sh->buf[child_index] = *buf;
+ if (xattr)
+ sh->xattr[child_index] = dict_ref (xattr);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "path %s on subvolume %s => -1 (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+
+ sh->child_errno[child_index] = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_sh_metadata_fix (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ dict_t *xattr_req = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+ local->call_count = call_count;
+
+ xattr_req = dict_new();
+
+ if (xattr_req)
+ ret = dict_set_uint64 (xattr_req, AFR_METADATA_PENDING,
+ priv->child_count * sizeof(int32_t));
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s on %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ /* TODO: what if lock fails? */
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ sh->op_failed = 1;
+
+ gf_log (this->name,
+ (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "locking of %s on child %d failed: %s",
+ local->loc.path, child_index,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inode of %s on child %d locked",
+ local->loc.path, child_index);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (sh->op_failed) {
+ afr_sh_metadata_finish (frame, this);
+ return 0;
+ }
+
+ afr_sh_metadata_lookup (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ struct flock flock = {0, };
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ call_count = local->child_count;
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ flock.l_start = 0;
+ flock.l_len = 0;
+ flock.l_type = F_WRLCK;
+
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "locking %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_lk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ &local->loc, F_SETLK, &flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = this->private;
+
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (local->need_metadata_self_heal && priv->metadata_self_heal) {
+ afr_sh_metadata_lock (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to data check on %s",
+ local->loc.path);
+ afr_sh_metadata_done (frame, this);
+ }
+
+ return 0;
+}
+
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
new file mode 100644
index 00000000000..1c97a9bc11b
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __AFR_SELF_HEAL_H__
+#define __AFR_SELF_HEAL_H__
+
+#include <sys/stat.h>
+
+#define FILETYPE_DIFFERS(buf1,buf2) ((S_IFMT & ((struct stat *)buf1)->st_mode) != (S_IFMT & ((struct stat *)buf2)->st_mode))
+#define PERMISSION_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_mode) != (((struct stat *)buf2)->st_mode))
+#define OWNERSHIP_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_uid) != (((struct stat *)buf2)->st_uid) || (((struct stat *)buf1)->st_gid != (((struct stat *)buf2)->st_gid)))
+#define SIZE_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_size) != (((struct stat *)buf2)->st_size))
+
+
+
+int
+afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this);
+int
+afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this);
+int
+afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this);
+
+int
+afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
+
+int
+afr_self_heal_data (call_frame_t *frame, xlator_t *this);
+
+int
+afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
+
+int
+afr_self_heal (call_frame_t *frame, xlator_t *this,
+ int (*completion_cbk) (call_frame_t *, xlator_t *));
+
+#endif /* __AFR_SELF_HEAL_H__ */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
new file mode 100644
index 00000000000..3df9f07e5a3
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -0,0 +1,957 @@
+/*
+ Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include "dict.h"
+#include "byte-order.h"
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+#include <signal.h>
+
+
+static void
+__mark_all_pending (int32_t *pending, int child_count)
+{
+ int i;
+
+ for (i = 0; i < child_count; i++)
+ pending[i] = hton32 (1);
+}
+
+
+static void
+__mark_child_dead (int32_t *pending, int child_count, int child)
+{
+ pending[child] = 0;
+}
+
+
+static void
+__mark_down_children (int32_t *pending, int child_count, unsigned char *child_up)
+{
+ int i;
+
+ for (i = 0; i < child_count; i++)
+ if (!child_up[i])
+ pending[i] = 0;
+}
+
+
+static void
+__mark_all_success (int32_t *pending, int child_count)
+{
+ int i;
+
+ for (i = 0; i < child_count; i++)
+ pending[i] = hton32 (-1);
+}
+
+
+static int
+__is_first_write_on_fd (xlator_t *this, fd_t *fd)
+{
+ int op_ret = 0;
+ int _ret = -1;
+
+ _ret = fd_ctx_get (fd, this, NULL);
+ if (_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "first writev() on fd=%p, writing changelog",
+ fd);
+
+ _ret = fd_ctx_set (fd, this, 0xaf1);
+ op_ret = 1;
+ }
+
+ return op_ret;
+}
+
+
+static int
+__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
+{
+ int ret = 0;
+
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ if (priv->data_change_log)
+ ret = 1;
+
+ break;
+
+ case AFR_METADATA_TRANSACTION:
+ if (priv->metadata_change_log)
+ ret = 1;
+
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ if (priv->entry_change_log)
+ ret = 1;
+
+ break;
+
+ case AFR_FLUSH_TRANSACTION:
+ ret = 1;
+ }
+
+ return ret;
+}
+
+
+static int
+__changelog_needed_pre_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ fd_t * fd = NULL;
+
+ int op_ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (__changelog_enabled (priv, local->transaction.type)) {
+ switch (local->op) {
+
+ case GF_FOP_WRITE:
+ case GF_FOP_FTRUNCATE:
+ /*
+ if it's a data transaction, we write the changelog
+ only on the first write on an fd
+ */
+
+ fd = local->fd;
+ if (!fd || __is_first_write_on_fd (this, fd))
+ op_ret = 1;
+
+ break;
+
+ case GF_FOP_FLUSH:
+ /* only do post-op on flush() */
+
+ op_ret = 0;
+ break;
+
+ default:
+ op_ret = 1;
+ }
+ }
+
+ return op_ret;
+}
+
+
+static int
+__changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int ret = 0;
+ afr_transaction_type type = -1;
+
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
+
+ if (__changelog_enabled (priv, type)
+ && (local->op != GF_FOP_WRITE)
+ && (local->op != GF_FOP_FTRUNCATE))
+ ret = 1;
+
+ return ret;
+}
+
+
+static int
+afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
+{
+ int ret = 0;
+
+ switch (type) {
+ case AFR_FLUSH_TRANSACTION:
+ case AFR_DATA_TRANSACTION:
+ ret = priv->data_lock_server_count;
+ break;
+
+ case AFR_METADATA_TRANSACTION:
+ ret = priv->metadata_lock_server_count;
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = priv->entry_lock_server_count;
+ break;
+ }
+
+ return ret;
+}
+
+
+/* {{{ unlock */
+
+int32_t
+afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local;
+ int call_count = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ local->transaction.done (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_unlock (call_frame_t *frame, xlator_t *this)
+{
+ struct flock flock;
+
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t *local = NULL;
+ afr_private_t * priv = this->private;
+
+ local = frame->local;
+
+ call_count = afr_locked_nodes_count (local->transaction.locked_nodes,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.done (frame, this);
+ return 0;
+ }
+
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ flock.l_start = local->transaction.start;
+ flock.l_len = local->transaction.len;
+ flock.l_type = F_UNLCK;
+
+ if (local->transaction.locked_nodes[i]) {
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ case AFR_FLUSH_TRANSACTION:
+
+ if (local->fd) {
+ STACK_WIND (frame, afr_unlock_common_cbk,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ local->fd, F_SETLK, &flock);
+ } else {
+ STACK_WIND (frame, afr_unlock_common_cbk,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ &local->loc, F_SETLK, &flock);
+ }
+
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+
+ STACK_WIND (frame, afr_unlock_common_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+
+ call_count--;
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd) {
+ STACK_WIND (frame, afr_unlock_common_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ local->fd,
+ local->transaction.basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ } else {
+ STACK_WIND (frame, afr_unlock_common_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+
+ }
+ break;
+ }
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+
+/* {{{ pending */
+
+int32_t
+afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ afr_unlock (frame, this);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+
+ int ret = 0;
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t * local = NULL;
+ dict_t * xattr = dict_ref (get_new_dict ());
+
+ local = frame->local;
+
+ __mark_all_success (local->pending_array, priv->child_count);
+ __mark_down_children (local->pending_array, priv->child_count, local->child_up);
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ call_count *= 2;
+ }
+
+ local->call_count = call_count;
+
+ if (call_count == 0) {
+ /* no child is up */
+ dict_unref (xattr);
+ afr_unlock (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ ret = dict_set_static_bin (xattr, local->transaction.pending,
+ local->pending_array,
+ priv->child_count * sizeof (int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set pending entry");
+
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ case AFR_FLUSH_TRANSACTION:
+ {
+ if (local->fd)
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr);
+ else
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr);
+ }
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ {
+ STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr);
+
+ call_count--;
+ }
+
+ /*
+ set it again because previous stack_wind
+ might have already returned (think of case
+ where subvolume is posix) and would have
+ used the dict as placeholder for return
+ value
+ */
+ ret = dict_set_static_bin (xattr, local->transaction.pending,
+ local->pending_array,
+ priv->child_count * sizeof (int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set pending entry");
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ {
+ if (local->fd)
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr);
+ else
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr);
+ }
+ break;
+ }
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ dict_unref (xattr);
+ return 0;
+}
+
+
+int32_t
+afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = this->private;
+ loc_t * loc = NULL;
+
+ int call_count = -1;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->child_up[child_index] = 0;
+
+ if (op_errno == ENOTSUP) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "xattrop not supported by %s",
+ priv->children[child_index]->name);
+ local->op_ret = -1;
+ } else if (!child_went_down (op_ret, op_errno)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "xattrop failed on child %s: %s",
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ local->op_errno = op_errno;
+ }
+
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ if ((local->op_ret == -1) &&
+ (local->op_errno == ENOTSUP)) {
+ local->transaction.resume (frame, this);
+ } else {
+ local->transaction.fop (frame, this);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ dict_t *xattr = NULL;
+
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ xattr = get_new_dict ();
+ dict_ref (xattr);
+
+ call_count = afr_up_children_count (priv->child_count,
+ local->child_up);
+
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ call_count *= 2;
+ }
+
+ if (call_count == 0) {
+ /* no child is up */
+ dict_unref (xattr);
+ afr_unlock (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ __mark_all_pending (local->pending_array, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ ret = dict_set_static_bin (xattr,
+ local->transaction.pending,
+ local->pending_array,
+ (priv->child_count *
+ sizeof (int32_t)));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set pending entry");
+
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ case AFR_FLUSH_TRANSACTION:
+ {
+ if (local->fd)
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &(local->loc),
+ GF_XATTROP_ADD_ARRAY, xattr);
+ }
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ {
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr);
+
+ call_count--;
+ }
+
+
+ /*
+ set it again because previous stack_wind
+ might have already returned (think of case
+ where subvolume is posix) and would have
+ used the dict as placeholder for return
+ value
+ */
+
+ ret = dict_set_static_bin (xattr, local->transaction.pending,
+ local->pending_array,
+ priv->child_count * sizeof (int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set pending entry");
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ {
+ if (local->fd)
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr);
+ }
+
+ break;
+ }
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ dict_unref (xattr);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ lock */
+
+static
+int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index);
+
+int32_t
+afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int done = 0;
+ int child_index = (long) cookie;
+
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ /* wait for the other lock to return */
+ call_count = --local->call_count;
+ }
+
+ if (op_ret == -1) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/posix-locks xlator on server");
+ local->op_ret = op_ret;
+ done = 1;
+ }
+
+ local->child_up[child_index] = 0;
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ if ((local->op_ret == -1) &&
+ (local->op_errno == ENOSYS)) {
+ afr_unlock (frame, this);
+ } else {
+ local->transaction.locked_nodes[child_index] = 1;
+ local->transaction.lock_count++;
+ afr_lock_rec (frame, this, child_index + 1);
+ }
+ }
+
+ return 0;
+}
+
+
+static loc_t *
+lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+{
+ int ret = 0;
+
+ ret = strcmp (l1->path, l2->path);
+
+ if (ret == 0)
+ ret = strcmp (b1, b2);
+
+ if (ret <= 0)
+ return l1;
+ else
+ return l2;
+}
+
+
+static
+int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ struct flock flock;
+
+ loc_t * lower = NULL;
+ loc_t * higher = NULL;
+
+ const char *lower_name = NULL;
+ const char *higher_name = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ flock.l_start = local->transaction.start;
+ flock.l_len = local->transaction.len;
+ flock.l_type = F_WRLCK;
+
+ /* skip over children that are down */
+ while ((child_index < priv->child_count)
+ && !local->child_up[child_index])
+ child_index++;
+
+ if ((child_index == priv->child_count) &&
+ local->transaction.lock_count == 0) {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unable to lock on even one child");
+
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+
+ local->transaction.done (frame, this);
+
+ return 0;
+
+ }
+
+ if ((child_index == priv->child_count)
+ || (local->transaction.lock_count ==
+ afr_lock_server_count (priv, local->transaction.type))) {
+
+ /* we're done locking */
+
+ if (__changelog_needed_pre_op (frame, this)) {
+ afr_changelog_pre_op (frame, this);
+ } else {
+ local->transaction.fop (frame, this);
+ }
+
+ return 0;
+ }
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ case AFR_FLUSH_TRANSACTION:
+
+ if (local->fd) {
+ STACK_WIND_COOKIE (frame, afr_lock_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->finodelk,
+ local->fd, F_SETLKW, &flock);
+
+ } else {
+ STACK_WIND_COOKIE (frame, afr_lock_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->inodelk,
+ &local->loc, F_SETLKW, &flock);
+ }
+
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ {
+ local->call_count = 2;
+
+ lower = lower_path (&local->transaction.parent_loc,
+ local->transaction.basename,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename);
+
+ lower_name = (lower == &local->transaction.parent_loc ?
+ local->transaction.basename :
+ local->transaction.new_basename);
+
+ higher = (lower == &local->transaction.parent_loc ?
+ &local->transaction.new_parent_loc :
+ &local->transaction.parent_loc);
+
+ higher_name = (higher == &local->transaction.parent_loc ?
+ local->transaction.basename :
+ local->transaction.new_basename);
+
+
+ /* TODO: these locks should be blocking */
+
+ STACK_WIND_COOKIE (frame, afr_lock_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->entrylk,
+ lower, lower_name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK);
+
+ STACK_WIND_COOKIE (frame, afr_lock_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->entrylk,
+ higher, higher_name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK);
+
+ break;
+ }
+
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd) {
+ STACK_WIND_COOKIE (frame, afr_lock_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->fentrylk,
+ local->fd,
+ local->transaction.basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ } else {
+ STACK_WIND_COOKIE (frame, afr_lock_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->entrylk,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK);
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+
+int32_t afr_lock (call_frame_t *frame, xlator_t *this)
+{
+ return afr_lock_rec (frame, this, 0);
+}
+
+
+/* }}} */
+
+int32_t
+afr_transaction_resume (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (__changelog_needed_post_op (frame, this)) {
+ afr_changelog_post_op (frame, this);
+ } else {
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ afr_unlock (frame, this);
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * afr_transaction_child_died - inform that a child died during an fop
+ */
+
+void
+afr_transaction_child_died (call_frame_t *frame, xlator_t *this, int child_index)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ __mark_child_dead (local->pending_array, priv->child_count, child_index);
+}
+
+
+int32_t
+afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_transaction_local_init (local, priv);
+
+ local->transaction.resume = afr_transaction_resume;
+ local->transaction.type = type;
+
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ if (__changelog_needed_pre_op (frame, this)) {
+ afr_changelog_pre_op (frame, this);
+ } else {
+ local->transaction.fop (frame, this);
+ }
+ } else {
+ afr_lock (frame, this);
+ }
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
new file mode 100644
index 00000000000..49cdd219f25
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __TRANSACTION_H__
+#define __TRANSACTION_H__
+
+#define AFR_METADATA_PENDING "trusted.glusterfs.afr.metadata-pending"
+
+#define AFR_DATA_PENDING "trusted.glusterfs.afr.data-pending"
+
+#define AFR_ENTRY_PENDING "trusted.glusterfs.afr.entry-pending"
+
+void
+afr_transaction_child_died (call_frame_t *frame, xlator_t *this,
+ int child_index);
+
+int32_t
+afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+
+#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
new file mode 100644
index 00000000000..e4c1a847985
--- /dev/null
+++ b/xlators/cluster/afr/src/afr.c
@@ -0,0 +1,2338 @@
+/*
+ Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+
+#include "afr-self-heal.h"
+
+
+/**
+ * afr_local_cleanup - cleanup everything in frame->local
+ */
+
+void
+afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->buf)
+ FREE (sh->buf);
+
+ if (sh->xattr) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
+ }
+ }
+ FREE (sh->xattr);
+ }
+
+ if (sh->child_errno)
+ FREE (sh->child_errno);
+
+ if (sh->pending_matrix) {
+ for (i = 0; i < priv->child_count; i++) {
+ FREE (sh->pending_matrix[i]);
+ }
+ FREE (sh->pending_matrix);
+ }
+
+ if (sh->delta_matrix) {
+ for (i = 0; i < priv->child_count; i++) {
+ FREE (sh->delta_matrix[i]);
+ }
+ FREE (sh->delta_matrix);
+ }
+
+ if (sh->sources)
+ FREE (sh->sources);
+
+ if (sh->success)
+ FREE (sh->success);
+
+ if (sh->healing_fd) {
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ }
+
+ loc_wipe (&sh->parent_loc);
+}
+
+
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this)
+{
+ if (!local)
+ return;
+
+ afr_local_sh_cleanup (local, this);
+
+ FREE (local->child_errno);
+ FREE (local->pending_array);
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+
+ FREE (local->transaction.locked_nodes);
+ FREE (local->transaction.child_errno);
+
+ FREE (local->transaction.basename);
+ FREE (local->transaction.new_basename);
+
+ loc_wipe (&local->transaction.parent_loc);
+ loc_wipe (&local->transaction.new_parent_loc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ FREE (local->child_up);
+
+ { /* lookup */
+ if (local->cont.lookup.xattr)
+ dict_unref (local->cont.lookup.xattr);
+ }
+
+ { /* getxattr */
+ if (local->cont.getxattr.name)
+ FREE (local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ if (local->cont.lk.locked_nodes)
+ FREE (local->cont.lk.locked_nodes);
+ }
+
+ { /* checksum */
+ if (local->cont.checksum.file_checksum)
+ FREE (local->cont.checksum.file_checksum);
+ if (local->cont.checksum.dir_checksum)
+ FREE (local->cont.checksum.dir_checksum);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref (local->cont.create.fd);
+ }
+
+ { /* writev */
+ FREE (local->cont.writev.vector);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref (local->cont.setxattr.dict);
+ }
+
+ { /* removexattr */
+ FREE (local->cont.removexattr.name);
+ }
+
+ { /* symlink */
+ FREE (local->cont.symlink.linkpath);
+ }
+}
+
+
+int
+afr_frame_return (call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ return call_count;
+}
+
+/**
+ * first_up_child - return the index of the first child that is up
+ */
+
+int
+afr_first_up_child (afr_private_t *priv)
+{
+ xlator_t ** children = NULL;
+ int ret = -1;
+ int i = 0;
+
+ LOCK (&priv->lock);
+ {
+ children = priv->children;
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i]) {
+ ret = i;
+ break;
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ return ret;
+}
+
+
+/**
+ * up_children_count - return the number of children that are up
+ */
+
+int
+afr_up_children_count (int child_count, unsigned char *child_up)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (child_up[i])
+ ret++;
+ return ret;
+}
+
+
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
+{
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < child_count; i++)
+ if (locked_nodes[i])
+ ret++;
+
+ return ret;
+}
+
+
+ino64_t
+afr_itransform (ino64_t ino, int child_count, int child_index)
+{
+ ino64_t scaled_ino = -1;
+
+ if (ino == ((uint64_t) -1)) {
+ scaled_ino = ((uint64_t) -1);
+ goto out;
+ }
+
+ scaled_ino = (ino * child_count) + child_index;
+
+out:
+ return scaled_ino;
+}
+
+
+int
+afr_deitransform_orig (ino64_t ino, int child_count)
+{
+ int index = -1;
+
+ index = ino % child_count;
+
+ return index;
+}
+
+
+int
+afr_deitransform (ino64_t ino, int child_count)
+{
+ return 0;
+}
+
+
+int
+afr_self_heal_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->govinda_gOvinda) {
+ ret = inode_ctx_put (local->cont.lookup.inode, this, 1);
+
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ }
+ } else {
+ inode_ctx_del (local->cont.lookup.inode, this, NULL);
+ }
+
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr);
+
+ return 0;
+}
+
+
+int
+afr_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ struct stat * lookup_buf = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ int prev_child_index = -1;
+ uint32_t open_fd_count = 0;
+ int ret = 0;
+
+ child_index = (long) cookie;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ lookup_buf = &local->cont.lookup.buf;
+
+ if (op_ret == -1) {
+ if (op_errno == ENOENT)
+ local->enoent_count++;
+
+ if (op_errno != ENOTCONN)
+ local->op_errno = op_errno;
+
+ goto unlock;
+ }
+
+ if (afr_sh_has_metadata_pending (xattr, child_index, this))
+ local->need_metadata_self_heal = 1;
+
+ if (afr_sh_has_entry_pending (xattr, child_index, this))
+ local->need_entry_self_heal = 1;
+
+ if (afr_sh_has_data_pending (xattr, child_index, this))
+ local->need_data_self_heal = 1;
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ local->open_fd_count += open_fd_count;
+
+ /* in case of revalidate, we need to send stat of the
+ * child whose stat was sent during the first lookup.
+ * (so that time stamp does not vary with revalidate.
+ * in case it is down, stat of the fist success will
+ * be replied */
+
+ /* inode number should be preserved across revalidates */
+
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+
+ local->cont.lookup.inode = inode;
+ local->cont.lookup.xattr = dict_ref (xattr);
+
+ *lookup_buf = *buf;
+ lookup_buf->st_ino = afr_itransform (buf->st_ino,
+ priv->child_count,
+ child_index);
+ } else {
+ if (FILETYPE_DIFFERS (buf, lookup_buf)) {
+ /* mismatching filetypes with same name
+ -- Govinda !! GOvinda !!!
+ */
+ local->govinda_gOvinda = 1;
+ }
+
+ if (PERMISSION_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ local->need_metadata_self_heal = 1;
+ }
+
+ if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ local->need_metadata_self_heal = 1;
+ }
+
+ if (SIZE_DIFFERS (buf, lookup_buf)
+ && S_ISREG (buf->st_mode)) {
+ local->need_data_self_heal = 1;
+ }
+
+ prev_child_index = afr_deitransform_orig (lookup_buf->st_ino,
+ priv->child_count);
+ if (child_index < prev_child_index) {
+ *lookup_buf = *buf;
+ lookup_buf->st_ino = afr_itransform (buf->st_ino,
+ priv->child_count,
+ child_index);
+ }
+ }
+
+ local->success_count++;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (local->op_ret == 0) {
+ /* KLUDGE: assuming DHT will not itransform in
+ revalidate */
+ if (local->cont.lookup.inode->ino)
+ lookup_buf->st_ino =
+ local->cont.lookup.inode->ino;
+ }
+
+ if (local->success_count && local->enoent_count) {
+ local->need_metadata_self_heal = 1;
+ local->need_data_self_heal = 1;
+ local->need_entry_self_heal = 1;
+ }
+
+ if (local->success_count) {
+ /* check for govinda_gOvinda case in previous lookup */
+ if (!inode_ctx_get (local->cont.lookup.inode,
+ this, NULL))
+ local->need_data_self_heal = 1;
+ }
+
+ if ((local->need_metadata_self_heal
+ || local->need_data_self_heal
+ || local->need_entry_self_heal)
+ && (!local->open_fd_count)) {
+
+ if (!local->cont.lookup.inode->st_mode) {
+ /* fix for RT #602 */
+ local->cont.lookup.inode->st_mode =
+ lookup_buf->st_mode;
+ }
+
+ afr_self_heal (frame, this, afr_self_heal_cbk);
+ } else {
+ AFR_STACK_UNWIND (frame, local->op_ret,
+ local->op_errno,
+ local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t op_errno = 0;
+
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ local->op_ret = -1;
+
+ frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ local->reval_child_index = 0;
+
+ local->call_count = priv->child_count;
+
+ local->child_up = memdup (priv->child_up, priv->child_count);
+ local->child_count = afr_up_children_count (priv->child_count,
+ local->child_up);
+
+ /* By default assume ENOTCONN. On success it will be set to 0. */
+ local->op_errno = ENOTCONN;
+
+ if ((xattr_req == NULL)
+ && (priv->metadata_self_heal
+ || priv->data_self_heal
+ || priv->entry_self_heal))
+ local->xattr_req = dict_new ();
+ else
+ local->xattr_req = dict_ref (xattr_req);
+
+ if (priv->metadata_self_heal) {
+ ret = dict_set_uint64 (local->xattr_req, AFR_METADATA_PENDING,
+ priv->child_count * sizeof(int32_t));
+ }
+
+ if (priv->data_self_heal) {
+ ret = dict_set_uint64 (local->xattr_req, AFR_DATA_PENDING,
+ priv->child_count * sizeof(int32_t));
+ }
+
+ if (priv->entry_self_heal) {
+ ret = dict_set_uint64 (local->xattr_req, AFR_ENTRY_PENDING,
+ priv->child_count * sizeof(int32_t));
+ }
+
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0);
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ ret = 0;
+out:
+ if (ret == -1)
+ AFR_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* {{{ open */
+
+int
+afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ afr_local_t * local = frame->local;
+
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->fd);
+ return 0;
+}
+
+
+int
+afr_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if ((local->cont.open.flags & O_TRUNC)
+ && (local->op_ret >= 0)) {
+ STACK_WIND (frame, afr_open_ftruncate_cbk,
+ this, this->fops->ftruncate,
+ fd, 0);
+ } else {
+ AFR_STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, local->fd);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int i = 0;
+ int ret = -1;
+
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t wind_flags = flags & (~O_TRUNC);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+
+ ret = inode_ctx_get (loc->inode, this, NULL);
+ if (ret == 0) {
+ /* if ctx is set it means self-heal failed */
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "returning EIO, file has to be manually corrected "
+ "in backend");
+ op_errno = EIO;
+ goto out;
+ }
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+ call_count = local->call_count;
+
+ local->cont.open.flags = flags;
+ local->fd = fd_ref (fd);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ loc, wind_flags, fd);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, fd);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ flush */
+
+int
+afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_flush_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int i = 0;
+ int call_count = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ local->fd);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+afr_flush_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+afr_simple_flush_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+static int
+__is_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ int _ret = 0;
+ int op_ret = 0;
+
+ _ret = fd_ctx_get (fd, this, NULL);
+ if (_ret == 0)
+ op_ret = 1;
+
+ return op_ret;
+}
+
+
+int
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+
+ int ret = -1;
+ int i = 0;
+ int call_count = 0;
+
+ int op_ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+
+ if (__is_fd_ctx_set (this, fd)) {
+ local->op = GF_FOP_FLUSH;
+ local->transaction.fop = afr_flush_wind;
+ local->transaction.done = afr_flush_done;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+
+ local->transaction.pending = AFR_DATA_PENDING;
+
+ afr_transaction (frame, this, AFR_FLUSH_TRANSACTION);
+ } else {
+ /*
+ * if fd's ctx is not set, then there is no need
+ * to erase changelog. So just send the flush
+ */
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_simple_flush_cbk,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ fd);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fsync */
+
+int
+afr_fsync_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fsync_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fsync,
+ fd, datasync);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fsync */
+
+int32_t
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fsync_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fsyncdir,
+ fd, datasync);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ xattrop */
+
+int32_t
+afr_xattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr);
+
+ return 0;
+}
+
+
+int32_t
+afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_xattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ loc, optype, xattr);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fxattrop */
+
+int32_t
+afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr);
+
+ return 0;
+}
+
+
+int32_t
+afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fxattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ fd, optype, xattr);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+/* }}} */
+
+
+int32_t
+afr_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_inodelk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t cmd, struct flock *flock)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_inodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ loc, cmd, flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_finodelk (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct flock *flock)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_finodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ fd, cmd, flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_entrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ loc, basename, cmd, type);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+
+int32_t
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int32_t
+afr_fentrylk (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fentrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ fd, basename, cmd, type);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_checksum_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ uint8_t *file_checksum, uint8_t *dir_checksum)
+
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0 && (local->op_ret != 0)) {
+ local->op_ret = 0;
+
+ local->cont.checksum.file_checksum = MALLOC (ZR_FILENAME_MAX);
+ memcpy (local->cont.checksum.file_checksum, file_checksum,
+ ZR_FILENAME_MAX);
+
+ local->cont.checksum.dir_checksum = MALLOC (ZR_FILENAME_MAX);
+ memcpy (local->cont.checksum.dir_checksum, dir_checksum,
+ ZR_FILENAME_MAX);
+
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->cont.checksum.file_checksum,
+ local->cont.checksum.dir_checksum);
+
+ return 0;
+}
+
+
+int32_t
+afr_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flag)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = -1;
+
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ call_count = local->call_count;
+ frame->local = local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_checksum_cbk,
+ priv->children[i],
+ priv->children[i]->fops->checksum,
+ loc, flag);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_statfs_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct statvfs *statvfs)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = 0;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == 0) {
+ local->op_ret = op_ret;
+
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
+ local->cont.statfs.buf = *statvfs;
+ } else {
+ local->cont.statfs.buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ }
+ }
+
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf);
+
+ return 0;
+}
+
+
+int32_t
+afr_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ afr_private_t * priv = NULL;
+ int child_count = 0;
+ afr_local_t * local = NULL;
+ int i = 0;
+
+ int ret = -1;
+ int call_count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+ child_count = priv->child_count;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+
+ ret = AFR_LOCAL_INIT (local, priv);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ frame->local = local;
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_statfs_cbk,
+ priv->children[i],
+ priv->children[i]->fops->statfs,
+ loc);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct flock *lock)
+{
+ afr_local_t * local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ lock);
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int i;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
+ priv->child_count);
+
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->cont.lk.flock);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->cont.lk.flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND (frame, afr_lk_unlock_cbk,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ local->fd, F_SETLK,
+ &local->cont.lk.flock);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct flock *lock)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int child_index = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ call_count = --local->call_count;
+
+ if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_lk_unlock (frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.flock = *lock;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk,
+ local->fd, local->cont.lk.cmd,
+ &local->cont.lk.flock);
+ } else if (local->op_ret == -1) {
+ /* all nodes have gone down */
+
+ AFR_STACK_UNWIND (frame, -1, ENOTCONN, &local->cont.lk.flock);
+ } else {
+ /* locking has succeeded on all nodes that are up */
+
+ AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->cont.lk.flock);
+ }
+
+ return 0;
+}
+
+
+int
+afr_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd,
+ struct flock *flock)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int i = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_INIT (local, priv);
+
+ frame->local = local;
+
+ local->cont.lk.locked_nodes = CALLOC (priv->child_count,
+ sizeof (*local->cont.lk.locked_nodes));
+
+ if (!local->cont.lk.locked_nodes) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory :(");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.flock = *flock;
+
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ fd, cmd, flock);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ }
+ return 0;
+}
+
+
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
+
+static int
+find_child_index (xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+
+ int i = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((xlator_t *) child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
+
+
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
+{
+ afr_private_t * priv = NULL;
+ unsigned char * child_up = NULL;
+
+ int i = -1;
+ int up_children = 0;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
+
+ child_up = priv->child_up;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ i = find_child_index (this, data);
+
+ child_up[i] = 1;
+
+ /*
+ if all the children were down, and one child came up,
+ send notify to parent
+ */
+
+ for (i = 0; i < priv->child_count; i++)
+ if (child_up[i])
+ up_children++;
+
+ if (up_children == 1)
+ default_notify (this, event, data);
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ i = find_child_index (this, data);
+
+ child_up[i] = 0;
+
+ /*
+ if all children are down, and this was the last to go down,
+ send notify to parent
+ */
+
+ for (i = 0; i < priv->child_count; i++)
+ if (child_up[i])
+ up_children++;
+
+ if (up_children == 0)
+ default_notify (this, event, data);
+
+ break;
+
+ default:
+ default_notify (this, event, data);
+ }
+
+ return 0;
+}
+
+
+static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
+ "as the 'favorite child'. This means that if a discrepancy in the content "
+ "or attributes (ownership, permission, etc.) of a file is detected among "
+ "the subvolumes, the file on '%s' will be considered the definitive "
+ "version and its contents will OVERWRITE the contents of the file on other "
+ "subvolumes. All versions of the file except that on '%s' "
+ "WILL BE LOST.";
+
+static const char *no_lock_servers_warning_str = "You have set lock-server-count = 0. "
+ "This means correctness is NO LONGER GUARANTEED in all cases. If two or more "
+ "applications write to the same region of a file, there is a possibility that "
+ "its copies will be INCONSISTENT. Set it to a value greater than 0 unless you "
+ "are ABSOLUTELY SURE of what you are doing and WILL NOT HOLD GlusterFS "
+ "RESPOSIBLE for inconsistent data. If you are in doubt, set it to a value "
+ "greater than 0.";
+
+int32_t
+init (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ int child_count = 0;
+ xlator_list_t * trav = NULL;
+ int i = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ char * read_subvol = NULL;
+ char * fav_child = NULL;
+ char * self_heal = NULL;
+ char * change_log = NULL;
+
+ int32_t lock_server_count = 1;
+
+ int fav_ret = -1;
+ int read_ret = -1;
+ int dict_ret = -1;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "AFR needs more than one child defined");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ ALLOC_OR_GOTO (this->private, afr_private_t, out);
+
+ priv = this->private;
+
+ read_ret = dict_get_str (this->options, "read-subvolume", &read_subvol);
+ priv->read_child = -1;
+
+ fav_ret = dict_get_str (this->options, "favorite-child", &fav_child);
+ priv->favorite_child = -1;
+
+ /* Default values */
+
+ priv->data_self_heal = 1;
+ priv->metadata_self_heal = 1;
+ priv->entry_self_heal = 1;
+
+ dict_ret = dict_get_str (this->options, "data-self-heal", &self_heal);
+ if (dict_ret == 0) {
+ ret = gf_string2boolean (self_heal, &priv->data_self_heal);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid 'option data-self-heal %s' "
+ "defaulting to data-self-heal as 'on'",
+ self_heal);
+ priv->data_self_heal = 1;
+ }
+ }
+
+ dict_ret = dict_get_str (this->options, "metadata-self-heal",
+ &self_heal);
+ if (dict_ret == 0) {
+ ret = gf_string2boolean (self_heal, &priv->metadata_self_heal);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid 'option metadata-self-heal %s' "
+ "defaulting to metadata-self-heal as 'on'",
+ self_heal);
+ priv->metadata_self_heal = 1;
+ }
+ }
+
+ dict_ret = dict_get_str (this->options, "entry-self-heal", &self_heal);
+ if (dict_ret == 0) {
+ ret = gf_string2boolean (self_heal, &priv->entry_self_heal);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid 'option entry-self-heal %s' "
+ "defaulting to entry-self-heal as 'on'",
+ self_heal);
+ priv->entry_self_heal = 1;
+ }
+ }
+
+ /* Change log options */
+
+ priv->data_change_log = 1;
+ priv->metadata_change_log = 0;
+ priv->entry_change_log = 1;
+
+ dict_ret = dict_get_str (this->options, "data-change-log",
+ &change_log);
+ if (dict_ret == 0) {
+ ret = gf_string2boolean (change_log, &priv->data_change_log);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid 'option data-change-log %s'. "
+ "defaulting to data-change-log as 'on'",
+ change_log);
+ priv->data_change_log = 1;
+ }
+ }
+
+ dict_ret = dict_get_str (this->options, "metadata-change-log",
+ &change_log);
+ if (dict_ret == 0) {
+ ret = gf_string2boolean (change_log,
+ &priv->metadata_change_log);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid 'option metadata-change-log %s'. "
+ "defaulting to metadata-change-log as 'off'",
+ change_log);
+ priv->metadata_change_log = 0;
+ }
+ }
+
+ dict_ret = dict_get_str (this->options, "entry-change-log",
+ &change_log);
+ if (dict_ret == 0) {
+ ret = gf_string2boolean (change_log, &priv->entry_change_log);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid 'option entry-change-log %s'. "
+ "defaulting to entry-change-log as 'on'",
+ change_log);
+ priv->entry_change_log = 1;
+ }
+ }
+
+ /* Locking options */
+
+ priv->data_lock_server_count = 1;
+ priv->metadata_lock_server_count = 0;
+ priv->entry_lock_server_count = 1;
+
+ dict_ret = dict_get_int32 (this->options, "data-lock-server-count",
+ &lock_server_count);
+ if (dict_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting data lock server count to %d",
+ lock_server_count);
+
+ if (lock_server_count == 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ no_lock_servers_warning_str);
+
+ priv->data_lock_server_count = lock_server_count;
+ }
+
+
+ dict_ret = dict_get_int32 (this->options,
+ "metadata-lock-server-count",
+ &lock_server_count);
+ if (dict_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting metadata lock server count to %d",
+ lock_server_count);
+ priv->metadata_lock_server_count = lock_server_count;
+ }
+
+
+ dict_ret = dict_get_int32 (this->options, "entry-lock-server-count",
+ &lock_server_count);
+ if (dict_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting entry lock server count to %d",
+ lock_server_count);
+
+ priv->entry_lock_server_count = lock_server_count;
+ }
+
+
+ trav = this->children;
+ while (trav) {
+ if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume '%s' specified as read child",
+ trav->xlator->name);
+
+ priv->read_child = child_count;
+ }
+
+ if (fav_ret == 0 && !strcmp (fav_child, trav->xlator->name)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ favorite_child_warning_str, trav->xlator->name,
+ trav->xlator->name, trav->xlator->name);
+ priv->favorite_child = child_count;
+ }
+
+ child_count++;
+ trav = trav->next;
+ }
+
+ /* XXX: return inode numbers from 1st subvolume till
+ afr supports read-subvolume based on inode's ctx
+ (and not itransform) for this reason afr_deitransform()
+ returns 0 always
+ */
+ priv->read_child = 0;
+
+ priv->wait_count = 1;
+
+ priv->child_count = child_count;
+ LOCK_INIT (&priv->lock);
+
+ priv->child_up = CALLOC (sizeof (unsigned char), child_count);
+ if (!priv->child_up) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->children = CALLOC (sizeof (xlator_t *), child_count);
+ if (!priv->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+
+ trav = trav->next;
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+fini (xlator_t *this)
+{
+ return 0;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = afr_lookup,
+ .open = afr_open,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsync = afr_fsync,
+ .fsyncdir = afr_fsyncdir,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+ .checksum = afr_checksum,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .readv = afr_readv,
+
+ /* inode write */
+ .chmod = afr_chmod,
+ .chown = afr_chown,
+ .fchmod = afr_fchmod,
+ .fchown = afr_fchown,
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .utimens = afr_utimens,
+ .setxattr = afr_setxattr,
+ .removexattr = afr_removexattr,
+
+ /* dir read */
+ .opendir = afr_opendir,
+ .readdir = afr_readdir,
+ .getdents = afr_getdents,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
+ .setdents = afr_setdents,
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"read-subvolume" },
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+ { .key = {"favorite-child"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+ { .key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"data-lock-server-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0
+ },
+ { .key = {"metadata-lock-server-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0
+ },
+ { .key = {"entry-lock-server-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
new file mode 100644
index 00000000000..4cf6cdf9dfe
--- /dev/null
+++ b/xlators/cluster/afr/src/afr.h
@@ -0,0 +1,523 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef __AFR_H__
+#define __AFR_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "scheduler.h"
+#include "call-stub.h"
+#include "compat-errno.h"
+
+
+typedef struct _afr_private {
+ gf_lock_t lock; /* to guard access to child_count, etc */
+ unsigned int child_count; /* total number of children */
+
+ xlator_t **children;
+
+ unsigned char *child_up;
+
+ gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t metadata_self_heal; /* on/off */
+ gf_boolean_t entry_self_heal; /* on/off */
+
+
+ gf_boolean_t data_change_log; /* on/off */
+ gf_boolean_t metadata_change_log; /* on/off */
+ gf_boolean_t entry_change_log; /* on/off */
+
+ unsigned int read_child; /* read-subvolume */
+ unsigned int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
+
+ unsigned int data_lock_server_count;
+ unsigned int metadata_lock_server_count;
+ unsigned int entry_lock_server_count;
+
+ unsigned int wait_count; /* # of servers to wait for success */
+} afr_private_t;
+
+typedef struct {
+ /* array of stat's, one for each child */
+ struct stat *buf;
+
+ /* array of xattr's, one for each child */
+ dict_t **xattr;
+
+ /* array of errno's, one for each child */
+ int *child_errno;
+
+ int32_t **pending_matrix;
+ int32_t **delta_matrix;
+
+ int *sources;
+ int source;
+ int active_source;
+ int active_sinks;
+ int *success;
+
+ fd_t *healing_fd;
+ int op_failed;
+
+ int file_has_holes;
+ blksize_t block_size;
+ off_t file_size;
+ off_t offset;
+
+ loc_t parent_loc;
+ int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
+ call_frame_t *sh_frame;
+} afr_self_heal_t;
+
+
+typedef enum {
+ AFR_DATA_TRANSACTION, /* truncate, write, ... */
+ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
+ AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
+ AFR_ENTRY_RENAME_TRANSACTION, /* rename */
+ AFR_FLUSH_TRANSACTION, /* flush */
+} afr_transaction_type;
+
+typedef struct _afr_local {
+ unsigned int call_count;
+ unsigned int success_count;
+ unsigned int enoent_count;
+
+ unsigned int need_metadata_self_heal;
+ unsigned int need_entry_self_heal;
+ unsigned int need_data_self_heal;
+ unsigned int govinda_gOvinda;
+
+ unsigned int reval_child_index;
+ int32_t op_ret;
+ int32_t op_errno;
+
+ int32_t *pending_array;
+
+ loc_t loc;
+ loc_t newloc;
+
+ fd_t *fd;
+
+ glusterfs_fop_t fop;
+
+ unsigned char *child_up;
+ int child_count;
+
+ int32_t *child_errno;
+
+ dict_t *xattr_req;
+ int open_fd_count;
+ /*
+ This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
+ */
+
+ int op;
+ struct {
+ struct {
+ unsigned char buf_set;
+ struct statvfs buf;
+ } statfs;
+
+ struct {
+ inode_t *inode;
+ struct stat buf;
+ dict_t *xattr;
+ } lookup;
+
+ struct {
+ int32_t flags;
+ } open;
+
+ struct {
+ int32_t cmd;
+ struct flock flock;
+ unsigned char *locked_nodes;
+ } lk;
+
+ struct {
+ uint8_t *file_checksum;
+ uint8_t *dir_checksum;
+ } checksum;
+
+ /* inode read */
+
+ struct {
+ int32_t mask;
+ int last_tried; /* index of the child we tried previously */
+ } access;
+
+ struct {
+ int last_tried;
+ ino_t ino;
+ } stat;
+
+ struct {
+ int last_tried;
+ ino_t ino;
+ } fstat;
+
+ struct {
+ size_t size;
+ int last_tried;
+ } readlink;
+
+ struct {
+ const char *name;
+ int last_tried;
+ } getxattr;
+
+ struct {
+ size_t size;
+ off_t offset;
+ int last_tried;
+ } readv;
+
+ /* dir read */
+
+ struct {
+ int success_count;
+ int32_t op_ret;
+ int32_t op_errno;
+ } opendir;
+
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ size_t size;
+ off_t offset;
+
+ int last_tried;
+ } readdir;
+
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+
+ size_t size;
+ off_t offset;
+ int32_t flag;
+
+ int last_tried;
+ } getdents;
+
+ /* inode write */
+
+ struct {
+ ino_t ino;
+ mode_t mode;
+ struct stat buf;
+ } chmod;
+
+ struct {
+ ino_t ino;
+ mode_t mode;
+ struct stat buf;
+ } fchmod;
+
+ struct {
+ ino_t ino;
+ uid_t uid;
+ gid_t gid;
+ struct stat buf;
+ } chown;
+
+ struct {
+ ino_t ino;
+ uid_t uid;
+ gid_t gid;
+ struct stat buf;
+ } fchown;
+
+ struct {
+ ino_t ino;
+ struct stat buf;
+
+ int32_t op_ret;
+
+ struct iovec *vector;
+ dict_t *refs;
+ int32_t count;
+ off_t offset;
+ } writev;
+
+ struct {
+ ino_t ino;
+ off_t offset;
+ struct stat buf;
+ } truncate;
+
+ struct {
+ ino_t ino;
+ off_t offset;
+ struct stat buf;
+ } ftruncate;
+
+ struct {
+ ino_t ino;
+ struct timespec tv[2];
+ struct stat buf;
+ } utimens;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } setxattr;
+
+ struct {
+ const char *name;
+ } removexattr;
+
+ /* dir write */
+
+ struct {
+ ino_t ino;
+ fd_t *fd;
+ int32_t flags;
+ mode_t mode;
+ inode_t *inode;
+ struct stat buf;
+ } create;
+
+ struct {
+ ino_t ino;
+ dev_t dev;
+ mode_t mode;
+ inode_t *inode;
+ struct stat buf;
+ } mknod;
+
+ struct {
+ ino_t ino;
+ int32_t mode;
+ inode_t *inode;
+ struct stat buf;
+ } mkdir;
+
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ } unlink;
+
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ } rmdir;
+
+ struct {
+ ino_t ino;
+ struct stat buf;
+ } rename;
+
+ struct {
+ ino_t ino;
+ inode_t *inode;
+ struct stat buf;
+ } link;
+
+ struct {
+ ino_t ino;
+ inode_t *inode;
+ struct stat buf;
+ char *linkpath;
+ } symlink;
+
+ struct {
+ int32_t flags;
+ dir_entry_t *entries;
+ int32_t count;
+ } setdents;
+ } cont;
+
+ struct {
+ off_t start, len;
+
+ unsigned char *locked_nodes;
+ int lock_count;
+
+ const char *basename;
+ const char *new_basename;
+
+ char *pending;
+
+ loc_t parent_loc;
+ loc_t new_parent_loc;
+
+ afr_transaction_type type;
+
+ int success_count;
+ int erase_pending;
+ int failure_count;
+
+ int last_tried;
+ int32_t *child_errno;
+
+ call_frame_t *main_frame;
+
+ int (*fop) (call_frame_t *frame, xlator_t *this);
+
+ int (*done) (call_frame_t *frame, xlator_t *this);
+
+ int (*resume) (call_frame_t *frame, xlator_t *this);
+
+ int (*unwind) (call_frame_t *frame, xlator_t *this);
+ } transaction;
+
+ afr_self_heal_t self_heal;
+} afr_local_t;
+
+/* try alloc and if it fails, goto label */
+#define ALLOC_OR_GOTO(var, type, label) do { \
+ var = CALLOC (sizeof (type), 1); \
+ if (!var) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "out of memory :("); \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
+ } while (0);
+
+
+/* did a call fail due to a child failing? */
+#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
+ ((op_errno == ENOTCONN) || \
+ (op_errno == EBADFD)))
+
+/* have we tried all children? */
+#define all_tried(i, count) ((i) == (count) - 1)
+
+void
+afr_build_parent_loc (loc_t *parent, loc_t *child);
+
+int
+afr_up_children_count (int child_count, unsigned char *child_up);
+
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+
+int
+afr_first_up_child (afr_private_t *priv);
+
+ino64_t
+afr_itransform (ino64_t ino, int child_count, int child_index);
+
+int
+afr_deitransform (ino64_t ino, int child_count);
+
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this);
+
+int
+afr_frame_return (call_frame_t *frame);
+
+#define AFR_STACK_UNWIND(frame, params ...) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_UNWIND (frame, params); \
+ afr_local_cleanup (__local, __this); \
+ free (__local); \
+} while (0);
+
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ afr_local_cleanup (__local, __this); \
+ free (__local); \
+} while (0);
+
+/* allocate and return a string that is the basename of argument */
+static inline char *
+AFR_BASENAME (const char *str)
+{
+ char *__tmp_str = NULL;
+ char *__basename_str = NULL;
+ __tmp_str = strdup (str);
+ __basename_str = strdup (basename (__tmp_str));
+ FREE (__tmp_str);
+ return __basename_str;
+}
+
+/* initialize local_t */
+static inline int
+AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
+{
+ local->child_up = CALLOC (sizeof (*local->child_up),
+ priv->child_count);
+ if (!local->child_up) {
+ return -ENOMEM;
+ }
+
+ memcpy (local->child_up, priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+
+
+ local->call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (local->call_count == 0)
+ return -ENOTCONN;
+
+ local->transaction.erase_pending = 1;
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ return 0;
+}
+
+
+static inline int
+afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
+{
+ local->child_errno = CALLOC (sizeof (*local->child_errno),
+ priv->child_count);
+ if (!local->child_errno) {
+ return -ENOMEM;
+ }
+
+ local->pending_array = CALLOC (sizeof (*local->pending_array),
+ priv->child_count);
+ if (!local->pending_array) {
+ return -ENOMEM;
+ }
+
+ local->transaction.locked_nodes = CALLOC (sizeof (*local->transaction.locked_nodes),
+ priv->child_count);
+
+ local->transaction.child_errno = CALLOC (sizeof (*local->transaction.child_errno),
+ priv->child_count);
+
+ return 0;
+}
+
+#endif /* __AFR_H__ */
diff --git a/xlators/cluster/dht/Makefile.am b/xlators/cluster/dht/Makefile.am
new file mode 100644
index 00000000000..f963effea22
--- /dev/null
+++ b/xlators/cluster/dht/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src \ No newline at end of file
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
new file mode 100644
index 00000000000..b7d07d137a6
--- /dev/null
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -0,0 +1,30 @@
+
+xlator_LTLIBRARIES = dht.la nufa.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+
+dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-hashfn-tea.c
+
+dht_la_SOURCES = $(dht_common_source) dht.c
+
+nufa_la_SOURCES = $(dht_common_source) nufa.c
+
+dht_la_LDFLAGS = -module -avoidversion
+dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = dht-common.h dht-common.c
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/distribute.so
+
+install-data-hook:
+ ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so \ No newline at end of file
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
new file mode 100644
index 00000000000..5e4979e31b0
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -0,0 +1,3470 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "defaults.h"
+
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+
+int
+dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ ret = op_ret;
+
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+
+ if (ret == 0)
+ local->selfheal.layout = NULL;
+
+ if (local->st_ino) {
+ local->stbuf.st_ino = local->st_ino;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hashed subvolume for %s",
+ local->loc.path);
+ }
+ }
+
+ DHT_STACK_UNWIND (frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr);
+
+ return 0;
+}
+
+
+int
+dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+ int is_dir = 0;
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+ layout = local->layout;
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ /* TODO: assert equal hash type in xattr, local->xattr */
+
+ /* TODO: always ensure same subvolume is in layout->list[0] */
+
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+
+ if (op_ret == -1) {
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (!is_dir)
+ goto unlock;
+
+ local->op_ret = 0;
+ if (local->xattr == NULL)
+ local->xattr = dict_ref (xattr);
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ if (prev->this == local->hashed_subvol)
+ local->st_ino = local->stbuf.st_ino;
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == 0) {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+
+ local->layout = NULL;
+
+ if (ret != 0) {
+ layout->gen = conf->gen;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "fixing assignment on %s",
+ local->loc.path);
+ goto selfheal;
+ }
+
+ inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+
+ if (local->st_ino) {
+ local->stbuf.st_ino = local->st_ino;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hashed subvolume for %s",
+ local->loc.path);
+ }
+ }
+
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr);
+ }
+
+ return 0;
+
+selfheal:
+ ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+
+ return 0;
+}
+
+int
+dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ if (op_errno != ENOTCONN && op_errno != ENOENT) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ }
+
+ goto unlock;
+ }
+
+ if (S_IFMT & (stbuf->st_mode ^ local->inode->st_mode)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mismatching filetypes 0%o v/s 0%o for %s",
+ (stbuf->st_mode & S_IFMT),
+ (local->inode->st_mode & S_IFMT),
+ local->loc.path);
+
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+
+ goto unlock;
+ }
+
+ layout = dht_layout_get (this, inode);
+
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+
+ if (is_linkfile) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "linkfile found in revalidate for %s",
+ local->loc.path);
+ local->layout_mismatch = 1;
+
+ goto unlock;
+ }
+
+ if (is_dir) {
+ ret = dht_layout_dir_mismatch (this, layout,
+ prev->this, &local->loc,
+ xattr);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mismatching layouts for %s",
+ local->loc.path);
+
+ local->layout_mismatch = 1;
+
+ goto unlock;
+ }
+ }
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->op_ret = 0;
+ local->stbuf.st_ino = local->st_ino;
+
+ if (!local->xattr)
+ local->xattr = dict_ref (xattr);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ if (!S_ISDIR (local->stbuf.st_mode)
+ && (local->hashed_subvol != local->cached_subvol)
+ && (local->stbuf.st_nlink == 1))
+ local->stbuf.st_mode |= S_ISVTX;
+
+ if (local->layout_mismatch) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
+
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr);
+ }
+
+ return 0;
+}
+
+
+int
+dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *cached_subvol = NULL;
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+
+ layout = dht_layout_for_subvol (this, local->cached_subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ cached_subvol ? cached_subvol->name : "<nil>");
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+ local->op_ret = 0;
+ if (local->stbuf.st_nlink == 1)
+ local->stbuf.st_mode |= S_ISVTX;
+
+unwind:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr);
+ return 0;
+}
+
+
+int
+dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+
+ conf = this->private;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ prev = cookie;
+ subvol = prev->this;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, buf, xattr);
+ is_dir = check_is_dir (inode, buf, xattr);
+
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol (this, inode, buf,
+ xattr);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found on %s linkfile %s (-> %s)",
+ subvol->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found on %s file %s",
+ subvol->name, loc->path);
+ }
+
+ if (!local->cached_subvol) {
+ /* found one file */
+ dht_stat_merge (this, &local->stbuf, buf, subvol);
+ local->xattr = dict_ref (xattr);
+ local->cached_subvol = subvol;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "multiple subvolumes (%s and %s atleast) have "
+ "file %s", local->cached_subvol->name,
+ subvol->name, local->loc.path);
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (is_linkfile) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "deleting stale linkfile %s on %s",
+ loc->path, subvol->name);
+ dht_linkfile_unlink (frame, this, subvol, loc);
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ if (!cached_subvol) {
+ DHT_STACK_UNWIND (frame, -1, ENOENT, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "linking file %s existing on %s to %s (hash)",
+ loc->path, cached_subvol->name, hashed_subvol->name);
+
+ dht_linkfile_create (frame, dht_lookup_linkfile_create_cbk,
+ cached_subvol, hashed_subvol, loc);
+ }
+
+ return 0;
+}
+
+
+int
+dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ if (!local->inode)
+ local->inode = inode_ref (loc->inode);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_everywhere_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+}
+
+
+int
+dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+
+ prev = cookie;
+ subvol = prev->this;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "lookup of %s on %s (following linkfile) failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ /* TODO: assert type is non-dir and non-linkfile */
+
+ if (stbuf->st_nlink == 1)
+ stbuf->st_mode |= S_ISVTX;
+ dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ inode_ctx_put (inode, this, (uint64_t)(long)layout);
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+
+ return 0;
+}
+
+
+int
+dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_layout_t *layout = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ call_frame_t *prev = NULL;
+ int call_cnt = 0;
+
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == 0) {
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (is_dir) {
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+ }
+ }
+
+ if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ dht_itransform (this, prev->this, stbuf->st_ino,
+ &stbuf->st_ino);
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ goto out;
+ }
+
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ return 0;
+}
+
+
+int
+dht_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "copying location failed for path=%s",
+ loc->path);
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+
+ local->cached_subvol = cached_subvol;
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate (loc)) {
+ layout = dht_layout_get (this, loc->inode);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "revalidate without cache. path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ op_errno = EAGAIN;
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->st_ino = loc->inode->ino;
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ /* TODO: remove the hard-coding */
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht.linkto", 256);
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ hashed_subvol, hashed_subvol->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ if (local->inode)
+ local->stbuf.st_ino = local->inode->ino;
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_stat (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->stat,
+ loc);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ layout = dht_layout_get (this, fd->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "local allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = layout->cnt;;
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->fstat,
+ fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_chmod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->chmod,
+ loc, mode);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_chown (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, uid_t uid, gid_t gid)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->chown,
+ loc, uid, gid);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fchmod (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, mode_t mode)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+
+ layout = dht_layout_get (this, fd->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fchmod,
+ fd, mode);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fchown (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, uid_t uid, gid_t gid)
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ layout = dht_layout_get (this, fd->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fchown,
+ fd, uid, gid);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_utimens (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct timespec tv[2])
+{
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ layout = dht_layout_get (this, loc->inode);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = layout->cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ STACK_WIND (frame, dht_attr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->utimens,
+ loc, tv);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->truncate,
+ loc, offset);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (fd->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->ftruncate,
+ fd, offset);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->access,
+ loc, mask);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, path);
+
+ return 0;
+}
+
+
+int
+dht_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readlink_cbk,
+ subvol, subvol->fops->readlink,
+ loc, size);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, xattr);
+
+ return 0;
+}
+
+
+int
+dht_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr, int flags)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->setxattr,
+ loc, xattr, flags);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->removexattr,
+ loc, key);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->fd);
+
+ return 0;
+}
+
+
+int
+dht_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_fd_cbk,
+ subvol, subvol->fops->open,
+ loc, flags, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct stat *stbuf)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readv_cbk,
+ subvol, subvol->fops->readv,
+ fd, size, off);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int
+dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct stat *stbuf)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_writev (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iovec *vector, int count, off_t off)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ fd, vector, count, off);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+
+ return 0;
+}
+
+
+int
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->flush, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocatoin failed :(");
+ goto err;
+ }
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_err_cbk,
+ subvol, subvol->fops->fsync,
+ fd, datasync);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct flock *flock)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, flock);
+
+ return 0;
+}
+
+
+int
+dht_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct flock *flock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_lk_cbk,
+ subvol, subvol->fops->lk,
+ fd, cmd, flock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+/* gf_lk no longer exists
+int
+dht_gf_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct flock *flock)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, flock);
+
+ return 0;
+}
+
+
+int
+dht_gf_lk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int cmd, struct flock *flock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_gf_lk_cbk,
+ subvol, subvol->fops->gf_lk,
+ fd, cmd, flock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+*/
+
+int
+dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct statvfs *statvfs)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
+
+ /* TODO: normalize sizes */
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->statvfs);
+
+ return 0;
+}
+
+
+int
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_statfs_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fd_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *next = NULL;
+ dht_layout_t *layout = NULL;
+ int count = 0;
+
+
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto done;
+
+ layout = dht_layout_get (this, local->fd->inode);
+
+ list_for_each_entry (orig_entry, &orig_entries->list, list) {
+ subvol = dht_layout_search (this, layout, orig_entry->d_name);
+
+ if (!subvol || subvol == prev->this) {
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto unwind;
+ }
+
+ dht_itransform (this, subvol, orig_entry->d_ino,
+ &entry->d_ino);
+ dht_itransform (this, subvol, orig_entry->d_off,
+ &entry->d_off);
+
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ }
+ op_ret = count;
+
+done:
+ if (count == 0) {
+ next = dht_subvol_next (this, prev->this);
+ if (!next) {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, dht_readdir_cbk,
+ next, next->fops->readdir,
+ local->fd, local->size, 0);
+ return 0;
+ }
+
+unwind:
+ if (op_ret < 0)
+ op_ret = 0;
+
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, &entries);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+
+int
+dht_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t yoff)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ xlator_t *xvol = NULL;
+ off_t xoff = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->size = size;
+
+ dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+
+ /* TODO: do proper readdir */
+ STACK_WIND (frame, dht_readdir_cbk,
+ xvol, xvol->fops->readdir,
+ fd, size, xoff);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fsyncdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir,
+ fd, datasync);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+
+
+ if (op_ret == -1)
+ goto out;
+
+ prev = cookie;
+
+ dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ layout = dht_layout_for_subvol (this, prev->this);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set inode context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ return 0;
+}
+
+
+int
+dht_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ subvol, subvol->fops->mknod,
+ loc, mode, rdev);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ subvol, subvol->fops->symlink,
+ linkname, loc);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ if (hashed_subvol != cached_subvol)
+ local->call_cnt++;
+
+ STACK_WIND (frame, dht_err_cbk,
+ cached_subvol, cached_subvol->fops->unlink, loc);
+
+ if (hashed_subvol != cached_subvol)
+ STACK_WIND (frame, dht_err_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink, loc);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto out;
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ stbuf->st_ino = local->loc.inode->ino;
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
+
+
+ if (op_ret == -1)
+ goto err;
+
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
+
+ STACK_WIND (frame, dht_link_cbk,
+ srcvol, srcvol->fops->link,
+ &local->loc, &local->loc2);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+
+ return 0;
+}
+
+
+int
+dht_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ cached_subvol = dht_subvol_get_cached (this, oldloc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, newloc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc, oldloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ if (hashed_subvol != cached_subvol) {
+ dht_linkfile_create (frame, dht_link_linkfile_cbk,
+ cached_subvol, hashed_subvol, newloc);
+ } else {
+ STACK_WIND (frame, dht_link_cbk,
+ cached_subvol, cached_subvol->fops->link,
+ oldloc, newloc);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct stat *stbuf)
+{
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+
+
+ if (op_ret == -1)
+ goto out;
+
+ prev = cookie;
+
+ dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ layout = dht_layout_for_subvol (this, prev->this);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set inode context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf);
+ return 0;
+}
+
+
+int
+dht_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+
+ if (op_ret == 0) {
+ inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+ local->selfheal.layout = NULL;
+ local->stbuf.st_ino = local->st_ino;
+ }
+
+ DHT_STACK_UNWIND (frame, op_ret, op_errno,
+ local->inode, &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+
+ LOCK (&frame->lock);
+ {
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, NULL);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ local->layout = NULL;
+ dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
+ &local->loc, layout);
+ }
+
+ return 0;
+}
+
+int
+dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, NULL);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+ local->op_ret = 0;
+
+ dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->st_ino = local->stbuf.st_ino;
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (local->call_cnt == 0) {
+ local->layout = NULL;
+ dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
+ &local->loc, layout);
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND (frame, dht_mkdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->mkdir,
+ &local->loc, local->mode);
+ }
+ return 0;
+err:
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ xlator_t *hashed_subvol = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+
+ if (hashed_subvol == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "hashed subvol not found");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+ local->inode = inode_ref (loc->inode);
+ ret = loc_copy (&local->loc, loc);
+ local->mode = mode;
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_mkdir_hashed_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->mkdir,
+ loc, mode);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ local->layout = NULL;
+
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ uint64_t tmp_layout = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+
+ if (op_errno != ENOENT)
+ local->need_selfheal = 1;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ inode_ctx_get (local->loc.inode, this,
+ &tmp_layout);
+ layout = (dht_layout_t *)(long)tmp_layout;
+
+ /* TODO: neater interface needed below */
+ local->stbuf.st_mode = local->loc.inode->st_mode;
+
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, layout);
+ } else {
+ DHT_STACK_UNWIND (frame, local->op_ret,
+ local->op_errno);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir_do (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1)
+ goto err;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rmdir,
+ &local->loc);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ return 0;
+}
+
+
+int
+dht_rmdir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret > 2) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_rmdir_readdir_cbk,
+ prev->this, prev->this->fops->readdir,
+ local->fd, 4096, 0);
+
+ return 0;
+
+err:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, local->fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+static int32_t
+dht_xattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+dht_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_xattrop_cbk,
+ subvol, subvol->fops->xattrop,
+ loc, flags, dict);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+dht_fxattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+dht_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ dht_fxattrop_cbk,
+ subvol, subvol->fops->fxattrop,
+ fd, flags, dict);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+static int32_t
+dht_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+dht_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *lock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_inodelk_cbk,
+ subvol, subvol->fops->inodelk,
+ loc, cmd, lock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+static int32_t
+dht_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+dht_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *lock)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ STACK_WIND (frame,
+ dht_finodelk_cbk,
+ subvol, subvol->fops->finodelk,
+ fd, cmd, lock);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+static int32_t
+dht_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+dht_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_entrylk_cbk,
+ subvol, subvol->fops->entrylk,
+ loc, basename, cmd, type);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+static int32_t
+dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ DHT_STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fentrylk_cbk,
+ subvol, subvol->fops->fentrylk,
+ fd, basename, cmd, type);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno);
+
+ return 0;
+}
+
+
+int
+dht_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_layout = 0;
+ dht_layout_t *layout = NULL;
+
+ inode_ctx_get (inode, this, &tmp_layout);
+
+ if (!layout)
+ return 0;
+ layout = (dht_layout_t *)(long)tmp_layout;
+ if (!layout->preset)
+ FREE (layout);
+
+ return 0;
+}
+
+
+
+static int
+dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+{
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
+
+ conf->subvolumes = CALLOC (cnt, sizeof (xlator_t *));
+ if (!conf->subvolumes) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
+
+ conf->subvolume_status = CALLOC (cnt, sizeof (char));
+ if (!conf->subvolume_status) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+dht_notify (xlator_t *this, int event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+
+ conf = this->private;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ subvol = data;
+
+ conf->gen++;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
+
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ subvol->name);
+ break;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
+
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ subvol->name);
+ break;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ break;
+ }
+
+ ret = default_notify (this, event, data);
+
+ return ret;
+}
+
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
new file mode 100644
index 00000000000..17017381b08
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -0,0 +1,212 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _DHT_H
+#define _DHT_H
+
+
+typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno);
+
+
+struct dht_layout {
+ int cnt;
+ int preset;
+ int gen;
+ int type;
+ struct {
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ xlator_t *xlator;
+ } list[0];
+};
+typedef struct dht_layout dht_layout_t;
+
+
+struct dht_local {
+ int call_cnt;
+ loc_t loc;
+ loc_t loc2;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
+ struct stat stbuf;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *xattr;
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t st_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ char need_selfheal;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct stat stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t missing;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_layout_t *layout;
+ } selfheal;
+
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+};
+typedef struct dht_local dht_local_t;
+
+
+struct dht_conf {
+ gf_lock_t subvolume_lock;
+ int subvolume_cnt;
+ xlator_t **subvolumes;
+ xlator_t *local_volume; /* Needed by NUFA */
+ char *subvolume_status;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ dht_layout_t *default_dir_layout;
+ gf_boolean_t search_unhashed;
+ int gen;
+};
+typedef struct dht_conf dht_conf_t;
+
+
+struct dht_disk_layout {
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
+};
+typedef struct dht_disk_layout dht_disk_layout_t;
+
+#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
+
+#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
+
+#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+
+#define is_last_call(cnt) (cnt == 0)
+
+#define DHT_LINKFILE_MODE (S_ISVTX)
+#define check_is_linkfile(i,s,x) ((s->st_mode & ~S_IFMT) == DHT_LINKFILE_MODE)
+
+#define check_is_dir(i,s,x) (S_ISDIR(s->st_mode))
+
+#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
+
+#define DHT_STACK_UNWIND(frame, params ...) do { \
+ dht_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_UNWIND (frame, params); \
+ dht_local_wipe (__local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) do { \
+ dht_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ dht_local_wipe (__local); \
+ } while (0)
+
+dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
+dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
+dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
+ const char *name);
+int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
+int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p,
+ uint32_t *misc_p);
+int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, loc_t *loc, dict_t *xattr);
+
+xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
+ struct stat *buf, dict_t *xattr);
+int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc);
+
+int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
+int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr);
+
+int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t **disk_layout_p);
+int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t *disk_layout);
+
+
+int dht_frame_return (call_frame_t *frame);
+
+int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
+int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
+ uint64_t *x);
+
+void dht_local_wipe (dht_local_t *local);
+dht_local_t *dht_local_init (call_frame_t *frame);
+int dht_stat_merge (xlator_t *this, struct stat *to, struct stat *from,
+ xlator_t *subvol);
+
+xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
+xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
+xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
+int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+
+int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+
+int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
+int
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+int
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+
+int dht_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc);
+#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-hashfn-tea.c b/xlators/cluster/dht/src/dht-hashfn-tea.c
new file mode 100644
index 00000000000..8437b495541
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-hashfn-tea.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+
+#define DELTA 0x9E3779B9
+#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
+#define PARTROUNDS 6 /* 6 gets complete mixing */
+
+
+static int
+tearound (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1)
+{
+ uint32_t sum = 0;
+ int n = 0;
+ uint32_t b0 = 0;
+ uint32_t b1 = 0;
+
+ b0 = *h0;
+ b1 = *h1;
+
+ n = rounds;
+
+ do {
+ sum += DELTA;
+ b0 += ((b1 << 4) + array[0])
+ ^ (b1 + sum)
+ ^ ((b1 >> 5) + array[1]);
+ b1 += ((b0 << 4) + array[2])
+ ^ (b0 + sum)
+ ^ ((b0 >> 5) + array[3]);
+ } while (--n);
+
+ *h0 += b0;
+ *h1 += b1;
+
+ return 0;
+}
+
+
+uint32_t
+__pad (int len)
+{
+ uint32_t pad = 0;
+
+ pad = (uint32_t) len | ((uint32_t) len << 8);
+ pad |= pad << 16;
+
+ return pad;
+}
+
+
+uint32_t
+dht_hashfn_tea (const char *msg, int len)
+{
+ uint32_t h0 = 0x9464a485;
+ uint32_t h1 = 0x542e1a94;
+ uint32_t array[4];
+ uint32_t pad = 0;
+ int i = 0;
+ int j = 0;
+ int full_quads = 0;
+ int full_words = 0;
+ int full_bytes = 0;
+ uint32_t *intmsg = NULL;
+ int word = 0;
+
+
+ intmsg = (uint32_t *) msg;
+ pad = __pad (len);
+
+ full_bytes = len;
+ full_words = len / 4;
+ full_quads = len / 16;
+
+ for (i = 0; i < full_quads; i++) {
+ for (j = 0; j < 4; j++) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ }
+ tearound (PARTROUNDS, &array[0], &h0, &h1);
+ }
+
+ if ((len % 16) == 0) {
+ goto done;
+ }
+
+ for (j = 0; j < 4; j++) {
+ if (full_words) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ } else {
+ array[j] = pad;
+ while (full_bytes) {
+ array[j] <<= 8;
+ array[j] |= msg[len - full_bytes];
+ full_bytes--;
+ }
+ }
+ }
+ tearound (FULLROUNDS, &array[0], &h0, &h1);
+
+done:
+ return h0 ^ h1;
+}
+
+
+#if 0
+int
+main (int argc, char *argv[])
+{
+ int i = 0;
+ int hashval = 0;
+
+ for (i = 1; i < argc; i++) {
+ hashval = tea (argv[i], strlen (argv[i]));
+ printf ("%s: %x\n", argv[i], hashval);
+ }
+}
+#endif
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
new file mode 100644
index 00000000000..9e321a43cec
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -0,0 +1,88 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+
+
+uint32_t dht_hashfn_tea (const char *name, int len);
+
+
+typedef enum {
+ DHT_HASH_TYPE_TEA,
+} dht_hashfn_type_t;
+
+
+int
+dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
+{
+ int ret = 0;
+ uint32_t hash = 0;
+
+ switch (type) {
+ case DHT_HASH_TYPE_TEA:
+ hash = dht_hashfn_tea (name, strlen (name));
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ if (ret == 0) {
+ *hash_p = hash;
+ }
+
+ return ret;
+}
+
+
+#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
+ rsync_frndly_name = (char *) name; \
+ if (name[0] == '.') { \
+ char *dot = 0; \
+ int namelen = 0; \
+ \
+ dot = strrchr (name, '.'); \
+ if (dot && dot > (name + 1) && *(dot + 1)) { \
+ namelen = (dot - name); \
+ rsync_frndly_name = alloca (namelen); \
+ strncpy (rsync_frndly_name, name + 1, \
+ namelen); \
+ rsync_frndly_name[namelen - 1] = 0; \
+ } \
+ } \
+ } while (0);
+
+
+int
+dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+{
+ char *rsync_friendly_name = NULL;
+
+ MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+
+ return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
new file mode 100644
index 00000000000..52d0720025f
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -0,0 +1,326 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+
+
+int
+dht_frame_return (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+
+ if (!frame)
+ return -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK (&frame->lock);
+
+ return this_call_cnt;
+}
+
+
+int
+dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
+{
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
+ }
+
+ conf = this->private;
+
+ max = conf->subvolume_cnt;
+ cnt = dht_subvol_cnt (this, subvol);
+
+ y = ((x * max) + cnt);
+
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
+}
+
+
+int
+dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
+ uint64_t *x_p)
+{
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ xlator_t *subvol = 0;
+
+
+ conf = this->private;
+ max = conf->subvolume_cnt;
+
+ cnt = y % max;
+ x = y / max;
+
+ subvol = conf->subvolumes[cnt];
+
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ if (x_p)
+ *x_p = x;
+
+ return 0;
+}
+
+
+void
+dht_local_wipe (dht_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->layout)
+ FREE (local->layout);
+
+ loc_wipe (&local->linkfile.loc);
+
+ if (local->linkfile.xattr)
+ dict_unref (local->linkfile.xattr);
+
+ if (local->linkfile.inode)
+ inode_unref (local->linkfile.inode);
+
+ if (local->fd) {
+ fd_unref (local->fd);
+ local->fd = NULL;
+ }
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ FREE (local);
+}
+
+
+dht_local_t *
+dht_local_init (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+
+ /* TODO: use mem-pool */
+ local = CALLOC (1, sizeof (*local));
+
+ if (!local)
+ return NULL;
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ frame->local = local;
+
+ return local;
+}
+
+
+char *
+basestr (const char *str)
+{
+ char *basestr = NULL;
+
+ basestr = strrchr (str, '/');
+ if (basestr)
+ basestr ++;
+
+ return basestr;
+}
+
+xlator_t *
+dht_first_up_child (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+
+ conf = this->private;
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolume_status[i]) {
+ child = conf->subvolumes[i];
+ break;
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ return child;
+}
+
+xlator_t *
+dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
+{
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+
+ if (is_fs_root (loc)) {
+ subvol = dht_first_up_child (this);
+ goto out;
+ }
+
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout missing path=%s parent=%"PRId64,
+ loc->path, loc->parent->ino);
+ goto out;
+ }
+
+ subvol = dht_layout_search (this, layout, loc->name);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not find subvolume for path=%s",
+ loc->path);
+ goto out;
+ }
+
+out:
+ return subvol;
+}
+
+
+xlator_t *
+dht_subvol_get_cached (xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+
+
+ layout = dht_layout_get (this, inode);
+
+ if (!layout) {
+ goto out;
+ }
+
+ subvol = layout->list[0].xlator;
+
+out:
+ return subvol;
+}
+
+
+xlator_t *
+dht_subvol_next (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
+ }
+ }
+
+ return next;
+}
+
+
+int
+dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+
+#define set_if_greater(a, b) do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
+
+int
+dht_stat_merge (xlator_t *this, struct stat *to,
+ struct stat *from, xlator_t *subvol)
+{
+ to->st_dev = from->st_dev;
+
+ dht_itransform (this, subvol, from->st_ino, &to->st_ino);
+
+ to->st_mode = from->st_mode;
+ to->st_nlink = from->st_nlink;
+ to->st_uid = from->st_uid;
+ to->st_gid = from->st_gid;
+ to->st_rdev = from->st_rdev;
+ to->st_size += from->st_size;
+ to->st_blksize = from->st_blksize;
+ to->st_blocks += from->st_blocks;
+
+ set_if_greater (to->st_atime, from->st_atime);
+ set_if_greater (to->st_mtime, from->st_mtime);
+ set_if_greater (to->st_ctime, from->st_ctime);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
new file mode 100644
index 00000000000..08b4a2746f8
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -0,0 +1,543 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "byte-order.h"
+
+#define layout_base_size (sizeof (dht_layout_t))
+
+#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0])
+
+#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size))
+
+
+dht_layout_t *
+dht_layout_new (xlator_t *this, int cnt)
+{
+ dht_layout_t *layout = NULL;
+
+
+ layout = CALLOC (1, layout_size (cnt));
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ layout->cnt = cnt;
+
+out:
+ return layout;
+}
+
+
+dht_layout_t *
+dht_layout_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t layout = 0;
+ int ret = -1;
+
+ ret = inode_ctx_get (inode, this, &layout);
+
+ return (dht_layout_t *)(long)layout;
+}
+
+
+xlator_t *
+dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+{
+ uint32_t hash = 0;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+
+
+ ret = dht_hash_compute (layout->type, name, &hash);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "hash computation failed for type=%d name=%s",
+ layout->type, name);
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash
+ && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume for hash (value) = %u", hash);
+ }
+
+out:
+ return subvol;
+}
+
+
+dht_layout_t *
+dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
+{
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
+ }
+ }
+
+ return layout;
+}
+
+
+int
+dht_layouts_init (xlator_t *this, dht_conf_t *conf)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
+
+
+ conf->file_layouts = CALLOC (conf->subvolume_cnt,
+ sizeof (dht_layout_t *));
+ if (!conf->file_layouts) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new (this, 1);
+
+ if (!layout) {
+ goto out;
+ }
+
+ layout->preset = 1;
+
+ layout->list[0].xlator = conf->subvolumes[i];
+
+ conf->file_layouts[i] = layout;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t **disk_layout_p)
+{
+ int ret = -1;
+ int32_t *disk_layout = NULL;
+
+ disk_layout = CALLOC (5, sizeof (int));
+ if (!disk_layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ disk_layout[0] = hton32 (1);
+ disk_layout[1] = hton32 (layout->type);
+ disk_layout[2] = hton32 (layout->list[pos].start);
+ disk_layout[3] = hton32 (layout->list[pos].stop);
+
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t *disk_layout)
+{
+ int cnt = 0;
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+
+
+ /* TODO: assert disk_layout_ptr is of required length */
+
+ cnt = ntoh32 (disk_layout[0]);
+ if (cnt != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "disk layout has invalid count %d", cnt);
+ return -1;
+ }
+
+ /* TODO: assert type is compatible */
+ type = ntoh32 (disk_layout[1]);
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "merged to layout: %u - %u (type %d) from %s",
+ start_off, stop_off, type,
+ layout->list[pos].xlator->name);
+
+ return 0;
+}
+
+
+int
+dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ int32_t *disk_layout = NULL;
+
+
+ if (op_ret != 0) {
+ err = op_errno;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
+ }
+ }
+
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ VOID(&disk_layout));
+ }
+
+ if (ret != 0) {
+ layout->list[i].err = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dht_disk_layout_merge (this, layout, i, disk_layout);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout merge from subvolume %s failed",
+ subvol->name);
+ goto out;
+ }
+ layout->list[i].err = 0;
+
+out:
+ return ret;
+}
+
+
+void
+dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+}
+
+
+int64_t
+dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
+{
+ int64_t diff = 0;
+
+ if (layout->list[i].err || layout->list[j].err)
+ diff = layout->list[i].err - layout->list[j].err;
+ else
+ diff = (int64_t) layout->list[i].start
+ - (int64_t) layout->list[j].start;
+
+ return diff;
+}
+
+
+int
+dht_layout_sort (dht_layout_t *layout)
+{
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
+
+ /* TODO: O(n^2) -- bad bad */
+
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
+
+ return 0;
+}
+
+
+int
+dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+{
+ dht_conf_t *conf = NULL;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ int ret = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+
+
+ conf = this->private;
+
+ /* TODO: explain WTF is happening */
+
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ missing++;
+ break;
+ case ENOTCONN:
+ down++;
+ break;
+ default:
+ misc++;
+ }
+ continue;
+ }
+
+ is_virgin = 0;
+
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
+ holes += (layout->list[i].start - (prev_stop + 1));
+ }
+
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
+ }
+ prev_stop = layout->list[i].stop;
+ }
+
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
+ holes += (last_stop - prev_stop);
+
+ if (holes_p)
+ *holes_p = hole_cnt;
+
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
+
+ if (missing_p)
+ *missing_p = missing;
+
+ if (down_p)
+ *down_p = down;
+
+ if (misc_p)
+ *misc_p = misc;
+
+ return ret;
+}
+
+
+int
+dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+
+
+ ret = dht_layout_sort (layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "sort failed?! how the ....");
+ goto out;
+ }
+
+ ret = dht_layout_anomalies (this, loc, layout,
+ &holes, &overlaps,
+ &missing, &down, &misc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error while finding anomalies in %s -- not good news",
+ loc->path);
+ goto out;
+ }
+
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "directory %s looked up first time",
+ loc->path);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "found anomalies in %s. holes=%d overlaps=%d",
+ loc->path, holes, overlaps);
+ }
+ ret = 1;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr)
+{
+ int idx = 0;
+ int pos = -1;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
+ int32_t count = -1;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s - no layout info for subvolume %s",
+ loc->path, subvol->name);
+ ret = 1;
+ goto out;
+ }
+
+ if (xattr == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - xattr dictionary is NULL",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
+ VOID(&disk_layout));
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - disk layout missing", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ count = ntoh32 (disk_layout[0]);
+ if (count != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - disk layout has invalid count %d",
+ loc->path, count);
+ ret = -1;
+ goto out;
+ }
+
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ if ((layout->list[pos].start != start_off)
+ || (layout->list[pos].stop != stop_off)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvol: %s; inode layout - %"PRId32" - %"PRId32"; "
+ "disk layout - %"PRId32" - %"PRId32,
+ layout->list[pos].xlator->name,
+ layout->list[pos].start, layout->list[pos].stop,
+ start_off, stop_off);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
new file mode 100644
index 00000000000..9cc24ccf6b3
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -0,0 +1,224 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "compat.h"
+#include "dht-common.h"
+
+
+
+int
+dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ local->linkfile.inode,
+ &local->linkfile.stbuf);
+
+ return 0;
+}
+
+
+int
+dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattr = NULL;
+ data_t *str_data = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1)
+ goto err;
+
+ xattr = get_new_dict ();
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->linkfile.xattr = dict_ref (xattr);
+ local->linkfile.inode = inode_ref (inode);
+
+ str_data = str_to_data (local->linkfile.srcvol->name);
+ if (!str_data) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to initialize linkfile data");
+ op_errno = EINVAL;
+ }
+ str_data = NULL;
+
+ local->linkfile.stbuf = *stbuf;
+
+ STACK_WIND (frame, dht_linkfile_xattr_cbk,
+ prev->this, prev->this->fops->setxattr,
+ &local->linkfile.loc, local->linkfile.xattr, 0);
+
+ return 0;
+
+err:
+ if (str_data) {
+ data_destroy (str_data);
+ str_data = NULL;
+ }
+
+ local->linkfile.linkfile_cbk (frame, cookie, this,
+ op_ret, op_errno, inode, stbuf);
+ return 0;
+}
+
+
+int
+dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+{
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+ loc_copy (&local->linkfile.loc, loc);
+
+ STACK_WIND (frame, dht_linkfile_create_cbk,
+ fromvol, fromvol->fops->mknod, loc,
+ S_IFREG | DHT_LINKFILE_MODE, 0);
+
+ return 0;
+}
+
+
+int
+dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ subvol = prev->this;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unlinking linkfile %s on %s failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+ }
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc)
+{
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
+
+ unlink_frame = copy_frame (frame);
+ if (!unlink_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ unlink_local = dht_local_init (unlink_frame);
+ if (!unlink_local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ loc_copy (&unlink_local->loc, loc);
+
+ STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
+ subvol, subvol->fops->unlink,
+ &unlink_local->loc);
+
+ return 0;
+err:
+ if (unlink_frame)
+ DHT_STACK_DESTROY (unlink_frame);
+
+ return -1;
+}
+
+
+xlator_t *
+dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct stat *stbuf,
+ dict_t *xattr)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
+
+
+ conf = this->private;
+
+ if (!xattr)
+ goto out;
+
+ ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+
+ if ((-1 == ret) || !volname)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
+ }
+ }
+
+out:
+ return subvol;
+}
+
+
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
new file mode 100644
index 00000000000..e5532f1bc87
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -0,0 +1,562 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
+ * delete the newpath if it gets EEXISTS from link() call.
+ */
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "defaults.h"
+
+
+int
+dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "rename %s -> %s on %s failed (%s)",
+ local->loc.path, local->loc2.path,
+ prev->this->name, strerror (op_errno));
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ } else {
+ /* TODO: construct proper stbuf for dir */
+ local->stbuf = *stbuf;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+ }
+
+ return 0;
+}
+
+
+
+int
+dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1)
+ goto err;
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rename_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename,
+ &local->loc, &local->loc2);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ return 0;
+}
+
+
+int
+dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret > 2) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_rename_readdir_cbk,
+ prev->this, prev->this->fops->readdir,
+ local->fd, 4096, 0);
+
+ return 0;
+
+err:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rename_dir (call_frame_t *frame, xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int op_errno = -1;
+
+
+ conf = frame->this->private;
+ local = frame->local;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = 0;
+
+ if (!local->dst_cached) {
+ dht_rename_dir_do (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rename_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ &local->loc2, local->fd);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unlink on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rename on %s failed (%s)", prev->this->name,
+ strerror (op_errno));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink should not be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ /* TODO: delete files in background */
+
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
+
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
+
+ if (local->call_cnt == 0)
+ goto unwind;
+
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc);
+ }
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_hashed, src_hashed->fops->unlink,
+ &local->loc);
+ }
+
+ if (dst_cached
+ && (dst_cached != dst_hashed)
+ && (dst_cached != src_cached)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ dst_cached, dst_cached->fops->unlink,
+ &local->loc2);
+ }
+ return 0;
+
+unwind:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_do_rename (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
+
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "renaming %s => %s (%s)",
+ local->loc.path, local->loc2.path, rename_subvol->name);
+
+ STACK_WIND (frame, dht_rename_cbk,
+ rename_subvol, rename_subvol->fops->rename,
+ &local->loc, &local->loc2);
+
+ return 0;
+}
+
+
+int
+dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "link/file on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == -1)
+ goto unwind;
+
+ dht_do_rename (frame);
+ }
+
+ return 0;
+
+unwind:
+ DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+
+ return 0;
+}
+
+
+int
+dht_rename_create_links (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (src_cached == dst_cached)
+ goto nolinks;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached)
+ call_cnt++;
+
+ if (src_cached != dst_hashed)
+ call_cnt++;
+
+ local->call_cnt = call_cnt;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linkfile %s @ %s => %s",
+ local->loc.path, dst_hashed->name, src_cached->name);
+ dht_linkfile_create (frame, dht_rename_links_cbk,
+ src_cached, dst_hashed, &local->loc);
+ }
+
+ if (src_cached != dst_hashed) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ STACK_WIND (frame, dht_rename_links_cbk,
+ src_cached, src_cached->fops->link,
+ &local->loc, &local->loc2);
+ }
+
+nolinks:
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename (frame);
+ }
+
+ return 0;
+}
+
+
+int
+dht_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ src_hashed = dht_subvol_get_hashed (this, oldloc);
+ if (!src_hashed) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ src_cached = dht_subvol_get_cached (this, oldloc->inode);
+ if (!src_cached) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ dst_hashed = dht_subvol_get_hashed (this, newloc);
+ if (!dst_hashed) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached (this, newloc->inode);
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc, oldloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
+ oldloc->path, src_hashed->name, src_cached->name,
+ newloc->path, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (S_ISDIR (oldloc->inode->st_mode)) {
+ dht_rename_dir (frame, this);
+ } else {
+ local->op_ret = 0;
+ dht_rename_create_links (frame);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
new file mode 100644
index 00000000000..ee32b2253ed
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -0,0 +1,460 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+
+
+int
+dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
+{
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+ local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
+ local->op_errno);
+
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
+
+ if (op_ret == 0)
+ err = 0;
+ else
+ err = op_errno;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
+ }
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ }
+
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int i)
+{
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+
+
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ xattr = get_new_dict ();
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to extract disk layout");
+ goto err;
+ }
+
+ ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
+ disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr dictionary");
+ goto err;
+ }
+ disk_layout = NULL;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting hash range %u - %u (type %d) on subvolume %s for %s",
+ layout->list[i].start, layout->list[i].stop,
+ layout->type, subvol->name, loc->path);
+
+ dict_ref (xattr);
+
+ STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
+ subvol, subvol->fops->setxattr,
+ loc, xattr, 0);
+
+ dict_unref (xattr);
+
+ return 0;
+
+err:
+ if (xattr)
+ dict_destroy (xattr);
+
+ if (disk_layout)
+ FREE (disk_layout);
+
+ dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
+ -1, ENOMEM);
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+ /* attr missing and layout present */
+ missing_xattr++;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
+
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ return 0;
+ }
+
+ local->call_cnt = missing_xattr;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+
+ ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+
+ if (--missing_xattr == 0)
+ break;
+ }
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
+
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
+
+ if ((op_ret == 0) || (op_errno == EEXIST)) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
+ }
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_xattr (frame, &local->loc, layout);
+ }
+
+ return 0;
+}
+
+
+int
+dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int force)
+{
+ int missing_dirs = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_xattr (frame, loc, layout);
+ return 0;
+ }
+
+ local->call_cnt = missing_dirs;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->mkdir,
+ loc, local->stbuf.st_mode);
+ }
+ }
+
+ return 0;
+}
+
+void
+dht_selfheal_fix_this_virgin (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ uint32_t chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int cnt = 0;
+ int err = 0;
+
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1) {
+ cnt++;
+ }
+ }
+
+ chunk = ((unsigned long) 0xffffffff) / cnt;
+
+ start = 0;
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1) {
+ layout->list[i].start = start;
+ layout->list[i].stop = start + chunk - 1;
+
+ start = start + chunk;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "gave fix: %u - %u on %s for %s",
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].xlator->name, loc->path);
+ if (--cnt == 0) {
+ layout->list[i].stop = 0xffffffff;
+ break;
+ }
+ }
+ }
+}
+
+
+int
+dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int missing = -1;
+ int down = -1;
+ int holes = -1;
+ int ret = -1;
+ int i = -1;
+
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+
+ missing = local->selfheal.missing;
+ down = local->selfheal.down;
+ holes = local->selfheal.hole_cnt;
+
+ if ((missing + down) == conf->subvolume_cnt) {
+ dht_selfheal_fix_this_virgin (frame, loc, layout);
+ ret = 0;
+ }
+
+ if (holes <= down) {
+ /* the down subvol might fill up the holes */
+ ret = 0;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* TODO: give a fix to these non-virgins */
+
+ return ret;
+}
+
+
+int
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+
+ local = frame->local;
+ this = frame->this;
+
+ ret = dht_layout_anomalies (this, loc, layout,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ &local->selfheal.missing,
+ &local->selfheal.down,
+ &local->selfheal.misc);
+
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
+ missing = local->selfheal.missing;
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = layout;
+
+/*
+ if (down) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%d subvolumes down -- not fixing", down);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (overlaps) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not fixing overlaps in %s", loc->path);
+ local->op_errno = EINVAL;
+ ret = -1;
+ goto sorry_no_fix;
+ }
+
+ if (misc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%d subvolumes have unrecoverable errors", misc);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (holes > missing) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%d holes and %d pigeons -- not fixing",
+ holes, missing);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+*/
+ ret = dht_selfheal_dir_getafix (frame, loc, layout);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "the directory is not a virgin");
+ goto sorry_no_fix;
+ }
+
+ dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+
+ return 0;
+
+sorry_no_fix:
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish (frame, this, ret);
+
+ return 0;
+}
+
+
+int
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+
+
+ local = frame->local;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = layout;
+
+ ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
new file mode 100644
index 00000000000..836e7a4e81f
--- /dev/null
+++ b/xlators/cluster/dht/src/dht.c
@@ -0,0 +1,222 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "dht-common.c"
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = -1;
+
+ ret = dht_notify (this, event, data);
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return;
+}
+
+int
+init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *lookup_unhashed_str = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "DHT needs more than one child defined");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = CALLOC (1, sizeof (*conf));
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ conf->search_unhashed = 0;
+
+ if (dict_get_str (this->options, "lookup-unhashed",
+ &lookup_unhashed_str) == 0) {
+ gf_string2boolean (lookup_unhashed_str,
+ &conf->search_unhashed);
+ }
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+
+ conf->gen = 1;
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
+
+ .stat = dht_stat,
+ .chmod = dht_chmod,
+ .chown = dht_chown,
+ .fchown = dht_fchown,
+ .fchmod = dht_fchmod,
+ .fstat = dht_fstat,
+ .utimens = dht_utimens,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+#if 0
+ .setdents = dht_setdents,
+ .getdents = dht_getdents,
+ .checksum = dht_checksum,
+#endif
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+// .release = dht_release,
+// .releasedir = dht_releasedir,
+ .forget = dht_forget
+};
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
new file mode 100644
index 00000000000..6333e002fbc
--- /dev/null
+++ b/xlators/cluster/dht/src/nufa.c
@@ -0,0 +1,684 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.c"
+
+/* TODO: all 'TODO's in dht.c holds good */
+
+int
+nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf, dict_t *xattr)
+{
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ call_frame_t *prev = NULL;
+ int call_cnt = 0;
+
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ dht_itransform (this, prev->this, stbuf->st_ino,
+ &stbuf->st_ino);
+
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ goto out;
+ }
+
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ }
+
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ if (!local->hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
+
+ err:
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ return 0;
+}
+
+int
+nufa_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "copying location failed for path=%s",
+ loc->path);
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+
+ local->cached_subvol = cached_subvol;
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate (loc)) {
+ layout = dht_layout_get (this, loc->inode);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "revalidate without cache. path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ op_errno = EAGAIN;
+ goto err;
+ }
+
+ local->inode = inode_ref (loc->inode);
+ local->st_ino = loc->inode->ino;
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht", 4 * 4);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ "trusted.glusterfs.dht.linkto", 256);
+
+ /* Send it to only local volume */
+ STACK_WIND (frame, nufa_local_lookup_cbk,
+ conf->local_volume,
+ conf->local_volume->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto err;
+
+ STACK_WIND (frame, dht_create_cbk,
+ conf->local_volume, conf->local_volume->fops->create,
+ &local->loc, local->flags, local->mode, local->fd);
+
+ return 0;
+
+ err:
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+nufa_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ if (subvol != conf->local_volume) {
+ /* create a link file instead of actual file */
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref (fd);
+ local->mode = mode;
+ local->flags = flags;
+
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ conf->local_volume, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, fd);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret >= 0) {
+ STACK_WIND (frame, dht_newfile_cbk,
+ conf->local_volume,
+ conf->local_volume->fops->mknod,
+ &local->loc, local->mode, local->rdev);
+
+ return 0;
+ }
+
+ DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ return 0;
+}
+
+
+int
+nufa_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+
+ if (conf->local_volume != subvol) {
+ /* Create linkfile first */
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->mode = mode;
+ local->rdev = rdev;
+
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ conf->local_volume, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ subvol, subvol->fops->mknod,
+ loc, mode, rdev);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = -1;
+
+ ret = dht_notify (this, event, data);
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return;
+}
+
+int
+init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ xlator_list_t *trav = NULL;
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ char *lookup_unhashed_str = NULL;
+ int ret = -1;
+ int i = 0;
+ char my_hostname[256];
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "DHT needs more than one child defined");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = CALLOC (1, sizeof (*conf));
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ conf->search_unhashed = 0;
+
+ if (dict_get_str (this->options, "lookup-unhashed",
+ &lookup_unhashed_str) == 0) {
+ gf_string2boolean (lookup_unhashed_str,
+ &conf->search_unhashed);
+ }
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+
+ conf->gen = 1;
+
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)",
+ strerror (errno));
+ }
+
+ if (ret == 0)
+ local_volname = my_hostname;
+
+ data = dict_get (this->options, "local-volume-name");
+ if (data) {
+ local_volname = data->data;
+ }
+
+ trav = this->children;
+ while (trav) {
+ if (strcmp (trav->xlator->name, local_volname) == 0)
+ break;
+ trav = trav->next;
+ }
+
+ if (!trav) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not find subvolume named '%s'. "
+ "Please define volume with the name as the hostname "
+ "or override it with 'option local-volume-name'",
+ local_volname);
+ goto err;
+ }
+ /* The volume specified exists */
+ conf->local_volume = trav->xlator;
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ FREE (conf->file_layouts[i]);
+ }
+ FREE (conf->file_layouts);
+ }
+
+ if (conf->default_dir_layout)
+ FREE (conf->default_dir_layout);
+
+ if (conf->subvolumes)
+ FREE (conf->subvolumes);
+
+ if (conf->subvolume_status)
+ FREE (conf->subvolume_status);
+
+ FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .chmod = dht_chmod,
+ .chown = dht_chown,
+ .fchown = dht_fchown,
+ .fchmod = dht_fchmod,
+ .fstat = dht_fstat,
+ .utimens = dht_utimens,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+#if 0
+ .setdents = dht_setdents,
+ .getdents = dht_getdents,
+ .checksum = dht_checksum,
+#endif
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+// .release = dht_release,
+// .releasedir = dht_releasedir,
+ .forget = dht_forget
+};
+
+
+struct volume_options options[] = {
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+ { .key = {"lookup-unhashed"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/ha/Makefile.am b/xlators/cluster/ha/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/cluster/ha/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
new file mode 100644
index 00000000000..069a0dcded2
--- /dev/null
+++ b/xlators/cluster/ha/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = ha.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+ha_la_LDFLAGS = -module -avoidversion
+
+ha_la_SOURCES = ha-helpers.c ha.c
+ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = ha.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
new file mode 100644
index 00000000000..8193caf27f2
--- /dev/null
+++ b/xlators/cluster/ha/src/ha-helpers.c
@@ -0,0 +1,191 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "dict.h"
+#include "compat-errno.h"
+#include "ha.h"
+
+int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ int i = -1;
+ ha_private_t *pvt = NULL;
+ int child_count = 0;
+ int ret = -1;
+ hafd_t *hafdp = NULL;
+ xlator_t *this = NULL;
+ uint64_t tmp_hafdp = 0;
+
+ this = frame->this;
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+
+ if (local == NULL) {
+ ret = fd_ctx_get (fd, this, &tmp_hafdp);
+ if (ret < 0) {
+ goto out;
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+ local = frame->local = CALLOC (1, sizeof (*local));
+ if (local == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ local->state = CALLOC (1, child_count);
+ if (local->state == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* take care of the preferred subvolume */
+ if (pvt->pref_subvol == -1)
+ local->active = hafdp->active;
+ else
+ local->active = pvt->pref_subvol;
+
+ LOCK (&hafdp->lock);
+ memcpy (local->state, hafdp->fdstate, child_count);
+ UNLOCK (&hafdp->lock);
+
+ /* in case the preferred subvolume is down */
+ if ((local->active != -1) && (local->state[local->active] == 0))
+ local->active = -1;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i]) {
+ if (local->active == -1)
+ local->active = i;
+ local->tries++;
+ }
+ }
+ if (local->active == -1) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ local->fd = fd_ref (fd);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno)
+{
+ xlator_t *xl = NULL;
+ ha_private_t *pvt = NULL;
+ xlator_t **children = NULL;
+ int prev_child = -1;
+ hafd_t *hafdp = NULL;
+ int ret = -1;
+ call_stub_t *stub = NULL;
+ ha_local_t *local = NULL;
+ uint64_t tmp_hafdp = 0;
+
+ xl = frame->this;
+ pvt = xl->private;
+ children = pvt->children;
+ prev_child = (long) cookie;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (xl->name, GF_LOG_ERROR ,"(child=%s) (op_ret=%d op_errno=%s)",
+ children[prev_child]->name, op_ret, strerror (op_errno));
+ }
+ if (op_ret == -1 && (op_errno == ENOTCONN)) {
+ ret = 0;
+ if (local->fd) {
+ ret = fd_ctx_get (local->fd, xl, &tmp_hafdp);
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+ if (ret == 0) {
+ if (local->fd) {
+ LOCK(&hafdp->lock);
+ hafdp->fdstate[prev_child] = 0;
+ UNLOCK(&hafdp->lock);
+ }
+ local->tries--;
+ if (local->tries != 0) {
+ while (1) {
+ local->active = (local->active + 1) % pvt->child_count;
+ if (local->state[local->active])
+ break;
+ }
+ stub = local->stub;
+ local->stub = NULL;
+ call_resume (stub);
+ return -1;
+ }
+ }
+ }
+ if (local->stub)
+ call_stub_destroy (local->stub);
+ if (local->fd) {
+ FREE (local->state);
+ fd_unref (local->fd);
+ }
+ return 0;
+}
+
+int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode)
+{
+ int i = -1;
+ ha_private_t *pvt = NULL;
+ xlator_t *xl = NULL;
+ int ret = -1;
+ ha_local_t *local = NULL;
+ uint64_t tmp_state = 0;
+
+ xl = frame->this;
+ pvt = xl->private;
+ local = frame->local;
+
+ if (local == NULL) {
+ local = frame->local = CALLOC (1, sizeof (*local));
+ if (local == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ local->active = pvt->pref_subvol;
+ ret = inode_ctx_get (inode, xl, &tmp_state);
+ if (ret < 0) {
+ goto out;
+ }
+ local->state = (char *)(long)tmp_state;
+ if (local->active != -1 && local->state[local->active] == 0)
+ local->active = -1;
+ for (i = 0; i < pvt->child_count; i++) {
+ if (local->state[i]) {
+ if (local->active == -1)
+ local->active = i;
+ local->tries++;
+ }
+ }
+ if (local->active == -1) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
new file mode 100644
index 00000000000..4542bdc7e86
--- /dev/null
+++ b/xlators/cluster/ha/src/ha.c
@@ -0,0 +1,3479 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/* generate errors randomly, code is simple now, better alogorithm
+ * can be written to decide what error to be returned and when
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "dict.h"
+#include "compat-errno.h"
+#include "ha.h"
+
+/*
+ * TODO:
+ * - dbench fails if ha over server side afr
+ * - lock calls - lock on all subvols.
+ * - support preferred-subvolume option. code already there.
+ * - do not alloc the call-stub in case only one subvol is up.
+ */
+
+int
+ha_forget (xlator_t *this,
+ inode_t *inode)
+{
+ uint64_t stateino = 0;
+ char *state = NULL;
+ if (!inode_ctx_del (inode, this, &stateino)) {
+ state = ((char *)(long)stateino);
+ FREE (state);
+ }
+
+ return 0;
+
+}
+
+int32_t
+ha_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int child_count = 0, i = 0, callcnt = 0;
+ char *state = NULL;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_state = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++) {
+ if (pvt->children[i] == prev_frame->this)
+ break;
+ }
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR, "(child=%s) (op_ret=%d op_errno=%s)",
+ children[i]->name, op_ret, strerror (op_errno));
+ }
+ inode_ctx_get (local->inode, this, &tmp_state);
+ state = (char *)(long)tmp_state;
+
+ LOCK (&frame->lock);
+ if (local->revalidate == 1) {
+ if ((!op_ret) != state[i]) {
+ local->revalidate_error = 1;
+ gf_log (this->name, GF_LOG_DEBUG, "revalidate error on %s",
+ pvt->children[i]->name);
+ }
+ } else {
+ if (op_ret == 0) {
+ state[i] = 1;
+ }
+ }
+ if (local->op_ret == -1 && op_ret == 0) {
+ local->op_ret = 0;
+ local->buf = *buf;
+ if (dict)
+ local->dict = dict_ref (dict);
+ }
+ if (op_ret == -1 && op_ret != ENOTCONN)
+ local->op_errno = op_errno;
+ callcnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 0) {
+ dict_t *ctx = local->dict;
+ inode_t *inode = local->inode;
+ if (local->revalidate_error == 1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_log (this->name, GF_LOG_DEBUG, "revalidate error, returning EIO");
+ }
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->buf,
+ ctx);
+ if (inode)
+ inode_unref (inode);
+ if (ctx)
+ dict_unref (ctx);
+ }
+ return 0;
+}
+
+int32_t
+ha_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int child_count = 0, i = 0;
+ char *state = NULL;
+ xlator_t **children = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ children = pvt->children;
+
+ frame->local = local = CALLOC (1, sizeof (*local));
+ child_count = pvt->child_count;
+ local->inode = inode_ref (loc->inode);
+
+ ret = inode_ctx_get (loc->inode, this, NULL);
+ if (ret) {
+ state = CALLOC (1, child_count);
+ inode_ctx_put (loc->inode, this, (uint64_t)(long)state);
+ } else
+ local->revalidate = 1;
+
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->call_count = child_count;
+
+ for (i = 0; i < child_count; i++) {
+ STACK_WIND (frame,
+ ha_lookup_cbk,
+ children[i],
+ children[i]->fops->lookup,
+ loc,
+ xattr_req);
+ }
+ return 0;
+}
+
+ int32_t
+ha_stat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ha_local_t *local = NULL;
+ int op_errno = ENOTCONN;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_stat_stub (frame, ha_stat, loc);
+
+ STACK_WIND_COOKIE (frame,
+ ha_stat_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->stat,
+ loc);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_chmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_chmod_stub (frame, ha_chmod, loc, mode);
+
+ STACK_WIND_COOKIE (frame,
+ ha_chmod_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->chmod,
+ loc,
+ mode);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_fchmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_fchmod_stub (frame, ha_fchmod, fd, mode);
+
+ STACK_WIND_COOKIE (frame,
+ ha_fchmod_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->fchmod,
+ fd,
+ mode);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_chown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_chown_stub (frame, ha_chown, loc, uid, gid);
+
+ STACK_WIND_COOKIE (frame,
+ ha_chown_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->chown,
+ loc,
+ uid,
+ gid);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+}
+
+ int32_t
+ha_fchown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_fchown_stub (frame, ha_fchown, fd, uid, gid);
+
+ STACK_WIND_COOKIE (frame,
+ ha_fchown_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->fchown,
+ fd,
+ uid,
+ gid);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_truncate_stub (frame, ha_truncate, loc, offset);
+
+ STACK_WIND_COOKIE (frame,
+ ha_truncate_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_ftruncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_ftruncate_stub (frame, ha_ftruncate, fd, offset);
+
+ STACK_WIND_COOKIE (frame,
+ ha_ftruncate_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+ha_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_utimens_stub (frame, ha_utimens, loc, tv);
+
+ STACK_WIND_COOKIE (frame,
+ ha_utimens_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->utimens,
+ loc,
+ tv);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+ha_access_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_access_stub (frame, ha_access, loc, mask);
+
+ STACK_WIND_COOKIE (frame,
+ ha_access_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->access,
+ loc,
+ mask);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+
+ int32_t
+ha_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *path)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ path);
+ }
+ return 0;
+}
+
+int32_t
+ha_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ ha_local_t *local = frame->local;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_readlink_stub (frame, ha_readlink, loc, size);
+
+ STACK_WIND_COOKIE (frame,
+ ha_readlink_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->readlink,
+ loc,
+ size);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int
+ha_mknod_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0, ret = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "(path=%s) (op_ret=%d op_errno=%d)",
+ local->stub->args.mknod.loc.path, op_ret, op_errno);
+ }
+ ret = inode_ctx_get (local->stub->args.mknod.loc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unwind(-1), inode_ctx_get() error");
+ /* It is difficult to handle this error at this stage
+ * as we still expect more cbks, we can't return as
+ * of now
+ */
+ } else if (op_ret == 0) {
+ stateino[i] = 1;
+ }
+ LOCK (&frame->lock);
+ cnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ if (cnt == 0) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ local->stub->args.mknod.loc.inode,
+ &local->buf);
+ call_stub_destroy (stub);
+ }
+ return 0;
+}
+
+int32_t
+ha_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0, ret = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mknod.loc.path, op_ret, op_errno);
+ }
+
+ ret = inode_ctx_get (local->stub->args.mknod.loc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error");
+ /* FIXME: handle the case */
+ }
+ if (op_ret == 0) {
+ stateino[i] = 1;
+ local->op_ret = 0;
+ local->first_success = 1;
+ local->buf = *buf;
+ }
+ cnt = --local->call_count;
+ for (i = local->active + 1; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+
+ if (cnt == 0 || i == child_count) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ stub = local->stub;
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mknod.loc.inode, &local->buf);
+ call_stub_destroy (stub);
+ return 0;
+ }
+
+ local->active = i;
+
+ if (local->first_success == 0) {
+ STACK_WIND (frame,
+ ha_mknod_cbk,
+ children[i],
+ children[i]->fops->mknod,
+ &local->stub->args.mknod.loc,
+ local->stub->args.mknod.mode,
+ local->stub->args.mknod.rdev);
+ return 0;
+ }
+ cnt = local->call_count;
+
+ for (; i < child_count; i++) {
+ if (local->state[i]) {
+ STACK_WIND (frame,
+ ha_mknod_lookup_cbk,
+ children[i],
+ children[i]->fops->lookup,
+ &local->stub->args.mknod.loc,
+ 0);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ return 0;
+}
+
+int32_t
+ha_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t rdev)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int child_count = 0, i = 0;
+ char *stateino = NULL;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+
+ frame->local = local = CALLOC (1, sizeof (*local));
+ local->stub = fop_mknod_stub (frame, ha_mknod, loc, mode, rdev);
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->state = CALLOC (1, child_count);
+ memcpy (local->state, pvt->state, child_count);
+ local->active = -1;
+
+ stateino = CALLOC (1, child_count);
+ inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
+
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i]) {
+ local->call_count++;
+ if (local->active == -1)
+ local->active = i;
+ }
+ }
+
+ STACK_WIND (frame,
+ ha_mknod_cbk,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->mknod,
+ loc, mode, rdev);
+ return 0;
+}
+
+
+int
+ha_mkdir_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno);
+ }
+ inode_ctx_get (local->stub->args.mkdir.loc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (op_ret == 0)
+ stateino[i] = 1;
+
+ LOCK (&frame->lock);
+ cnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ if (cnt == 0) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ local->stub->args.mkdir.loc.inode,
+ &local->buf);
+ call_stub_destroy (stub);
+ }
+ return 0;
+}
+
+int32_t
+ha_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno);
+ }
+
+ inode_ctx_get (local->stub->args.mkdir.loc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (op_ret == 0) {
+ stateino[i] = 1;
+ local->op_ret = 0;
+ local->first_success = 1;
+ local->buf = *buf;
+ }
+ cnt = --local->call_count;
+ for (i = local->active + 1; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+
+ if (cnt == 0 || i == child_count) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ stub = local->stub;
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mkdir.loc.inode, &local->buf);
+ call_stub_destroy (stub);
+ return 0;
+ }
+
+ local->active = i;
+
+ if (local->first_success == 0) {
+ STACK_WIND (frame,
+ ha_mkdir_cbk,
+ children[i],
+ children[i]->fops->mkdir,
+ &local->stub->args.mkdir.loc,
+ local->stub->args.mkdir.mode);
+ return 0;
+ }
+ cnt = local->call_count;
+
+ for (; i < child_count; i++) {
+ if (local->state[i]) {
+ STACK_WIND (frame,
+ ha_mkdir_lookup_cbk,
+ children[i],
+ children[i]->fops->lookup,
+ &local->stub->args.mkdir.loc,
+ 0);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ return 0;
+}
+
+int32_t
+ha_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int child_count = 0, i = 0;
+ char *stateino = NULL;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+
+ frame->local = local = CALLOC (1, sizeof (*local));
+ local->stub = fop_mkdir_stub (frame, ha_mkdir, loc, mode);
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->state = CALLOC (1, child_count);
+ memcpy (local->state, pvt->state, child_count);
+ local->active = -1;
+
+ stateino = CALLOC (1, child_count);
+ inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i]) {
+ local->call_count++;
+ if (local->active == -1)
+ local->active = i;
+ }
+ }
+
+ STACK_WIND (frame,
+ ha_mkdir_cbk,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->mkdir,
+ loc, mode);
+ return 0;
+}
+
+ int32_t
+ha_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame, op_ret, op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_unlink_stub (frame, ha_unlink, loc);
+
+ STACK_WIND_COOKIE (frame,
+ ha_unlink_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->unlink,
+ loc);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+ int32_t
+ha_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ha_local_t *local = frame->local;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_rmdir_stub (frame, ha_rmdir, loc);
+
+ STACK_WIND_COOKIE (frame,
+ ha_rmdir_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->rmdir,
+ loc);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+
+int
+ha_symlink_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno);
+ }
+ inode_ctx_get (local->stub->args.symlink.loc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (op_ret == 0)
+ stateino[i] = 1;
+
+ LOCK (&frame->lock);
+ cnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ if (cnt == 0) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ local->stub->args.symlink.loc.inode,
+ &local->buf);
+ call_stub_destroy (stub);
+ }
+ return 0;
+}
+
+int32_t
+ha_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno);
+ }
+ inode_ctx_get (local->stub->args.symlink.loc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (op_ret == 0) {
+ stateino[i] = 1;
+ local->op_ret = 0;
+ local->first_success = 1;
+ local->buf = *buf;
+ }
+ cnt = --local->call_count;
+ for (i = local->active + 1; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+
+ if (cnt == 0 || i == child_count) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ stub = local->stub;
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->stub->args.symlink.loc.inode, &local->buf);
+ call_stub_destroy (stub);
+ return 0;
+ }
+
+ local->active = i;
+
+ if (local->first_success == 0) {
+ STACK_WIND (frame,
+ ha_symlink_cbk,
+ children[i],
+ children[i]->fops->symlink,
+ local->stub->args.symlink.linkname,
+ &local->stub->args.symlink.loc);
+ return 0;
+ }
+ cnt = local->call_count;
+
+ for (; i < child_count; i++) {
+ if (local->state[i]) {
+ STACK_WIND (frame,
+ ha_symlink_lookup_cbk,
+ children[i],
+ children[i]->fops->lookup,
+ &local->stub->args.symlink.loc,
+ 0);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ return 0;
+}
+
+int32_t
+ha_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkname,
+ loc_t *loc)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int child_count = 0, i = 0;
+ char *stateino = NULL;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+
+ frame->local = local = CALLOC (1, sizeof (*local));
+ local->stub = fop_symlink_stub (frame, ha_symlink, linkname, loc);
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->state = CALLOC (1, child_count);
+ memcpy (local->state, pvt->state, child_count);
+ local->active = -1;
+
+ stateino = CALLOC (1, child_count);
+ inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
+
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i]) {
+ local->call_count++;
+ if (local->active == -1) {
+ local->active = i;
+ }
+ }
+ }
+
+ STACK_WIND (frame,
+ ha_symlink_cbk,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->symlink,
+ linkname, loc);
+ return 0;
+}
+
+ int32_t
+ha_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, oldloc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_rename_stub (frame, ha_rename, oldloc, newloc);
+ STACK_WIND_COOKIE (frame,
+ ha_rename_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->rename,
+ oldloc, newloc);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int
+ha_link_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno);
+ }
+ inode_ctx_get (local->stub->args.link.newloc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (op_ret == 0)
+ stateino[i] = 1;
+
+ LOCK (&frame->lock);
+ cnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ if (cnt == 0) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ local->stub->args.link.oldloc.inode,
+ &local->buf);
+ call_stub_destroy (stub);
+ }
+ return 0;
+}
+
+int32_t
+ha_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ int child_count = 0, i = 0, cnt = 0;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ for (i = 0; i < child_count; i++)
+ if (prev_frame->this == children[i])
+ break;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno);
+ }
+ inode_ctx_get (local->stub->args.link.newloc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (op_ret == 0) {
+ stateino[i] = 1;
+ local->op_ret = 0;
+ local->first_success = 1;
+ local->buf = *buf;
+ }
+ cnt = --local->call_count;
+ for (i = local->active + 1; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+
+ if (cnt == 0 || i == child_count) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ stub = local->stub;
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.link.oldloc.inode, &local->buf);
+ call_stub_destroy (stub);
+ return 0;
+ }
+
+ local->active = i;
+
+ if (local->first_success == 0) {
+ STACK_WIND (frame,
+ ha_link_cbk,
+ children[i],
+ children[i]->fops->link,
+ &local->stub->args.link.oldloc,
+ &local->stub->args.link.newloc);
+ return 0;
+ }
+ cnt = local->call_count;
+
+ for (; i < child_count; i++) {
+ if (local->state[i]) {
+ STACK_WIND (frame,
+ ha_link_lookup_cbk,
+ children[i],
+ children[i]->fops->lookup,
+ &local->stub->args.link.newloc,
+ 0);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ return 0;
+}
+
+int32_t
+ha_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int child_count = 0, i = 0;
+ char *stateino = NULL;
+ int32_t ret = 0;
+ uint64_t tmp_stateino = 0;
+
+ ret = inode_ctx_get (newloc->inode, this, &tmp_stateino);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
+ }
+ stateino = (char *)(long)tmp_stateino;
+
+ if (stateino == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "newloc->inode's ctx is NULL, returning EINVAL");
+ STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL);
+ return 0;
+ }
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+
+ frame->local = local = CALLOC (1, sizeof (*local));
+ local->stub = fop_link_stub (frame, ha_link, oldloc, newloc);
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->state = CALLOC (1, child_count);
+ memcpy (local->state, pvt->state, child_count);
+ local->active = -1;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i]) {
+ local->call_count++;
+ if (local->active == -1)
+ local->active = i;
+ }
+ }
+
+ STACK_WIND (frame,
+ ha_link_cbk,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->link,
+ oldloc,
+ newloc);
+ return 0;
+}
+
+int32_t
+ha_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int i, child_count = 0, cnt = 0, ret = 0;
+ char *stateino = NULL;
+ hafd_t *hafdp = NULL;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ uint64_t tmp_stateino = 0;
+ uint64_t tmp_hafdp = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ ret = inode_ctx_get (local->stub->args.create.loc.inode,
+ this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error");
+ /* FIXME: handle */
+ }
+ ret = fd_ctx_get (local->stub->args.create.fd, this, &tmp_hafdp);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error");
+ /* FIXME: handle */
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+
+ for (i = 0; i < child_count; i++) {
+ if (prev_frame->this == children[i])
+ break;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.create.loc.path, op_ret, op_errno);
+ }
+ if (op_ret != -1) {
+ stateino[i] = 1;
+ hafdp->fdstate[i] = 1;
+ if (local->op_ret == -1) {
+ local->op_ret = 0;
+ local->buf = *buf;
+ local->first_success = 1;
+ }
+ local->stub->args.create.flags &= (~O_EXCL);
+ }
+ LOCK (&frame->lock);
+ cnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ for (i = local->active + 1; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+
+ if (cnt == 0 || i == child_count) {
+ char *state = local->state;
+ call_stub_t *stub = local->stub;
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ stub->args.create.fd,
+ stub->args.create.loc.inode, &local->buf);
+ FREE (state);
+ call_stub_destroy (stub);
+ return 0;
+ }
+ local->active = i;
+ cnt = local->call_count;
+ for (; i < child_count; i++) {
+ if (local->state[i]) {
+ STACK_WIND (frame,
+ ha_create_cbk,
+ children[i],
+ children[i]->fops->create,
+ &local->stub->args.create.loc,
+ local->stub->args.create.flags,
+ local->stub->args.create.mode,
+ local->stub->args.create.fd);
+ if ((local->first_success == 0) || (cnt == 0))
+ break;
+ }
+ }
+ return 0;
+}
+
+int32_t
+ha_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode, fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ int i, child_count = 0;
+ char *stateino = NULL;
+ xlator_t **children = NULL;
+ hafd_t *hafdp = NULL;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ children = pvt->children;
+
+ if (local == NULL) {
+ local = frame->local = CALLOC (1, sizeof (*local));
+ local->stub = fop_create_stub (frame, ha_create, loc, flags, mode, fd);
+ local->state = CALLOC (1, child_count);
+ local->active = -1;
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ memcpy (local->state, pvt->state, child_count);
+
+ for (i = 0; i < pvt->child_count; i++) {
+ if (local->state[i]) {
+ local->call_count++;
+ if (local->active == -1)
+ local->active = i;
+ }
+ }
+ /* FIXME handle active -1 */
+ stateino = CALLOC (1, child_count);
+ hafdp = CALLOC (1, sizeof (*hafdp));
+ hafdp->fdstate = CALLOC (1, child_count);
+ hafdp->path = strdup(loc->path);
+ LOCK_INIT (&hafdp->lock);
+ fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
+ inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
+ }
+
+ STACK_WIND (frame,
+ ha_create_cbk,
+ children[local->active],
+ children[local->active]->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+ int32_t
+ha_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ xlator_t **children = NULL;
+ int i = 0, child_count = 0, callcnt = 0, ret = 0;
+ call_frame_t *prev_frame = NULL;
+ hafd_t *hafdp = NULL;
+ uint64_t tmp_hafdp = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ children = pvt->children;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+
+ ret = fd_ctx_get (local->fd, this, &tmp_hafdp);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+
+ for (i = 0; i < child_count; i++)
+ if (children[i] == prev_frame->this)
+ break;
+ LOCK (&frame->lock);
+ if (op_ret != -1) {
+ hafdp->fdstate[i] = 1;
+ local->op_ret = 0;
+ }
+ if (op_ret == -1 && op_errno != ENOTCONN)
+ local->op_errno = op_errno;
+ callcnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 0) {
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ local->fd);
+ }
+ return 0;
+}
+
+int32_t
+ha_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ xlator_t **children = NULL;
+ int cnt = 0, i, child_count = 0, ret = 0;
+ hafd_t *hafdp = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ children = pvt->children;
+ child_count = pvt->child_count;
+
+
+ local = frame->local = CALLOC (1, sizeof (*local));
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->fd = fd;
+
+ hafdp = CALLOC (1, sizeof (*hafdp));
+ hafdp->fdstate = CALLOC (1, child_count);
+ hafdp->path = strdup (loc->path);
+ hafdp->active = -1;
+ if (pvt->pref_subvol == -1) {
+ hafdp->active = fd->inode->ino % child_count;
+ }
+
+ LOCK_INIT (&hafdp->lock);
+ fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
+ ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ for (i = 0; i < child_count; i++)
+ if (stateino[i])
+ cnt++;
+ local->call_count = cnt;
+ for (i = 0; i < child_count; i++) {
+ if (stateino[i]) {
+ STACK_WIND (frame,
+ ha_open_cbk,
+ children[i],
+ children[i]->fops->open,
+ loc, flags, fd);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ return 0;
+}
+
+ int32_t
+ha_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ int ret = 0;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ vector,
+ count,
+ stbuf);
+ }
+ return 0;
+}
+
+int32_t
+ha_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_readv_stub (frame, ha_readv, fd, size, offset);
+
+ STACK_WIND_COOKIE (frame,
+ ha_readv_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ int ret = 0;
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ stbuf);
+ }
+ return 0;
+}
+
+int32_t
+ha_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off);
+
+ STACK_WIND_COOKIE (frame,
+ ha_writev_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->writev,
+ fd,
+ vector,
+ count,
+ off);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = 0;
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_flush_stub (frame, ha_flush, fd);
+ STACK_WIND_COOKIE (frame,
+ ha_flush_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->flush,
+ fd);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+
+ int32_t
+ha_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = 0;
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_fsync_stub (frame, ha_fsync, fd, flags);
+ STACK_WIND_COOKIE (frame,
+ ha_fsync_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->fsync,
+ fd,
+ flags);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+ int32_t
+ha_fstat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int ret = 0;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_fstat_stub (frame, ha_fstat, fd);
+ STACK_WIND_COOKIE (frame,
+ ha_fstat_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->fstat,
+ fd);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+ha_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ xlator_t **children = NULL;
+ int i = 0, child_count = 0, callcnt = 0, ret = 0;
+ call_frame_t *prev_frame = NULL;
+ hafd_t *hafdp = NULL;
+ uint64_t tmp_hafdp = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ children = pvt->children;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+
+ ret = fd_ctx_get (local->fd, this, &tmp_hafdp);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+
+ for (i = 0; i < child_count; i++)
+ if (children[i] == prev_frame->this)
+ break;
+ LOCK (&frame->lock);
+ if (op_ret != -1) {
+ hafdp->fdstate[i] = 1;
+ local->op_ret = 0;
+ }
+ if (op_ret == -1 && op_errno != ENOTCONN)
+ local->op_errno = op_errno;
+ callcnt = --local->call_count;
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 0) {
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ local->fd);
+ }
+ return 0;
+}
+
+int32_t
+ha_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ char *stateino = NULL;
+ xlator_t **children = NULL;
+ int cnt = 0, i, child_count = 0, ret = 0;
+ hafd_t *hafdp = NULL;
+ uint64_t tmp_stateino = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ children = pvt->children;
+ child_count = pvt->child_count;
+
+ local = frame->local = CALLOC (1, sizeof (*local));
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->fd = fd;
+
+ hafdp = CALLOC (1, sizeof (*hafdp));
+ hafdp->fdstate = CALLOC (1, child_count);
+ hafdp->path = strdup (loc->path);
+ LOCK_INIT (&hafdp->lock);
+ fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
+ ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
+ stateino = (char *)(long)tmp_stateino;
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error");
+ }
+ for (i = 0; i < child_count; i++)
+ if (stateino[i])
+ cnt++;
+ local->call_count = cnt;
+ for (i = 0; i < child_count; i++) {
+ if (stateino[i]) {
+ STACK_WIND (frame,
+ ha_opendir_cbk,
+ children[i],
+ children[i]->fops->opendir,
+ loc, fd);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ return 0;
+}
+
+ int32_t
+ha_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ int ret = 0;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ entries,
+ count);
+ }
+ return 0;
+}
+
+int32_t
+ha_getdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ int32_t flag)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset, flag);
+ STACK_WIND_COOKIE (frame,
+ ha_getdents_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->getdents,
+ fd,
+ size,
+ offset,
+ flag);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+ return 0;
+}
+
+ int32_t
+ha_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = 0;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_setdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+
+ local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries, count);
+
+ STACK_WIND_COOKIE (frame,
+ ha_setdents_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->setdents,
+ fd,
+ flags,
+ entries,
+ count);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+ int32_t
+ha_fsyncdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = 0;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_fsyncdir_stub (frame, ha_fsyncdir, fd, flags);
+ STACK_WIND_COOKIE (frame,
+ ha_fsyncdir_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->fsyncdir,
+ fd,
+ flags);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+
+ int32_t
+ha_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *buf)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ }
+ return 0;
+}
+
+int32_t
+ha_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+
+ local->stub = fop_statfs_stub (frame, ha_statfs, loc);
+ STACK_WIND_COOKIE (frame,
+ ha_statfs_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->statfs,
+ loc);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+ int32_t
+ha_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_setxattr_stub (frame, ha_setxattr, loc, dict, flags);
+ STACK_WIND_COOKIE (frame,
+ ha_setxattr_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->setxattr,
+ loc,
+ dict,
+ flags);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+ int32_t
+ha_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ dict);
+ }
+ return 0;
+}
+
+int32_t
+ha_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_getxattr_stub (frame, ha_getxattr, loc, name);
+ STACK_WIND_COOKIE (frame,
+ ha_getxattr_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->getxattr,
+ loc,
+ name);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+ha_xattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ int ret = -1;
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ }
+ return 0;
+}
+
+
+int32_t
+ha_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+
+ local->stub = fop_xattrop_stub (frame, ha_xattrop, loc, flags, dict);
+
+ STACK_WIND_COOKIE (frame,
+ ha_xattrop_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->xattrop,
+ loc,
+ flags,
+ dict);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, dict);
+ return 0;
+}
+
+int32_t
+ha_fxattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ int ret = -1;
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0)
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+ha_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_fxattrop_stub (frame, ha_fxattrop, fd, flags, dict);
+
+ STACK_WIND_COOKIE (frame,
+ ha_fxattrop_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->fxattrop,
+ fd,
+ flags,
+ dict);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, dict);
+ return 0;
+}
+
+ int32_t
+ha_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = -1;
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+
+ local->stub = fop_removexattr_stub (frame, ha_removexattr, loc, name);
+
+ STACK_WIND_COOKIE (frame,
+ ha_removexattr_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->removexattr,
+ loc,
+ name);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+ha_lk_setlk_unlck_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ ha_local_t *local = NULL;
+ int cnt = 0;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ cnt = --local->call_count;
+ if (op_ret == 0)
+ local->op_ret = 0;
+ UNLOCK (&frame->lock);
+
+ if (cnt == 0) {
+ stub = local->stub;
+ FREE (local->state);
+ if (stub->args.lk.lock.l_type == F_UNLCK) {
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, &stub->args.lk.lock);
+ } else {
+ STACK_UNWIND (frame, -1, EIO, NULL);
+ }
+ call_stub_destroy (stub);
+ }
+ return 0;
+}
+
+int32_t
+ha_lk_setlk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ xlator_t **children = NULL;
+ int i = 0, cnt = 0, j = 0;
+ int child_count = 0;
+ call_frame_t *prev_frame = NULL;
+ char *state = NULL;
+
+ local = frame->local;
+ pvt = this->private;
+ children = pvt->children;
+ child_count = pvt->child_count;
+ prev_frame = cookie;
+ state = local->state;
+
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ if ((op_ret == 0) || (op_ret == -1 && op_errno == ENOTCONN)) {
+ for (i = 0; i < child_count; i++) {
+ if (prev_frame->this == cookie)
+ break;
+ }
+ i++;
+ for (; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+ if (i == child_count) {
+ call_stub_t *stub = local->stub;
+ FREE (local->state);
+ STACK_UNWIND (frame, 0, op_errno, &stub->args.lk.lock);
+ call_stub_destroy (stub);
+ return 0;
+ }
+ STACK_WIND (frame,
+ ha_lk_setlk_cbk,
+ children[i],
+ children[i]->fops->lk,
+ local->stub->args.lk.fd,
+ local->stub->args.lk.cmd,
+ &local->stub->args.lk.lock);
+ return 0;
+ } else {
+ for (i = 0; i < child_count; i++) {
+ if (prev_frame->this == cookie)
+ break;
+ }
+ cnt = 0;
+ for (j = 0; j < i; j++) {
+ if (state[i])
+ cnt++;
+ }
+ if (cnt) {
+ struct flock lock;
+ lock = local->stub->args.lk.lock;
+ for (i = 0; i < child_count; i++) {
+ if (state[i]) {
+ STACK_WIND (frame,
+ ha_lk_setlk_unlck_cbk,
+ children[i],
+ children[i]->fops->lk,
+ local->stub->args.lk.fd,
+ local->stub->args.lk.cmd,
+ &lock);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ return 0;
+ } else {
+ FREE (local->state);
+ call_stub_destroy (local->stub);
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ lock);
+ return 0;
+ }
+ }
+}
+
+int32_t
+ha_lk_getlk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ fd_t *fd = NULL;
+ int child_count = 0, i = 0;
+ xlator_t **children = NULL;
+ call_frame_t *prev_frame = NULL;
+
+ local = frame->local;
+ pvt = this->private;
+ fd = local->stub->args.lk.fd;
+ child_count = pvt->child_count;
+ children = pvt->children;
+ prev_frame = cookie;
+
+ if (op_ret == 0) {
+ FREE (local->state);
+ call_stub_destroy (local->stub);
+ STACK_UNWIND (frame, 0, 0, lock);
+ return 0;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ if (prev_frame->this == children[i])
+ break;
+ }
+
+ for (; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+
+ if (i == child_count) {
+ FREE (local->state);
+ call_stub_destroy (local->stub);
+ STACK_UNWIND (frame, op_ret, op_errno, lock);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ ha_lk_getlk_cbk,
+ children[i],
+ children[i]->fops->lk,
+ fd,
+ local->stub->args.lk.cmd,
+ &local->stub->args.lk.lock);
+ return 0;
+}
+
+int32_t
+ha_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ hafd_t *hafdp = NULL;
+ char *state = NULL;
+ int child_count = 0, i = 0, cnt = 0, ret = 0;
+ xlator_t **children = NULL;
+ uint64_t tmp_hafdp = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ child_count = pvt->child_count;
+ children = pvt->children;
+ ret = fd_ctx_get (fd, this, &tmp_hafdp);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR, "fd_ctx_get failed");
+
+ if (local == NULL) {
+ local = frame->local = CALLOC (1, sizeof (*local));
+ local->active = -1;
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+
+ if (local->active == -1) {
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ local->stub = fop_lk_stub (frame, ha_lk, fd, cmd, lock);
+ local->state = CALLOC (1, child_count);
+ state = hafdp->fdstate;
+ LOCK (&hafdp->lock);
+ memcpy (local->state, state, child_count);
+ UNLOCK (&hafdp->lock);
+ if (cmd == F_GETLK) {
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+ STACK_WIND (frame,
+ ha_lk_getlk_cbk,
+ children[i],
+ children[i]->fops->lk,
+ fd,
+ cmd,
+ lock);
+ } else if (cmd == F_SETLK && lock->l_type == F_UNLCK) {
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i])
+ local->call_count++;
+ }
+ cnt = local->call_count;
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i]) {
+ STACK_WIND (frame,
+ ha_lk_setlk_unlck_cbk,
+ children[i],
+ children[i]->fops->lk,
+ fd, cmd, lock);
+ if (--cnt == 0)
+ break;
+ }
+ }
+ } else {
+ for (i = 0; i < child_count; i++) {
+ if (local->state[i])
+ break;
+ }
+ STACK_WIND (frame,
+ ha_lk_setlk_cbk,
+ children[i],
+ children[i]->fops->lk,
+ fd,
+ cmd,
+ lock);
+ }
+ return 0;
+}
+
+ int32_t
+ha_inode_entry_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ }
+ return 0;
+}
+
+int32_t
+ha_inodelk (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t cmd,
+ struct flock *lock)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_inodelk_stub (frame, ha_inodelk, loc, cmd, lock);
+ STACK_WIND_COOKIE (frame,
+ ha_inode_entry_lk_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->inodelk,
+ loc,
+ cmd,
+ lock);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+ha_entrylk (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *basename,
+ entrylk_cmd cmd,
+ entrylk_type type)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_entrylk_stub (frame, ha_entrylk, loc, basename, cmd, type);
+ STACK_WIND_COOKIE (frame,
+ ha_inode_entry_lk_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->entrylk,
+ loc, basename, cmd, type);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+}
+
+ int32_t
+ha_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *file_checksum,
+ uint8_t *dir_checksum)
+{
+ int ret = -1;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0) {
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ file_checksum,
+ dir_checksum);
+ }
+ return 0;
+}
+
+int32_t
+ha_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ int op_errno = 0;
+ ha_local_t *local = NULL;
+
+ op_errno = ha_alloc_init_inode (frame, loc->inode);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_checksum_stub (frame, ha_checksum, loc, flag);
+
+ STACK_WIND_COOKIE (frame,
+ ha_checksum_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->checksum,
+ loc,
+ flag);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+ha_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ int ret = 0;
+
+ ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
+ if (ret == 0)
+ STACK_UNWIND (frame, op_ret, op_errno, entries);
+ return 0;
+}
+
+int32_t
+ha_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off)
+{
+ ha_local_t *local = NULL;
+ int op_errno = 0;
+
+ op_errno = ha_alloc_init_fd (frame, fd);
+ if (op_errno < 0) {
+ op_errno = -op_errno;
+ goto err;
+ }
+ local = frame->local;
+ local->stub = fop_readdir_stub (frame, ha_readdir, fd, size, off);
+ STACK_WIND_COOKIE (frame,
+ ha_readdir_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->readdir,
+ fd, size, off);
+ return 0;
+err:
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+}
+
+/* Management operations */
+
+ int32_t
+ha_stats_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct xlator_stats *stats)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ if (op_ret == -1 && op_errno == ENOTCONN) {
+ for (i = 0; i < pvt->child_count; i++) {
+ if (prev_frame->this == children[i])
+ break;
+ }
+ i++;
+ for (; i < pvt->child_count; i++) {
+ if (pvt->state[i])
+ break;
+ }
+
+ if (i == pvt->child_count) {
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ ha_stats_cbk,
+ children[i],
+ children[i]->mops->stats,
+ local->flags);
+ return 0;
+ }
+
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ stats);
+ return 0;
+}
+
+int32_t
+ha_stats (call_frame_t *frame,
+ xlator_t *this,
+ int32_t flags)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ local = frame->local = CALLOC (1, sizeof (*local));
+ pvt = this->private;
+ children = pvt->children;
+ for (i = 0; i < pvt->child_count; i++) {
+ if (pvt->state[i])
+ break;
+ }
+
+ if (i == pvt->child_count) {
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+ local->flags = flags;
+
+ STACK_WIND (frame,
+ ha_stats_cbk,
+ children[i],
+ children[i]->mops->stats,
+ flags);
+ return 0;
+}
+
+
+int32_t
+ha_getspec_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ char *spec_data)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ call_frame_t *prev_frame = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ local = frame->local;
+ pvt = this->private;
+ prev_frame = cookie;
+ children = pvt->children;
+
+ if (op_ret == -1 && op_errno == ENOTCONN) {
+ for (i = 0; i < pvt->child_count; i++) {
+ if (prev_frame->this == children[i])
+ break;
+ }
+ i++;
+ for (; i < pvt->child_count; i++) {
+ if (pvt->state[i])
+ break;
+ }
+
+ if (i == pvt->child_count) {
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ ha_getspec_cbk,
+ children[i],
+ children[i]->mops->getspec,
+ local->pattern,
+ local->flags);
+ return 0;
+ }
+
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ spec_data);
+ return 0;
+}
+
+int32_t
+ha_getspec (call_frame_t *frame,
+ xlator_t *this,
+ const char *key,
+ int32_t flags)
+{
+ ha_local_t *local = NULL;
+ ha_private_t *pvt = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ local = frame->local = CALLOC (1, sizeof (*local));
+ pvt = this->private;
+ children = pvt->children;
+
+ local = frame->local = CALLOC (1, sizeof (*local));
+ for (i = 0; i < pvt->child_count; i++) {
+ if (pvt->state[i])
+ break;
+ }
+
+ if (i == pvt->child_count) {
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+ local->flags = flags;
+ local->pattern = (char *)key;
+
+ STACK_WIND (frame,
+ ha_getspec_cbk,
+ children[i],
+ children[i]->mops->getspec,
+ key, flags);
+ return 0;
+}
+
+int32_t
+ha_closedir (xlator_t *this,
+ fd_t *fd)
+{
+ hafd_t *hafdp = NULL;
+ int op_errno = 0;
+ uint64_t tmp_hafdp = 0;
+
+ op_errno = fd_ctx_del (fd, this, &tmp_hafdp);
+ if (op_errno != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error");
+ return 0;
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+
+ FREE (hafdp->fdstate);
+ FREE (hafdp->path);
+ LOCK_DESTROY (&hafdp->lock);
+ return 0;
+}
+
+int32_t
+ha_close (xlator_t *this,
+ fd_t *fd)
+{
+ hafd_t *hafdp = NULL;
+ int op_errno = 0;
+ uint64_t tmp_hafdp = 0;
+
+ op_errno = fd_ctx_del (fd, this, &tmp_hafdp);
+ if (op_errno != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error");
+ return 0;
+ }
+ hafdp = (hafd_t *)(long)tmp_hafdp;
+
+ FREE (hafdp->fdstate);
+ FREE (hafdp->path);
+ LOCK_DESTROY (&hafdp->lock);
+ return 0;
+}
+
+/* notify */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ ha_private_t *pvt = NULL;
+ int32_t i = 0, upcnt = 0;
+
+ pvt = this->private;
+ if (pvt == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG, "got notify before init()");
+ return 0;
+ }
+
+ switch (event)
+ {
+ case GF_EVENT_CHILD_DOWN:
+ {
+ for (i = 0; i < pvt->child_count; i++) {
+ if (data == pvt->children[i])
+ break;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_DOWN from %s", pvt->children[i]->name);
+ pvt->state[i] = 0;
+ for (i = 0; i < pvt->child_count; i++) {
+ if (pvt->state[i])
+ break;
+ }
+ if (i == pvt->child_count) {
+ default_notify (this, event, data);
+ }
+ }
+ break;
+ case GF_EVENT_CHILD_UP:
+ {
+ for (i = 0; i < pvt->child_count; i++) {
+ if (data == pvt->children[i])
+ break;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_UP from %s", pvt->children[i]->name);
+
+ pvt->state[i] = 1;
+
+ for (i = 0; i < pvt->child_count; i++) {
+ if (pvt->state[i])
+ upcnt++;
+ }
+
+ if (upcnt == 1) {
+ default_notify (this, event, data);
+ }
+ }
+ break;
+
+ default:
+ {
+ default_notify (this, event, data);
+ }
+ }
+
+ return 0;
+}
+
+int
+init (xlator_t *this)
+{
+ ha_private_t *pvt = NULL;
+ xlator_list_t *trav = NULL;
+ int count = 0, ret = 0;
+
+ if (!this->children) {
+ gf_log (this->name,GF_LOG_ERROR,
+ "FATAL: ha should have one or more child defined");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ trav = this->children;
+ pvt = CALLOC (1, sizeof (ha_private_t));
+
+ ret = dict_get_int32 (this->options, "preferred-subvolume",
+ &pvt->pref_subvol);
+ if (ret < 0) {
+ pvt->pref_subvol = -1;
+ }
+
+ trav = this->children;
+ while (trav) {
+ count++;
+ trav = trav->next;
+ }
+
+ pvt->child_count = count;
+ pvt->children = CALLOC (count, sizeof (xlator_t*));
+
+ trav = this->children;
+ count = 0;
+ while (trav) {
+ pvt->children[count] = trav->xlator;
+ count++;
+ trav = trav->next;
+ }
+
+ pvt->state = CALLOC (1, count);
+ this->private = pvt;
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ ha_private_t *priv = NULL;
+ priv = this->private;
+ FREE (priv);
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = ha_lookup,
+ .stat = ha_stat,
+ .readlink = ha_readlink,
+ .mknod = ha_mknod,
+ .mkdir = ha_mkdir,
+ .unlink = ha_unlink,
+ .rmdir = ha_rmdir,
+ .symlink = ha_symlink,
+ .rename = ha_rename,
+ .link = ha_link,
+ .chmod = ha_chmod,
+ .chown = ha_chown,
+ .truncate = ha_truncate,
+ .utimens = ha_utimens,
+ .create = ha_create,
+ .open = ha_open,
+ .readv = ha_readv,
+ .writev = ha_writev,
+ .statfs = ha_statfs,
+ .flush = ha_flush,
+ .fsync = ha_fsync,
+ .setxattr = ha_setxattr,
+ .getxattr = ha_getxattr,
+ .removexattr = ha_removexattr,
+ .opendir = ha_opendir,
+ .readdir = ha_readdir,
+ .getdents = ha_getdents,
+ .fsyncdir = ha_fsyncdir,
+ .access = ha_access,
+ .ftruncate = ha_ftruncate,
+ .fstat = ha_fstat,
+ .lk = ha_lk,
+ .fchmod = ha_fchmod,
+ .fchown = ha_fchown,
+ .setdents = ha_setdents,
+ .lookup_cbk = ha_lookup_cbk,
+ .checksum = ha_checksum,
+ .xattrop = ha_xattrop,
+ .fxattrop = ha_fxattrop
+};
+
+struct xlator_mops mops = {
+ .stats = ha_stats,
+ .getspec = ha_getspec,
+};
+
+struct xlator_cbks cbks = {
+ .release = ha_close,
+ .releasedir = ha_closedir,
+ .forget = ha_forget,
+};
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
new file mode 100644
index 00000000000..77a04f16549
--- /dev/null
+++ b/xlators/cluster/ha/src/ha.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __HA_H_
+#define __HA_H_
+
+typedef struct {
+ call_stub_t *stub;
+ int32_t op_ret, op_errno;
+ int32_t active, tries, revalidate, revalidate_error;
+ int32_t call_count;
+ char *state, *pattern;
+ dict_t *dict;
+ loc_t *loc;
+ struct stat buf;
+ fd_t *fd;
+ inode_t *inode;
+ int32_t flags;
+ int32_t first_success;
+} ha_local_t;
+
+typedef struct {
+ char *state;
+ xlator_t **children;
+ int child_count, pref_subvol;
+} ha_private_t;
+
+typedef struct {
+ char *fdstate;
+ char *path;
+ gf_lock_t lock;
+ int active;
+} hafd_t;
+
+#define HA_ACTIVE_CHILD(this, local) (((ha_private_t *)this->private)->children[local->active])
+
+extern int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd);
+
+extern int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno) ;
+
+extern int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode);
+
+#endif
diff --git a/xlators/cluster/map/Makefile.am b/xlators/cluster/map/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/cluster/map/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
new file mode 100644
index 00000000000..44ee4d9eed3
--- /dev/null
+++ b/xlators/cluster/map/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = map.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+map_la_LDFLAGS = -module -avoidversion
+
+map_la_SOURCES = map.c map-helper.c
+map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = map.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
new file mode 100644
index 00000000000..4e51219d491
--- /dev/null
+++ b/xlators/cluster/map/src/map-helper.c
@@ -0,0 +1,357 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "map.h"
+
+
+xlator_t *
+map_subvol_next (xlator_t *this, xlator_t *prev)
+{
+ map_private_t *priv = NULL;
+ xlator_t *next = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->xlarray[i].xl == prev) {
+ if ((i + 1) < priv->child_count)
+ next = priv->xlarray[i + 1].xl;
+ break;
+ }
+ }
+
+ return next;
+}
+
+int
+map_subvol_cnt (xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ int ret = -1;
+ map_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (subvol == priv->xlarray[i].xl) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
+map_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
+{
+ map_private_t *priv = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
+ }
+
+ priv = this->private;
+
+ max = priv->child_count;
+ cnt = map_subvol_cnt (this, subvol);
+
+ y = ((x * max) + cnt);
+
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
+}
+
+
+int
+map_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
+ uint64_t *x_p)
+{
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ xlator_t *subvol = 0;
+ map_private_t *priv = NULL;
+
+ priv = this->private;
+ max = priv->child_count;
+
+ cnt = y % max;
+ x = y / max;
+
+ subvol = priv->xlarray[cnt].xl;
+
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ if (x_p)
+ *x_p = x;
+
+ return 0;
+}
+
+
+xlator_t *
+get_mapping_subvol_from_path (xlator_t *this, const char *path)
+{
+ map_private_t *priv = NULL;
+ struct map_pattern *map = NULL;
+
+ /* To make sure we handle '/' properly */
+ if (!strcmp (path, "/"))
+ return NULL;
+
+ priv = this->private;
+
+ map = priv->map;
+ while (map) {
+ if (!strncmp (map->directory, path, map->dir_len)) {
+ if ((path[map->dir_len] == '/') ||
+ (path[map->dir_len] == '\0')) {
+ return map->xl;
+ }
+ }
+
+ map = map->next;
+ }
+
+ return priv->default_xl;
+}
+
+xlator_t *
+get_mapping_subvol_from_ctx (xlator_t *this, inode_t *inode)
+{
+ uint64_t subvol = 0;
+ int ret = -1;
+
+ ret = inode_ctx_get (inode, this, &subvol);
+ if (ret != 0)
+ return NULL;
+
+ return (xlator_t *)(long)subvol;
+}
+
+int
+check_multiple_volume_entry (xlator_t *this,
+ xlator_t *subvol)
+{
+ int ret = -1;
+ int idx = 0;
+ map_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (idx = 0; idx < priv->child_count; idx++) {
+ if (priv->xlarray[idx].xl == subvol) {
+ if (priv->xlarray[idx].mapped) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume '%s' is already mapped",
+ subvol->name);
+ goto out;
+ }
+ priv->xlarray[idx].mapped = 1;
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume '%s' is not found",
+ subvol->name);
+
+ out:
+ return ret;
+}
+
+int
+verify_dir_and_assign_subvol (xlator_t *this,
+ const char *directory,
+ const char *subvol)
+{
+ int default_flag = 0;
+ int ret = -1;
+ int idx = 0;
+ map_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ struct map_pattern *tmp_map = NULL;
+
+ priv = this->private;
+
+ /* check if directory is valid, ie, its a top level dir, and
+ * not includes a '*' in it.
+ */
+ if (!strcmp ("*", directory)) {
+ default_flag = 1;
+ } else {
+ if (directory[0] != '/') {
+ gf_log (this->name, GF_LOG_ERROR,
+ "map takes absolute path, starting with '/'. "
+ "not '%s'", directory);
+ goto out;
+ }
+ for (idx = 1; idx < (strlen (directory) - 1); idx++) {
+ if (directory[idx] == '/') {
+ gf_log (this->name, GF_LOG_ERROR,
+ "map takes only top level directory, "
+ "not '%s'", directory);
+ goto out;
+ }
+ }
+ }
+
+ /* Assign proper subvolume */
+ trav = this->children;
+ while (trav) {
+ if (!strcmp (trav->xlator->name, subvol)) {
+
+ /* Check if there is another directory for
+ * same volume, if yes, return error.
+ */
+ ret = check_multiple_volume_entry (this,
+ trav->xlator);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = 0;
+ if (default_flag) {
+ if (priv->default_xl) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "'*' specified more than "
+ "once. don't confuse me!!!");
+ }
+
+ priv->default_xl = trav->xlator;
+ goto out;
+ }
+
+ tmp_map = CALLOC (1, sizeof (struct map_pattern));
+ tmp_map->xl = trav->xlator;
+ tmp_map->dir_len = strlen (directory);
+
+ /* make sure that the top level directory starts
+ * with '/' and ends without '/'
+ */
+ tmp_map->directory = strdup (directory);
+ if (directory[tmp_map->dir_len - 1] == '/') {
+ tmp_map->dir_len--;
+ }
+
+ if (!priv->map)
+ priv->map = tmp_map;
+ else {
+ struct map_pattern *trav_map = NULL;
+ trav_map = priv->map;
+ while (trav_map->next)
+ trav_map = trav_map->next;
+ trav_map->next = tmp_map;
+ }
+
+ goto out;
+ }
+
+ trav = trav->next;
+ }
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "map volume '%s' is not proper subvolume", subvol);
+
+ out:
+ return ret;
+}
+
+int
+assign_default_subvol (xlator_t *this, const char *default_xl)
+{
+ int ret = -1;
+ map_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+
+ priv = this->private;
+ trav = this->children;
+
+ while (trav) {
+ if (!strcmp (trav->xlator->name, default_xl)) {
+ ret = check_multiple_volume_entry (this,
+ trav->xlator);
+ if (ret != 0) {
+ goto out;
+ }
+ if (priv->default_xl)
+ gf_log (this->name, GF_LOG_WARNING,
+ "default-volume option provided, "
+ "overriding earlier '*' option");
+ priv->default_xl = trav->xlator;
+ return 0;
+ }
+ trav = trav->next;
+ }
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "default-volume value is not an valid subvolume. check again");
+ out:
+ return -1;
+}
+
+void
+verify_if_all_subvolumes_got_used (xlator_t *this)
+{
+ int idx = 0;
+ map_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (idx = 0; idx < priv->child_count; idx++) {
+ if (!priv->xlarray[idx].mapped) {
+ if (!priv->default_xl) {
+ priv->default_xl = priv->xlarray[idx].xl;
+ priv->xlarray[idx].mapped = 1;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "subvolume '%s' is not mapped to "
+ "any directory",
+ priv->xlarray[idx].xl->name);
+ }
+ }
+ }
+
+ if (!priv->default_xl) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "default subvolume not specified, filesystem "
+ "may not work properly. Check 'map' translator "
+ "documentation for more info");
+ }
+
+ return ;
+}
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
new file mode 100644
index 00000000000..8c4b7c83c50
--- /dev/null
+++ b/xlators/cluster/map/src/map.c
@@ -0,0 +1,2193 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "map.h"
+
+/* For <op>_cbk functions */
+#include "defaults.c"
+
+
+int32_t
+map_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_stat_cbk,
+ subvol,
+ subvol->fops->stat,
+ loc);
+
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_chmod_cbk,
+ subvol,
+ subvol->fops->chmod,
+ loc,
+ mode);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_fchmod_cbk,
+ subvol,
+ subvol->fops->fchmod,
+ fd,
+ mode);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_chown_cbk,
+ subvol,
+ subvol->fops->chown,
+ loc,
+ uid,
+ gid);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_fchown_cbk,
+ subvol,
+ subvol->fops->fchown,
+ fd,
+ uid,
+ gid);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_truncate_cbk,
+ subvol,
+ subvol->fops->truncate,
+ loc,
+ offset);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_ftruncate_cbk,
+ subvol,
+ subvol->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_utimens_cbk,
+ subvol,
+ subvol->fops->utimens,
+ loc,
+ tv);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_access_cbk,
+ subvol,
+ subvol->fops->access,
+ loc,
+ mask);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_readlink_cbk,
+ subvol,
+ subvol->fops->readlink,
+ loc,
+ size);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_unlink_cbk,
+ subvol,
+ subvol->fops->unlink,
+ loc);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_rmdir_cbk,
+ subvol,
+ subvol->fops->rmdir,
+ loc);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int32_t op_errno = 1;
+ xlator_t *old_subvol = NULL;
+ xlator_t *new_subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (oldloc->inode, err);
+ VALIDATE_OR_GOTO (oldloc->path, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ old_subvol = get_mapping_subvol_from_ctx (this, oldloc->inode);
+ if (!old_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->path) {
+ new_subvol = get_mapping_subvol_from_path (this,
+ newloc->path);
+ if (new_subvol && (new_subvol != old_subvol)) {
+ op_errno = EXDEV;
+ goto err;
+ }
+ }
+
+ STACK_WIND (frame,
+ default_rename_cbk,
+ old_subvol,
+ old_subvol->fops->rename,
+ oldloc, newloc);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int32_t op_errno = 1;
+ xlator_t *old_subvol = NULL;
+ xlator_t *new_subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (oldloc->inode, err);
+ VALIDATE_OR_GOTO (oldloc->path, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ old_subvol = get_mapping_subvol_from_ctx (this, oldloc->inode);
+ if (!old_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->path) {
+ new_subvol = get_mapping_subvol_from_path (this,
+ newloc->path);
+ if (new_subvol && (new_subvol != old_subvol)) {
+ op_errno = EXDEV;
+ goto err;
+ }
+ }
+
+ STACK_WIND (frame,
+ default_link_cbk,
+ old_subvol,
+ old_subvol->fops->link,
+ oldloc, newloc);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_open_cbk,
+ subvol,
+ subvol->fops->open,
+ loc, flags, fd);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_readv_cbk,
+ subvol,
+ subvol->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_writev_cbk,
+ subvol,
+ subvol->fops->writev,
+ fd,
+ vector,
+ count,
+ off);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_flush_cbk,
+ subvol,
+ subvol->fops->flush,
+ fd);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_fsync_cbk,
+ subvol,
+ subvol->fops->fsync,
+ fd,
+ flags);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_fstat_cbk,
+ subvol,
+ subvol->fops->fstat,
+ fd);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_getdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ int32_t flag)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_getdents_cbk,
+ subvol,
+ subvol->fops->getdents,
+ fd,
+ size,
+ offset,
+ flag);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_setdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_setdents_cbk,
+ subvol,
+ subvol->fops->setdents,
+ fd,
+ flags,
+ entries,
+ count);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_fsyncdir_cbk,
+ subvol,
+ subvol->fops->fsyncdir,
+ fd,
+ flags);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+
+
+int32_t
+map_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ /* TODO: support for 'get' 'put' API */
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_setxattr_cbk,
+ subvol,
+ subvol->fops->setxattr,
+ loc,
+ dict,
+ flags);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ /* TODO: support for 'get' 'put' API */
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_getxattr_cbk,
+ subvol,
+ subvol->fops->getxattr,
+ loc,
+ name);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_xattrop_cbk,
+ subvol,
+ subvol->fops->xattrop,
+ loc,
+ flags,
+ dict);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_fxattrop_cbk,
+ subvol,
+ subvol->fops->fxattrop,
+ fd,
+ flags,
+ dict);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_removexattr_cbk,
+ subvol,
+ subvol->fops->removexattr,
+ loc,
+ name);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_lk_cbk,
+ subvol,
+ subvol->fops->lk,
+ fd,
+ cmd,
+ lock);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *lock)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_inodelk_cbk,
+ subvol,
+ subvol->fops->inodelk,
+ loc, cmd, lock);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *lock)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_finodelk_cbk,
+ subvol,
+ subvol->fops->finodelk,
+ fd, cmd, lock);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, default_entrylk_cbk,
+ subvol,
+ subvol->fops->entrylk,
+ loc, basename, cmd, type);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, default_fentrylk_cbk,
+ subvol,
+ subvol->fops->fentrylk,
+ fd, basename, cmd, type);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ default_checksum_cbk,
+ subvol,
+ subvol->fops->checksum,
+ loc,
+ flag);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+map_newentry_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ call_frame_t *prev = NULL;
+ prev = cookie;
+
+ map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+
+}
+
+
+int32_t
+map_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t rdev)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ subvol = get_mapping_subvol_from_path (this, loc->path);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
+ if (op_errno != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set subvolume ptr in inode ctx",
+ loc->path);
+ }
+
+ STACK_WIND (frame,
+ map_newentry_cbk,
+ subvol,
+ subvol->fops->mknod,
+ loc, mode, rdev);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ subvol = get_mapping_subvol_from_path (this, loc->path);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
+ if (op_errno != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set subvolume ptr in inode ctx",
+ loc->path);
+ }
+
+ STACK_WIND (frame,
+ map_newentry_cbk,
+ subvol,
+ subvol->fops->mkdir,
+ loc, mode);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkpath,
+ loc_t *loc)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ subvol = get_mapping_subvol_from_path (this, loc->path);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
+ if (op_errno != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set subvolume ptr in inode ctx",
+ loc->path);
+ }
+
+ STACK_WIND (frame,
+ map_newentry_cbk,
+ subvol,
+ subvol->fops->symlink,
+ linkpath, loc);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+static int32_t
+map_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ call_frame_t *prev = NULL;
+ prev = cookie;
+
+ map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+}
+
+int32_t
+map_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode, fd_t *fd)
+{
+ int32_t op_errno = 1;
+ xlator_t *subvol = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ subvol = get_mapping_subvol_from_path (this, loc->path);
+ if (!subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
+ if (op_errno != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set subvolume ptr in inode ctx",
+ loc->path);
+ }
+
+ STACK_WIND (frame, map_create_cbk,
+ subvol,
+ subvol->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+map_single_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ call_frame_t *prev = NULL;
+ prev = cookie;
+
+ map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
+
+ return 0;
+}
+
+int32_t
+map_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ int callcnt = 0;
+ map_local_t *local = NULL;
+ inode_t *tmp_inode = NULL;
+ dict_t *tmp_dict = NULL;
+
+ local = frame->local;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if ((op_ret == 0) && (local->op_ret == -1)) {
+ local->op_ret = 0;
+ local->stbuf = *buf;
+ if (dict)
+ local->dict = dict_ref (dict);
+ local->inode = inode_ref (inode);
+ }
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ tmp_dict = local->dict;
+ tmp_inode = local->inode;
+
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->dict);
+
+ inode_unref (local->inode);
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+ }
+
+ return 0;
+}
+
+int32_t
+map_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ int32_t op_errno = EINVAL;
+ xlator_t *subvol = NULL;
+ map_local_t *local = NULL;
+ map_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ priv = this->private;
+
+ if (loc->inode->ino == 1)
+ goto root_inode;
+
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ subvol = get_mapping_subvol_from_path (this, loc->path);
+ if (!subvol) {
+ goto err;
+ }
+
+ op_errno = inode_ctx_put (loc->inode, this,
+ (uint64_t)(long)subvol);
+ if (op_errno != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set subvolume in inode ctx",
+ loc->path);
+ }
+ }
+
+ /* Just one callback */
+ STACK_WIND (frame,
+ map_single_lookup_cbk,
+ subvol,
+ subvol->fops->lookup,
+ loc,
+ xattr_req);
+
+ return 0;
+
+ root_inode:
+ local = CALLOC (1, sizeof (map_local_t));
+
+ frame->local = local;
+ local->call_count = priv->child_count;
+ local->op_ret = -1;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ map_lookup_cbk,
+ trav->xlator,
+ trav->xlator->fops->lookup,
+ loc,
+ xattr_req);
+ trav = trav->next;
+ }
+
+ return 0;
+
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+/*
+ * unify_normalize_stats -
+ */
+void
+map_normalize_stats (struct statvfs *buf,
+ unsigned long bsize,
+ unsigned long frsize)
+{
+ double factor;
+
+ if (buf->f_bsize != bsize) {
+ factor = ((double) buf->f_bsize) / bsize;
+ buf->f_bsize = bsize;
+ buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double) buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
+ }
+}
+
+
+int32_t
+map_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *stbuf)
+{
+ struct statvfs *dict_buf = NULL;
+ map_local_t *local = NULL;
+ int this_call_cnt = 0;
+ unsigned long bsize;
+ unsigned long frsize;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_count;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
+
+ /* when a call is successfull, add it to local->dict */
+ dict_buf = &local->statvfs;
+
+ if (dict_buf->f_bsize != 0) {
+ bsize = max (dict_buf->f_bsize,
+ stbuf->f_bsize);
+
+ frsize = max (dict_buf->f_frsize,
+ stbuf->f_frsize);
+ map_normalize_stats(dict_buf, bsize, frsize);
+ map_normalize_stats(stbuf, bsize, frsize);
+ } else {
+ dict_buf->f_bsize = stbuf->f_bsize;
+ dict_buf->f_frsize = stbuf->f_frsize;
+ }
+
+ dict_buf->f_blocks += stbuf->f_blocks;
+ dict_buf->f_bfree += stbuf->f_bfree;
+ dict_buf->f_bavail += stbuf->f_bavail;
+ dict_buf->f_files += stbuf->f_files;
+ dict_buf->f_ffree += stbuf->f_ffree;
+ dict_buf->f_favail += stbuf->f_favail;
+ dict_buf->f_fsid = stbuf->f_fsid;
+ dict_buf->f_flag = stbuf->f_flag;
+ dict_buf->f_namemax = stbuf->f_namemax;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!this_call_cnt) {
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->statvfs);
+ }
+
+ return 0;
+}
+
+int32_t
+map_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_errno = EINVAL;
+ xlator_t *subvol = NULL;
+ map_local_t *local = NULL;
+ map_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ if (loc->inode->ino == 1)
+ goto root_inode;
+ subvol = get_mapping_subvol_from_ctx (this, loc->inode);
+ if (!subvol) {
+ goto err;
+ }
+
+ /* Just one callback */
+ STACK_WIND (frame,
+ default_statfs_cbk,
+ subvol,
+ subvol->fops->statfs,
+ loc);
+
+ return 0;
+
+ root_inode:
+ local = CALLOC (1, sizeof (map_local_t));
+
+ priv = this->private;
+ frame->local = local;
+ local->call_count = priv->child_count;
+ local->op_ret = -1;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ map_statfs_cbk,
+ trav->xlator,
+ trav->xlator->fops->statfs,
+ loc);
+ trav = trav->next;
+ }
+
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+map_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ int callcnt = 0;
+ map_local_t *local = NULL;
+ fd_t *local_fd = NULL;
+
+ local = frame->local;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+
+ local->op_ret = 0;
+ }
+ unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ local_fd = local->fd;
+ local->fd = NULL;
+
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, local_fd);
+
+ fd_unref (local_fd);
+ }
+ return 0;
+}
+
+
+int32_t
+map_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd)
+{
+ int32_t op_errno = EINVAL;
+ xlator_t *subvol = NULL;
+ map_local_t *local = NULL;
+ map_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ if (loc->inode->ino == 1)
+ goto root_inode;
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ goto err;
+ }
+
+ /* Just one callback */
+ STACK_WIND (frame,
+ default_opendir_cbk,
+ subvol,
+ subvol->fops->opendir,
+ loc, fd);
+ return 0;
+
+ root_inode:
+ local = CALLOC (1, sizeof (map_local_t));
+
+ priv = this->private;
+ frame->local = local;
+ local->call_count = priv->child_count;
+ local->op_ret = -1;
+ local->fd = fd_ref (fd);
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ map_opendir_cbk,
+ trav->xlator,
+ trav->xlator->fops->opendir,
+ loc, fd);
+ trav = trav->next;
+ }
+
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+map_single_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ call_frame_t *prev = NULL;
+ gf_dirent_t *orig_entry = NULL;
+
+ prev = cookie;
+
+ list_for_each_entry (orig_entry, &entries->list, list) {
+ map_itransform (this, prev->this, orig_entry->d_ino,
+ &orig_entry->d_ino);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, entries);
+
+ return 0;
+}
+
+
+int
+map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries)
+{
+ map_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *next = NULL;
+ int count = 0;
+ fd_t *local_fd = NULL;
+
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto done;
+
+ list_for_each_entry (orig_entry, &orig_entries->list, list) {
+ subvol = prev->this;
+
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto unwind;
+ }
+
+ map_itransform (this, subvol, orig_entry->d_ino,
+ &entry->d_ino);
+ map_itransform (this, subvol, orig_entry->d_off,
+ &entry->d_off);
+
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+
+ op_ret = count;
+
+done:
+ if (count == 0) {
+ next = map_subvol_next (this, prev->this);
+ if (!next) {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, map_readdir_cbk,
+ next, next->fops->readdir,
+ local->fd, local->size, 0);
+ return 0;
+ }
+
+unwind:
+ if (op_ret < 0)
+ op_ret = 0;
+
+ local_fd = local->fd;
+ local->fd = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno, &entries);
+
+ fd_unref (local_fd);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+
+int32_t
+map_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t yoff)
+{
+ int32_t op_errno = EINVAL;
+ xlator_t *subvol = NULL;
+ map_local_t *local = NULL;
+ map_private_t *priv = NULL;
+ xlator_t *xvol = NULL;
+ off_t xoff = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ if (fd->inode->ino == 1)
+ goto root_inode;
+
+ subvol = get_mapping_subvol_from_ctx (this, fd->inode);
+ if (!subvol) {
+ goto err;
+ }
+
+ /* Just one callback */
+
+ STACK_WIND (frame,
+ map_single_readdir_cbk,
+ subvol,
+ subvol->fops->readdir,
+ fd, size, yoff);
+ return 0;
+
+ root_inode:
+ /* readdir on '/' */
+ local = CALLOC (1, sizeof (map_local_t));
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ priv = this->private;
+ frame->local = local;
+ local->op_errno = ENOENT;
+ local->op_ret = -1;
+
+ local->fd = fd_ref (fd);
+ local->size = size;
+
+ map_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+
+ STACK_WIND (frame, map_readdir_cbk,
+ xvol, xvol->fops->readdir,
+ fd, size, xoff);
+
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+#if 0
+/* TODO : do it later as currently only unify uses this mop and mostly
+ unify will be used below map */
+int32_t
+map_stats_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct xlator_stats *stats)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stats);
+ return 0;
+}
+
+
+int32_t
+map_stats (call_frame_t *frame,
+ xlator_t *this,
+ int32_t flags)
+{
+ STACK_WIND (frame,
+ map_stats_cbk,
+ subvol,
+ subvol->mops->stats,
+ flags);
+ return 0;
+ err:
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+#endif /* if 0 */
+
+
+/* TODO: define the behavior of notify */
+
+
+void
+fini (xlator_t *this)
+{
+ map_private_t *priv = NULL;
+ struct map_pattern *trav_map = NULL;
+ struct map_pattern *tmp_map = NULL;
+
+ priv = this->private;
+
+ if (priv) {
+ if (priv->xlarray)
+ FREE (priv->xlarray);
+
+ trav_map = priv->map;
+ while (trav_map) {
+ tmp_map = trav_map;
+ trav_map = trav_map->next;
+ FREE (tmp_map);
+ }
+
+ FREE(priv);
+ }
+
+ return;
+}
+
+int
+init (xlator_t *this)
+{
+ map_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ int count = 0;
+ int ret = -1;
+ char *pattern_string = NULL;
+ char *map_pair_str = NULL;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_map_pair = NULL;
+ char *dir_str = NULL;
+ char *subvol_str = NULL;
+ char *default_xl = NULL;
+
+ if (!this->children) {
+ gf_log (this->name,GF_LOG_ERROR,
+ "FATAL: map should have one or more child defined");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = CALLOC (1, sizeof (map_private_t));
+ this->private = priv;
+
+ /* allocate xlator array */
+ trav = this->children;
+ while (trav) {
+ count++;
+ trav = trav->next;
+ }
+ priv->xlarray = CALLOC (1, sizeof (struct map_xlator_array) * count);
+ priv->child_count = count;
+
+ /* build xlator array */
+ count = 0;
+ trav = this->children;
+ while (trav) {
+ priv->xlarray[count++].xl = trav->xlator;
+ trav = trav->next;
+ }
+
+ /* map dir1:brick1;dir2:brick2;dir3:brick3;*:brick4 */
+ ret = dict_get_str (this->options, "map-directory", &pattern_string);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "map.pattern not given, can't continue");
+ goto err;
+ }
+ map_pair_str = strtok_r (pattern_string, ";", &tmp_str);
+ while (map_pair_str) {
+ dup_map_pair = strdup (map_pair_str);
+ dir_str = strtok_r (dup_map_pair, ":", &tmp_str1);
+ if (!dir_str) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "directory string invalid");
+ goto err;
+ }
+ subvol_str = strtok_r (NULL, ":", &tmp_str1);
+ if (!subvol_str) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mapping subvolume string invalid");
+ goto err;
+ }
+ ret = verify_dir_and_assign_subvol (this,
+ dir_str,
+ subvol_str);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "verification failed");
+ goto err;
+ }
+
+ FREE (dup_map_pair);
+
+ map_pair_str = strtok_r (NULL, ";", &tmp_str);
+ }
+
+ /* default-volume brick4 */
+ ret = dict_get_str (this->options, "default-volume", &default_xl);
+ if (ret == 0) {
+ ret = assign_default_subvol (this, default_xl);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "assigning default failed");
+ goto err;
+ }
+ }
+
+ verify_if_all_subvolumes_got_used (this);
+
+ return 0;
+ err:
+ fini (this);
+ return -1;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = map_lookup,
+ .mknod = map_mknod,
+ .create = map_create,
+
+ .stat = map_stat,
+ .chmod = map_chmod,
+ .chown = map_chown,
+ .fchown = map_fchown,
+ .fchmod = map_fchmod,
+ .fstat = map_fstat,
+ .utimens = map_utimens,
+ .truncate = map_truncate,
+ .ftruncate = map_ftruncate,
+ .access = map_access,
+ .readlink = map_readlink,
+ .setxattr = map_setxattr,
+ .getxattr = map_getxattr,
+ .removexattr = map_removexattr,
+ .open = map_open,
+ .readv = map_readv,
+ .writev = map_writev,
+ .flush = map_flush,
+ .fsync = map_fsync,
+ .statfs = map_statfs,
+ .lk = map_lk,
+ .opendir = map_opendir,
+ .readdir = map_readdir,
+ .fsyncdir = map_fsyncdir,
+ .symlink = map_symlink,
+ .unlink = map_unlink,
+ .link = map_link,
+ .mkdir = map_mkdir,
+ .rmdir = map_rmdir,
+ .rename = map_rename,
+ .inodelk = map_inodelk,
+ .finodelk = map_finodelk,
+ .entrylk = map_entrylk,
+ .fentrylk = map_fentrylk,
+ .xattrop = map_xattrop,
+ .fxattrop = map_fxattrop,
+ .setdents = map_setdents,
+ .getdents = map_getdents,
+ .checksum = map_checksum,
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"map-directory"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"default-volume"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+
+ { .key = {NULL} }
+};
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
new file mode 100644
index 00000000000..0f1aabfd6e3
--- /dev/null
+++ b/xlators/cluster/map/src/map.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __MAP_H__
+#define __MAP_H__
+
+#include "xlator.h"
+
+struct map_pattern {
+ struct map_pattern *next;
+ xlator_t *xl;
+ char *directory;
+ int dir_len;
+};
+
+struct map_xlator_array {
+ xlator_t *xl;
+ int mapped; /* yes/no */
+};
+
+typedef struct {
+ struct map_pattern *map;
+ xlator_t *default_xl;
+ struct map_xlator_array *xlarray;
+ int child_count;
+} map_private_t;
+
+typedef struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ int call_count;
+ struct statvfs statvfs;
+ struct stat stbuf;
+ inode_t *inode;
+ dict_t *dict;
+ fd_t *fd;
+
+ size_t size;
+} map_local_t;
+
+xlator_t *map_subvol_next (xlator_t *this, xlator_t *prev);
+int map_subvol_cnt (xlator_t *this, xlator_t *subvol);
+
+int map_itransform (xlator_t *this, xlator_t *subvol,
+ uint64_t x, uint64_t *y_p);
+int map_deitransform (xlator_t *this, uint64_t y,
+ xlator_t **subvol_p, uint64_t *x_p);
+
+
+xlator_t *get_mapping_subvol_from_path (xlator_t *this, const char *path);
+xlator_t *get_mapping_subvol_from_ctx (xlator_t *this, inode_t *inode);
+
+int check_multiple_volume_entry (xlator_t *this, xlator_t *subvol);
+int verify_dir_and_assign_subvol (xlator_t *this,
+ const char *directory, const char *subvol);
+int assign_default_subvol (xlator_t *this, const char *default_xl);
+void verify_if_all_subvolumes_got_used (xlator_t *this);
+
+
+#endif /* __MAP_H__ */
diff --git a/xlators/cluster/stripe/Makefile.am b/xlators/cluster/stripe/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/cluster/stripe/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
new file mode 100644
index 00000000000..60e0a156876
--- /dev/null
+++ b/xlators/cluster/stripe/src/Makefile.am
@@ -0,0 +1,14 @@
+
+xlator_LTLIBRARIES = stripe.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+stripe_la_LDFLAGS = -module -avoidversion
+
+stripe_la_SOURCES = stripe.c
+stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
new file mode 100644
index 00000000000..83787ca2a96
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -0,0 +1,3286 @@
+/*
+ Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * xlators/cluster/stripe:
+ * Stripe translator, stripes the data accross its child nodes,
+ * as per the options given in the volfile. The striping works
+ * fairly simple. It writes files at different offset as per
+ * calculation. So, 'ls -l' output at the real posix level will
+ * show file size bigger than the actual size. But when one does
+ * 'df' or 'du <file>', real size of the file on the server is shown.
+ *
+ * WARNING:
+ * Stripe translator can't regenerate data if a child node gets disconnected.
+ * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
+ * very much necessary, or else, use it in combination with AFR, to have a
+ * backup copy.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include <fnmatch.h>
+#include <signal.h>
+
+#define STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
+ if (!(_loc && _loc->inode)) { \
+ STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
+ return 0; \
+ } \
+} while(0)
+
+/**
+ * struct stripe_options : This keeps the pattern and the block-size
+ * information, which is used for striping on a file.
+ */
+struct stripe_options {
+ struct stripe_options *next;
+ char path_pattern[256];
+ uint64_t block_size;
+};
+
+/**
+ * Private structure for stripe translator
+ */
+struct stripe_private {
+ struct stripe_options *pattern;
+ xlator_t **xl_array;
+ uint64_t block_size;
+ gf_lock_t lock;
+ uint8_t nodes_down;
+ int8_t first_child_down;
+ int8_t child_count;
+ int8_t state[256]; /* Current state of the child node,
+ 0 for down, 1 for up */
+ gf_boolean_t xattr_supported; /* 0 for no, 1 for yes, default yes */
+};
+
+/**
+ * Used to keep info about the replies received from fops->readv calls
+ */
+struct readv_replies {
+ struct iovec *vector;
+ int32_t count; //count of vector
+ int32_t op_ret; //op_ret of readv
+ int32_t op_errno;
+ struct stat stbuf; /* 'stbuf' is also a part of reply */
+};
+
+/**
+ * Local structure to be passed with all the frames in case of STACK_WIND
+ */
+struct stripe_local; /* this itself is used inside the structure; */
+
+struct stripe_local {
+ struct stripe_local *next;
+ call_frame_t *orig_frame;
+
+ /* Used by _cbk functions */
+ struct stat stbuf;
+ struct readv_replies *replies;
+ struct statvfs statvfs_buf;
+ dir_entry_t *entry;
+ struct xlator_stats stats;
+
+ int8_t revalidate;
+ int8_t failed;
+ int8_t unwind;
+
+ int32_t node_index;
+ int32_t call_count;
+ int32_t wind_count; /* used instead of child_cound
+ in case of read and write */
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t count;
+ int32_t flags;
+ char *name;
+ inode_t *inode;
+
+ loc_t loc;
+ loc_t loc2;
+
+ /* For File I/O fops */
+ dict_t *dict;
+
+ /* General usage */
+ off_t offset;
+ off_t stripe_size;
+
+ int8_t *list;
+ struct flock lock;
+ fd_t *fd;
+ void *value;
+};
+
+typedef struct stripe_local stripe_local_t;
+typedef struct stripe_private stripe_private_t;
+
+/**
+ * stripe_get_matching_bs - Get the matching block size for the given path.
+ */
+int32_t
+stripe_get_matching_bs (const char *path,
+ struct stripe_options *opts,
+ uint64_t default_bs)
+{
+ struct stripe_options *trav = NULL;
+ char *pathname = NULL;
+ uint64_t block_size = 0;
+
+ block_size = default_bs;
+ pathname = strdup (path);
+ trav = opts;
+
+ while (trav) {
+ if (fnmatch (trav->path_pattern,
+ pathname, FNM_NOESCAPE) == 0) {
+ block_size = trav->block_size;
+ break;
+ }
+ trav = trav->next;
+ }
+ free (pathname);
+
+ return block_size;
+}
+
+
+/*
+ * stripe_common_cbk -
+ */
+int32_t
+stripe_common_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/**
+ * stripe_stack_unwind_cbk - This function is used for all the _cbk without
+ * any extra arguments (other than the minimum given)
+ * This is called from functions like fsync,unlink,rmdir etc.
+ *
+ */
+int32_t
+stripe_stack_unwind_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ if (op_errno == ENOTCONN)
+ local->failed = 1;
+ }
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->loc.path)
+ loc_wipe (&local->loc);
+ if (local->loc2.path)
+ loc_wipe (&local->loc2);
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ }
+ return 0;
+}
+
+int32_t
+stripe_common_buf_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+/**
+ * stripe_stack_unwind_buf_cbk - This function is used for all the _cbk with
+ * 'struct stat *buf' as extra argument (other than minimum)
+ * This is called from functions like, chmod, fchmod, chown, fchown,
+ * truncate, ftruncate, utimens etc.
+ *
+ * @cookie - this argument should be always 'xlator_t *' of child node
+ */
+int32_t
+stripe_stack_unwind_buf_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ if (op_errno == ENOTCONN)
+ local->failed = 1;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (local->stbuf.st_blksize == 0) {
+ local->stbuf = *buf;
+ /* Because st_blocks gets added again */
+ local->stbuf.st_blocks = 0;
+ }
+
+ if (FIRST_CHILD(this) ==
+ ((call_frame_t *)cookie)->this) {
+ /* Always, pass the inode number of
+ first child to the above layer */
+ local->stbuf.st_ino = buf->st_ino;
+ local->stbuf.st_mtime = buf->st_mtime;
+ }
+
+ local->stbuf.st_blocks += buf->st_blocks;
+ if (local->stbuf.st_size < buf->st_size)
+ local->stbuf.st_size = buf->st_size;
+ if (local->stbuf.st_blksize != buf->st_blksize) {
+ /* TODO: add to blocks in terms of
+ original block size */
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->loc.path)
+ loc_wipe (&local->loc);
+ if (local->loc2.path)
+ loc_wipe (&local->loc2);
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+ }
+
+ return 0;
+}
+
+/* In case of symlink, mknod, the file is created on just first node */
+int32_t
+stripe_common_inode_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+/**
+ * stripe_stack_unwind_inode_cbk - This is called by the function like,
+ * link (), symlink (), mkdir (), mknod ()
+ * This creates a inode for new inode. It keeps a list of all
+ * the inodes received from the child nodes. It is used while
+ * forwarding any fops to child nodes.
+ *
+ */
+int32_t
+stripe_stack_unwind_inode_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ if (op_errno == ENOTCONN)
+ local->failed = 1;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = 0;
+
+ if (local->stbuf.st_blksize == 0) {
+ local->inode = inode;
+ local->stbuf = *buf;
+ /* Because st_blocks gets added again */
+ local->stbuf.st_blocks = 0;
+ }
+ if (FIRST_CHILD(this) ==
+ ((call_frame_t *)cookie)->this) {
+ local->stbuf.st_ino = buf->st_ino;
+ local->stbuf.st_mtime = buf->st_mtime;
+ }
+
+ local->stbuf.st_blocks += buf->st_blocks;
+ if (local->stbuf.st_size < buf->st_size)
+ local->stbuf.st_size = buf->st_size;
+ if (local->stbuf.st_blksize != buf->st_blksize) {
+ /* TODO: add to blocks in terms of
+ original block size */
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf);
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_stack_unwind_inode_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ int32_t callcnt = 0;
+ dict_t *tmp_dict = NULL;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ if (op_errno != ENOENT)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ if (op_errno == ENOTCONN)
+ local->failed = 1;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = 0;
+
+ if (local->stbuf.st_blksize == 0) {
+ local->inode = inode;
+ local->stbuf = *buf;
+ /* Because st_blocks gets added again */
+ local->stbuf.st_blocks = 0;
+ }
+ if (FIRST_CHILD(this) ==
+ ((call_frame_t *)cookie)->this) {
+ local->stbuf.st_ino = buf->st_ino;
+ local->stbuf.st_mtime = buf->st_mtime;
+ if (local->dict)
+ dict_unref (local->dict);
+ local->dict = dict_ref (dict);
+ } else {
+ if (!local->dict)
+ local->dict = dict_ref (dict);
+ }
+ local->stbuf.st_blocks += buf->st_blocks;
+ if (local->stbuf.st_size < buf->st_size)
+ local->stbuf.st_size = buf->st_size;
+ if (local->stbuf.st_blksize != buf->st_blksize) {
+ /* TODO: add to blocks in terms of
+ original block size */
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ tmp_dict = local->dict;
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->dict);
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_lookup -
+ */
+int32_t
+stripe_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = this->private;
+ char send_lookup_to_all = 0;
+
+ if (!(loc && loc->inode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong argument, returning EINVAL");
+ STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+
+ if ((!loc->inode->st_mode) ||
+ S_ISDIR (loc->inode->st_mode) ||
+ S_ISREG (loc->inode->st_mode))
+ send_lookup_to_all = 1;
+
+ if (send_lookup_to_all) {
+ /* Everytime in stripe lookup, all child nodes
+ should be looked up */
+ local->call_count = priv->child_count;
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_inode_lookup_cbk,
+ trav->xlator,
+ trav->xlator->fops->lookup,
+ loc, xattr_req);
+ trav = trav->next;
+ }
+ } else {
+ local->call_count = 1;
+
+ STACK_WIND (frame,
+ stripe_stack_unwind_inode_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_stat -
+ */
+int32_t
+stripe_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int send_lookup_to_all = 0;
+ xlator_list_t *trav = NULL;
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
+ send_lookup_to_all = 1;
+
+ if (!send_lookup_to_all) {
+ STACK_WIND (frame,
+ stripe_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+ } else {
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = loc->inode;
+ local->call_count = priv->child_count;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->stat,
+ loc);
+ trav = trav->next;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * stripe_chmod -
+ */
+int32_t
+stripe_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int send_fop_to_all = 0;
+ xlator_list_t *trav = NULL;
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
+ send_fop_to_all = 1;
+
+ if (!send_fop_to_all) {
+ STACK_WIND (frame,
+ stripe_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chmod,
+ loc, mode);
+ } else {
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = loc->inode;
+ local->call_count = priv->child_count;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->chmod,
+ loc, mode);
+ trav = trav->next;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * stripe_chown -
+ */
+int32_t
+stripe_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ int send_fop_to_all = 0;
+ xlator_list_t *trav = NULL;
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
+ send_fop_to_all = 1;
+
+ trav = this->children;
+ if (!send_fop_to_all) {
+ STACK_WIND (frame,
+ stripe_common_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->chown,
+ loc, uid, gid);
+ } else {
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = loc->inode;
+ local->call_count = priv->child_count;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->chown,
+ loc, uid, gid);
+ trav = trav->next;
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_statfs_cbk -
+ */
+int32_t
+stripe_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *stbuf)
+{
+ stripe_local_t *local = (stripe_local_t *)frame->local;
+ int32_t callcnt;
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret != 0 && op_errno != ENOTCONN) {
+ local->op_errno = op_errno;
+ }
+ if (op_ret == 0) {
+ struct statvfs *dict_buf = &local->statvfs_buf;
+ dict_buf->f_bsize = stbuf->f_bsize;
+ dict_buf->f_frsize = stbuf->f_frsize;
+ dict_buf->f_blocks += stbuf->f_blocks;
+ dict_buf->f_bfree += stbuf->f_bfree;
+ dict_buf->f_bavail += stbuf->f_bavail;
+ dict_buf->f_files += stbuf->f_files;
+ dict_buf->f_ffree += stbuf->f_ffree;
+ dict_buf->f_favail += stbuf->f_favail;
+ dict_buf->f_fsid = stbuf->f_fsid;
+ dict_buf->f_flag = stbuf->f_flag;
+ dict_buf->f_namemax = stbuf->f_namemax;
+ local->op_ret = 0;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, &local->statvfs_buf);
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_statfs -
+ */
+int32_t
+stripe_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = this->children;
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ frame->local = local;
+
+ local->call_count = ((stripe_private_t *)this->private)->child_count;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_statfs_cbk,
+ trav->xlator,
+ trav->xlator->fops->statfs,
+ loc);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_truncate -
+ */
+int32_t
+stripe_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ int send_fop_to_all = 0;
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
+ send_fop_to_all = 1;
+
+ if (!send_fop_to_all) {
+ STACK_WIND (frame,
+ stripe_common_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->truncate,
+ loc,
+ offset);
+ } else {
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = loc->inode;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->truncate,
+ loc,
+ offset);
+ trav = trav->next;
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_utimens -
+ */
+int32_t
+stripe_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ int send_fop_to_all = 0;
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
+ send_fop_to_all = 1;
+
+ if (!send_fop_to_all) {
+ STACK_WIND (frame,
+ stripe_common_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->utimens,
+ loc, tv);
+ } else {
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = loc->inode;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->utimens,
+ loc, tv);
+ trav = trav->next;
+ }
+ }
+ return 0;
+}
+
+
+int32_t
+stripe_first_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ stripe_local_t *local = frame->local;
+ xlator_list_t *trav = this->children;
+
+ if (op_ret == -1)
+ {
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+ }
+
+ local->op_ret = 0;
+ local->stbuf = *buf;
+ local->call_count--;
+ trav = trav->next; /* Skip first child */
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->rename,
+ &local->loc, &local->loc2);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+/**
+ * stripe_rename -
+ */
+int32_t
+stripe_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ stripe_private_t *priv = this->private;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, EIO, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ local->inode = oldloc->inode;
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ local->call_count = priv->child_count;
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ stripe_first_rename_cbk,
+ trav->xlator,
+ trav->xlator->fops->rename,
+ oldloc, newloc);
+
+ return 0;
+}
+
+
+/**
+ * stripe_access -
+ */
+int32_t
+stripe_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ STACK_WIND (frame,
+ stripe_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc, mask);
+
+ return 0;
+}
+
+
+/**
+ * stripe_readlink_cbk -
+ */
+int32_t
+stripe_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *path)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, path);
+
+ return 0;
+}
+
+
+/**
+ * stripe_readlink -
+ */
+int32_t
+stripe_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ stripe_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc, size);
+
+ return 0;
+}
+
+
+/**
+ * stripe_unlink -
+ */
+int32_t
+stripe_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int send_fop_to_all = 0;
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO);
+ return 0;
+ }
+
+ if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
+ send_fop_to_all = 1;
+
+ if (!send_fop_to_all) {
+ STACK_WIND (frame,
+ stripe_common_cbk,
+ trav->xlator,
+ trav->xlator->fops->unlink,
+ loc);
+ } else {
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_cbk,
+ trav->xlator,
+ trav->xlator->fops->unlink,
+ loc);
+ trav = trav->next;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+stripe_first_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ xlator_list_t *trav = this->children;
+ stripe_local_t *local = frame->local;
+
+ if (op_ret == -1)
+ {
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+ }
+
+ local->call_count--; /* First child successful */
+ trav = trav->next; /* Skip first child */
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_cbk,
+ trav->xlator,
+ trav->xlator->fops->rmdir,
+ &local->loc);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_rmdir -
+ */
+int32_t
+stripe_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = this->children;
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = loc->inode;
+ loc_copy (&local->loc, loc);
+ local->call_count = priv->child_count;
+
+ STACK_WIND (frame,
+ stripe_first_rmdir_cbk,
+ trav->xlator,
+ trav->xlator->fops->rmdir,
+ loc);
+
+ return 0;
+}
+
+
+/**
+ * stripe_setxattr -
+ */
+int32_t
+stripe_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ stripe_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags);
+
+ return 0;
+}
+
+
+int32_t
+stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ loc_wipe (&local->loc);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf);
+ }
+
+ return 0;
+}
+
+
+/**
+ */
+int32_t
+stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->op_ret == -1) {
+ local->call_count = priv->child_count;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_mknod_ifreg_fail_unlink_cbk,
+ trav->xlator,
+ trav->xlator->fops->unlink,
+ &local->loc);
+ trav = trav->next;
+ }
+ return 0;
+ }
+
+ loc_wipe (&local->loc);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf);
+ }
+ return 0;
+}
+
+/**
+ */
+int32_t
+stripe_mknod_ifreg_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int ret = 0;
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+ stripe_private_t *priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ /* Get the mapping in inode private */
+ /* Get the stat buf right */
+ if (local->stbuf.st_blksize == 0) {
+ local->stbuf = *buf;
+ /* Because st_blocks gets added again */
+ local->stbuf.st_blocks = 0;
+ }
+
+ /* Always, pass the inode number of first child
+ to the above layer */
+ if (FIRST_CHILD(this) ==
+ ((call_frame_t *)cookie)->this)
+ local->stbuf.st_ino = buf->st_ino;
+
+ local->stbuf.st_blocks += buf->st_blocks;
+ if (local->stbuf.st_size < buf->st_size)
+ local->stbuf.st_size = buf->st_size;
+ if (local->stbuf.st_blksize != buf->st_blksize) {
+ /* TODO: add to blocks in terms of
+ original block size */
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if ((local->op_ret != -1) && priv->xattr_supported) {
+ /* Send a setxattr request to nodes where the
+ files are created */
+ int32_t index = 0;
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ xlator_list_t *trav = this->children;
+ dict_t *dict = NULL;
+
+ sprintf (size_key,
+ "trusted.%s.stripe-size", this->name);
+ sprintf (count_key,
+ "trusted.%s.stripe-count", this->name);
+ sprintf (index_key,
+ "trusted.%s.stripe-index", this->name);
+
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ dict = get_new_dict ();
+ dict_ref (dict);
+ /* TODO: check return value */
+ ret = dict_set_int64 (dict, size_key,
+ local->stripe_size);
+ ret = dict_set_int32 (dict, count_key,
+ local->call_count);
+ ret = dict_set_int32 (dict, index_key, index);
+
+ STACK_WIND (frame,
+ stripe_mknod_ifreg_setxattr_cbk,
+ trav->xlator,
+ trav->xlator->fops->setxattr,
+ &local->loc, dict, 0);
+
+ dict_unref (dict);
+ index++;
+ trav = trav->next;
+ }
+ } else {
+ /* Create itself has failed.. so return
+ without setxattring */
+ loc_wipe (&local->loc);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf);
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_mknod -
+ */
+int32_t
+stripe_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t rdev)
+{
+ stripe_private_t *priv = this->private;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO, NULL, NULL);
+ return 0;
+ }
+
+ if (S_ISREG(mode)) {
+ /* NOTE: on older kernels (older than 2.6.9),
+ creat() fops is sent as mknod() + open(). Hence handling
+ S_IFREG files is necessary */
+ if (priv->nodes_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Some node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO, loc->inode, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->stripe_size = stripe_get_matching_bs (loc->path,
+ priv->pattern,
+ priv->block_size);
+ frame->local = local;
+ local->inode = loc->inode;
+ loc_copy (&local->loc, loc);
+
+ /* Everytime in stripe lookup, all child nodes should
+ be looked up */
+ local->call_count =
+ ((stripe_private_t *)this->private)->child_count;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_mknod_ifreg_cbk,
+ trav->xlator,
+ trav->xlator->fops->mknod,
+ loc, mode, rdev);
+ trav = trav->next;
+ }
+
+ /* This case is handled, no need to continue further. */
+ return 0;
+ }
+
+
+ STACK_WIND (frame,
+ stripe_common_inode_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev);
+
+ return 0;
+}
+
+
+/**
+ * stripe_mkdir -
+ */
+int32_t
+stripe_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ stripe_private_t *priv = this->private;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO, NULL, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ local->call_count = priv->child_count;
+ frame->local = local;
+
+ /* Everytime in stripe lookup, all child nodes should be looked up */
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_inode_cbk,
+ trav->xlator,
+ trav->xlator->fops->mkdir,
+ loc, mode);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_symlink -
+ */
+int32_t
+stripe_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkpath,
+ loc_t *loc)
+{
+ stripe_private_t *priv = this->private;
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO, NULL, NULL);
+ return 0;
+ }
+
+ /* send symlink to only first node */
+ STACK_WIND (frame,
+ stripe_common_inode_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc);
+
+ return 0;
+}
+
+/**
+ * stripe_link -
+ */
+int32_t
+stripe_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int send_fop_to_all = 0;
+ stripe_private_t *priv = this->private;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO, NULL, NULL);
+ return 0;
+ }
+
+
+ if (S_ISREG (oldloc->inode->st_mode))
+ send_fop_to_all = 1;
+
+ if (!send_fop_to_all) {
+ STACK_WIND (frame,
+ stripe_common_inode_cbk,
+ trav->xlator,
+ trav->xlator->fops->link,
+ oldloc, newloc);
+ } else {
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ /* Everytime in stripe lookup, all child
+ nodes should be looked up */
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_inode_cbk,
+ trav->xlator,
+ trav->xlator->fops->link,
+ oldloc, newloc);
+ trav = trav->next;
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_create_fail_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ fd_t *lfd = NULL;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ lfd = local->fd;
+ loc_wipe (&local->loc);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->fd, local->inode, &local->stbuf);
+ fd_unref (lfd);
+ }
+ return 0;
+}
+
+
+/**
+ * stripe_create_setxattr_cbk -
+ */
+int32_t
+stripe_create_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ fd_t *lfd = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->op_ret == -1) {
+ local->call_count = priv->child_count;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_create_fail_unlink_cbk,
+ trav->xlator,
+ trav->xlator->fops->unlink,
+ &local->loc);
+ trav = trav->next;
+ }
+
+ return 0;
+ }
+
+ lfd = local->fd;
+ loc_wipe (&local->loc);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->fd, local->inode, &local->stbuf);
+ fd_unref (lfd);
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_create_cbk -
+ */
+int32_t
+stripe_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+ stripe_private_t *priv = this->private;
+ fd_t *lfd = NULL;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ /* Get the mapping in inode private */
+ /* Get the stat buf right */
+ if (local->stbuf.st_blksize == 0) {
+ local->stbuf = *buf;
+ /* Because st_blocks gets added again */
+ local->stbuf.st_blocks = 0;
+ }
+
+ /* Always, pass the inode number of first
+ child to the above layer */
+ if (FIRST_CHILD(this) ==
+ ((call_frame_t *)cookie)->this)
+ local->stbuf.st_ino = buf->st_ino;
+
+ local->stbuf.st_blocks += buf->st_blocks;
+ if (local->stbuf.st_size < buf->st_size)
+ local->stbuf.st_size = buf->st_size;
+ if (local->stbuf.st_blksize != buf->st_blksize) {
+ /* TODO: add to blocks in terms of
+ original block size */
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret >= 0) {
+ fd_ctx_set (local->fd, this, local->stripe_size);
+ }
+
+ if ((local->op_ret != -1) &&
+ local->stripe_size && priv->xattr_supported) {
+ /* Send a setxattr request to nodes where
+ the files are created */
+ int ret = 0;
+ int32_t index = 0;
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ xlator_list_t *trav = this->children;
+ dict_t *dict = NULL;
+
+ sprintf (size_key,
+ "trusted.%s.stripe-size", this->name);
+ sprintf (count_key,
+ "trusted.%s.stripe-count", this->name);
+ sprintf (index_key,
+ "trusted.%s.stripe-index", this->name);
+
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ dict = get_new_dict ();
+ dict_ref (dict);
+
+ /* TODO: check return values */
+ ret = dict_set_int64 (dict, size_key,
+ local->stripe_size);
+ ret = dict_set_int32 (dict, count_key,
+ local->call_count);
+ ret = dict_set_int32 (dict, index_key, index);
+
+ STACK_WIND (frame,
+ stripe_create_setxattr_cbk,
+ trav->xlator,
+ trav->xlator->fops->setxattr,
+ &local->loc,
+ dict,
+ 0);
+
+ dict_unref (dict);
+ index++;
+ trav = trav->next;
+ }
+ } else {
+ /* Create itself has failed.. so return
+ without setxattring */
+ lfd = local->fd;
+ loc_wipe (&local->loc);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->fd, local->inode, &local->stbuf);
+
+ fd_unref (lfd);
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_create - If a block-size is specified for the 'name', create the
+ * file in all the child nodes. If not, create it in only first child.
+ *
+ * @name- complete path of the file to be created.
+ */
+int32_t
+stripe_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ stripe_private_t *priv = this->private;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+
+ /* files created in O_APPEND mode does not allow lseek() on fd */
+ flags &= ~O_APPEND;
+
+ if (priv->first_child_down || priv->nodes_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO, fd, loc->inode, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ local->stripe_size = stripe_get_matching_bs (loc->path,
+ priv->pattern,
+ priv->block_size);
+ frame->local = local;
+ local->inode = loc->inode;
+ loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
+
+ local->call_count = ((stripe_private_t *)this->private)->child_count;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_create_cbk,
+ trav->xlator,
+ trav->xlator->fops->create,
+ loc, flags, mode, fd);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_open_cbk -
+ */
+int32_t
+stripe_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ local->failed = 1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret >= 0) {
+ fd_ctx_set (local->fd, this, local->stripe_size);
+ }
+ loc_wipe (&local->loc);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, fd);
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_getxattr_cbk -
+ */
+int32_t
+stripe_open_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+ xlator_list_t *trav = this->children;
+ stripe_private_t *priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ if (op_errno == ENOTCONN)
+ local->failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->failed && (local->op_ret != -1)) {
+ /* If getxattr doesn't fails, call open */
+ char size_key[256] = {0,};
+ data_t *stripe_size_data = NULL;
+
+ sprintf (size_key,
+ "trusted.%s.stripe-size", this->name);
+ stripe_size_data = dict_get (dict, size_key);
+
+ if (stripe_size_data) {
+ local->stripe_size =
+ data_to_int64 (stripe_size_data);
+ /*
+ if (local->stripe_size != priv->block_size) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "file(%s) is having different "
+ "block-size", local->loc.path);
+ }
+ */
+ } else {
+ /* if the file was created using earlier
+ versions of stripe */
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "[CRITICAL] Seems like file(%s) "
+ "created using earlier version",
+ local->loc.path);
+ }
+ }
+
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_open_cbk,
+ trav->xlator,
+ trav->xlator->fops->open,
+ &local->loc, local->flags, local->fd);
+ trav = trav->next;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_open -
+ */
+int32_t
+stripe_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* files opened in O_APPEND mode does not allow lseek() on fd */
+ flags &= ~O_APPEND;
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->fd = fd;
+ frame->local = local;
+ local->inode = loc->inode;
+ loc_copy (&local->loc, loc);
+
+ /* Striped files */
+ local->flags = flags;
+ local->call_count = priv->child_count;
+ local->stripe_size = stripe_get_matching_bs (loc->path,
+ priv->pattern,
+ priv->block_size);
+
+ if (priv->xattr_supported) {
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_open_getxattr_cbk,
+ trav->xlator,
+ trav->xlator->fops->getxattr,
+ loc, NULL);
+ trav = trav->next;
+ }
+ } else {
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_open_cbk,
+ trav->xlator,
+ trav->xlator->fops->open,
+ &local->loc, local->flags, local->fd);
+ trav = trav->next;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_opendir_cbk -
+ */
+int32_t
+stripe_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->failed = 1;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, local->fd);
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_opendir -
+ */
+int32_t
+stripe_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning EIO");
+ STACK_UNWIND (frame, -1, EIO, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ frame->local = local;
+ local->inode = loc->inode;
+ local->fd = fd;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_opendir_cbk,
+ trav->xlator,
+ trav->xlator->fops->opendir,
+ loc, fd);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_getxattr_cbk -
+ */
+int32_t
+stripe_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *value)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, value);
+ return 0;
+}
+
+
+/**
+ * stripe_getxattr -
+ */
+int32_t
+stripe_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ STACK_WIND (frame,
+ stripe_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name);
+
+ return 0;
+}
+
+/**
+ * stripe_removexattr -
+ */
+int32_t
+stripe_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ if (priv->first_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "First node down, returning ENOTCONN");
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ stripe_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name);
+
+ return 0;
+}
+
+
+/**
+ * stripe_lk_cbk -
+ */
+int32_t
+stripe_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ if (op_errno == ENOTCONN)
+ local->failed = 1;
+ }
+ if (op_ret == 0 && local->op_ret == -1) {
+ /* First successful call, copy the *lock */
+ local->op_ret = 0;
+ local->lock = *lock;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, &local->lock);
+ }
+ return 0;
+}
+
+
+/**
+ * stripe_lk -
+ */
+int32_t
+stripe_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = this->children;
+ stripe_private_t *priv = this->private;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_lk_cbk,
+ trav->xlator,
+ trav->xlator->fops->lk,
+ fd, cmd, lock);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_writedir -
+ */
+int32_t
+stripe_setdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_cbk,
+ trav->xlator,
+ trav->xlator->fops->setdents,
+ fd, flags, entries, count);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_flush -
+ */
+int32_t
+stripe_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_cbk,
+ trav->xlator,
+ trav->xlator->fops->flush,
+ fd);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_close -
+ */
+int32_t
+stripe_release (xlator_t *this,
+ fd_t *fd)
+{
+ return 0;
+}
+
+
+/**
+ * stripe_fsync -
+ */
+int32_t
+stripe_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_cbk,
+ trav->xlator,
+ trav->xlator->fops->fsync,
+ fd, flags);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_fstat -
+ */
+int32_t
+stripe_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = fd->inode;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->fstat,
+ fd);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_fchmod -
+ */
+int32_t
+stripe_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = fd->inode;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->fchmod,
+ fd, mode);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_fchown -
+ */
+int32_t
+stripe_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = fd->inode;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->fchown,
+ fd, uid, gid);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_ftruncate -
+ */
+int32_t
+stripe_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->inode = fd->inode;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_buf_cbk,
+ trav->xlator,
+ trav->xlator->fops->ftruncate,
+ fd, offset);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_releasedir -
+ */
+int32_t
+stripe_releasedir (xlator_t *this,
+ fd_t *fd)
+{
+ return 0;
+}
+
+
+/**
+ * stripe_fsyncdir -
+ */
+int32_t
+stripe_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = this->private;
+ xlator_list_t *trav = this->children;
+
+ STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd);
+
+ /* Initialization */
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stack_unwind_cbk,
+ trav->xlator,
+ trav->xlator->fops->fsyncdir,
+ fd,
+ flags);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_single_readv_cbk - This function is used as return fn, when the
+ * file name doesn't match the pattern specified for striping.
+ */
+int32_t
+stripe_single_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+ return 0;
+}
+
+/**
+ * stripe_readv_cbk - get all the striped reads, and order it properly, send it
+ * to above layer after putting it in a single vector.
+ */
+int32_t
+stripe_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ int32_t index = 0;
+ int32_t callcnt = 0;
+ call_frame_t *main_frame = NULL;
+ stripe_local_t *main_local = NULL;
+ stripe_local_t *local = frame->local;
+
+ index = local->node_index;
+ main_frame = local->orig_frame;
+ main_local = main_frame->local;
+
+ LOCK (&main_frame->lock);
+ {
+ main_local->replies[index].op_ret = op_ret;
+ main_local->replies[index].op_errno = op_errno;
+ if (op_ret >= 0) {
+ main_local->replies[index].stbuf = *stbuf;
+ main_local->replies[index].count = count;
+ main_local->replies[index].vector =
+ iov_dup (vector, count);
+
+ if (frame->root->rsp_refs)
+ dict_copy (frame->root->rsp_refs,
+ main_frame->root->rsp_refs);
+ }
+ callcnt = ++main_local->call_count;
+ }
+ UNLOCK(&main_frame->lock);
+
+ if (callcnt == main_local->wind_count) {
+ int32_t final_count = 0;
+ struct iovec *final_vec = NULL;
+ struct stat tmp_stbuf = {0,};
+ dict_t *refs = main_frame->root->rsp_refs;
+
+ op_ret = 0;
+ memcpy (&tmp_stbuf, &main_local->replies[0].stbuf,
+ sizeof (struct stat));
+ for (index=0; index < main_local->wind_count; index++) {
+ /* TODO: check whether each stripe returned 'expected'
+ * number of bytes
+ */
+ if (main_local->replies[index].op_ret == -1) {
+ op_ret = -1;
+ op_errno = main_local->replies[index].op_errno;
+ break;
+ }
+ op_ret += main_local->replies[index].op_ret;
+ final_count += main_local->replies[index].count;
+ /* TODO: Do I need to send anything more in stbuf? */
+ if (tmp_stbuf.st_size <
+ main_local->replies[index].stbuf.st_size) {
+ tmp_stbuf.st_size =
+ main_local->replies[index].stbuf.st_size;
+ }
+ }
+ if (op_ret != -1) {
+ final_vec = CALLOC (final_count,
+ sizeof (struct iovec));
+ ERR_ABORT (final_vec);
+ final_count = 0;
+
+ for (index=0;
+ index < main_local->wind_count; index++) {
+ memcpy (final_vec + final_count,
+ main_local->replies[index].vector,
+ (main_local->replies[index].count *
+ sizeof (struct iovec)));
+ final_count +=
+ main_local->replies[index].count;
+
+ free (main_local->replies[index].vector);
+ }
+ } else {
+ final_vec = NULL;
+ final_count = 0;
+ }
+ /* */
+ FREE (main_local->replies);
+ refs = main_frame->root->rsp_refs;
+ STACK_UNWIND (main_frame, op_ret, op_errno,
+ final_vec, final_count, &tmp_stbuf);
+
+ dict_unref (refs);
+ if (final_vec)
+ free (final_vec);
+ }
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+/**
+ * stripe_readv -
+ */
+int32_t
+stripe_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ int32_t index = 0;
+ int32_t num_stripe = 0;
+ size_t frame_size = 0;
+ off_t rounded_end = 0;
+ uint64_t stripe_size = 0;
+ off_t rounded_start = 0;
+ off_t frame_offset = offset;
+ stripe_local_t *local = NULL;
+ call_frame_t *rframe = NULL;
+ stripe_local_t *rlocal = NULL;
+ xlator_list_t *trav = this->children;
+ stripe_private_t *priv = this->private;
+
+ fd_ctx_get (fd, this, &stripe_size);
+ if (!stripe_size) {
+ STACK_UNWIND (frame, -1, EINVAL, NULL, 0, NULL);
+ return 0;
+ }
+
+ /* The file is stripe across the child nodes. Send the read request
+ * to the child nodes appropriately after checking which region of
+ * the file is in which child node. Always '0-<stripe_size>' part of
+ * the file resides in the first child.
+ */
+ rounded_start = floor (offset, stripe_size);
+ rounded_end = roof (offset+size, stripe_size);
+ num_stripe = (rounded_end - rounded_start) / stripe_size;
+
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ local->wind_count = num_stripe;
+ frame->local = local;
+ frame->root->rsp_refs = dict_ref (get_new_dict ());
+
+ /* This is where all the vectors should be copied. */
+ local->replies = CALLOC (1, num_stripe *
+ sizeof (struct readv_replies));
+ ERR_ABORT (local->replies);
+
+ for (index = 0;
+ index < ((offset / stripe_size) % priv->child_count);
+ index++) {
+ trav = trav->next;
+ }
+
+ for (index = 0; index < num_stripe; index++) {
+ rframe = copy_frame (frame);
+ rlocal = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (rlocal);
+
+ frame_size = min (roof (frame_offset+1, stripe_size),
+ (offset + size)) - frame_offset;
+
+ rlocal->node_index = index;
+ rlocal->orig_frame = frame;
+ rframe->local = rlocal;
+ STACK_WIND (rframe,
+ stripe_readv_cbk,
+ trav->xlator,
+ trav->xlator->fops->readv,
+ fd, frame_size, frame_offset);
+
+ frame_offset += frame_size;
+
+ trav = trav->next ? trav->next : this->children;
+ }
+
+ return 0;
+}
+
+
+/**
+ * stripe_writev_cbk -
+ */
+int32_t
+stripe_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+ LOCK(&frame->lock);
+ {
+ callcnt = ++local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ }
+ if (op_ret >= 0) {
+ local->op_ret += op_ret;
+ local->stbuf = *stbuf;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == local->wind_count) && local->unwind) {
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, &local->stbuf);
+ }
+ return 0;
+}
+
+
+/**
+ * stripe_single_writev_cbk -
+ */
+int32_t
+stripe_single_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+/**
+ * stripe_writev -
+ */
+int32_t
+stripe_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ int32_t idx = 0;
+ int32_t total_size = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ int32_t tmp_count = count;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ struct iovec *tmp_vec = vector;
+ stripe_private_t *priv = this->private;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+
+ fd_ctx_get (fd, this, &stripe_size);
+ if (!stripe_size) {
+ STACK_UNWIND (frame, -1, EINVAL, NULL);
+ return 0;
+ }
+
+ /* File has to be stripped across the child nodes */
+ for (idx = 0; idx< count; idx ++) {
+ total_size += tmp_vec[idx].iov_len;
+ }
+ remaining_size = total_size;
+
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ frame->local = local;
+ local->stripe_size = stripe_size;
+
+ while (1) {
+ /* Send striped chunk of the vector to child
+ nodes appropriately. */
+ trav = this->children;
+
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % priv->child_count);
+ while (idx) {
+ trav = trav->next;
+ idx--;
+ }
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ tmp_count = iov_subset (vector, count, offset_offset,
+ offset_offset + fill_size, NULL);
+ tmp_vec = CALLOC (tmp_count, sizeof (struct iovec));
+ ERR_ABORT (tmp_vec);
+ tmp_count = iov_subset (vector, count, offset_offset,
+ offset_offset + fill_size, tmp_vec);
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ STACK_WIND(frame,
+ stripe_writev_cbk,
+ trav->xlator,
+ trav->xlator->fops->writev,
+ fd, tmp_vec, tmp_count, offset + offset_offset);
+ FREE (tmp_vec);
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+}
+
+
+
+/* Management operations */
+
+/**
+ * stripe_stats_cbk - Add all the fields received from different clients.
+ * Once all the clients return, send stats to above layer.
+ *
+ */
+int32_t
+stripe_stats_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct xlator_stats *stats)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ ((call_frame_t *)cookie)->this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ if (op_ret == 0) {
+ if (local->op_ret == -2) {
+ /* This is to make sure this is the
+ first time */
+ local->stats = *stats;
+ local->op_ret = 0;
+ } else {
+ local->stats.nr_files += stats->nr_files;
+ local->stats.free_disk += stats->free_disk;
+ local->stats.disk_usage += stats->disk_usage;
+ local->stats.nr_clients += stats->nr_clients;
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stats);
+ }
+
+ return 0;
+}
+
+/**
+ * stripe_stats -
+ */
+int32_t
+stripe_stats (call_frame_t *frame,
+ xlator_t *this,
+ int32_t flags)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = this->children;
+
+ local = CALLOC (1, sizeof (stripe_local_t));
+ ERR_ABORT (local);
+ frame->local = local;
+ local->op_ret = -2; /* to be used as a flag in _cbk */
+ local->call_count = ((stripe_private_t*)this->private)->child_count;
+ while (trav) {
+ STACK_WIND (frame,
+ stripe_stats_cbk,
+ trav->xlator,
+ trav->xlator->mops->stats,
+ flags);
+ trav = trav->next;
+ }
+ return 0;
+}
+
+/**
+ * notify
+ */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ stripe_private_t *priv = this->private;
+ int down_client = 0;
+ int i = 0;
+
+ if (!priv)
+ return 0;
+
+ switch (event)
+ {
+ case GF_EVENT_CHILD_UP:
+ {
+ /* get an index number to set */
+ for (i = 0; i < priv->child_count; i++) {
+ if (data == priv->xl_array[i])
+ break;
+ }
+ priv->state[i] = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->state[i])
+ down_client++;
+ }
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+
+ if (data == FIRST_CHILD (this)) {
+ priv->first_child_down = 0;
+ default_notify (this, event, data);
+ }
+ }
+ UNLOCK (&priv->lock);
+ }
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ {
+ /* get an index number to set */
+ for (i = 0; i < priv->child_count; i++) {
+ if (data == priv->xl_array[i])
+ break;
+ }
+ priv->state[i] = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->state[i])
+ down_client++;
+ }
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+
+ if (data == FIRST_CHILD (this)) {
+ priv->first_child_down = 1;
+ default_notify (this, event, data);
+ }
+ }
+ UNLOCK (&priv->lock);
+ }
+ break;
+
+ default:
+ {
+ /* */
+ default_notify (this, event, data);
+ }
+ break;
+ }
+
+ return 0;
+}
+/**
+ * init - This function is called when xlator-graph gets initialized.
+ * The option given in volfiles are parsed here.
+ * @this -
+ */
+int32_t
+init (xlator_t *this)
+{
+ stripe_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ data_t *data = NULL;
+ int32_t count = 0;
+
+ trav = this->children;
+ while (trav) {
+ count++;
+ trav = trav->next;
+ }
+
+ if (!count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe configured without \"subvolumes\" option. "
+ "exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = CALLOC (1, sizeof (stripe_private_t));
+ ERR_ABORT (priv);
+ priv->xl_array = CALLOC (1, count * sizeof (xlator_t *));
+ ERR_ABORT (priv->xl_array);
+ priv->child_count = count;
+ LOCK_INIT (&priv->lock);
+
+ trav = this->children;
+ count = 0;
+ while (trav) {
+ priv->xl_array[count++] = trav->xlator;
+ trav = trav->next;
+ }
+
+ if (count > 256) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "maximum number of stripe subvolumes supported "
+ "is 256");
+ return -1;
+ }
+
+ priv->block_size = (128 * GF_UNIT_KB);
+ /* option stripe-pattern *avi:1GB,*pdf:4096 */
+ data = dict_get (this->options, "block-size");
+ if (!data) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No \"option block-size <x>\" given, defaulting "
+ "to 128KB");
+ } else {
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *num = NULL;
+ struct stripe_options *temp_stripeopt = NULL;
+ struct stripe_options *stripe_opt = NULL;
+
+ /* Get the pattern for striping.
+ "option block-size *avi:10MB" etc */
+ stripe_str = strtok_r (data->data, ",", &tmp_str);
+ while (stripe_str) {
+ dup_str = strdup (stripe_str);
+ stripe_opt = CALLOC (1,
+ sizeof (struct stripe_options));
+ ERR_ABORT (stripe_opt);
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (num &&
+ (gf_string2bytesize (num,
+ &stripe_opt->block_size)
+ != 0)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ num);
+ return -1;
+ } else if (!num && (gf_string2bytesize (
+ pattern,
+ &stripe_opt->block_size)
+ != 0)) {
+ /* Possible that there is no pattern given */
+ stripe_opt->block_size = (128 * GF_UNIT_KB);
+ pattern = "*";
+ }
+ memcpy (stripe_opt->path_pattern,
+ pattern, strlen (pattern));
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "block-size : pattern %s : size %"PRId64,
+ stripe_opt->path_pattern,
+ stripe_opt->block_size);
+
+ if (!priv->pattern) {
+ priv->pattern = stripe_opt;
+ } else {
+ temp_stripeopt = priv->pattern;
+ while (temp_stripeopt->next)
+ temp_stripeopt = temp_stripeopt->next;
+ temp_stripeopt->next = stripe_opt;
+ }
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+ }
+
+ priv->xattr_supported = 1;
+ data = dict_get (this->options, "use-xattr");
+ if (data) {
+ if (gf_string2boolean (data->data,
+ &priv->xattr_supported) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error setting hard check for extended "
+ "attribute");
+ //return -1;
+ }
+ }
+
+ /* notify related */
+ priv->nodes_down = priv->child_count;
+ this->private = priv;
+
+ return 0;
+}
+
+/**
+ * fini - Free all the private variables
+ * @this -
+ */
+void
+fini (xlator_t *this)
+{
+ stripe_private_t *priv = this->private;
+ struct stripe_options *prev = NULL;
+ struct stripe_options *trav = priv->pattern;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ FREE (prev);
+ }
+ FREE (priv->xl_array);
+ LOCK_DESTROY (&priv->lock);
+ FREE (priv);
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .stat = stripe_stat,
+ .unlink = stripe_unlink,
+ .symlink = stripe_symlink,
+ .rename = stripe_rename,
+ .link = stripe_link,
+ .chmod = stripe_chmod,
+ .chown = stripe_chown,
+ .truncate = stripe_truncate,
+ .utimens = stripe_utimens,
+ .create = stripe_create,
+ .open = stripe_open,
+ .readv = stripe_readv,
+ .writev = stripe_writev,
+ .statfs = stripe_statfs,
+ .flush = stripe_flush,
+ .fsync = stripe_fsync,
+ .setxattr = stripe_setxattr,
+ .getxattr = stripe_getxattr,
+ .removexattr = stripe_removexattr,
+ .access = stripe_access,
+ .ftruncate = stripe_ftruncate,
+ .fstat = stripe_fstat,
+ .readlink = stripe_readlink,
+ .mkdir = stripe_mkdir,
+ .rmdir = stripe_rmdir,
+ .lk = stripe_lk,
+ .opendir = stripe_opendir,
+ .fsyncdir = stripe_fsyncdir,
+ .fchmod = stripe_fchmod,
+ .fchown = stripe_fchown,
+ .lookup = stripe_lookup,
+ .setdents = stripe_setdents,
+ .mknod = stripe_mknod,
+};
+
+struct xlator_mops mops = {
+ .stats = stripe_stats,
+};
+
+struct xlator_cbks cbks = {
+ .release = stripe_release,
+ .releasedir = stripe_releasedir
+};
+
+
+struct volume_options options[] = {
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"use-xattr"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/cluster/unify/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/cluster/unify/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
new file mode 100644
index 00000000000..b9e6f63e9d7
--- /dev/null
+++ b/xlators/cluster/unify/src/Makefile.am
@@ -0,0 +1,16 @@
+
+xlator_LTLIBRARIES = unify.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+unify_la_LDFLAGS = -module -avoidversion
+
+unify_la_SOURCES = unify.c unify-self-heal.c
+unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = unify.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
new file mode 100644
index 00000000000..4885dd91a35
--- /dev/null
+++ b/xlators/cluster/unify/src/unify-self-heal.c
@@ -0,0 +1,1225 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * unify-self-heal.c :
+ * This file implements few functions which enables 'unify' translator
+ * to be consistent in its behaviour when
+ * > a node fails,
+ * > a node gets added,
+ * > a failed node comes back
+ * > a new namespace server is added (ie, an fresh namespace server).
+ *
+ * This functionality of 'unify' will enable glusterfs to support storage
+ * system failure, and maintain consistancy. This works both ways, ie, when
+ * an entry (either file or directory) is found on namespace server, and not
+ * on storage nodes, its created in storage nodes and vica-versa.
+ *
+ * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
+ *
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "unify.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "common-utils.h"
+
+int32_t
+unify_sh_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count);
+
+int32_t
+unify_sh_ns_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count);
+
+int32_t
+unify_bgsh_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count);
+
+int32_t
+unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count);
+
+/**
+ * unify_local_wipe - free all the extra allocation of local->* here.
+ */
+static void
+unify_local_wipe (unify_local_t *local)
+{
+ /* Free the strdup'd variables in the local structure */
+ if (local->name) {
+ FREE (local->name);
+ }
+
+ if (local->sh_struct) {
+ if (local->sh_struct->offset_list)
+ FREE (local->sh_struct->offset_list);
+
+ if (local->sh_struct->entry_list)
+ FREE (local->sh_struct->entry_list);
+
+ if (local->sh_struct->count_list)
+ FREE (local->sh_struct->count_list);
+
+ FREE (local->sh_struct);
+ }
+
+ loc_wipe (&local->loc1);
+ loc_wipe (&local->loc2);
+}
+
+int32_t
+unify_sh_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = -1;
+ unify_local_t *local = frame->local;
+ inode_t *inode = NULL;
+ dict_t *tmp_dict = NULL;
+ dir_entry_t *prev, *entry, *trav;
+
+ LOCK (&frame->lock);
+ {
+ /* if local->call_count == 0, that means, setdents on
+ * storagenodes is still pending.
+ */
+ if (local->call_count)
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (callcnt == 0) {
+ if (local->sh_struct->entry_list[0]) {
+ prev = entry = local->sh_struct->entry_list[0];
+ if (!entry)
+ return 0;
+ trav = entry->next;
+ while (trav) {
+ prev->next = trav->next;
+ FREE (trav->name);
+ if (S_ISLNK (trav->buf.st_mode))
+ FREE (trav->link);
+ FREE (trav);
+ trav = prev->next;
+ }
+ FREE (entry);
+ }
+
+ if (!local->flags) {
+ if (local->sh_struct->count_list[0] >=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT) {
+ /* count == size, that means, there are more entries
+ to read from */
+ //local->call_count = 0;
+ local->sh_struct->offset_list[0] +=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT;
+ STACK_WIND (frame,
+ unify_sh_ns_getdents_cbk,
+ NS(this),
+ NS(this)->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ local->sh_struct->offset_list[0],
+ GF_GET_DIR_ONLY);
+ }
+ } else {
+ inode = local->loc1.inode;
+ fd_unref (local->fd);
+ tmp_dict = local->dict;
+
+ unify_local_wipe (local);
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ inode, &local->stbuf, local->dict);
+ if (tmp_dict)
+ dict_unref (local->dict);
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+unify_sh_ns_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count)
+{
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ long index = 0;
+ unsigned long final = 0;
+ dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t));
+
+ local->sh_struct->entry_list[0] = tmp;
+ local->sh_struct->count_list[0] = count;
+ if (entry) {
+ tmp->next = entry->next;
+ entry->next = NULL;
+ }
+
+ if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
+ final = 1;
+ }
+
+ LOCK (&frame->lock);
+ {
+ /* local->call_count will be '0' till now. make it 1 so, it
+ can be UNWIND'ed for the last call. */
+ local->call_count = priv->child_count;
+ if (final)
+ local->flags = 1;
+ }
+ UNLOCK (&frame->lock);
+
+ for (index = 0; index < priv->child_count; index++)
+ {
+ STACK_WIND_COOKIE (frame,
+ unify_sh_setdents_cbk,
+ (void *)index,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->setdents,
+ local->fd, GF_SET_DIR_ONLY,
+ local->sh_struct->entry_list[0], count);
+ }
+
+ return 0;
+}
+
+int32_t
+unify_sh_ns_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = -1;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ long index = (long)cookie;
+ dir_entry_t *prev, *entry, *trav;
+
+ LOCK (&frame->lock);
+ {
+ if (local->sh_struct->entry_list[index]) {
+ prev = entry = local->sh_struct->entry_list[index];
+ trav = entry->next;
+ while (trav) {
+ prev->next = trav->next;
+ FREE (trav->name);
+ if (S_ISLNK (trav->buf.st_mode))
+ FREE (trav->link);
+ FREE (trav);
+ trav = prev->next;
+ }
+ FREE (entry);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (local->sh_struct->count_list[index] <
+ UNIFY_SELF_HEAL_GETDENTS_COUNT) {
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+ } else {
+ /* count == size, that means, there are more entries
+ to read from */
+ local->sh_struct->offset_list[index] +=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT;
+ STACK_WIND_COOKIE (frame,
+ unify_sh_getdents_cbk,
+ cookie,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ local->sh_struct->offset_list[index],
+ GF_GET_ALL);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on (%s) with offset %"PRId64"",
+ priv->xl_array[index]->name,
+ local->sh_struct->offset_list[index]);
+ }
+
+ if (!callcnt) {
+ /* All storage nodes have done unified setdents on NS node.
+ * Now, do getdents from NS and do setdents on storage nodes.
+ */
+
+ /* sh_struct->offset_list is no longer required for
+ storage nodes now */
+ local->sh_struct->offset_list[0] = 0; /* reset */
+
+ STACK_WIND (frame,
+ unify_sh_ns_getdents_cbk,
+ NS(this),
+ NS(this)->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ 0, /* In this call, do send '0' as offset */
+ GF_GET_DIR_ONLY);
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_sh_getdents_cbk -
+ */
+int32_t
+unify_sh_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count)
+{
+ int32_t callcnt = -1;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ long index = (long)cookie;
+ dir_entry_t *tmp = NULL;
+
+ if (op_ret >= 0 && count > 0) {
+ /* There is some dentry found, just send the dentry to NS */
+ tmp = CALLOC (1, sizeof (dir_entry_t));
+ local->sh_struct->entry_list[index] = tmp;
+ local->sh_struct->count_list[index] = count;
+ if (entry) {
+ tmp->next = entry->next;
+ entry->next = NULL;
+ }
+ STACK_WIND_COOKIE (frame,
+ unify_sh_ns_setdents_cbk,
+ cookie,
+ NS(this),
+ NS(this)->fops->setdents,
+ local->fd,
+ GF_SET_IF_NOT_PRESENT,
+ local->sh_struct->entry_list[index],
+ count);
+ return 0;
+ }
+
+ if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+ } else {
+ /* count == size, that means, there are more entries
+ to read from */
+ local->sh_struct->offset_list[index] +=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT;
+ STACK_WIND_COOKIE (frame,
+ unify_sh_getdents_cbk,
+ cookie,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ local->sh_struct->offset_list[index],
+ GF_GET_ALL);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on (%s) with offset %"PRId64"",
+ priv->xl_array[index]->name,
+ local->sh_struct->offset_list[index]);
+ }
+
+ if (!callcnt) {
+ /* All storage nodes have done unified setdents on NS node.
+ * Now, do getdents from NS and do setdents on storage nodes.
+ */
+
+ /* sh_struct->offset_list is no longer required for
+ storage nodes now */
+ local->sh_struct->offset_list[0] = 0; /* reset */
+
+ STACK_WIND (frame,
+ unify_sh_ns_getdents_cbk,
+ NS(this),
+ NS(this)->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ 0, /* In this call, do send '0' as offset */
+ GF_GET_DIR_ONLY);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_sh_opendir_cbk -
+ *
+ * @cookie:
+ */
+int32_t
+unify_sh_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ int32_t callcnt = 0;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ int16_t index = 0;
+ inode_t *inode = NULL;
+ dict_t *tmp_dict = NULL;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING, "failed");
+ local->failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ local->call_count = priv->child_count + 1;
+
+ if (!local->failed) {
+ /* send getdents() namespace after finishing
+ storage nodes */
+ local->call_count--;
+
+ fd_bind (fd);
+
+ if (local->call_count) {
+ /* Used as the offset index. This list keeps
+ * track of offset sent to each node during
+ * STACK_WIND.
+ */
+ local->sh_struct->offset_list =
+ calloc (priv->child_count,
+ sizeof (off_t));
+ ERR_ABORT (local->sh_struct->offset_list);
+
+ local->sh_struct->entry_list =
+ calloc (priv->child_count,
+ sizeof (dir_entry_t *));
+ ERR_ABORT (local->sh_struct->entry_list);
+
+ local->sh_struct->count_list =
+ calloc (priv->child_count,
+ sizeof (int));
+ ERR_ABORT (local->sh_struct->count_list);
+
+ /* Send getdents on all the fds */
+ for (index = 0;
+ index < priv->child_count; index++) {
+ STACK_WIND_COOKIE (frame,
+ unify_sh_getdents_cbk,
+ (void *)(long)index,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ 0, /* In this call, do send '0' as offset */
+ GF_GET_ALL);
+ }
+
+ /* did stack wind, so no need to unwind here */
+ return 0;
+ } /* (local->call_count) */
+ } /* (!local->failed) */
+
+ /* Opendir failed on one node. */
+ inode = local->loc1.inode;
+ fd_unref (local->fd);
+ tmp_dict = local->dict;
+
+ unify_local_wipe (local);
+ /* Only 'self-heal' failed, lookup() was successful. */
+ local->op_ret = 0;
+
+ /* This is lookup_cbk ()'s UNWIND. */
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
+ &local->stbuf, local->dict);
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+ }
+
+ return 0;
+}
+
+/**
+ * gf_sh_checksum_cbk -
+ *
+ * @frame: frame used in lookup. get a copy of it, and use that copy.
+ * @this: pointer to unify xlator.
+ * @inode: pointer to inode, for which the consistency check is required.
+ *
+ */
+int32_t
+unify_sh_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *file_checksum,
+ uint8_t *dir_checksum)
+{
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ int16_t index = 0;
+ int32_t callcnt = 0;
+ inode_t *inode = NULL;
+ dict_t *tmp_dict = NULL;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret >= 0) {
+ if (NS(this) == (xlator_t *)cookie) {
+ memcpy (local->sh_struct->ns_file_checksum,
+ file_checksum, ZR_FILENAME_MAX);
+ memcpy (local->sh_struct->ns_dir_checksum,
+ dir_checksum, ZR_FILENAME_MAX);
+ } else {
+ if (local->entry_count == 0) {
+ /* Initialize the dir_checksum to be
+ * used for comparision with other
+ * storage nodes. Should be done for
+ * the first successful call *only*.
+ */
+ /* Using 'entry_count' as a flag */
+ local->entry_count = 1;
+ memcpy (local->sh_struct->dir_checksum,
+ dir_checksum, ZR_FILENAME_MAX);
+ }
+
+ /* Reply from the storage nodes */
+ for (index = 0;
+ index < ZR_FILENAME_MAX; index++) {
+ /* Files should be present in
+ only one node */
+ local->sh_struct->file_checksum[index] ^= file_checksum[index];
+
+ /* directory structure should be
+ same accross */
+ if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
+ local->failed = 1;
+ }
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ for (index = 0; index < ZR_FILENAME_MAX ; index++) {
+ if (local->sh_struct->file_checksum[index] !=
+ local->sh_struct->ns_file_checksum[index]) {
+ local->failed = 1;
+ break;
+ }
+ if (local->sh_struct->dir_checksum[index] !=
+ local->sh_struct->ns_dir_checksum[index]) {
+ local->failed = 1;
+ break;
+ }
+ }
+
+ if (local->failed) {
+ /* Log it, it should be a rare event */
+ gf_log (this->name, GF_LOG_WARNING,
+ "Self-heal triggered on directory %s",
+ local->loc1.path);
+
+ /* Any self heal will be done at directory level */
+ local->call_count = 0;
+ local->op_ret = -1;
+ local->failed = 0;
+
+ local->fd = fd_create (local->loc1.inode,
+ frame->root->pid);
+
+ local->call_count = priv->child_count + 1;
+
+ for (index = 0;
+ index < (priv->child_count + 1); index++) {
+ STACK_WIND_COOKIE (frame,
+ unify_sh_opendir_cbk,
+ priv->xl_array[index]->name,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->opendir,
+ &local->loc1,
+ local->fd);
+ }
+ /* opendir can be done on the directory */
+ return 0;
+ }
+
+ /* no mismatch */
+ inode = local->loc1.inode;
+ tmp_dict = local->dict;
+
+ unify_local_wipe (local);
+
+ /* This is lookup_cbk ()'s UNWIND. */
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->stbuf,
+ local->dict);
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+ }
+
+ return 0;
+}
+
+/* Foreground self-heal part over */
+
+/* Background self-heal part */
+
+int32_t
+unify_bgsh_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = -1;
+ unify_local_t *local = frame->local;
+ dir_entry_t *prev, *entry, *trav;
+
+ LOCK (&frame->lock);
+ {
+ /* if local->call_count == 0, that means, setdents
+ on storagenodes is still pending. */
+ if (local->call_count)
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+
+ if (callcnt == 0) {
+ if (local->sh_struct->entry_list[0]) {
+ prev = entry = local->sh_struct->entry_list[0];
+ trav = entry->next;
+ while (trav) {
+ prev->next = trav->next;
+ FREE (trav->name);
+ if (S_ISLNK (trav->buf.st_mode))
+ FREE (trav->link);
+ FREE (trav);
+ trav = prev->next;
+ }
+ FREE (entry);
+ }
+
+ if (!local->flags) {
+ if (local->sh_struct->count_list[0] >=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT) {
+ /* count == size, that means, there are more
+ entries to read from */
+ //local->call_count = 0;
+ local->sh_struct->offset_list[0] +=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT;
+ STACK_WIND (frame,
+ unify_bgsh_ns_getdents_cbk,
+ NS(this),
+ NS(this)->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ local->sh_struct->offset_list[0],
+ GF_GET_DIR_ONLY);
+ }
+ } else {
+ fd_unref (local->fd);
+ unify_local_wipe (local);
+ STACK_DESTROY (frame->root);
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count)
+{
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ long index = 0;
+ unsigned long final = 0;
+ dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t));
+
+ local->sh_struct->entry_list[0] = tmp;
+ local->sh_struct->count_list[0] = count;
+ if (entry) {
+ tmp->next = entry->next;
+ entry->next = NULL;
+ }
+
+ if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
+ final = 1;
+ }
+
+ LOCK (&frame->lock);
+ {
+ /* local->call_count will be '0' till now. make it 1 so,
+ it can be UNWIND'ed for the last call. */
+ local->call_count = priv->child_count;
+ if (final)
+ local->flags = 1;
+ }
+ UNLOCK (&frame->lock);
+
+ for (index = 0; index < priv->child_count; index++)
+ {
+ STACK_WIND_COOKIE (frame,
+ unify_bgsh_setdents_cbk,
+ (void *)index,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->setdents,
+ local->fd, GF_SET_DIR_ONLY,
+ local->sh_struct->entry_list[0], count);
+ }
+
+ return 0;
+}
+
+int32_t
+unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = -1;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ long index = (long)cookie;
+ dir_entry_t *prev, *entry, *trav;
+
+ if (local->sh_struct->entry_list[index]) {
+ prev = entry = local->sh_struct->entry_list[index];
+ if (!entry)
+ return 0;
+ trav = entry->next;
+ while (trav) {
+ prev->next = trav->next;
+ FREE (trav->name);
+ if (S_ISLNK (trav->buf.st_mode))
+ FREE (trav->link);
+ FREE (trav);
+ trav = prev->next;
+ }
+ FREE (entry);
+ }
+
+ if (local->sh_struct->count_list[index] <
+ UNIFY_SELF_HEAL_GETDENTS_COUNT) {
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+ } else {
+ /* count == size, that means, there are more entries
+ to read from */
+ local->sh_struct->offset_list[index] +=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT;
+ STACK_WIND_COOKIE (frame,
+ unify_bgsh_getdents_cbk,
+ cookie,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ local->sh_struct->offset_list[index],
+ GF_GET_ALL);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on (%s) with offset %"PRId64"",
+ priv->xl_array[index]->name,
+ local->sh_struct->offset_list[index]);
+ }
+
+ if (!callcnt) {
+ /* All storage nodes have done unified setdents on NS node.
+ * Now, do getdents from NS and do setdents on storage nodes.
+ */
+
+ /* sh_struct->offset_list is no longer required for
+ storage nodes now */
+ local->sh_struct->offset_list[0] = 0; /* reset */
+
+ STACK_WIND (frame,
+ unify_bgsh_ns_getdents_cbk,
+ NS(this),
+ NS(this)->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ 0, /* In this call, do send '0' as offset */
+ GF_GET_DIR_ONLY);
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_bgsh_getdents_cbk -
+ */
+int32_t
+unify_bgsh_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count)
+{
+ int32_t callcnt = -1;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ long index = (long)cookie;
+ dir_entry_t *tmp = NULL;
+
+ if (op_ret >= 0 && count > 0) {
+ /* There is some dentry found, just send the dentry to NS */
+ tmp = CALLOC (1, sizeof (dir_entry_t));
+ local->sh_struct->entry_list[index] = tmp;
+ local->sh_struct->count_list[index] = count;
+ if (entry) {
+ tmp->next = entry->next;
+ entry->next = NULL;
+ }
+ STACK_WIND_COOKIE (frame,
+ unify_bgsh_ns_setdents_cbk,
+ cookie,
+ NS(this),
+ NS(this)->fops->setdents,
+ local->fd,
+ GF_SET_IF_NOT_PRESENT,
+ local->sh_struct->entry_list[index],
+ count);
+ return 0;
+ }
+
+ if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+ } else {
+ /* count == size, that means, there are more entries to read from */
+ local->sh_struct->offset_list[index] +=
+ UNIFY_SELF_HEAL_GETDENTS_COUNT;
+
+ STACK_WIND_COOKIE (frame,
+ unify_bgsh_getdents_cbk,
+ cookie,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ local->sh_struct->offset_list[index],
+ GF_GET_ALL);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir on (%s) with offset %"PRId64"",
+ priv->xl_array[index]->name,
+ local->sh_struct->offset_list[index]);
+ }
+
+ if (!callcnt) {
+ /* All storage nodes have done unified setdents on NS node.
+ * Now, do getdents from NS and do setdents on storage nodes.
+ */
+
+ /* sh_struct->offset_list is no longer required for
+ storage nodes now */
+ local->sh_struct->offset_list[0] = 0; /* reset */
+
+ STACK_WIND (frame,
+ unify_bgsh_ns_getdents_cbk,
+ NS(this),
+ NS(this)->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ 0, /* In this call, do send '0' as offset */
+ GF_GET_DIR_ONLY);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_bgsh_opendir_cbk -
+ *
+ * @cookie:
+ */
+int32_t
+unify_bgsh_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ int32_t callcnt = 0;
+ int16_t index = 0;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ } else {
+ local->failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ local->call_count = priv->child_count + 1;
+
+ if (!local->failed) {
+ /* send getdents() namespace after finishing
+ storage nodes */
+ local->call_count--;
+ callcnt = local->call_count;
+
+ fd_bind (fd);
+
+ if (local->call_count) {
+ /* Used as the offset index. This list keeps
+ track of offset sent to each node during
+ STACK_WIND. */
+ local->sh_struct->offset_list =
+ calloc (priv->child_count,
+ sizeof (off_t));
+ ERR_ABORT (local->sh_struct->offset_list);
+
+ local->sh_struct->entry_list =
+ calloc (priv->child_count,
+ sizeof (dir_entry_t *));
+ ERR_ABORT (local->sh_struct->entry_list);
+
+ local->sh_struct->count_list =
+ calloc (priv->child_count,
+ sizeof (int));
+ ERR_ABORT (local->sh_struct->count_list);
+
+ /* Send getdents on all the fds */
+ for (index = 0;
+ index < priv->child_count; index++) {
+ STACK_WIND_COOKIE (frame,
+ unify_bgsh_getdents_cbk,
+ (void *)(long)index,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->getdents,
+ local->fd,
+ UNIFY_SELF_HEAL_GETDENTS_COUNT,
+ 0, /* In this call, do send '0' as offset */
+ GF_GET_ALL);
+ }
+ /* did a stack wind, so no need to unwind here */
+ return 0;
+ } /* (local->call_count) */
+ } /* (!local->failed) */
+
+ /* Opendir failed on one node. */
+ fd_unref (local->fd);
+
+ unify_local_wipe (local);
+ STACK_DESTROY (frame->root);
+ }
+
+ return 0;
+}
+
+/**
+ * gf_bgsh_checksum_cbk -
+ *
+ * @frame: frame used in lookup. get a copy of it, and use that copy.
+ * @this: pointer to unify xlator.
+ * @inode: pointer to inode, for which the consistency check is required.
+ *
+ */
+int32_t
+unify_bgsh_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *file_checksum,
+ uint8_t *dir_checksum)
+{
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ int16_t index = 0;
+ int32_t callcnt = 0;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret >= 0) {
+ if (NS(this) == (xlator_t *)cookie) {
+ memcpy (local->sh_struct->ns_file_checksum,
+ file_checksum, ZR_FILENAME_MAX);
+ memcpy (local->sh_struct->ns_dir_checksum,
+ dir_checksum, ZR_FILENAME_MAX);
+ } else {
+ if (local->entry_count == 0) {
+ /* Initialize the dir_checksum to be
+ * used for comparision with other
+ * storage nodes. Should be done for
+ * the first successful call *only*.
+ */
+ /* Using 'entry_count' as a flag */
+ local->entry_count = 1;
+ memcpy (local->sh_struct->dir_checksum,
+ dir_checksum, ZR_FILENAME_MAX);
+ }
+
+ /* Reply from the storage nodes */
+ for (index = 0;
+ index < ZR_FILENAME_MAX; index++) {
+ /* Files should be present in only
+ one node */
+ local->sh_struct->file_checksum[index] ^= file_checksum[index];
+
+ /* directory structure should be same
+ accross */
+ if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
+ local->failed = 1;
+ }
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ for (index = 0; index < ZR_FILENAME_MAX ; index++) {
+ if (local->sh_struct->file_checksum[index] !=
+ local->sh_struct->ns_file_checksum[index]) {
+ local->failed = 1;
+ break;
+ }
+ if (local->sh_struct->dir_checksum[index] !=
+ local->sh_struct->ns_dir_checksum[index]) {
+ local->failed = 1;
+ break;
+ }
+ }
+
+ if (local->failed) {
+ /* Log it, it should be a rare event */
+ gf_log (this->name, GF_LOG_WARNING,
+ "Self-heal triggered on directory %s",
+ local->loc1.path);
+
+ /* Any self heal will be done at the directory level */
+ local->op_ret = -1;
+ local->failed = 0;
+
+ local->fd = fd_create (local->loc1.inode,
+ frame->root->pid);
+ local->call_count = priv->child_count + 1;
+
+ for (index = 0;
+ index < (priv->child_count + 1); index++) {
+ STACK_WIND_COOKIE (frame,
+ unify_bgsh_opendir_cbk,
+ priv->xl_array[index]->name,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->opendir,
+ &local->loc1,
+ local->fd);
+ }
+
+ /* opendir can be done on the directory */
+ return 0;
+ }
+
+ /* no mismatch */
+ unify_local_wipe (local);
+ STACK_DESTROY (frame->root);
+ }
+
+ return 0;
+}
+
+/* Background self-heal part over */
+
+
+
+
+/**
+ * zr_unify_self_heal -
+ *
+ * @frame: frame used in lookup. get a copy of it, and use that copy.
+ * @this: pointer to unify xlator.
+ * @inode: pointer to inode, for which the consistency check is required.
+ *
+ */
+int32_t
+zr_unify_self_heal (call_frame_t *frame,
+ xlator_t *this,
+ unify_local_t *local)
+{
+ unify_private_t *priv = this->private;
+ call_frame_t *bg_frame = NULL;
+ unify_local_t *bg_local = NULL;
+ inode_t *tmp_inode = NULL;
+ dict_t *tmp_dict = NULL;
+ int16_t index = 0;
+
+ if (local->inode_generation < priv->inode_generation) {
+ /* Any self heal will be done at the directory level */
+ /* Update the inode's generation to the current generation
+ value. */
+ local->inode_generation = priv->inode_generation;
+ inode_ctx_put (local->loc1.inode, this,
+ (uint64_t)(long)local->inode_generation);
+
+ if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
+ local->op_ret = 0;
+ local->failed = 0;
+ local->call_count = priv->child_count + 1;
+ local->sh_struct =
+ calloc (1, sizeof (struct unify_self_heal_struct));
+
+ /* +1 is for NS */
+ for (index = 0;
+ index < (priv->child_count + 1); index++) {
+ STACK_WIND_COOKIE (frame,
+ unify_sh_checksum_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->checksum,
+ &local->loc1,
+ 0);
+ }
+
+ /* Self-heal in foreground, hence no need
+ to UNWIND here */
+ return 0;
+ }
+
+ /* Self Heal done in background */
+ bg_frame = copy_frame (frame);
+ INIT_LOCAL (bg_frame, bg_local);
+ loc_copy (&bg_local->loc1, &local->loc1);
+ bg_local->op_ret = 0;
+ bg_local->failed = 0;
+ bg_local->call_count = priv->child_count + 1;
+ bg_local->sh_struct =
+ calloc (1, sizeof (struct unify_self_heal_struct));
+
+ /* +1 is for NS */
+ for (index = 0; index < (priv->child_count + 1); index++) {
+ STACK_WIND_COOKIE (bg_frame,
+ unify_bgsh_checksum_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->checksum,
+ &bg_local->loc1,
+ 0);
+ }
+ }
+
+ /* generation number matches, self heal already done or
+ * self heal done in background: just do STACK_UNWIND
+ */
+ tmp_inode = local->loc1.inode;
+ tmp_dict = local->dict;
+
+ unify_local_wipe (local);
+
+ /* This is lookup_cbk ()'s UNWIND. */
+ STACK_UNWIND (frame,
+ local->op_ret,
+ local->op_errno,
+ tmp_inode,
+ &local->stbuf,
+ local->dict);
+
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ return 0;
+}
+
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
new file mode 100644
index 00000000000..e2a5e14b191
--- /dev/null
+++ b/xlators/cluster/unify/src/unify.c
@@ -0,0 +1,4451 @@
+/*
+ Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ * xlators/cluster/unify:
+ * - This xlator is one of the main translator in GlusterFS, which
+ * actually does the clustering work of the file system. One need to
+ * understand that, unify assumes file to be existing in only one of
+ * the child node, and directories to be present on all the nodes.
+ *
+ * NOTE:
+ * Now, unify has support for global namespace, which is used to keep a
+ * global view of fs's namespace tree. The stat for directories are taken
+ * just from the namespace, where as for files, just 'st_ino' is taken from
+ * Namespace node, and other stat info is taken from the actual storage node.
+ * Also Namespace node helps to keep consistant inode for files across
+ * glusterfs (re-)mounts.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "unify.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include <signal.h>
+#include <libgen.h>
+#include "compat-errno.h"
+#include "compat.h"
+
+#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
+ if (!(_loc && _loc->inode)) { \
+ STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
+ return 0; \
+ } \
+} while(0)
+
+
+#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
+ if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
+ STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
+ return 0; \
+ } \
+} while(0)
+
+#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
+ if (!_fd) { \
+ STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
+ return 0; \
+ } \
+} while(0)
+
+/**
+ * unify_local_wipe - free all the extra allocation of local->* here.
+ */
+static void
+unify_local_wipe (unify_local_t *local)
+{
+ /* Free the strdup'd variables in the local structure */
+ if (local->name) {
+ FREE (local->name);
+ }
+ loc_wipe (&local->loc1);
+ loc_wipe (&local->loc2);
+}
+
+
+
+/*
+ * unify_normalize_stats -
+ */
+void
+unify_normalize_stats (struct statvfs *buf,
+ unsigned long bsize,
+ unsigned long frsize)
+{
+ double factor;
+
+ if (buf->f_bsize != bsize) {
+ factor = ((double) buf->f_bsize) / bsize;
+ buf->f_bsize = bsize;
+ buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double) buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
+ }
+}
+
+
+xlator_t *
+unify_loc_subvol (loc_t *loc, xlator_t *this)
+{
+ unify_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int16_t *list = NULL;
+ long index = 0;
+ xlator_t *subvol_i = NULL;
+ int ret = 0;
+ uint64_t tmp_list = 0;
+
+ priv = this->private;
+ subvol = NS (this);
+
+ if (!S_ISDIR (loc->inode->st_mode)) {
+ ret = inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+ if (!list)
+ goto out;
+
+ for (index = 0; list[index] != -1; index++) {
+ subvol_i = priv->xl_array[list[index]];
+ if (subvol_i != NS (this)) {
+ subvol = subvol_i;
+ break;
+ }
+ }
+ }
+out:
+ return subvol;
+}
+
+
+
+/**
+ * unify_statfs_cbk -
+ */
+int32_t
+unify_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *stbuf)
+{
+ int32_t callcnt = 0;
+ struct statvfs *dict_buf = NULL;
+ unsigned long bsize;
+ unsigned long frsize;
+ unify_local_t *local = (unify_local_t *)frame->local;
+ call_frame_t *prev_frame = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret >= 0) {
+ /* when a call is successfull, add it to local->dict */
+ dict_buf = &local->statvfs_buf;
+
+ if (dict_buf->f_bsize != 0) {
+ bsize = max (dict_buf->f_bsize,
+ stbuf->f_bsize);
+
+ frsize = max (dict_buf->f_frsize,
+ stbuf->f_frsize);
+ unify_normalize_stats(dict_buf, bsize, frsize);
+ unify_normalize_stats(stbuf, bsize, frsize);
+ } else {
+ dict_buf->f_bsize = stbuf->f_bsize;
+ dict_buf->f_frsize = stbuf->f_frsize;
+ }
+
+ dict_buf->f_blocks += stbuf->f_blocks;
+ dict_buf->f_bfree += stbuf->f_bfree;
+ dict_buf->f_bavail += stbuf->f_bavail;
+ dict_buf->f_files += stbuf->f_files;
+ dict_buf->f_ffree += stbuf->f_ffree;
+ dict_buf->f_favail += stbuf->f_favail;
+ dict_buf->f_fsid = stbuf->f_fsid;
+ dict_buf->f_flag = stbuf->f_flag;
+ dict_buf->f_namemax = stbuf->f_namemax;
+ local->op_ret = op_ret;
+ } else {
+ /* fop on storage node has failed due to some error */
+ if (op_errno != ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): %s",
+ prev_frame->this->name,
+ strerror (op_errno));
+ }
+ local->op_errno = op_errno;
+ }
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->statvfs_buf);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_statfs -
+ */
+int32_t
+unify_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ unify_local_t *local = NULL;
+ xlator_list_t *trav = this->children;
+
+ INIT_LOCAL (frame, local);
+ local->call_count = ((unify_private_t *)this->private)->child_count;
+
+ while(trav) {
+ STACK_WIND (frame,
+ unify_statfs_cbk,
+ trav->xlator,
+ trav->xlator->fops->statfs,
+ loc);
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+/**
+ * unify_buf_cbk -
+ */
+int32_t
+unify_buf_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int32_t callcnt = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s(): child(%s): path(%s): %s",
+ gf_fop_list[frame->root->op],
+ prev_frame->this->name,
+ (local->loc1.path)?local->loc1.path:"",
+ strerror (op_errno));
+
+ local->op_errno = op_errno;
+ if ((op_errno == ENOENT) && priv->optimist)
+ local->op_ret = 0;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = 0;
+
+ if (NS (this) == prev_frame->this) {
+ local->st_ino = buf->st_ino;
+ /* If the entry is directory, get the stat
+ from NS node */
+ if (S_ISDIR (buf->st_mode) ||
+ !local->stbuf.st_blksize) {
+ local->stbuf = *buf;
+ }
+ }
+
+ if ((!S_ISDIR (buf->st_mode)) &&
+ (NS (this) != prev_frame->this)) {
+ /* If file, take the stat info from Storage
+ node. */
+ local->stbuf = *buf;
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ /* If the inode number is not filled, operation should
+ fail */
+ if (!local->st_ino)
+ local->op_ret = -1;
+
+ local->stbuf.st_ino = local->st_ino;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+ }
+
+ return 0;
+}
+
+#define check_if_dht_linkfile(s) ((s->st_mode & ~S_IFMT) == S_ISVTX)
+
+/**
+ * unify_lookup_cbk -
+ */
+int32_t
+unify_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ int32_t callcnt = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+ inode_t *tmp_inode = NULL;
+ dict_t *local_dict = NULL;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ if ((op_errno != ENOTCONN) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ priv->xl_array[(long)cookie]->name,
+ local->loc1.path, strerror (op_errno));
+ local->op_errno = op_errno;
+ local->failed = 1;
+
+ } else if (local->revalidate &&
+ !(priv->optimist && (op_errno == ENOENT))) {
+
+ gf_log (this->name,
+ (op_errno == ENOTCONN) ?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ priv->xl_array[(long)cookie]->name,
+ local->loc1.path, strerror (op_errno));
+ local->op_errno = op_errno;
+ local->failed = 1;
+ }
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (check_if_dht_linkfile(buf)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "file %s may be DHT link file on %s, "
+ "make sure the backend is not shared "
+ "between unify and DHT",
+ local->loc1.path,
+ priv->xl_array[(long)cookie]->name);
+ }
+
+ if (local->stbuf.st_mode && local->stbuf.st_blksize) {
+ /* make sure we already have a stbuf
+ stored in local->stbuf */
+ if (S_ISDIR (local->stbuf.st_mode) &&
+ !S_ISDIR (buf->st_mode)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "[CRITICAL] '%s' is directory "
+ "on namespace, non-directory "
+ "on node '%s', returning EIO",
+ local->loc1.path,
+ priv->xl_array[(long)cookie]->name);
+ local->return_eio = 1;
+ }
+ if (!S_ISDIR (local->stbuf.st_mode) &&
+ S_ISDIR (buf->st_mode)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "[CRITICAL] '%s' is directory "
+ "on node '%s', non-directory "
+ "on namespace, returning EIO",
+ local->loc1.path,
+ priv->xl_array[(long)cookie]->name);
+ local->return_eio = 1;
+ }
+ }
+
+ if (!local->revalidate && !S_ISDIR (buf->st_mode)) {
+ /* This is the first time lookup on file*/
+ if (!local->list) {
+ /* list is not allocated, allocate
+ the max possible range */
+ local->list = CALLOC (1, 2 * (priv->child_count + 2));
+ if (!local->list) {
+ gf_log (this->name,
+ GF_LOG_CRITICAL,
+ "Not enough memory");
+ STACK_UNWIND (frame, -1,
+ ENOMEM, inode,
+ NULL, NULL);
+ return 0;
+ }
+ }
+ /* update the index of the list */
+ local->list [local->index++] =
+ (int16_t)(long)cookie;
+ }
+
+ if ((!local->dict) && dict &&
+ (priv->xl_array[(long)cookie] != NS(this))) {
+ local->dict = dict_ref (dict);
+ }
+
+ /* index of NS node is == total child count */
+ if (priv->child_count == (int16_t)(long)cookie) {
+ /* Take the inode number from namespace */
+ local->st_ino = buf->st_ino;
+ if (S_ISDIR (buf->st_mode) ||
+ !(local->stbuf.st_blksize)) {
+ local->stbuf = *buf;
+ }
+ } else if (!S_ISDIR (buf->st_mode)) {
+ /* If file, then get the stat from
+ storage node */
+ local->stbuf = *buf;
+ }
+
+ if (local->st_nlink < buf->st_nlink) {
+ local->st_nlink = buf->st_nlink;
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ local_dict = local->dict;
+ if (local->return_eio) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "[CRITICAL] Unable to fix the path (%s) with "
+ "self-heal, try manual verification. "
+ "returning EIO.", local->loc1.path);
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
+ if (local_dict) {
+ dict_unref (local_dict);
+ }
+ return 0;
+ }
+
+ if (!local->stbuf.st_blksize) {
+ /* Inode not present */
+ local->op_ret = -1;
+ } else {
+ if (!local->revalidate &&
+ !S_ISDIR (local->stbuf.st_mode)) {
+ /* If its a file, big array is useless,
+ allocate the smaller one */
+ int16_t *list = NULL;
+ list = CALLOC (1, 2 * (local->index + 1));
+ ERR_ABORT (list);
+ memcpy (list, local->list, 2 * local->index);
+ /* Make the end of the list as -1 */
+ FREE (local->list);
+ local->list = list;
+ local->list [local->index] = -1;
+ /* Update the inode's ctx with proper array */
+ /* TODO: log on failure */
+ inode_ctx_put (local->loc1.inode, this,
+ (uint64_t)(long)local->list);
+ }
+
+ if (S_ISDIR(local->loc1.inode->st_mode)) {
+ /* lookup is done for directory */
+ if (local->failed && priv->self_heal) {
+ /* Triggering self-heal */
+ /* means, self-heal required for this
+ inode */
+ local->inode_generation = 0;
+ priv->inode_generation++;
+ }
+ } else {
+ local->stbuf.st_ino = local->st_ino;
+ }
+
+ local->stbuf.st_nlink = local->st_nlink;
+ }
+ if (local->op_ret == -1) {
+ if (!local->revalidate && local->list)
+ FREE (local->list);
+ }
+
+ if ((local->op_ret >= 0) && local->failed &&
+ local->revalidate) {
+ /* Done revalidate, but it failed */
+ if (op_errno != ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Revalidate failed for path(%s): %s",
+ local->loc1.path, strerror (op_errno));
+ }
+ local->op_ret = -1;
+ }
+
+ if ((priv->self_heal && !priv->optimist) &&
+ (!local->revalidate && (local->op_ret == 0) &&
+ S_ISDIR(local->stbuf.st_mode))) {
+ /* Let the self heal be done here */
+ zr_unify_self_heal (frame, this, local);
+ local_dict = NULL;
+ } else {
+ /* either no self heal, or op_ret == -1 (failure) */
+ tmp_inode = local->loc1.inode;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ tmp_inode, &local->stbuf, local->dict);
+ }
+ if (local_dict) {
+ dict_unref (local_dict);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * unify_lookup -
+ */
+int32_t
+unify_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ unify_local_t *local = NULL;
+ unify_private_t *priv = this->private;
+ int16_t *list = NULL;
+ long index = 0;
+
+ if (!(loc && loc->inode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: Argument not right", loc?loc->path:"(null)");
+ STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+ if (local->loc1.path == NULL) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL);
+ return 0;
+ }
+
+ if (!inode_ctx_get (loc->inode, this, NULL) &&
+ loc->inode->st_mode &&
+ !S_ISDIR (loc->inode->st_mode)) {
+ uint64_t tmp_list = 0;
+ /* check if revalidate or fresh lookup */
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ local->list = (int16_t *)(long)tmp_list;
+ }
+
+ if (local->list) {
+ list = local->list;
+ for (index = 0; list[index] != -1; index++);
+ if (index != 2) {
+ if (index < 2) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "returning ESTALE for %s: file "
+ "count is %ld", loc->path, index);
+ /* Print where all the file is present */
+ for (index = 0;
+ local->list[index] != -1; index++) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: found on %s", loc->path,
+ priv->xl_array[list[index]]->name);
+ }
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, -1, ESTALE,
+ NULL, NULL, NULL);
+ return 0;
+ } else {
+ /* There are more than 2 presences */
+ /* Just log and continue */
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: file count is %ld",
+ loc->path, index);
+ /* Print where all the file is present */
+ for (index = 0;
+ local->list[index] != -1; index++) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: found on %s", loc->path,
+ priv->xl_array[list[index]]->name);
+ }
+ }
+ }
+
+ /* is revalidate */
+ local->revalidate = 1;
+
+ for (index = 0; list[index] != -1; index++)
+ local->call_count++;
+
+ for (index = 0; list[index] != -1; index++) {
+ char need_break = (list[index+1] == -1);
+ STACK_WIND_COOKIE (frame,
+ unify_lookup_cbk,
+ (void *)(long)list[index], //cookie
+ priv->xl_array [list[index]],
+ priv->xl_array [list[index]]->fops->lookup,
+ loc,
+ xattr_req);
+ if (need_break)
+ break;
+ }
+ } else {
+ if (loc->inode->st_mode) {
+ if (inode_ctx_get (loc->inode, this, NULL)) {
+ inode_ctx_get (loc->inode, this,
+ &local->inode_generation);
+ }
+ }
+ /* This is first call, there is no list */
+ /* call count should be all child + 1 namespace */
+ local->call_count = priv->child_count + 1;
+
+ for (index = 0; index <= priv->child_count; index++) {
+ STACK_WIND_COOKIE (frame,
+ unify_lookup_cbk,
+ (void *)index, //cookie
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->lookup,
+ loc,
+ xattr_req);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * unify_stat - if directory, get the stat directly from NameSpace child.
+ * if file, check for a hint and send it only there (also to NS).
+ * if its a fresh stat, then do it on all the nodes.
+ *
+ * NOTE: for all the call, sending cookie as xlator pointer, which will be
+ * used in cbk.
+ */
+int32_t
+unify_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ unify_local_t *local = NULL;
+ unify_private_t *priv = this->private;
+ int16_t index = 0;
+ int16_t *list = NULL;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+ if (local->loc1.path == NULL) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+ local->st_ino = loc->inode->ino;
+ if (S_ISDIR (loc->inode->st_mode)) {
+ /* Directory */
+ local->call_count = 1;
+ STACK_WIND (frame, unify_buf_cbk, NS(this),
+ NS(this)->fops->stat, loc);
+ } else {
+ /* File */
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; list[index] != -1; index++)
+ local->call_count++;
+
+ for (index = 0; list[index] != -1; index++) {
+ char need_break = (list[index+1] == -1);
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->stat,
+ loc);
+ if (need_break)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * unify_access_cbk -
+ */
+int32_t
+unify_access_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+/**
+ * unify_access - Send request to only namespace, which has all the
+ * attributes set for the file.
+ */
+int32_t
+unify_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ STACK_WIND (frame,
+ unify_access_cbk,
+ NS(this),
+ NS(this)->fops->access,
+ loc,
+ mask);
+
+ return 0;
+}
+
+int32_t
+unify_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int32_t callcnt = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+ inode_t *tmp_inode = NULL;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if ((op_ret == -1) && !(priv->optimist &&
+ (op_errno == ENOENT ||
+ op_errno == EEXIST))) {
+ /* TODO: Decrement the inode_generation of
+ * this->inode's parent inode, hence the missing
+ * directory is created properly by self-heal.
+ * Currently, there is no way to get the parent
+ * inode directly.
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ priv->xl_array[(long)cookie]->name,
+ local->loc1.path, strerror (op_errno));
+ if (op_errno != EEXIST)
+ local->failed = 1;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0)
+ local->op_ret = 0;
+
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->failed) {
+ inode_ctx_put (local->loc1.inode, this,
+ priv->inode_generation);
+ }
+
+ tmp_inode = local->loc1.inode;
+ unify_local_wipe (local);
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ tmp_inode, &local->stbuf);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_ns_mkdir_cbk -
+ */
+int32_t
+unify_ns_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+ long index = 0;
+
+ if (op_ret == -1) {
+ /* No need to send mkdir request to other servers,
+ * as namespace action failed
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "namespace: path(%s): %s",
+ local->name, strerror (op_errno));
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, NULL);
+ return 0;
+ }
+
+ /* Create one inode for this entry */
+ local->op_ret = 0;
+ local->stbuf = *buf;
+
+ local->call_count = priv->child_count;
+
+ /* Send mkdir request to all the nodes now */
+ for (index = 0; index < priv->child_count; index++) {
+ STACK_WIND_COOKIE (frame,
+ unify_mkdir_cbk,
+ (void *)index, //cookie
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->mkdir,
+ &local->loc1,
+ local->mode);
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_mkdir -
+ */
+int32_t
+unify_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ unify_local_t *local = NULL;
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ local->mode = mode;
+
+ loc_copy (&local->loc1, loc);
+
+ if (local->loc1.path == NULL) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ unify_ns_mkdir_cbk,
+ NS(this),
+ NS(this)->fops->mkdir,
+ loc,
+ mode);
+ return 0;
+}
+
+/**
+ * unify_rmdir_cbk -
+ */
+int32_t
+unify_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
+ local->op_ret = 0;
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_ns_rmdir_cbk -
+ */
+int32_t
+unify_ns_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int16_t index = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+
+ if (op_ret == -1) {
+ /* No need to send rmdir request to other servers,
+ * as namespace action failed
+ */
+ gf_log (this->name,
+ ((op_errno != ENOTEMPTY) ?
+ GF_LOG_ERROR : GF_LOG_DEBUG),
+ "namespace: path(%s): %s",
+ local->loc1.path, strerror (op_errno));
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+ }
+
+ local->call_count = priv->child_count;
+
+ for (index = 0; index < priv->child_count; index++) {
+ STACK_WIND (frame,
+ unify_rmdir_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->rmdir,
+ &local->loc1);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_rmdir -
+ */
+int32_t
+unify_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ unify_local_t *local = NULL;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+
+ loc_copy (&local->loc1, loc);
+ if (local->loc1.path == NULL) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ unify_ns_rmdir_cbk,
+ NS(this),
+ NS(this)->fops->rmdir,
+ loc);
+
+ return 0;
+}
+
+/**
+ * unify_open_cbk -
+ */
+int32_t
+unify_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ int32_t callcnt = 0;
+ unify_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ if (NS(this) != (xlator_t *)cookie) {
+ /* Store child node's ptr, used in
+ all the f*** / FileIO calls */
+ fd_ctx_set (fd, this, (uint64_t)(long)cookie);
+ }
+ }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->failed = 1;
+ }
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if ((local->failed == 1) && (local->op_ret >= 0)) {
+ local->call_count = 1;
+ /* return -1 to user */
+ local->op_ret = -1;
+ //local->op_errno = EIO;
+
+ if (!fd_ctx_get (local->fd, this, NULL)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Open success on child node, "
+ "failed on namespace");
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Open success on namespace, "
+ "failed on child node");
+ }
+ }
+
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, local->fd);
+ }
+
+ return 0;
+}
+
+#ifdef GF_DARWIN_HOST_OS
+/**
+ * unify_create_lookup_cbk -
+ */
+int32_t
+unify_open_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ int32_t callcnt = 0;
+ int16_t index = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ priv->xl_array[(long)cookie]->name,
+ local->loc1.path, strerror (op_errno));
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->index++;
+ if (NS(this) == priv->xl_array[(long)cookie]) {
+ local->list[0] = (int16_t)(long)cookie;
+ } else {
+ local->list[1] = (int16_t)(long)cookie;
+ }
+ if (S_ISDIR (buf->st_mode))
+ local->failed = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ int16_t file_list[3] = {0,};
+ local->op_ret = -1;
+
+ file_list[0] = local->list[0];
+ file_list[1] = local->list[1];
+ file_list[2] = -1;
+
+ if (local->index != 2) {
+ /* Lookup failed, can't do open */
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: present on %d nodes",
+ local->name, local->index);
+
+ if (local->index < 2) {
+ unify_local_wipe (local);
+ gf_log (this->name, GF_LOG_ERROR,
+ "returning as file found on less "
+ "than 2 nodes");
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, local->fd);
+ return 0;
+ }
+ }
+
+ if (local->failed) {
+ /* Open on directory, return EISDIR */
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, -1, EISDIR, local->fd);
+ return 0;
+ }
+
+ /* Everything is perfect :) */
+ local->call_count = 2;
+
+ for (index = 0; file_list[index] != -1; index++) {
+ char need_break = (file_list[index+1] == -1);
+ STACK_WIND_COOKIE (frame,
+ unify_open_cbk,
+ priv->xl_array[file_list[index]],
+ priv->xl_array[file_list[index]],
+ priv->xl_array[file_list[index]]->fops->open,
+ &local->loc1,
+ local->flags,
+ local->fd);
+ if (need_break)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+unify_open_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *path)
+{
+ int16_t index = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+
+ if (op_ret == -1) {
+ STACK_UNWIND (frame, -1, ENOENT);
+ return 0;
+ }
+
+ if (path[0] == '/') {
+ local->name = strdup (path);
+ ERR_ABORT (local->name);
+ } else {
+ char *tmp_str = strdup (local->loc1.path);
+ char *tmp_base = dirname (tmp_str);
+ local->name = CALLOC (1, ZR_PATH_MAX);
+ strcpy (local->name, tmp_base);
+ strncat (local->name, "/", 1);
+ strcat (local->name, path);
+ FREE (tmp_str);
+ }
+
+ local->list = CALLOC (1, sizeof (int16_t) * 3);
+ ERR_ABORT (local->list);
+ local->call_count = priv->child_count + 1;
+ local->op_ret = -1;
+ for (index = 0; index <= priv->child_count; index++) {
+ /* Send the lookup to all the nodes including namespace */
+ STACK_WIND_COOKIE (frame,
+ unify_open_lookup_cbk,
+ (void *)(long)index,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->lookup,
+ &local->loc1,
+ NULL);
+ }
+
+ return 0;
+}
+#endif /* GF_DARWIN_HOST_OS */
+
+/**
+ * unify_open -
+ */
+int32_t
+unify_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ unify_private_t *priv = this->private;
+ unify_local_t *local = NULL;
+ int16_t *list = NULL;
+ int16_t index = 0;
+ int16_t file_list[3] = {0,};
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Init */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+ local->fd = fd;
+ local->flags = flags;
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+
+ local->list = list;
+ file_list[0] = priv->child_count; /* Thats namespace */
+ file_list[2] = -1;
+ for (index = 0; list[index] != -1; index++) {
+ local->call_count++;
+ if (list[index] != priv->child_count)
+ file_list[1] = list[index];
+ }
+
+ if (local->call_count != 2) {
+ /* If the lookup was done for file */
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: entry_count is %d",
+ loc->path, local->call_count);
+ for (index = 0; local->list[index] != -1; index++)
+ gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
+ loc->path, priv->xl_array[list[index]]->name);
+
+ if (local->call_count < 2) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "returning EIO as file found on onlyone node");
+ STACK_UNWIND (frame, -1, EIO, fd);
+ return 0;
+ }
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+ /* Handle symlink here */
+ if (S_ISLNK (loc->inode->st_mode)) {
+ /* Callcount doesn't matter here */
+ STACK_WIND (frame,
+ unify_open_readlink_cbk,
+ NS(this),
+ NS(this)->fops->readlink,
+ loc, ZR_PATH_MAX);
+ return 0;
+ }
+#endif /* GF_DARWIN_HOST_OS */
+
+ local->call_count = 2;
+ for (index = 0; file_list[index] != -1; index++) {
+ char need_break = (file_list[index+1] == -1);
+ STACK_WIND_COOKIE (frame,
+ unify_open_cbk,
+ priv->xl_array[file_list[index]], //cookie
+ priv->xl_array[file_list[index]],
+ priv->xl_array[file_list[index]]->fops->open,
+ loc,
+ flags,
+ fd);
+ if (need_break)
+ break;
+ }
+
+ return 0;
+}
+
+
+int32_t
+unify_create_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ unify_local_t *local = frame->local;
+ inode_t *inode = local->loc1.inode;
+
+ unify_local_wipe (local);
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
+ inode, &local->stbuf);
+
+ return 0;
+}
+
+/**
+ * unify_create_open_cbk -
+ */
+int32_t
+unify_create_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ int ret = 0;
+ int32_t callcnt = 0;
+ unify_local_t *local = frame->local;
+ inode_t *inode = NULL;
+ xlator_t *child = NULL;
+ uint64_t tmp_value = 0;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ if (NS(this) != (xlator_t *)cookie) {
+ /* Store child node's ptr, used in all
+ the f*** / FileIO calls */
+ /* TODO: log on failure */
+ ret = fd_ctx_get (fd, this, &tmp_value);
+ cookie = (void *)(long)tmp_value;
+ } else {
+ /* NOTE: open successful on namespace.
+ * fd's ctx can be used to identify open
+ * failure on storage subvolume. cool
+ * ide ;) */
+ local->failed = 0;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ ((xlator_t *)cookie)->name,
+ local->loc1.path, strerror (op_errno));
+ local->op_errno = op_errno;
+ local->failed = 1;
+ }
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed == 1 && (local->op_ret >= 0)) {
+ local->call_count = 1;
+ /* return -1 to user */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ local->fd = fd;
+ local->call_count = 1;
+
+ if (!fd_ctx_get (local->fd, this, &tmp_value)) {
+ child = (xlator_t *)(long)tmp_value;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "Create success on child node, "
+ "failed on namespace");
+
+ STACK_WIND (frame,
+ unify_create_unlink_cbk,
+ child,
+ child->fops->unlink,
+ &local->loc1);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Create success on namespace, "
+ "failed on child node");
+
+ STACK_WIND (frame,
+ unify_create_unlink_cbk,
+ NS(this),
+ NS(this)->fops->unlink,
+ &local->loc1);
+ }
+ return 0;
+ }
+ inode = local->loc1.inode;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
+ inode, &local->stbuf);
+ }
+ return 0;
+}
+
+/**
+ * unify_create_lookup_cbk -
+ */
+int32_t
+unify_create_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ int32_t callcnt = 0;
+ int16_t index = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ priv->xl_array[(long)cookie]->name,
+ local->loc1.path, strerror (op_errno));
+ local->op_errno = op_errno;
+ local->failed = 1;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->list[local->index++] = (int16_t)(long)cookie;
+ if (NS(this) == priv->xl_array[(long)cookie]) {
+ local->st_ino = buf->st_ino;
+ } else {
+ local->stbuf = *buf;
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ int16_t *list = local->list;
+ int16_t file_list[3] = {0,};
+ local->op_ret = -1;
+
+ local->list [local->index] = -1;
+ file_list[0] = list[0];
+ file_list[1] = list[1];
+ file_list[2] = -1;
+
+ local->stbuf.st_ino = local->st_ino;
+ /* TODO: log on failure */
+ inode_ctx_put (local->loc1.inode, this,
+ (uint64_t)(long)local->list);
+
+ if (local->index != 2) {
+ /* Lookup failed, can't do open */
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: present on %d nodes",
+ local->loc1.path, local->index);
+ file_list[0] = priv->child_count;
+ for (index = 0; list[index] != -1; index++) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: found on %s", local->loc1.path,
+ priv->xl_array[list[index]]->name);
+ if (list[index] != priv->child_count)
+ file_list[1] = list[index];
+ }
+
+ if (local->index < 2) {
+ unify_local_wipe (local);
+ gf_log (this->name, GF_LOG_ERROR,
+ "returning EIO as file found on "
+ "only one node");
+ STACK_UNWIND (frame, -1, EIO,
+ local->fd, inode, NULL);
+ return 0;
+ }
+ }
+ /* Everything is perfect :) */
+ local->call_count = 2;
+
+ for (index = 0; file_list[index] != -1; index++) {
+ char need_break = (file_list[index+1] == -1);
+ STACK_WIND_COOKIE (frame,
+ unify_create_open_cbk,
+ priv->xl_array[file_list[index]],
+ priv->xl_array[file_list[index]],
+ priv->xl_array[file_list[index]]->fops->open,
+ &local->loc1,
+ local->flags,
+ local->fd);
+ if (need_break)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_create_cbk -
+ */
+int32_t
+unify_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int ret = 0;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+ inode_t *tmp_inode = NULL;
+
+ if (op_ret == -1) {
+ /* send unlink () on Namespace */
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ local->call_count = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "create failed on %s (file %s, error %s), "
+ "sending unlink to namespace",
+ prev_frame->this->name,
+ local->loc1.path, strerror (op_errno));
+
+ STACK_WIND (frame,
+ unify_create_unlink_cbk,
+ NS(this),
+ NS(this)->fops->unlink,
+ &local->loc1);
+
+ return 0;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->stbuf = *buf;
+ /* Just inode number should be from NS node */
+ local->stbuf.st_ino = local->st_ino;
+
+ /* TODO: log on failure */
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
+ }
+
+ tmp_inode = local->loc1.inode;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
+ tmp_inode, &local->stbuf);
+
+ return 0;
+}
+
+/**
+ * unify_ns_create_cbk -
+ *
+ */
+int32_t
+unify_ns_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ struct sched_ops *sched_ops = NULL;
+ xlator_t *sched_xl = NULL;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ int16_t *list = NULL;
+ int16_t index = 0;
+
+ if (op_ret == -1) {
+ /* No need to send create request to other servers, as
+ namespace action failed. Handle exclusive create here. */
+ if ((op_errno != EEXIST) ||
+ ((op_errno == EEXIST) &&
+ ((local->flags & O_EXCL) == O_EXCL))) {
+ /* If its just a create call without O_EXCL,
+ don't do this */
+ gf_log (this->name, GF_LOG_ERROR,
+ "namespace: path(%s): %s",
+ local->loc1.path, strerror (op_errno));
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+ }
+ }
+
+ if (op_ret >= 0) {
+ /* Get the inode number from the NS node */
+ local->st_ino = buf->st_ino;
+
+ local->op_ret = -1;
+
+ /* Start the mapping list */
+ list = CALLOC (1, sizeof (int16_t) * 3);
+ ERR_ABORT (list);
+ inode_ctx_put (inode, this, (uint64_t)(long)list);
+ list[0] = priv->child_count;
+ list[2] = -1;
+
+ /* This means, file doesn't exist anywhere in the Filesystem */
+ sched_ops = priv->sched_ops;
+
+ /* Send create request to the scheduled node now */
+ sched_xl = sched_ops->schedule (this, local->loc1.path);
+ if (sched_xl == NULL)
+ {
+ /* send unlink () on Namespace */
+ local->op_errno = ENOTCONN;
+ local->op_ret = -1;
+ local->call_count = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "no node online to schedule create:(file %s) "
+ "sending unlink to namespace",
+ (local->loc1.path)?local->loc1.path:"");
+
+ STACK_WIND (frame,
+ unify_create_unlink_cbk,
+ NS(this),
+ NS(this)->fops->unlink,
+ &local->loc1);
+
+ return 0;
+ }
+
+ for (index = 0; index < priv->child_count; index++)
+ if (sched_xl == priv->xl_array[index])
+ break;
+ list[1] = index;
+
+ STACK_WIND (frame, unify_create_cbk,
+ sched_xl, sched_xl->fops->create,
+ &local->loc1, local->flags, local->mode, fd);
+ } else {
+ /* File already exists, and there is no O_EXCL flag */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "File(%s) already exists on namespace, sending "
+ "open instead", local->loc1.path);
+
+ local->list = CALLOC (1, sizeof (int16_t) * 3);
+ ERR_ABORT (local->list);
+ local->call_count = priv->child_count + 1;
+ local->op_ret = -1;
+ for (index = 0; index <= priv->child_count; index++) {
+ /* Send lookup() to all nodes including namespace */
+ STACK_WIND_COOKIE (frame,
+ unify_create_lookup_cbk,
+ (void *)(long)index,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->lookup,
+ &local->loc1,
+ NULL);
+ }
+ }
+ return 0;
+}
+
+/**
+ * unify_create - create a file in global namespace first, so other
+ * clients can see them. Create the file in storage nodes in background.
+ */
+int32_t
+unify_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ unify_local_t *local = NULL;
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ local->mode = mode;
+ local->flags = flags;
+ local->fd = fd;
+
+ loc_copy (&local->loc1, loc);
+ if (local->loc1.path == NULL) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ unify_ns_create_cbk,
+ NS(this),
+ NS(this)->fops->create,
+ loc,
+ flags | O_EXCL,
+ mode,
+ fd);
+
+ return 0;
+}
+
+
+/**
+ * unify_opendir_cbk -
+ */
+int32_t
+unify_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}
+
+/**
+ * unify_opendir -
+ */
+int32_t
+unify_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd)
+{
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ STACK_WIND (frame, unify_opendir_cbk,
+ NS(this), NS(this)->fops->opendir, loc, fd);
+
+ return 0;
+}
+
+
+/**
+ * unify_chmod -
+ */
+int32_t
+unify_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ unify_local_t *local = NULL;
+ unify_private_t *priv = this->private;
+ int32_t index = 0;
+ int32_t callcnt = 0;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+
+ loc_copy (&local->loc1, loc);
+ local->st_ino = loc->inode->ino;
+
+ if (S_ISDIR (loc->inode->st_mode)) {
+ local->call_count = priv->child_count + 1;
+
+ for (index = 0; index < (priv->child_count + 1); index++) {
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->chmod,
+ loc, mode);
+ }
+ } else {
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ local->list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; local->list[index] != -1; index++) {
+ local->call_count++;
+ callcnt++;
+ }
+
+ for (index = 0; local->list[index] != -1; index++) {
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ priv->xl_array[local->list[index]],
+ priv->xl_array[local->list[index]]->fops->chmod,
+ loc,
+ mode);
+ if (!--callcnt)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * unify_chown -
+ */
+int32_t
+unify_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ unify_local_t *local = NULL;
+ unify_private_t *priv = this->private;
+ int32_t index = 0;
+ int32_t callcnt = 0;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+ local->st_ino = loc->inode->ino;
+
+ if (S_ISDIR (loc->inode->st_mode)) {
+ local->call_count = priv->child_count + 1;
+
+ for (index = 0; index < (priv->child_count + 1); index++) {
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->chown,
+ loc, uid, gid);
+ }
+ } else {
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ local->list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; local->list[index] != -1; index++) {
+ local->call_count++;
+ callcnt++;
+ }
+
+ for (index = 0; local->list[index] != -1; index++) {
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ priv->xl_array[local->list[index]],
+ priv->xl_array[local->list[index]]->fops->chown,
+ loc, uid, gid);
+ if (!--callcnt)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_truncate_cbk -
+ */
+int32_t
+unify_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int32_t callcnt = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ prev_frame->this->name,
+ (local->loc1.path)?local->loc1.path:"",
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ if (!((op_errno == ENOENT) && priv->optimist))
+ local->op_ret = -1;
+ }
+
+ if (op_ret >= 0) {
+ if (NS (this) == prev_frame->this) {
+ local->st_ino = buf->st_ino;
+ /* If the entry is directory, get the
+ stat from NS node */
+ if (S_ISDIR (buf->st_mode) ||
+ !local->stbuf.st_blksize) {
+ local->stbuf = *buf;
+ }
+ }
+
+ if ((!S_ISDIR (buf->st_mode)) &&
+ (NS (this) != prev_frame->this)) {
+ /* If file, take the stat info from
+ Storage node. */
+ local->stbuf = *buf;
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->st_ino)
+ local->stbuf.st_ino = local->st_ino;
+ else
+ local->op_ret = -1;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_truncate -
+ */
+int32_t
+unify_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ unify_local_t *local = NULL;
+ unify_private_t *priv = this->private;
+ int32_t index = 0;
+ int32_t callcnt = 0;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+ local->st_ino = loc->inode->ino;
+
+ if (S_ISDIR (loc->inode->st_mode)) {
+ local->call_count = 1;
+
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ NS(this),
+ NS(this)->fops->stat,
+ loc);
+ } else {
+ local->op_ret = 0;
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ local->list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; local->list[index] != -1; index++) {
+ local->call_count++;
+ callcnt++;
+ }
+
+ /* Don't send truncate to NS node */
+ STACK_WIND (frame, unify_truncate_cbk, NS(this),
+ NS(this)->fops->stat, loc);
+ callcnt--;
+
+ for (index = 0; local->list[index] != -1; index++) {
+ if (NS(this) != priv->xl_array[local->list[index]]) {
+ STACK_WIND (frame,
+ unify_truncate_cbk,
+ priv->xl_array[local->list[index]],
+ priv->xl_array[local->list[index]]->fops->truncate,
+ loc,
+ offset);
+ if (!--callcnt)
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * unify_utimens -
+ */
+int32_t
+unify_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ unify_local_t *local = NULL;
+ unify_private_t *priv = this->private;
+ int32_t index = 0;
+ int32_t callcnt = 0;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+ local->st_ino = loc->inode->ino;
+
+ if (S_ISDIR (loc->inode->st_mode)) {
+ local->call_count = priv->child_count + 1;
+
+ for (index = 0; index < (priv->child_count + 1); index++) {
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->utimens,
+ loc, tv);
+ }
+ } else {
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ local->list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; local->list[index] != -1; index++) {
+ local->call_count++;
+ callcnt++;
+ }
+
+ for (index = 0; local->list[index] != -1; index++) {
+ STACK_WIND (frame,
+ unify_buf_cbk,
+ priv->xl_array[local->list[index]],
+ priv->xl_array[local->list[index]]->fops->utimens,
+ loc,
+ tv);
+ if (!--callcnt)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * unify_readlink_cbk -
+ */
+int32_t
+unify_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *path)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, path);
+ return 0;
+}
+
+/**
+ * unify_readlink - Read the link only from the storage node.
+ */
+int32_t
+unify_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ unify_private_t *priv = this->private;
+ int32_t entry_count = 0;
+ int16_t *list = NULL;
+ int16_t index = 0;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; list[index] != -1; index++)
+ entry_count++;
+
+ if (entry_count >= 2) {
+ for (index = 0; list[index] != -1; index++) {
+ if (priv->xl_array[list[index]] != NS(this)) {
+ STACK_WIND (frame,
+ unify_readlink_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->readlink,
+ loc,
+ size);
+ break;
+ }
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "returning ENOENT, no softlink files found "
+ "on storage node");
+ STACK_UNWIND (frame, -1, ENOENT, NULL);
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_unlink_cbk -
+ */
+int32_t
+unify_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
+ local->op_ret = 0;
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_unlink -
+ */
+int32_t
+unify_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ unify_private_t *priv = this->private;
+ unify_local_t *local = NULL;
+ int16_t *list = NULL;
+ int16_t index = 0;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; list[index] != -1; index++)
+ local->call_count++;
+
+ if (local->call_count) {
+ for (index = 0; list[index] != -1; index++) {
+ char need_break = (list[index+1] == -1);
+ STACK_WIND (frame,
+ unify_unlink_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->unlink,
+ loc);
+ if (need_break)
+ break;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: returning ENOENT", loc->path);
+ STACK_UNWIND (frame, -1, ENOENT);
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_readv_cbk -
+ */
+int32_t
+unify_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+ return 0;
+}
+
+/**
+ * unify_readv -
+ */
+int32_t
+unify_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame,
+ unify_readv_cbk,
+ child,
+ child->fops->readv,
+ fd,
+ size,
+ offset);
+
+
+ return 0;
+}
+
+/**
+ * unify_writev_cbk -
+ */
+int32_t
+unify_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+/**
+ * unify_writev -
+ */
+int32_t
+unify_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off)
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame,
+ unify_writev_cbk,
+ child,
+ child->fops->writev,
+ fd,
+ vector,
+ count,
+ off);
+
+ return 0;
+}
+
+/**
+ * unify_ftruncate -
+ */
+int32_t
+unify_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ xlator_t *child = NULL;
+ unify_local_t *local = NULL;
+ uint64_t tmp_child = 0;
+
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ local->op_ret = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ local->call_count = 2;
+
+ STACK_WIND (frame, unify_truncate_cbk,
+ child, child->fops->ftruncate,
+ fd, offset);
+
+ STACK_WIND (frame, unify_truncate_cbk,
+ NS(this), NS(this)->fops->fstat,
+ fd);
+
+ return 0;
+}
+
+
+/**
+ * unify_fchmod -
+ */
+int32_t
+unify_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ unify_local_t *local = NULL;
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ local->st_ino = fd->inode->ino;
+
+ if (!fd_ctx_get (fd, this, &tmp_child)) {
+ /* If its set, then its file */
+ child = (xlator_t *)(long)tmp_child;
+
+ local->call_count = 2;
+
+ STACK_WIND (frame, unify_buf_cbk, child,
+ child->fops->fchmod, fd, mode);
+
+ STACK_WIND (frame, unify_buf_cbk, NS(this),
+ NS(this)->fops->fchmod, fd, mode);
+
+ } else {
+ /* this is an directory */
+ local->call_count = 1;
+
+ STACK_WIND (frame, unify_buf_cbk,
+ NS(this), NS(this)->fops->fchmod, fd, mode);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_fchown -
+ */
+int32_t
+unify_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ unify_local_t *local = NULL;
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ local->st_ino = fd->inode->ino;
+
+ if (!fd_ctx_get (fd, this, &tmp_child)) {
+ /* If its set, then its file */
+ child = (xlator_t *)(long)tmp_child;
+
+ local->call_count = 2;
+
+ STACK_WIND (frame, unify_buf_cbk, child,
+ child->fops->fchown, fd, uid, gid);
+
+ STACK_WIND (frame, unify_buf_cbk, NS(this),
+ NS(this)->fops->fchown, fd, uid, gid);
+ } else {
+ local->call_count = 1;
+
+ STACK_WIND (frame, unify_buf_cbk,
+ NS(this), NS(this)->fops->fchown,
+ fd, uid, gid);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_flush_cbk -
+ */
+int32_t
+unify_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/**
+ * unify_flush -
+ */
+int32_t
+unify_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame, unify_flush_cbk, child,
+ child->fops->flush, fd);
+
+ return 0;
+}
+
+
+/**
+ * unify_fsync_cbk -
+ */
+int32_t
+unify_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/**
+ * unify_fsync -
+ */
+int32_t
+unify_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame, unify_fsync_cbk, child,
+ child->fops->fsync, fd, flags);
+
+ return 0;
+}
+
+/**
+ * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
+ * both namespace and the storage node if its a file.
+ */
+int32_t
+unify_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ unify_local_t *local = NULL;
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
+
+ INIT_LOCAL (frame, local);
+ local->st_ino = fd->inode->ino;
+
+ if (!fd_ctx_get (fd, this, &tmp_child)) {
+ /* If its set, then its file */
+ child = (xlator_t *)(long)tmp_child;
+ local->call_count = 2;
+
+ STACK_WIND (frame, unify_buf_cbk, child,
+ child->fops->fstat, fd);
+
+ STACK_WIND (frame, unify_buf_cbk, NS(this),
+ NS(this)->fops->fstat, fd);
+
+ } else {
+ /* this is an directory */
+ local->call_count = 1;
+ STACK_WIND (frame, unify_buf_cbk, NS(this),
+ NS(this)->fops->fstat, fd);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_getdents_cbk -
+ */
+int32_t
+unify_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entry,
+ int32_t count)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, entry, count);
+ return 0;
+}
+
+/**
+ * unify_getdents - send the FOP request to all the nodes.
+ */
+int32_t
+unify_getdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ int32_t flag)
+{
+ UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
+
+ STACK_WIND (frame, unify_getdents_cbk, NS(this),
+ NS(this)->fops->getdents, fd, size, offset, flag);
+
+ return 0;
+}
+
+
+/**
+ * unify_readdir_cbk -
+ */
+int32_t
+unify_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+
+ return 0;
+}
+
+/**
+ * unify_readdir - send the FOP request to all the nodes.
+ */
+int32_t
+unify_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
+
+ STACK_WIND (frame, unify_readdir_cbk, NS(this),
+ NS(this)->fops->readdir, fd, size, offset);
+
+ return 0;
+}
+
+
+/**
+ * unify_fsyncdir_cbk -
+ */
+int32_t
+unify_fsyncdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+/**
+ * unify_fsyncdir -
+ */
+int32_t
+unify_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
+
+ STACK_WIND (frame, unify_fsyncdir_cbk,
+ NS(this), NS(this)->fops->fsyncdir, fd, flags);
+
+ return 0;
+}
+
+/**
+ * unify_lk_cbk - UNWIND frame with the proper return arguments.
+ */
+int32_t
+unify_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, lock);
+ return 0;
+}
+
+/**
+ * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
+ */
+int32_t
+unify_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame, unify_lk_cbk, child,
+ child->fops->lk, fd, cmd, lock);
+
+ return 0;
+}
+
+
+int32_t
+unify_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno);
+
+static int32_t
+unify_setxattr_file_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ unify_private_t *private = this->private;
+ unify_local_t *local = frame->local;
+ xlator_t *sched_xl = NULL;
+ struct sched_ops *sched_ops = NULL;
+
+ if (op_ret == -1) {
+ if (!ENOTSUP)
+ gf_log (this->name, GF_LOG_ERROR,
+ "setxattr with XATTR_CREATE on ns: "
+ "path(%s) key(%s): %s",
+ local->loc1.path, local->name,
+ strerror (op_errno));
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+ }
+
+ LOCK (&frame->lock);
+ {
+ local->failed = 0;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->call_count = 1;
+ }
+ UNLOCK (&frame->lock);
+
+ /* schedule XATTR_CREATE on one of the child node */
+ sched_ops = private->sched_ops;
+
+ /* Send create request to the scheduled node now */
+ sched_xl = sched_ops->schedule (this, local->name);
+ if (!sched_xl) {
+ STACK_UNWIND (frame, -1, ENOTCONN);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ unify_setxattr_cbk,
+ sched_xl,
+ sched_xl->fops->setxattr,
+ &local->loc1,
+ local->dict,
+ local->flags);
+ return 0;
+}
+
+/**
+ * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
+ */
+int32_t
+unify_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+ dict_t *dict = NULL;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, (((op_errno == ENOENT) ||
+ (op_errno == ENOTSUP))?
+ GF_LOG_DEBUG : GF_LOG_ERROR),
+ "child(%s): path(%s): %s",
+ prev_frame->this->name,
+ (local->loc1.path)?local->loc1.path:"",
+ strerror (op_errno));
+ if (local->failed == -1) {
+ local->failed = 1;
+ }
+ local->op_errno = op_errno;
+ } else {
+ local->failed = 0;
+ local->op_ret = op_ret;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed && local->name &&
+ ZR_FILE_CONTENT_REQUEST(local->name)) {
+ dict = get_new_dict ();
+ dict_set (dict, local->dict->members_list->key,
+ data_from_dynptr(NULL, 0));
+ dict_ref (dict);
+
+ local->call_count = 1;
+
+ STACK_WIND (frame,
+ unify_setxattr_file_cbk,
+ NS(this),
+ NS(this)->fops->setxattr,
+ &local->loc1,
+ dict,
+ XATTR_CREATE);
+
+ dict_unref (dict);
+ return 0;
+ }
+
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_sexattr - This function should be sent to all the storage nodes,
+ * which contains the file, (excluding namespace).
+ */
+int32_t
+unify_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ unify_private_t *priv = this->private;
+ unify_local_t *local = NULL;
+ int16_t *list = NULL;
+ int16_t index = 0;
+ int32_t call_count = 0;
+ uint64_t tmp_list = 0;
+ data_pair_t *trav = dict->members_list;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ local->failed = -1;
+ loc_copy (&local->loc1, loc);
+
+ if (S_ISDIR (loc->inode->st_mode)) {
+
+ if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
+ /* direct the storage xlators to change file
+ content only if file exists */
+ local->flags = flags;
+ local->dict = dict;
+ local->name = strdup (trav->key);
+ flags |= XATTR_REPLACE;
+ }
+
+ local->call_count = priv->child_count;
+ for (index = 0; index < priv->child_count; index++) {
+ STACK_WIND (frame,
+ unify_setxattr_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->setxattr,
+ loc, dict, flags);
+ }
+ return 0;
+ }
+
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; list[index] != -1; index++) {
+ if (NS(this) != priv->xl_array[list[index]]) {
+ local->call_count++;
+ call_count++;
+ }
+ }
+
+ if (local->call_count) {
+ for (index = 0; list[index] != -1; index++) {
+ if (priv->xl_array[list[index]] != NS(this)) {
+ STACK_WIND (frame,
+ unify_setxattr_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->setxattr,
+ loc,
+ dict,
+ flags);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+ }
+
+ /* No entry in storage nodes */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "returning ENOENT, file not found on storage node.");
+ STACK_UNWIND (frame, -1, ENOENT);
+
+ return 0;
+}
+
+
+/**
+ * unify_getxattr_cbk - This function is called from only one child, so, no
+ * need of any lock or anything else, just send it to above layer
+ */
+int32_t
+unify_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *value)
+{
+ int32_t callcnt = 0;
+ dict_t *local_value = NULL;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name,
+ (((op_errno == ENOENT) ||
+ (op_errno == ENODATA) ||
+ (op_errno == ENOTSUP)) ?
+ GF_LOG_DEBUG : GF_LOG_ERROR),
+ "child(%s): path(%s): %s",
+ prev_frame->this->name,
+ (local->loc1.path)?local->loc1.path:"",
+ strerror (op_errno));
+ } else {
+ if (!local->dict)
+ local->dict = dict_ref (value);
+ local->op_ret = op_ret;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ local_value = local->dict;
+ local->dict = NULL;
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local_value);
+
+ if (local_value)
+ dict_unref (local_value);
+ }
+
+ return 0;
+}
+
+
+/**
+ * unify_getxattr - This FOP is sent to only the storage node.
+ */
+int32_t
+unify_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ unify_private_t *priv = this->private;
+ int16_t *list = NULL;
+ int16_t index = 0;
+ int16_t count = 0;
+ unify_local_t *local = NULL;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+ INIT_LOCAL (frame, local);
+
+ if (S_ISDIR (loc->inode->st_mode)) {
+ local->call_count = priv->child_count;
+ for (index = 0; index < priv->child_count; index++)
+ STACK_WIND (frame,
+ unify_getxattr_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->getxattr,
+ loc,
+ name);
+ return 0;
+ }
+
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; list[index] != -1; index++) {
+ if (NS(this) != priv->xl_array[list[index]]) {
+ local->call_count++;
+ count++;
+ }
+ }
+
+ if (count) {
+ for (index = 0; list[index] != -1; index++) {
+ if (priv->xl_array[list[index]] != NS(this)) {
+ STACK_WIND (frame,
+ unify_getxattr_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->getxattr,
+ loc,
+ name);
+ if (!--count)
+ break;
+ }
+ }
+ } else {
+ dict_t *tmp_dict = get_new_dict ();
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: returning ENODATA, no file found on storage node",
+ loc->path);
+ STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
+ dict_destroy (tmp_dict);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_removexattr_cbk - Wait till all the child node returns the call
+ * and then UNWIND to above layer.
+ */
+int32_t
+unify_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ if (op_errno != ENOTSUP)
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ prev_frame->this->name,
+ local->loc1.path, strerror (op_errno));
+ } else {
+ local->op_ret = op_ret;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ }
+
+ return 0;
+}
+
+/**
+ * unify_removexattr - Send it to all the child nodes which has the files.
+ */
+int32_t
+unify_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ unify_private_t *priv = this->private;
+ unify_local_t *local = NULL;
+ int16_t *list = NULL;
+ int16_t index = 0;
+ int32_t call_count = 0;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+
+ if (S_ISDIR (loc->inode->st_mode)) {
+ local->call_count = priv->child_count;
+ for (index = 0; index < priv->child_count; index++)
+ STACK_WIND (frame,
+ unify_removexattr_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->removexattr,
+ loc,
+ name);
+
+ return 0;
+ }
+
+ inode_ctx_get (loc->inode, this, &tmp_list);
+ list = (int16_t *)(long)tmp_list;
+
+ for (index = 0; list[index] != -1; index++) {
+ if (NS(this) != priv->xl_array[list[index]]) {
+ local->call_count++;
+ call_count++;
+ }
+ }
+
+ if (local->call_count) {
+ for (index = 0; list[index] != -1; index++) {
+ if (priv->xl_array[list[index]] != NS(this)) {
+ STACK_WIND (frame,
+ unify_removexattr_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->removexattr,
+ loc,
+ name);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: returning ENOENT, not found on storage node.", loc->path);
+ STACK_UNWIND (frame, -1, ENOENT);
+
+ return 0;
+}
+
+
+int32_t
+unify_mknod_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ unify_local_t *local = frame->local;
+
+ if (op_ret == -1)
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: %s", local->loc1.path, strerror (op_errno));
+
+ unify_local_wipe (local);
+ /* No log required here as this -1 is for mknod call */
+ STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
+ return 0;
+}
+
+/**
+ * unify_mknod_cbk -
+ */
+int32_t
+unify_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ unify_local_t *local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mknod failed on storage node, sending unlink to "
+ "namespace");
+ local->op_errno = op_errno;
+ STACK_WIND (frame,
+ unify_mknod_unlink_cbk,
+ NS(this),
+ NS(this)->fops->unlink,
+ &local->loc1);
+ return 0;
+ }
+
+ local->stbuf = *buf;
+ local->stbuf.st_ino = local->st_ino;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
+ return 0;
+}
+
+/**
+ * unify_ns_mknod_cbk -
+ */
+int32_t
+unify_ns_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ struct sched_ops *sched_ops = NULL;
+ xlator_t *sched_xl = NULL;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ int16_t *list = NULL;
+ int16_t index = 0;
+ call_frame_t *prev_frame = cookie;
+
+ if (op_ret == -1) {
+ /* No need to send mknod request to other servers,
+ * as namespace action failed
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s): %s",
+ prev_frame->this->name, local->loc1.path,
+ strerror (op_errno));
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+ }
+
+ /* Create one inode for this entry */
+ local->op_ret = 0;
+ local->stbuf = *buf;
+ local->st_ino = buf->st_ino;
+
+ list = CALLOC (1, sizeof (int16_t) * 3);
+ ERR_ABORT (list);
+ list[0] = priv->child_count;
+ list[2] = -1;
+ inode_ctx_put (inode, this, (uint64_t)(long)list);
+
+ sched_ops = priv->sched_ops;
+
+ /* Send mknod request to scheduled node now */
+ sched_xl = sched_ops->schedule (this, local->loc1.path);
+ if (!sched_xl) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mknod failed on storage node, no node online "
+ "at the moment, sending unlink to NS");
+ local->op_errno = ENOTCONN;
+ STACK_WIND (frame,
+ unify_mknod_unlink_cbk,
+ NS(this),
+ NS(this)->fops->unlink,
+ &local->loc1);
+
+ return 0;
+ }
+
+ for (index = 0; index < priv->child_count; index++)
+ if (sched_xl == priv->xl_array[index])
+ break;
+ list[1] = index;
+
+ STACK_WIND (frame, unify_mknod_cbk,
+ sched_xl, sched_xl->fops->mknod,
+ &local->loc1, local->mode, local->dev);
+
+ return 0;
+}
+
+/**
+ * unify_mknod - Create a device on namespace first, and later create on
+ * the storage node.
+ */
+int32_t
+unify_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t rdev)
+{
+ unify_local_t *local = NULL;
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ local->mode = mode;
+ local->dev = rdev;
+ loc_copy (&local->loc1, loc);
+ if (local->loc1.path == NULL) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ unify_ns_mknod_cbk,
+ NS(this),
+ NS(this)->fops->mknod,
+ loc,
+ mode,
+ rdev);
+
+ return 0;
+}
+
+int32_t
+unify_symlink_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ unify_local_t *local = frame->local;
+ if (op_ret == -1)
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: %s", local->loc1.path, strerror (op_errno));
+
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
+ return 0;
+}
+
+/**
+ * unify_symlink_cbk -
+ */
+int32_t
+unify_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ unify_local_t *local = frame->local;
+
+ if (op_ret == -1) {
+ /* Symlink on storage node failed, hence send unlink
+ to the NS node */
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "symlink on storage node failed, sending unlink "
+ "to namespace");
+
+ STACK_WIND (frame,
+ unify_symlink_unlink_cbk,
+ NS(this),
+ NS(this)->fops->unlink,
+ &local->loc1);
+
+ return 0;
+ }
+
+ local->stbuf = *buf;
+ local->stbuf.st_ino = local->st_ino;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
+
+ return 0;
+}
+
+/**
+ * unify_ns_symlink_cbk -
+ */
+int32_t
+unify_ns_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+
+ struct sched_ops *sched_ops = NULL;
+ xlator_t *sched_xl = NULL;
+ int16_t *list = NULL;
+ unify_local_t *local = frame->local;
+ unify_private_t *priv = this->private;
+ int16_t index = 0;
+
+ if (op_ret == -1) {
+ /* No need to send symlink request to other servers,
+ * as namespace action failed
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "namespace: path(%s): %s",
+ local->loc1.path, strerror (op_errno));
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, NULL, buf);
+ return 0;
+ }
+
+ /* Create one inode for this entry */
+ local->op_ret = 0;
+ local->st_ino = buf->st_ino;
+
+ /* Start the mapping list */
+
+ list = CALLOC (1, sizeof (int16_t) * 3);
+ ERR_ABORT (list);
+ list[0] = priv->child_count; //namespace's index
+ list[2] = -1;
+ inode_ctx_put (inode, this, (uint64_t)(long)list);
+
+ sched_ops = priv->sched_ops;
+
+ /* Send symlink request to all the nodes now */
+ sched_xl = sched_ops->schedule (this, local->loc1.path);
+ if (!sched_xl) {
+ /* Symlink on storage node failed, hence send unlink
+ to the NS node */
+ local->op_errno = ENOTCONN;
+ gf_log (this->name, GF_LOG_ERROR,
+ "symlink on storage node failed, no node online, "
+ "sending unlink to namespace");
+
+ STACK_WIND (frame,
+ unify_symlink_unlink_cbk,
+ NS(this),
+ NS(this)->fops->unlink,
+ &local->loc1);
+
+ return 0;
+ }
+
+ for (index = 0; index < priv->child_count; index++)
+ if (sched_xl == priv->xl_array[index])
+ break;
+ list[1] = index;
+
+ STACK_WIND (frame,
+ unify_symlink_cbk,
+ sched_xl,
+ sched_xl->fops->symlink,
+ local->name,
+ &local->loc1);
+
+ return 0;
+}
+
+/**
+ * unify_symlink -
+ */
+int32_t
+unify_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkpath,
+ loc_t *loc)
+{
+ unify_local_t *local = NULL;
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, loc);
+ local->name = strdup (linkpath);
+
+ if ((local->name == NULL) ||
+ (local->loc1.path == NULL)) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ unify_ns_symlink_cbk,
+ NS(this),
+ NS(this)->fops->symlink,
+ linkpath,
+ loc);
+
+ return 0;
+}
+
+
+int32_t
+unify_rename_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ int32_t callcnt = 0;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s -> %s): %s",
+ prev_frame->this->name,
+ local->loc1.path, local->loc2.path,
+ strerror (op_errno));
+
+ }
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ local->stbuf.st_ino = local->st_ino;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ &local->stbuf);
+ }
+ return 0;
+}
+
+int32_t
+unify_ns_rename_undo_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ unify_local_t *local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "namespace: path(%s -> %s): %s",
+ local->loc1.path, local->loc2.path,
+ strerror (op_errno));
+ }
+
+ local->stbuf.st_ino = local->st_ino;
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
+ return 0;
+}
+
+int32_t
+unify_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int32_t index = 0;
+ int32_t callcnt = 0;
+ int16_t *list = NULL;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+ call_frame_t *prev_frame = cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret >= 0) {
+ if (!S_ISDIR (buf->st_mode))
+ local->stbuf = *buf;
+ local->op_ret = op_ret;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "child(%s): path(%s -> %s): %s",
+ prev_frame->this->name,
+ local->loc1.path, local->loc2.path,
+ strerror (op_errno));
+ local->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ local->stbuf.st_ino = local->st_ino;
+ if (S_ISDIR (local->loc1.inode->st_mode)) {
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
+ return 0;
+ }
+
+ if (local->op_ret == -1) {
+ /* TODO: check this logic */
+
+ /* Rename failed in storage node, successful on NS,
+ * hence, rename back the entries in NS */
+ /* NOTE: this will be done only if the destination
+ * doesn't exists, if the destination exists, the
+ * job of correcting NS is left to self-heal
+ */
+ if (!local->index) {
+ loc_t tmp_oldloc = {
+ /* its actual 'newloc->path' */
+ .path = local->loc2.path,
+ .inode = local->loc1.inode,
+ .parent = local->loc2.parent
+ };
+
+ loc_t tmp_newloc = {
+ /* Actual 'oldloc->path' */
+ .path = local->loc1.path,
+ .parent = local->loc1.parent
+ };
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "rename succussful on namespace, on "
+ "stroage node failed, reverting back");
+
+ STACK_WIND (frame,
+ unify_ns_rename_undo_cbk,
+ NS(this),
+ NS(this)->fops->rename,
+ &tmp_oldloc,
+ &tmp_newloc);
+ return 0;
+ }
+ } else {
+ /* Rename successful on storage nodes */
+
+ int32_t idx = 0;
+ int16_t *tmp_list = NULL;
+ uint64_t tmp_list_int64 = 0;
+ if (local->loc2.inode) {
+ inode_ctx_get (local->loc2.inode,
+ this, &tmp_list_int64);
+ list = (int16_t *)(long)tmp_list_int64;
+
+ }
+
+ if (list) {
+ for (index = 0; list[index] != -1; index++);
+ tmp_list = CALLOC (1, index * 2);
+ memcpy (tmp_list, list, index * 2);
+
+ for (index = 0; list[index] != -1; index++) {
+ /* TODO: Check this logic. */
+ /* If the destination file exists in
+ * the same storage node where we sent
+ * 'rename' call, no need to send
+ * unlink
+ */
+ for (idx = 0;
+ local->list[idx] != -1; idx++) {
+ if (tmp_list[index] == local->list[idx]) {
+ tmp_list[index] = priv->child_count;
+ continue;
+ }
+ }
+
+ if (NS(this) != priv->xl_array[tmp_list[index]]) {
+ local->call_count++;
+ callcnt++;
+ }
+ }
+
+ if (local->call_count) {
+ if (callcnt > 1)
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "%s->%s: more (%d) "
+ "subvolumes have the "
+ "newloc entry",
+ local->loc1.path,
+ local->loc2.path,
+ callcnt);
+
+ for (index=0;
+ tmp_list[index] != -1; index++) {
+ if (NS(this) != priv->xl_array[tmp_list[index]]) {
+ STACK_WIND (frame,
+ unify_rename_unlink_cbk,
+ priv->xl_array[tmp_list[index]],
+ priv->xl_array[tmp_list[index]]->fops->unlink,
+ &local->loc2);
+ if (!--callcnt)
+ break;
+ }
+ }
+
+ FREE (tmp_list);
+ return 0;
+ }
+ if (tmp_list)
+ FREE (tmp_list);
+ }
+ }
+
+ /* Need not send 'unlink' to storage node */
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, local->op_ret,
+ local->op_errno, &local->stbuf);
+ }
+
+ return 0;
+}
+
+int32_t
+unify_ns_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ int32_t index = 0;
+ int32_t callcnt = 0;
+ int16_t *list = NULL;
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+
+ if (op_ret == -1) {
+ /* Free local->new_inode */
+ gf_log (this->name, GF_LOG_ERROR,
+ "namespace: path(%s -> %s): %s",
+ local->loc1.path, local->loc2.path,
+ strerror (op_errno));
+
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+ }
+
+ local->stbuf = *buf;
+ local->st_ino = buf->st_ino;
+
+ /* Everything is fine. */
+ if (S_ISDIR (buf->st_mode)) {
+ local->call_count = priv->child_count;
+ for (index=0; index < priv->child_count; index++) {
+ STACK_WIND (frame,
+ unify_rename_cbk,
+ priv->xl_array[index],
+ priv->xl_array[index]->fops->rename,
+ &local->loc1,
+ &local->loc2);
+ }
+
+ return 0;
+ }
+
+ local->call_count = 0;
+ /* send rename */
+ list = local->list;
+ for (index=0; list[index] != -1; index++) {
+ if (NS(this) != priv->xl_array[list[index]]) {
+ local->call_count++;
+ callcnt++;
+ }
+ }
+
+ if (local->call_count) {
+ for (index=0; list[index] != -1; index++) {
+ if (NS(this) != priv->xl_array[list[index]]) {
+ STACK_WIND (frame,
+ unify_rename_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->rename,
+ &local->loc1,
+ &local->loc2);
+ if (!--callcnt)
+ break;
+ }
+ }
+ } else {
+ /* file doesn't seem to be present in storage nodes */
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "CRITICAL: source file not in storage node, "
+ "rename successful on namespace :O");
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, -1, EIO, NULL);
+ }
+ return 0;
+}
+
+
+/**
+ * unify_rename - One of the tricky function. The deadliest of all :O
+ */
+int32_t
+unify_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ unify_local_t *local = NULL;
+ uint64_t tmp_list = 0;
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+ loc_copy (&local->loc1, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ if ((local->loc1.path == NULL) ||
+ (local->loc2.path == NULL)) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ inode_ctx_get (oldloc->inode, this, &tmp_list);
+ local->list = (int16_t *)(long)tmp_list;
+
+ STACK_WIND (frame,
+ unify_ns_rename_cbk,
+ NS(this),
+ NS(this)->fops->rename,
+ oldloc,
+ newloc);
+ return 0;
+}
+
+/**
+ * unify_link_cbk -
+ */
+int32_t
+unify_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ unify_local_t *local = frame->local;
+
+ if (op_ret >= 0)
+ local->stbuf = *buf;
+ local->stbuf.st_ino = local->st_ino;
+
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
+
+ return 0;
+}
+
+/**
+ * unify_ns_link_cbk -
+ */
+int32_t
+unify_ns_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ unify_private_t *priv = this->private;
+ unify_local_t *local = frame->local;
+ int16_t *list = local->list;
+ int16_t index = 0;
+
+ if (op_ret == -1) {
+ /* No need to send link request to other servers,
+ * as namespace action failed
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "namespace: path(%s -> %s): %s",
+ local->loc1.path, local->loc2.path,
+ strerror (op_errno));
+ unify_local_wipe (local);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+ }
+
+ /* Update inode for this entry */
+ local->op_ret = 0;
+ local->st_ino = buf->st_ino;
+
+ /* Send link request to the node now */
+ for (index = 0; list[index] != -1; index++) {
+ char need_break = (list[index+1] == -1);
+ if (priv->xl_array[list[index]] != NS (this)) {
+ STACK_WIND (frame,
+ unify_link_cbk,
+ priv->xl_array[list[index]],
+ priv->xl_array[list[index]]->fops->link,
+ &local->loc1,
+ &local->loc2);
+ }
+ if (need_break)
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * unify_link -
+ */
+int32_t
+unify_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ unify_local_t *local = NULL;
+ uint64_t tmp_list = 0;
+
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
+ UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
+
+ /* Initialization */
+ INIT_LOCAL (frame, local);
+
+ loc_copy (&local->loc1, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ inode_ctx_get (oldloc->inode, this, &tmp_list);
+ local->list = (int16_t *)(long)tmp_list;
+
+ STACK_WIND (frame,
+ unify_ns_link_cbk,
+ NS(this),
+ NS(this)->fops->link,
+ oldloc,
+ newloc);
+
+ return 0;
+}
+
+
+/**
+ * unify_checksum_cbk -
+ */
+int32_t
+unify_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *fchecksum,
+ uint8_t *dchecksum)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+
+ return 0;
+}
+
+/**
+ * unify_checksum -
+ */
+int32_t
+unify_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ STACK_WIND (frame,
+ unify_checksum_cbk,
+ NS(this),
+ NS(this)->fops->checksum,
+ loc,
+ flag);
+
+ return 0;
+}
+
+
+/**
+ * unify_finodelk_cbk -
+ */
+int
+unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/**
+ * unify_finodelk
+ */
+int
+unify_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct flock *flock)
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame, unify_finodelk_cbk,
+ child, child->fops->finodelk,
+ fd, cmd, flock);
+
+ return 0;
+}
+
+
+
+/**
+ * unify_fentrylk_cbk -
+ */
+int
+unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/**
+ * unify_fentrylk
+ */
+int
+unify_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame, unify_fentrylk_cbk,
+ child, child->fops->fentrylk,
+ fd, basename, cmd, type);
+
+ return 0;
+}
+
+
+
+/**
+ * unify_fxattrop_cbk -
+ */
+int
+unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, xattr);
+ return 0;
+}
+
+/**
+ * unify_fxattrop
+ */
+int
+unify_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
+ xlator_t *child = NULL;
+ uint64_t tmp_child = 0;
+
+ fd_ctx_get (fd, this, &tmp_child);
+ child = (xlator_t *)(long)tmp_child;
+
+ STACK_WIND (frame, unify_fxattrop_cbk,
+ child, child->fops->fxattrop,
+ fd, optype, xattr);
+
+ return 0;
+}
+
+
+/**
+ * unify_inodelk_cbk -
+ */
+int
+unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+/**
+ * unify_inodelk
+ */
+int
+unify_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int cmd, struct flock *flock)
+{
+ xlator_t *child = NULL;
+
+ child = unify_loc_subvol (loc, this);
+
+ STACK_WIND (frame, unify_inodelk_cbk,
+ child, child->fops->inodelk,
+ loc, cmd, flock);
+
+ return 0;
+}
+
+
+
+/**
+ * unify_entrylk_cbk -
+ */
+int
+unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/**
+ * unify_entrylk
+ */
+int
+unify_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+
+{
+ xlator_t *child = NULL;
+
+ child = unify_loc_subvol (loc, this);
+
+ STACK_WIND (frame, unify_entrylk_cbk,
+ child, child->fops->entrylk,
+ loc, basename, cmd, type);
+
+ return 0;
+}
+
+
+
+/**
+ * unify_xattrop_cbk -
+ */
+int
+unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, xattr);
+ return 0;
+}
+
+/**
+ * unify_xattrop
+ */
+int
+unify_xattrop (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ xlator_t *child = NULL;
+
+ child = unify_loc_subvol (loc, this);
+
+ STACK_WIND (frame, unify_xattrop_cbk,
+ child, child->fops->xattrop,
+ loc, optype, xattr);
+
+ return 0;
+}
+
+
+/**
+ * notify
+ */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ unify_private_t *priv = this->private;
+ struct sched_ops *sched = NULL;
+
+ if (!priv) {
+ return 0;
+ }
+
+ sched = priv->sched_ops;
+ if (!sched) {
+ gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
+ raise (SIGTERM);
+ return 0;
+ }
+ if (priv->namespace == data) {
+ if (event == GF_EVENT_CHILD_UP) {
+ sched->notify (this, event, data);
+ }
+ return 0;
+ }
+
+ switch (event)
+ {
+ case GF_EVENT_CHILD_UP:
+ {
+ /* Call scheduler's update () to enable it for scheduling */
+ sched->notify (this, event, data);
+
+ LOCK (&priv->lock);
+ {
+ /* Increment the inode's generation, which is
+ used for self_heal */
+ ++priv->inode_generation;
+ ++priv->num_child_up;
+ }
+ UNLOCK (&priv->lock);
+
+ if (!priv->is_up) {
+ default_notify (this, event, data);
+ priv->is_up = 1;
+ }
+ }
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ {
+ /* Call scheduler's update () to disable the child node
+ * for scheduling
+ */
+ sched->notify (this, event, data);
+ LOCK (&priv->lock);
+ {
+ --priv->num_child_up;
+ }
+ UNLOCK (&priv->lock);
+
+ if (priv->num_child_up == 0) {
+ /* Send CHILD_DOWN to upper layer */
+ default_notify (this, event, data);
+ priv->is_up = 0;
+ }
+ }
+ break;
+
+ default:
+ {
+ default_notify (this, event, data);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * init - This function is called first in the xlator, while initializing.
+ * All the config file options are checked and appropriate flags are set.
+ *
+ * @this -
+ */
+int32_t
+init (xlator_t *this)
+{
+ int32_t ret = 0;
+ int32_t count = 0;
+ data_t *scheduler = NULL;
+ data_t *data = NULL;
+ xlator_t *ns_xl = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_list_t *xlparent = NULL;
+ xlator_list_t *parent = NULL;
+ unify_private_t *_private = NULL;
+
+ /* Check for number of child nodes, if there is no child nodes, exit */
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "No child nodes specified. check \"subvolumes \" "
+ "option in volfile");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ /* Check for 'scheduler' in volume */
+ scheduler = dict_get (this->options, "scheduler");
+ if (!scheduler) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"option scheduler <x>\" is missing in volfile");
+ return -1;
+ }
+
+ /* Setting "option namespace <node>" */
+ data = dict_get (this->options, "namespace");
+ if(!data) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "namespace option not specified, Exiting");
+ return -1;
+ }
+ /* Search namespace in the child node, if found, exit */
+ trav = this->children;
+ while (trav) {
+ if (strcmp (trav->xlator->name, data->data) == 0)
+ break;
+ trav = trav->next;
+ }
+ if (trav) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "namespace node used as a subvolume, Exiting");
+ return -1;
+ }
+
+ /* Search for the namespace node, if found, continue */
+ ns_xl = this->next;
+ while (ns_xl) {
+ if (strcmp (ns_xl->name, data->data) == 0)
+ break;
+ ns_xl = ns_xl->next;
+ }
+ if (!ns_xl) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "namespace node not found in volfile, Exiting");
+ return -1;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "namespace node specified as %s", data->data);
+
+ _private = CALLOC (1, sizeof (*_private));
+ ERR_ABORT (_private);
+ _private->sched_ops = get_scheduler (this, scheduler->data);
+ if (!_private->sched_ops) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Error while loading scheduler. Exiting");
+ FREE (_private);
+ return -1;
+ }
+
+ if (ns_xl->parents) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Namespace node should not be a child of any other node. Exiting");
+ FREE (_private);
+ return -1;
+ }
+
+ _private->namespace = ns_xl;
+
+ /* update _private structure */
+ {
+ count = 0;
+ trav = this->children;
+ /* Get the number of child count */
+ while (trav) {
+ count++;
+ trav = trav->next;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Child node count is %d", count);
+
+ _private->child_count = count;
+ if (count == 1) {
+ /* TODO: Should I error out here? */
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "WARNING: You have defined only one "
+ "\"subvolumes\" for unify volume. It may not "
+ "be the desired config, review your volume "
+ "volfile. If this is how you are testing it,"
+ " you may hit some performance penalty");
+ }
+
+ _private->xl_array = CALLOC (1,
+ sizeof (xlator_t) * (count + 1));
+ ERR_ABORT (_private->xl_array);
+
+ count = 0;
+ trav = this->children;
+ while (trav) {
+ _private->xl_array[count++] = trav->xlator;
+ trav = trav->next;
+ }
+ _private->xl_array[count] = _private->namespace;
+
+ /* self-heal part, start with generation '1' */
+ _private->inode_generation = 1;
+ /* Because, Foreground part is tested well */
+ _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
+ data = dict_get (this->options, "self-heal");
+ if (data) {
+ if (strcasecmp (data->data, "off") == 0)
+ _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
+
+ if (strcasecmp (data->data, "foreground") == 0)
+ _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
+
+ if (strcasecmp (data->data, "background") == 0)
+ _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
+ }
+
+ /* optimist - ask bulde for more about it */
+ data = dict_get (this->options, "optimist");
+ if (data) {
+ if (gf_string2boolean (data->data,
+ &_private->optimist) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "optimist excepts only boolean "
+ "options");
+ }
+ }
+
+ LOCK_INIT (&_private->lock);
+ }
+
+ /* Now that everything is fine. */
+ this->private = (void *)_private;
+ {
+ /* Initialize scheduler, if everything else is successful */
+ ret = _private->sched_ops->init (this);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Initializing scheduler failed, Exiting");
+ FREE (_private);
+ return -1;
+ }
+
+ ret = 0;
+
+ /* This section is required because some fops may look
+ * for 'xl->parent' variable
+ */
+ xlparent = CALLOC (1, sizeof (*xlparent));
+ xlparent->xlator = this;
+ if (!ns_xl->parents) {
+ ns_xl->parents = xlparent;
+ } else {
+ parent = ns_xl->parents;
+ while (parent->next)
+ parent = parent->next;
+ parent->next = xlparent;
+ }
+ /* Initialize the namespace volume */
+ if (!ns_xl->ready) {
+ ret = xlator_tree_init (ns_xl);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "initializing namespace node failed, "
+ "Exiting");
+ FREE (_private);
+ return -1;
+ }
+ }
+ }
+
+ /* Tell namespace node that init is done */
+ ns_xl->notify (ns_xl, GF_EVENT_PARENT_UP, this);
+
+ return 0;
+}
+
+/**
+ * fini - Free all the allocated memory
+ */
+void
+fini (xlator_t *this)
+{
+ unify_private_t *priv = this->private;
+ priv->sched_ops->fini (this);
+ this->private = NULL;
+ LOCK_DESTROY (&priv->lock);
+ FREE (priv->xl_array);
+ FREE (priv);
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .stat = unify_stat,
+ .chmod = unify_chmod,
+ .readlink = unify_readlink,
+ .mknod = unify_mknod,
+ .mkdir = unify_mkdir,
+ .unlink = unify_unlink,
+ .rmdir = unify_rmdir,
+ .symlink = unify_symlink,
+ .rename = unify_rename,
+ .link = unify_link,
+ .chown = unify_chown,
+ .truncate = unify_truncate,
+ .create = unify_create,
+ .open = unify_open,
+ .readv = unify_readv,
+ .writev = unify_writev,
+ .statfs = unify_statfs,
+ .flush = unify_flush,
+ .fsync = unify_fsync,
+ .setxattr = unify_setxattr,
+ .getxattr = unify_getxattr,
+ .removexattr = unify_removexattr,
+ .opendir = unify_opendir,
+ .readdir = unify_readdir,
+ .fsyncdir = unify_fsyncdir,
+ .access = unify_access,
+ .ftruncate = unify_ftruncate,
+ .fstat = unify_fstat,
+ .lk = unify_lk,
+ .fchown = unify_fchown,
+ .fchmod = unify_fchmod,
+ .utimens = unify_utimens,
+ .lookup = unify_lookup,
+ .getdents = unify_getdents,
+ .checksum = unify_checksum,
+ .inodelk = unify_inodelk,
+ .finodelk = unify_finodelk,
+ .entrylk = unify_entrylk,
+ .fentrylk = unify_fentrylk,
+ .xattrop = unify_xattrop,
+ .fxattrop = unify_fxattrop
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = { "namespace" },
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+ { .key = { "scheduler" },
+ .value = { "alu", "rr", "random", "nufa", "switch" },
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"self-heal"},
+ .value = { "foreground", "background", "off" },
+ .type = GF_OPTION_TYPE_STR
+ },
+ /* TODO: remove it some time later */
+ { .key = {"optimist"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
new file mode 100644
index 00000000000..bc18dc53f52
--- /dev/null
+++ b/xlators/cluster/unify/src/unify.h
@@ -0,0 +1,132 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _UNIFY_H
+#define _UNIFY_H
+
+#include "scheduler.h"
+#include "list.h"
+
+#define MAX_DIR_ENTRY_STRING (32 * 1024)
+
+#define ZR_UNIFY_SELF_HEAL_OFF 0
+#define ZR_UNIFY_FG_SELF_HEAL 1
+#define ZR_UNIFY_BG_SELF_HEAL 2
+
+/* Sometimes one should use completely random numbers.. its good :p */
+#define UNIFY_SELF_HEAL_GETDENTS_COUNT 1024
+
+#define NS(xl) (((unify_private_t *)xl->private)->namespace)
+
+/* This is used to allocate memory for local structure */
+#define INIT_LOCAL(fr, loc) \
+do { \
+ loc = CALLOC (1, sizeof (unify_local_t)); \
+ ERR_ABORT (loc); \
+ if (!loc) { \
+ STACK_UNWIND (fr, -1, ENOMEM); \
+ return 0; \
+ } \
+ fr->local = loc; \
+ loc->op_ret = -1; \
+ loc->op_errno = ENOENT; \
+} while (0)
+
+
+
+struct unify_private {
+ /* Update this structure depending on requirement */
+ void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
+ if xlator is using scheduler */
+ struct sched_ops *sched_ops; /* Scheduler options */
+ xlator_t *namespace; /* ptr to namespace xlator */
+ xlator_t **xl_array;
+ gf_boolean_t optimist;
+ int16_t child_count;
+ int16_t num_child_up;
+ uint8_t self_heal;
+ uint8_t is_up;
+ uint64_t inode_generation;
+ gf_lock_t lock;
+};
+typedef struct unify_private unify_private_t;
+
+struct unify_self_heal_struct {
+ uint8_t dir_checksum[ZR_FILENAME_MAX];
+ uint8_t ns_dir_checksum[ZR_FILENAME_MAX];
+ uint8_t file_checksum[ZR_FILENAME_MAX];
+ uint8_t ns_file_checksum[ZR_FILENAME_MAX];
+ off_t *offset_list;
+ int *count_list;
+ dir_entry_t **entry_list;
+};
+
+
+struct _unify_local_t {
+ int32_t call_count;
+ int32_t op_ret;
+ int32_t op_errno;
+ mode_t mode;
+ off_t offset;
+ dev_t dev;
+ uid_t uid;
+ gid_t gid;
+ int32_t flags;
+ int32_t entry_count;
+ int32_t count; // dir_entry_t count;
+ fd_t *fd;
+ struct stat stbuf;
+ struct statvfs statvfs_buf;
+ struct timespec tv[2];
+ char *name;
+ int32_t revalidate;
+
+ ino_t st_ino;
+ nlink_t st_nlink;
+
+ dict_t *dict;
+
+ int16_t *list;
+ int16_t *new_list; /* Used only in case of rename */
+ int16_t index;
+
+ int32_t failed;
+ int32_t return_eio; /* Used in case of different st-mode
+ present for a given path */
+
+ uint64_t inode_generation; /* used to store the per directory
+ * inode_generation. Got from inode's ctx
+ * of directory inodes
+ */
+
+ struct unify_self_heal_struct *sh_struct;
+ loc_t loc1, loc2;
+};
+typedef struct _unify_local_t unify_local_t;
+
+int32_t zr_unify_self_heal (call_frame_t *frame,
+ xlator_t *this,
+ unify_local_t *local);
+
+#endif /* _UNIFY_H */
diff --git a/xlators/debug/Makefile.am b/xlators/debug/Makefile.am
new file mode 100644
index 00000000000..16cf893a11c
--- /dev/null
+++ b/xlators/debug/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = trace error-gen
+
+CLEANFILES =
diff --git a/xlators/debug/error-gen/Makefile.am b/xlators/debug/error-gen/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/debug/error-gen/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
new file mode 100644
index 00000000000..1bd7f332ca0
--- /dev/null
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -0,0 +1,14 @@
+
+xlator_LTLIBRARIES = error-gen.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
+
+error_gen_la_LDFLAGS = -module -avoidversion
+
+error_gen_la_SOURCES = error-gen.c
+error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
new file mode 100644
index 00000000000..9c0b3253e65
--- /dev/null
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -0,0 +1,1780 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+
+typedef struct {
+ int op_count;
+} eg_t;
+
+int error_gen (xlator_t *this)
+{
+ eg_t *egp = NULL;
+ int count = 0;
+ egp = this->private;
+ count = ++egp->op_count;
+ if((count % 10) == 0) {
+ count = count / 10;
+ if ((count % 2) == 0)
+ return ENOTCONN;
+ else
+ return EIO;
+ }
+ return 0;
+}
+
+static int32_t
+error_gen_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ dict);
+ return 0;
+}
+
+int32_t
+error_gen_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xattr_req);
+ return 0;
+}
+
+
+int32_t
+error_gen_forget (xlator_t *this,
+ inode_t *inode)
+{
+ return 0;
+}
+
+int32_t
+error_gen_stat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+ return 0;
+}
+
+int32_t
+error_gen_chmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_chmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chmod,
+ loc,
+ mode);
+ return 0;
+}
+
+
+int32_t
+error_gen_fchmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_fchmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fchmod,
+ fd,
+ mode);
+ return 0;
+}
+
+int32_t
+error_gen_chown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_chown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chown,
+ loc,
+ uid,
+ gid);
+ return 0;
+}
+
+int32_t
+error_gen_fchown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_fchown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fchown,
+ fd,
+ uid,
+ gid);
+ return 0;
+}
+
+int32_t
+error_gen_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+}
+
+int32_t
+error_gen_ftruncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+}
+
+int32_t
+error_gen_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+
+int32_t
+error_gen_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_utimens_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimens,
+ loc,
+ tv);
+ return 0;
+}
+
+int32_t
+error_gen_access_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_access_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc,
+ mask);
+ return 0;
+}
+
+
+int32_t
+error_gen_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *path)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ path);
+ return 0;
+}
+
+int32_t
+error_gen_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc,
+ size);
+ return 0;
+}
+
+
+int32_t
+error_gen_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t rdev)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev);
+ return 0;
+}
+
+int32_t
+error_gen_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode);
+ return 0;
+}
+
+int32_t
+error_gen_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc);
+ return 0;
+}
+
+int32_t
+error_gen_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc);
+ return 0;
+}
+
+
+int32_t
+error_gen_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+error_gen_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkpath,
+ loc_t *loc)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc);
+ return 0;
+}
+
+
+int32_t
+error_gen_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+error_gen_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc);
+ return 0;
+}
+
+
+int32_t
+error_gen_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+error_gen_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ error_gen_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc);
+ return 0;
+}
+
+
+int32_t
+error_gen_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+}
+
+int32_t
+error_gen_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode, fd_t *fd)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+int32_t
+error_gen_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ fd);
+ return 0;
+}
+
+int32_t
+error_gen_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd);
+ return 0;
+}
+
+int32_t
+error_gen_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ vector,
+ count,
+ stbuf);
+ return 0;
+}
+
+int32_t
+error_gen_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+ return 0;
+ }
+
+
+ STACK_WIND (frame,
+ error_gen_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+}
+
+
+int32_t
+error_gen_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ stbuf);
+ return 0;
+}
+
+int32_t
+error_gen_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+
+ STACK_WIND (frame,
+ error_gen_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ off);
+ return 0;
+}
+
+int32_t
+error_gen_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
+ return 0;
+}
+
+
+int32_t
+error_gen_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags);
+ return 0;
+}
+
+int32_t
+error_gen_fstat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd);
+ return 0;
+}
+
+int32_t
+error_gen_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ fd);
+ return 0;
+}
+
+int32_t
+error_gen_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd);
+ return 0;
+}
+
+
+int32_t
+error_gen_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ entries,
+ count);
+ return 0;
+}
+
+int32_t
+error_gen_getdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ int32_t flag)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_getdents_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getdents,
+ fd,
+ size,
+ offset,
+ flag);
+ return 0;
+}
+
+
+int32_t
+error_gen_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_setdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_setdents_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setdents,
+ fd,
+ flags,
+ entries,
+ count);
+ return 0;
+}
+
+
+int32_t
+error_gen_fsyncdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ flags);
+ return 0;
+}
+
+
+int32_t
+error_gen_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+error_gen_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_statfs_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ loc);
+ return 0;
+}
+
+
+int32_t
+error_gen_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags);
+ return 0;
+}
+
+int32_t
+error_gen_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ dict);
+ return 0;
+}
+
+int32_t
+error_gen_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc,
+ name);
+ return 0;
+}
+
+int32_t
+error_gen_xattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+error_gen_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc, flags, dict);
+ return 0;
+}
+
+int32_t
+error_gen_fxattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+error_gen_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict);
+ return 0;
+}
+
+int32_t
+error_gen_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name);
+ return 0;
+}
+
+int32_t
+error_gen_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ lock);
+ return 0;
+}
+
+int32_t
+error_gen_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd,
+ cmd,
+ lock);
+ return 0;
+}
+
+
+int32_t
+error_gen_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+error_gen_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *lock)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_inodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ loc, cmd, lock);
+ return 0;
+}
+
+
+int32_t
+error_gen_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+error_gen_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *lock)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ fd, cmd, lock);
+ return 0;
+}
+
+
+int32_t
+error_gen_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_entrylk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk,
+ loc, basename, cmd, type);
+ return 0;
+}
+
+int32_t
+error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fentrylk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk,
+ fd, basename, cmd, type);
+ return 0;
+}
+
+
+/* Management operations */
+
+int32_t
+error_gen_stats_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct xlator_stats *stats)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ stats);
+ return 0;
+}
+
+
+int32_t
+error_gen_stats (call_frame_t *frame,
+ xlator_t *this,
+ int32_t flags)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_stats_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->mops->stats,
+ flags);
+ return 0;
+}
+
+
+
+int32_t
+error_gen_getspec_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ char *spec_data)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ spec_data);
+ return 0;
+}
+
+
+int32_t
+error_gen_getspec (call_frame_t *frame,
+ xlator_t *this,
+ const char *key,
+ int32_t flags)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_getspec_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->mops->getspec,
+ key, flags);
+ return 0;
+}
+
+
+int32_t
+error_gen_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *file_checksum,
+ uint8_t *dir_checksum)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ file_checksum,
+ dir_checksum);
+ return 0;
+}
+
+
+int32_t
+error_gen_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_checksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->checksum,
+ loc,
+ flag);
+ return 0;
+}
+
+int32_t
+error_gen_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, entries);
+ return 0;
+}
+
+
+int32_t
+error_gen_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off)
+{
+ int op_errno = 0;
+ op_errno = error_gen(this);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ error_gen_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off);
+ return 0;
+}
+
+int32_t
+error_gen_closedir (xlator_t *this,
+ fd_t *fd)
+{
+ return 0;
+}
+
+int32_t
+error_gen_close (xlator_t *this,
+ fd_t *fd)
+{
+ return 0;
+}
+
+int
+init (xlator_t *this)
+{
+ eg_t *pvt = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error-gen not configured with one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ pvt = CALLOC (1, sizeof (eg_t));
+ this->private = pvt;
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_DEBUG, "fini called");
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = error_gen_lookup,
+ .stat = error_gen_stat,
+ .readlink = error_gen_readlink,
+ .mknod = error_gen_mknod,
+ .mkdir = error_gen_mkdir,
+ .unlink = error_gen_unlink,
+ .rmdir = error_gen_rmdir,
+ .symlink = error_gen_symlink,
+ .rename = error_gen_rename,
+ .link = error_gen_link,
+ .chmod = error_gen_chmod,
+ .chown = error_gen_chown,
+ .truncate = error_gen_truncate,
+ .utimens = error_gen_utimens,
+ .create = error_gen_create,
+ .open = error_gen_open,
+ .readv = error_gen_readv,
+ .writev = error_gen_writev,
+ .statfs = error_gen_statfs,
+ .flush = error_gen_flush,
+ .fsync = error_gen_fsync,
+ .setxattr = error_gen_setxattr,
+ .getxattr = error_gen_getxattr,
+ .removexattr = error_gen_removexattr,
+ .opendir = error_gen_opendir,
+ .readdir = error_gen_readdir,
+ .getdents = error_gen_getdents,
+ .fsyncdir = error_gen_fsyncdir,
+ .access = error_gen_access,
+ .ftruncate = error_gen_ftruncate,
+ .fstat = error_gen_fstat,
+ .lk = error_gen_lk,
+ .fchmod = error_gen_fchmod,
+ .fchown = error_gen_fchown,
+ .setdents = error_gen_setdents,
+ .lookup_cbk = error_gen_lookup_cbk,
+ .checksum = error_gen_checksum,
+ .xattrop = error_gen_xattrop,
+ .fxattrop = error_gen_fxattrop,
+ .inodelk = error_gen_inodelk,
+ .finodelk = error_gen_finodelk,
+ .entrylk = error_gen_entrylk,
+ .fentrylk = error_gen_fentrylk
+};
+
+struct xlator_mops mops = {
+ .stats = error_gen_stats,
+ .getspec = error_gen_getspec,
+};
+
+struct xlator_cbks cbks = {
+ .release = error_gen_close,
+ .releasedir = error_gen_closedir,
+};
diff --git a/xlators/debug/trace/Makefile.am b/xlators/debug/trace/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/debug/trace/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
new file mode 100644
index 00000000000..0f1679a049d
--- /dev/null
+++ b/xlators/debug/trace/src/Makefile.am
@@ -0,0 +1,14 @@
+
+xlator_LTLIBRARIES = trace.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
+
+trace_la_LDFLAGS = -module -avoidversion
+
+trace_la_SOURCES = trace.c
+trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
new file mode 100644
index 00000000000..3ccf11a837a
--- /dev/null
+++ b/xlators/debug/trace/src/trace.c
@@ -0,0 +1,2321 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/**
+ * xlators/debug/trace :
+ * This translator logs all the arguments to the fops/mops and also
+ * their _cbk functions, which later passes the call to next layer.
+ * Very helpful translator for debugging.
+ */
+
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "common-utils.h"
+
+#define ERR_EINVAL_NORETURN(cond) \
+do \
+ { \
+ if ((cond)) \
+ { \
+ gf_log ("ERROR", GF_LOG_ERROR, \
+ "%s: %s: (%s) is true", \
+ __FILE__, __FUNCTION__, #cond); \
+ } \
+ } while (0)
+
+typedef struct trace_private {
+ int32_t debug_flag;
+} trace_private_t;
+
+struct {
+ char *name;
+ int enabled;
+} trace_fop_names[GF_FOP_MAXVALUE];
+
+int32_t
+trace_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this);
+
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, fd=%p, ino=%"PRIu64"), "
+ "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", "
+ "st_mode=%d, st_nlink=%"GF_PRI_NLINK", st_uid=%d, "
+ "st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
+ "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", "
+ "st_atime=%s, st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, fd, inode->ino, buf->st_dev,
+ buf->st_ino, buf->st_mode, buf->st_nlink,
+ buf->st_uid, buf->st_gid, buf->st_rdev, buf->st_size,
+ buf->st_blksize,
+ buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+}
+
+int32_t
+trace_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ ERR_EINVAL_NORETURN (!this);
+
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d, *fd=%p)",
+ frame->root->unique, op_ret, op_errno, fd);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+int32_t
+trace_stat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this);
+
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, buf {st_dev=%"GF_PRI_DEV", "
+ "st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", "
+ "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64
+ ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", "
+ "st_atime=%s, st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
+ buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
+ buf->st_rdev, buf->st_size, buf->st_blksize,
+ buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this);
+
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", "
+ "st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", "
+ "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", "
+ "st_size=%"PRId64", st_blksize=%"GF_PRI_BLKSIZE", "
+ "st_blocks=%"PRId64", st_atime=%s, st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
+ buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
+ buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks,
+ atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf);
+ return 0;
+}
+
+int32_t
+trace_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this);
+
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", "
+ "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, "
+ "st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_ino, buf->st_size,
+ buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_GETDENTS].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d, count=%d)",
+ frame->root->unique, op_ret, op_errno, count);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, entries, count);
+ return 0;
+}
+
+int32_t
+trace_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *buf)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64" :(op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+
+ return 0;
+}
+
+int32_t
+trace_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_chown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_CHOWN].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, "
+ "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
+ buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_chmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_CHMOD].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, "
+ "st_atime=%s, st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
+ atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_fchmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FCHMOD].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, "
+ "st_atime=%s, st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
+ atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_fchown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FCHOWN].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, "
+ "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
+ buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d, buf {st_ino=%"PRIu64"})",
+ frame->root->unique, op_ret, op_errno,
+ (buf? buf->st_ino : 0));
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *buf)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s)",
+ frame->root->unique, op_ret, op_errno, buf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *xattr)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
+ "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", st_mode=%d, "
+ "st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, "
+ "st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
+ "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
+ frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino,
+ buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
+ buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
+ return 0;
+}
+
+int32_t
+trace_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_ino=%"PRIu64", "
+ "st_mode=%d, st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, "
+ "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, "
+ "st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, inode->ino, buf->st_ino,
+ buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
+ buf->st_size, buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+trace_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_dev=%"GF_PRI_DEV
+ ", st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", "
+ "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64
+ ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, "
+ "st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino,
+ buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
+ buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks,
+ atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+
+int32_t
+trace_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d, ino=%"PRIu64"",
+ frame->root->unique, op_ret, op_errno,
+ (inode? inode->ino : 0));
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+trace_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
+ "*buf {st_nlink=%"GF_PRI_NLINK"})",
+ frame->root->unique, op_ret, inode->ino, buf->st_nlink);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+trace_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+trace_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d, fd=%p)",
+ frame->root->unique, op_ret, op_errno, fd);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+int32_t
+trace_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", st_blksize=%"
+ GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
+ frame->root->unique, op_ret, buf->st_size, buf->st_blksize,
+ buf->st_blocks);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_UTIMENS].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_atime=%s, st_mtime=%s, "
+ "st_ctime=%s})",
+ frame->root->unique, op_ret, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *buf)
+{
+ ERR_EINVAL_NORETURN (!this);
+
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
+ ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
+ "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
+ GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
+ buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
+ buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ ERR_EINVAL_NORETURN (!this || !dict);
+
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d, dict=%p)",
+ frame->root->unique, op_ret, op_errno, dict);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+trace_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+trace_fsyncdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_access_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_ftruncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", "
+ "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
+ frame->root->unique, op_ret, buf->st_size, buf->st_blksize,
+ buf->st_blocks);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_fstat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ char atime_buf[256], mtime_buf[256], ctime_buf[256];
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ if (op_ret >= 0) {
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", "
+ "st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", "
+ "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
+ "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, "
+ "st_mtime=%s, st_ctime=%s})",
+ frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
+ buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
+ buf->st_rdev, buf->st_size, buf->st_blksize,
+ buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trace_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, {l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique, op_ret, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+ } else {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, lock);
+ return 0;
+}
+
+
+int32_t
+trace_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_SETDENTS].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_entrylk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+trace_xattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ ERR_EINVAL_NORETURN (!this || !dict);
+
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+trace_fxattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ ERR_EINVAL_NORETURN (!this || !dict);
+
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+trace_inodelk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+trace_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ ERR_EINVAL_NORETURN (!this || !loc || !basename);
+
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc= {path=%s, ino=%"PRIu64"} basename=%s, cmd=%s, type=%s)",
+ frame->root->unique, loc->path, loc->inode->ino, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+ }
+
+ STACK_WIND (frame,
+ trace_entrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->entrylk,
+ loc, basename, cmd, type);
+ return 0;
+}
+
+int32_t
+trace_inodelk (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *flock)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, cmd=%s)",
+ frame->root->unique, loc->path, loc->inode->ino,
+ ((cmd == F_SETLK)? "F_SETLK" : "unknown"));
+ }
+
+ STACK_WIND (frame,
+ trace_inodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->inodelk,
+ loc, cmd, flock);
+ return 0;
+}
+
+
+int32_t
+trace_finodelk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ERR_EINVAL_NORETURN (!this );
+
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret, op_errno);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+trace_finodelk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *flock)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (fd=%p, cmd=%s)",
+ frame->root->unique, fd,
+ ((cmd == F_SETLK) ? "F_SETLK" : "unknown"));
+ }
+
+ STACK_WIND (frame,
+ trace_finodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->finodelk,
+ fd, cmd, flock);
+ return 0;
+}
+
+
+int32_t
+trace_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (path=%s, ino=%"PRIu64" flags=%d)",
+ frame->root->unique, loc->path, loc->inode->ino, flags);
+
+ }
+
+ STACK_WIND (frame, trace_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc, flags, dict);
+
+ return 0;
+}
+
+int32_t
+trace_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (fd=%p, flags=%d)",
+ frame->root->unique, fd, flags);
+
+ }
+
+ STACK_WIND (frame, trace_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict);
+
+ return 0;
+}
+
+int32_t
+trace_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ /* TODO: print all the keys mentioned in xattr_req */
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
+ frame->root->unique, loc->path,
+ loc->inode->ino);
+ }
+
+ STACK_WIND (frame, trace_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+
+ return 0;
+}
+
+int32_t
+trace_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ERR_EINVAL_NORETURN (!this || !loc );
+
+
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
+ frame->root->unique, loc->path, loc->inode->ino);
+ }
+
+ STACK_WIND (frame,
+ trace_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+
+ return 0;
+}
+
+int32_t
+trace_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ ERR_EINVAL_NORETURN (!this || !loc || (size < 1));
+
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, size=%"GF_PRI_SIZET")",
+ frame->root->unique, loc->path, loc->inode->ino, size);
+ }
+
+ STACK_WIND (frame,
+ trace_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc,
+ size);
+
+ return 0;
+}
+
+int32_t
+trace_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t dev)
+{
+ ERR_EINVAL_NORETURN (!this || !loc->path);
+
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%d, dev=%"GF_PRI_DEV")",
+ frame->root->unique, loc->path, loc->inode->ino, mode, dev);
+ }
+
+ STACK_WIND (frame,
+ trace_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc,
+ mode,
+ dev);
+
+ return 0;
+}
+
+int32_t
+trace_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ ERR_EINVAL_NORETURN (!this || !loc || !loc->path);
+
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (path=%s, ino=%"PRIu64", mode=%d)",
+ frame->root->unique, loc->path,
+ ((loc->inode)? loc->inode->ino : 0), mode);
+ }
+
+ STACK_WIND (frame,
+ trace_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc,
+ mode);
+ return 0;
+}
+
+int32_t
+trace_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
+ frame->root->unique, loc->path, loc->inode->ino);
+ }
+
+ STACK_WIND (frame,
+ trace_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc);
+ return 0;
+}
+
+int32_t
+trace_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
+ frame->root->unique, loc->path, loc->inode->ino);
+ }
+
+ STACK_WIND (frame,
+ trace_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc);
+
+ return 0;
+}
+
+int32_t
+trace_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkpath,
+ loc_t *loc)
+{
+ ERR_EINVAL_NORETURN (!this || !linkpath || !loc || !loc->path);
+
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (linkpath=%s, loc {path=%s, ino=%"PRIu64"})",
+ frame->root->unique, linkpath, loc->path,
+ ((loc->inode)? loc->inode->ino : 0));
+ }
+
+ STACK_WIND (frame,
+ trace_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath,
+ loc);
+
+ return 0;
+}
+
+int32_t
+trace_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ ERR_EINVAL_NORETURN (!this || !oldloc || !newloc);
+
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
+ "newloc{path=%s, ino=%"PRIu64"})",
+ frame->root->unique, oldloc->path, oldloc->ino,
+ newloc->path, newloc->ino);
+ }
+
+ STACK_WIND (frame,
+ trace_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc,
+ newloc);
+
+ return 0;
+}
+
+int32_t
+trace_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ ERR_EINVAL_NORETURN (!this || !oldloc || !newloc);
+
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
+ "newloc {path=%s, ino=%"PRIu64"})",
+ frame->root->unique, oldloc->path, oldloc->inode->ino,
+ newloc->path, newloc->inode->ino);
+ }
+
+ STACK_WIND (frame,
+ trace_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc,
+ newloc);
+ return 0;
+}
+
+int32_t
+trace_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_CHMOD].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%o)",
+ frame->root->unique, loc->path, loc->inode->ino, mode);
+ }
+
+ STACK_WIND (frame,
+ trace_chmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chmod,
+ loc,
+ mode);
+
+ return 0;
+}
+
+int32_t
+trace_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_CHOWN].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, uid=%d, gid=%d)",
+ frame->root->unique, loc->path, loc->inode->ino, uid, gid);
+ }
+
+ STACK_WIND (frame,
+ trace_chown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chown,
+ loc,
+ uid,
+ gid);
+
+ return 0;
+}
+
+int32_t
+trace_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, offset=%"PRId64")",
+ frame->root->unique, loc->path, loc->inode->ino, offset);
+ }
+
+ STACK_WIND (frame,
+ trace_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+
+ return 0;
+}
+
+int32_t
+trace_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ char actime_str[256];
+ char modtime_str[256];
+
+ ERR_EINVAL_NORETURN (!this || !loc || !tv);
+
+ if (trace_fop_names[GF_FOP_UTIMENS].enabled) {
+ strftime (actime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[0].tv_sec));
+ strftime (modtime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[1].tv_sec));
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, "
+ "*tv=%p {actime=%s, modtime=%s})",
+ frame->root->unique, loc->path, loc->inode->ino,
+ tv, actime_str, modtime_str);
+ }
+
+ STACK_WIND (frame,
+ trace_utimens_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimens,
+ loc,
+ tv);
+
+ return 0;
+}
+
+int32_t
+trace_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=%d, fd=%p)",
+ frame->root->unique, loc->path, loc->inode->ino, flags, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd);
+ return 0;
+}
+
+int32_t
+trace_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ ERR_EINVAL_NORETURN (!this || !loc->path);
+
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=0%o mode=0%o)",
+ frame->root->unique, loc->path, loc->inode->ino, flags, mode);
+ }
+
+ STACK_WIND (frame,
+ trace_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ fd);
+ return 0;
+}
+
+int32_t
+trace_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ ERR_EINVAL_NORETURN (!this || !fd || (size < 1));
+
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ frame->root->unique, fd, size, offset);
+ }
+
+ STACK_WIND (frame,
+ trace_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+}
+
+int32_t
+trace_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ ERR_EINVAL_NORETURN (!this || !fd || !vector || (count < 1));
+
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*fd=%p, *vector=%p, count=%d, offset=%"PRId64")",
+ frame->root->unique, fd, vector, count, offset);
+ }
+
+ STACK_WIND (frame,
+ trace_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ offset);
+ return 0;
+}
+
+int32_t
+trace_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
+ frame->root->unique, loc->path,
+ ((loc->inode)? loc->inode->ino : 0));
+ }
+
+ STACK_WIND (frame,
+ trace_statfs_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
+ loc);
+ return 0;
+}
+
+int32_t
+trace_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*fd=%p)",
+ frame->root->unique, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
+ return 0;
+}
+
+
+int32_t
+trace_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (flags=%d, *fd=%p)",
+ frame->root->unique, flags, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags);
+ return 0;
+}
+
+int32_t
+trace_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ ERR_EINVAL_NORETURN (!this || !loc || !dict);
+
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, dict=%p, flags=%d)",
+ frame->root->unique, loc->path,
+ ((loc->inode)? loc->inode->ino : 0), dict, flags);
+ }
+
+ STACK_WIND (frame,
+ trace_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags);
+ return 0;
+}
+
+int32_t
+trace_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}), name=%s",
+ frame->root->unique, loc->path,
+ ((loc->inode)? loc->inode->ino : 0), name);
+ }
+
+ STACK_WIND (frame,
+ trace_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc,
+ name);
+ return 0;
+}
+
+int32_t
+trace_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ ERR_EINVAL_NORETURN (!this || !loc || !name);
+
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, name=%s)",
+ frame->root->unique, loc->path,
+ ((loc->inode)? loc->inode->ino : 0), name);
+ }
+
+ STACK_WIND (frame,
+ trace_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name);
+
+ return 0;
+}
+
+int32_t
+trace_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd)
+{
+ ERR_EINVAL_NORETURN (!this || !loc );
+
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64":( loc {path=%s, ino=%"PRIu64"}, fd=%p)",
+ frame->root->unique, loc->path, loc->inode->ino, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc,
+ fd);
+ return 0;
+}
+
+int32_t
+trace_getdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ int32_t flag)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_GETDENTS].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64", flag=0x%x)",
+ frame->root->unique, fd, size, offset, flag);
+ }
+
+ STACK_WIND (frame,
+ trace_getdents_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getdents,
+ fd,
+ size,
+ offset,
+ flag);
+ return 0;
+}
+
+
+int32_t
+trace_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ frame->root->unique, fd, size, offset);
+ }
+
+ STACK_WIND (frame,
+ trace_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd,
+ size,
+ offset);
+
+ return 0;
+}
+
+
+int32_t
+trace_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (datasync=%d, *fd=%p)",
+ frame->root->unique, datasync, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ datasync);
+ return 0;
+}
+
+int32_t
+trace_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ ERR_EINVAL_NORETURN (!this || !loc);
+
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*loc {path=%s, ino=%"PRIu64"}, mask=0%o)",
+ frame->root->unique, loc->path,
+ ((loc->inode)? loc->inode->ino : 0), mask);
+ }
+
+ STACK_WIND (frame,
+ trace_access_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc,
+ mask);
+ return 0;
+}
+
+int32_t
+trace_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (offset=%"PRId64", *fd=%p)",
+ frame->root->unique, offset, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset);
+
+ return 0;
+}
+
+int32_t
+trace_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FCHOWN].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*fd=%p, uid=%d, gid=%d)",
+ frame->root->unique, fd, uid, gid);
+ }
+
+ STACK_WIND (frame,
+ trace_fchown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fchown,
+ fd,
+ uid,
+ gid);
+ return 0;
+}
+
+int32_t
+trace_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FCHMOD].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (mode=%o, *fd=%p)",
+ frame->root->unique, mode, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_fchmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fchmod,
+ fd,
+ mode);
+ return 0;
+}
+
+int32_t
+trace_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*fd=%p)",
+ frame->root->unique, fd);
+ }
+
+ STACK_WIND (frame,
+ trace_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd);
+ return 0;
+}
+
+int32_t
+trace_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ ERR_EINVAL_NORETURN (!this || !fd);
+
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique, fd, cmd, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+ }
+
+ STACK_WIND (frame,
+ trace_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd,
+ cmd,
+ lock);
+ return 0;
+}
+
+int32_t
+trace_setdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ if (trace_fop_names[GF_FOP_SETDENTS].enabled) {
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (*fd=%p, flags=%d, count=%d",
+ frame->root->unique, fd, flags, count);
+ }
+
+ STACK_WIND (frame,
+ trace_setdents_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setdents,
+ fd,
+ flags,
+ entries,
+ count);
+ return 0;
+}
+
+
+int32_t
+trace_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *fchecksum,
+ uint8_t *dchecksum)
+{
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": op_ret (%d), op_errno(%d)",
+ frame->root->unique, op_ret, op_errno);
+
+ STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+
+ return 0;
+}
+
+int32_t
+trace_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": loc->path (%s) flag (%d)",
+ frame->root->unique, loc->path, flag);
+
+ STACK_WIND (frame,
+ trace_checksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->checksum,
+ loc,
+ flag);
+
+ return 0;
+}
+
+
+int32_t
+trace_stats_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct xlator_stats *stats)
+{
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": op_ret (%d), op_errno(%d)",
+ frame->root->unique, op_ret, op_errno);
+
+ STACK_UNWIND (frame, op_ret, op_errno, stats);
+ return 0;
+}
+
+int32_t
+trace_stats (call_frame_t *frame,
+ xlator_t *this,
+ int32_t flags)
+{
+ ERR_EINVAL_NORETURN (!this);
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "%"PRId64": (flags=%d)",
+ frame->root->unique, flags);
+
+ STACK_WIND (frame,
+ trace_stats_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->mops->stats,
+ flags);
+
+ return 0;
+}
+
+void
+enable_all_calls (int enabled)
+{
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ trace_fop_names[i].enabled = enabled;
+}
+
+void
+enable_call (const char *name, int enabled)
+{
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ if (!strcasecmp(trace_fop_names[i].name, name))
+ trace_fop_names[i].enabled = enabled;
+}
+
+
+/*
+ include = 1 for "include-ops"
+ = 0 for "exclude-ops"
+*/
+void
+process_call_list (const char *list, int include)
+{
+ enable_all_calls (include ? 0 : 1);
+
+ char *call = strsep ((char **)&list, ",");
+ while (call) {
+ enable_call (call, include);
+ call = strsep ((char **)&list, ",");
+ }
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ dict_t *options = this->options;
+ char *includes = NULL, *excludes = NULL;
+
+ if (!this)
+ return -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "trace translator requires one subvolume");
+ return -1;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ trace_fop_names[i].name = (gf_fop_list[i] ?
+ gf_fop_list[i] : ":O");
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and 'exclude-ops'");
+ return -1;
+ }
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+ gf_log_set_loglevel (GF_LOG_NORMAL);
+
+ /* Set this translator's inode table pointer to child node's pointer. */
+ this->itable = FIRST_CHILD (this)->itable;
+
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ if (!this)
+ return;
+
+ gf_log (this->name, GF_LOG_NORMAL,
+ "trace translator unloaded");
+ return;
+}
+
+struct xlator_fops fops = {
+ .stat = trace_stat,
+ .readlink = trace_readlink,
+ .mknod = trace_mknod,
+ .mkdir = trace_mkdir,
+ .unlink = trace_unlink,
+ .rmdir = trace_rmdir,
+ .symlink = trace_symlink,
+ .rename = trace_rename,
+ .link = trace_link,
+ .chmod = trace_chmod,
+ .chown = trace_chown,
+ .truncate = trace_truncate,
+ .utimens = trace_utimens,
+ .open = trace_open,
+ .readv = trace_readv,
+ .writev = trace_writev,
+ .statfs = trace_statfs,
+ .flush = trace_flush,
+ .fsync = trace_fsync,
+ .setxattr = trace_setxattr,
+ .getxattr = trace_getxattr,
+ .removexattr = trace_removexattr,
+ .opendir = trace_opendir,
+ .readdir = trace_readdir,
+ .fsyncdir = trace_fsyncdir,
+ .access = trace_access,
+ .ftruncate = trace_ftruncate,
+ .fstat = trace_fstat,
+ .create = trace_create,
+ .fchown = trace_fchown,
+ .fchmod = trace_fchmod,
+ .lk = trace_lk,
+ .inodelk = trace_inodelk,
+ .finodelk = trace_finodelk,
+ .entrylk = trace_entrylk,
+ .lookup = trace_lookup,
+ .setdents = trace_setdents,
+ .getdents = trace_getdents,
+ .checksum = trace_checksum,
+ .xattrop = trace_xattrop,
+ .fxattrop = trace_fxattrop,
+};
+
+struct xlator_mops mops = {
+ .stats = trace_stats,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"include-ops", "include"},
+ .type = GF_OPTION_TYPE_STR,
+ /*.value = { ""} */
+ },
+ { .key = {"exclude-ops", "exclude"},
+ .type = GF_OPTION_TYPE_STR
+ /*.value = { ""} */
+ },
+ { .key = {NULL} },
+};
+
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
new file mode 100644
index 00000000000..2cbde680fac
--- /dev/null
+++ b/xlators/encryption/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = rot-13
+
+CLEANFILES =
diff --git a/xlators/encryption/rot-13/Makefile.am b/xlators/encryption/rot-13/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/encryption/rot-13/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/Makefile.am b/xlators/encryption/rot-13/src/Makefile.am
new file mode 100644
index 00000000000..ba5e623d8e2
--- /dev/null
+++ b/xlators/encryption/rot-13/src/Makefile.am
@@ -0,0 +1,14 @@
+xlator_LTLIBRARIES = rot-13.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
+
+rot_13_la_LDFLAGS = -module -avoidversion
+
+rot_13_la_SOURCES = rot-13.c
+rot_13_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = rot-13.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c
new file mode 100644
index 00000000000..7cae46134c2
--- /dev/null
+++ b/xlators/encryption/rot-13/src/rot-13.c
@@ -0,0 +1,200 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+
+#include "rot-13.h"
+
+/*
+ * This is a rot13 ``encryption'' xlator. It rot13's data when
+ * writing to disk and rot13's it back when reading it.
+ * This xlator is meant as an example, NOT FOR PRODUCTION
+ * USE ;) (hence no error-checking)
+ */
+
+void
+rot13 (char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len; i++) {
+ if (buf[i] >= 'a' && buf[i] <= 'z')
+ buf[i] = 'a' + ((buf[i] - 'a' + 13) % 26);
+ else if (buf[i] >= 'A' && buf[i] <= 'Z')
+ buf[i] = 'A' + ((buf[i] - 'A' + 13) % 26);
+ }
+}
+
+void
+rot13_iovec (struct iovec *vector, int count)
+{
+ int i;
+ for (i = 0; i < count; i++) {
+ rot13 (vector[i].iov_base, vector[i].iov_len);
+ }
+}
+
+int32_t
+rot13_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ rot_13_private_t *priv = (rot_13_private_t *)this->private;
+
+ if (priv->decrypt_read)
+ rot13_iovec (vector, count);
+
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+ return 0;
+}
+
+int32_t
+rot13_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ rot13_readv_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ fd, size, offset);
+ return 0;
+}
+
+int32_t
+rot13_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+int32_t
+rot13_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ rot_13_private_t *priv = (rot_13_private_t *)this->private;
+ if (priv->encrypt_write)
+ rot13_iovec (vector, count);
+
+ STACK_WIND (frame,
+ rot13_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ data_t *data = NULL;
+ rot_13_private_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log ("rot13", GF_LOG_ERROR,
+ "FATAL: rot13 should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = CALLOC (sizeof (rot_13_private_t), 1);
+ ERR_ABORT (priv);
+ priv->decrypt_read = 1;
+ priv->encrypt_write = 1;
+
+ data = dict_get (this->options, "encrypt-write");
+ if (data) {
+ if (gf_string2boolean (data->data, &priv->encrypt_write) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "encrypt-write takes only boolean options");
+ return -1;
+ }
+ }
+
+ data = dict_get (this->options, "decrypt-read");
+ if (data) {
+ if (gf_string2boolean (data->data, &priv->decrypt_read) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "decrypt-read takes only boolean options");
+ return -1;
+ }
+ }
+
+ this->private = priv;
+ gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ rot_13_private_t *priv = this->private;
+
+ FREE (priv);
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .readv = rot13_readv,
+ .writev = rot13_writev
+};
+
+struct xlator_mops mops = {
+};
+
+
+struct volume_options options[] = {
+ { .key = {"encrypt-write"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h
new file mode 100644
index 00000000000..43e60c32615
--- /dev/null
+++ b/xlators/encryption/rot-13/src/rot-13.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __ROT_13_H__
+#define __ROT_13_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+typedef struct {
+ gf_boolean_t encrypt_write;
+ gf_boolean_t decrypt_read;
+} rot_13_private_t;
+
+#endif /* __ROT_13_H__ */
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
new file mode 100644
index 00000000000..9ac9b6f19de
--- /dev/null
+++ b/xlators/features/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = locks trash path-convertor filter quota
+
+CLEANFILES =
diff --git a/xlators/features/filter/Makefile.am b/xlators/features/filter/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/features/filter/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am
new file mode 100644
index 00000000000..fa0b92214a9
--- /dev/null
+++ b/xlators/features/filter/src/Makefile.am
@@ -0,0 +1,13 @@
+xlator_LTLIBRARIES = filter.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+filter_la_LDFLAGS = -module -avoidversion
+
+filter_la_SOURCES = filter.c
+filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c
new file mode 100644
index 00000000000..67ea45d3a23
--- /dev/null
+++ b/xlators/features/filter/src/filter.c
@@ -0,0 +1,1768 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+
+#define GF_FILTER_NOBODY_UID 65534
+#define GF_FILTER_NOBODY_GID 65534
+#define GF_FILTER_ROOT_UID 0
+#define GF_FILTER_ROOT_GID 0
+
+#define GF_MAXIMUM_FILTERING_ALLOWED 32
+
+/*
+ option root-filtering on (off by default)
+ option translate-uid <uid-range=newuid,uid=newuid>
+ option translate-gid <gid-range=newgid,gid=newgid>
+ option read-only <yes|true>
+ option fixed-uid <uid>
+ option fixed-gid <gid>
+ option filter-uid <uid-range,uid>
+ option filter-gid <gid-range,gid> // not supported yet
+
+*/
+
+struct gf_filter {
+ /* Flags */
+ gf_boolean_t complete_read_only;
+ char fixed_uid_set;
+ char fixed_gid_set;
+ char partial_filter;
+
+ /* Options */
+ /* Mapping/Filtering/Translate whatever you want to call */
+ int translate_num_uid_entries;
+ int translate_num_gid_entries;
+ int translate_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2];
+ int translate_output_uid[GF_MAXIMUM_FILTERING_ALLOWED];
+ int translate_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2];
+ int translate_output_gid[GF_MAXIMUM_FILTERING_ALLOWED];
+
+ /* Fixed uid/gid */
+ int fixed_uid;
+ int fixed_gid;
+
+ /* Filter */
+ int filter_num_uid_entries;
+ int filter_num_gid_entries;
+ int filter_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2];
+ int filter_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2];
+
+};
+
+/* update_frame: The main logic of the whole translator.
+ Return values:
+ 0: no change
+ // TRANSLATE
+ 1: only uid changed
+ 2: only gid changed
+ 3: both uid/gid changed
+ // FILTER
+ 4: uid in filter range
+ 5: gid in filter range // not supported yet
+ 6: complete fs is readonly
+*/
+
+#define GF_FILTER_NO_CHANGE 0
+#define GF_FILTER_MAP_UID 1
+#define GF_FILTER_MAP_GID 2
+#define GF_FILTER_MAP_BOTH 3
+#define GF_FILTER_FILTER_UID 4
+#define GF_FILTER_FILTER_GID 5
+#define GF_FILTER_RO_FS 6
+
+static int32_t
+update_frame (call_frame_t *frame,
+ inode_t *inode,
+ struct gf_filter *filter)
+{
+ uid_t uid = 0;
+ int32_t idx = 0;
+ int32_t ret = 0;
+ int32_t dictret = 0;
+ uint64_t tmp_uid = 0;
+
+ for (idx = 0; idx < filter->translate_num_uid_entries; idx++) {
+ if ((frame->root->uid >=filter->translate_input_uid[idx][0]) &&
+ (frame->root->uid <=filter->translate_input_uid[idx][1])) {
+ dictret = inode_ctx_get (inode, frame->this, &tmp_uid);
+ uid = (uid_t)tmp_uid;
+ if (dictret == 0) {
+ if (frame->root->uid != uid)
+ ret = GF_FILTER_MAP_UID;
+ } else {
+ ret = GF_FILTER_MAP_UID;
+ }
+ break;
+ }
+ }
+
+ for (idx = 0; idx < filter->translate_num_gid_entries; idx++) {
+ if ((frame->root->gid >=filter->translate_input_gid[idx][0]) &&
+ (frame->root->gid <=filter->translate_input_gid[idx][1])) {
+ if (ret == GF_FILTER_NO_CHANGE)
+ ret = GF_FILTER_MAP_GID;
+ else
+ ret = GF_FILTER_MAP_BOTH;
+ break;
+ }
+ }
+
+
+ if (filter->complete_read_only)
+ return GF_FILTER_RO_FS;
+
+ if (filter->partial_filter) {
+ dictret = inode_ctx_get (inode, frame->this, &tmp_uid);
+ uid = (uid_t)tmp_uid;
+ if (dictret != -1) {
+ for (idx = 0; idx < filter->filter_num_uid_entries;
+ idx++) {
+ if ((uid >=filter->filter_input_uid[idx][0]) &&
+ (uid <=filter->filter_input_uid[idx][1])) {
+ return GF_FILTER_FILTER_UID;
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+/* if 'root' don't change the uid/gid */
+static int32_t
+update_stat (struct stat *stbuf,
+ struct gf_filter *filter)
+{
+ int32_t idx = 0;
+ for (idx = 0; idx < filter->translate_num_uid_entries; idx++) {
+ if (stbuf->st_uid == GF_FILTER_ROOT_UID)
+ continue;
+ if ((stbuf->st_uid >= filter->translate_input_uid[idx][0]) &&
+ (stbuf->st_uid <= filter->translate_input_uid[idx][1])) {
+ stbuf->st_uid = filter->translate_output_uid[idx];
+ break;
+ }
+ }
+
+ for (idx = 0; idx < filter->translate_num_gid_entries; idx++) {
+ if (stbuf->st_gid == GF_FILTER_ROOT_GID)
+ continue;
+ if ((stbuf->st_gid >= filter->translate_input_gid[idx][0]) &&
+ (stbuf->st_gid <= filter->translate_input_gid[idx][1])) {
+ stbuf->st_gid = filter->translate_output_gid[idx];
+ break;
+ }
+ }
+
+ if (filter->fixed_uid_set) {
+ stbuf->st_uid = filter->fixed_uid;
+ }
+
+ if (filter->fixed_gid_set) {
+ stbuf->st_gid = filter->fixed_gid;
+ }
+
+ return 0;
+}
+
+static int32_t
+filter_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *dict)
+{
+ int ret = 0;
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "couldn't set context");
+ }
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
+ return 0;
+}
+
+int32_t
+filter_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ STACK_WIND (frame,
+ filter_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xattr_req);
+ return 0;
+}
+
+
+static int32_t
+filter_stat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+filter_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ STACK_WIND (frame,
+ filter_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+ return 0;
+}
+
+static int32_t
+filter_chmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+filter_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, NULL);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL);
+ return 0;
+ default:
+ break;
+ }
+
+ STACK_WIND (frame,
+ filter_chmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chmod,
+ loc,
+ mode);
+ return 0;
+}
+
+
+static int32_t
+filter_fchmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+filter_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ STACK_WIND (frame,
+ filter_fchmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fchmod,
+ fd,
+ mode);
+ return 0;
+}
+
+static int32_t
+filter_chown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+filter_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, NULL);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL);
+ return 0;
+ default:
+ break;
+ }
+
+ STACK_WIND (frame,
+ filter_chown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chown,
+ loc,
+ uid,
+ gid);
+ return 0;
+}
+
+static int32_t
+filter_fchown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+filter_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ STACK_WIND (frame,
+ filter_fchown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fchown,
+ fd,
+ uid,
+ gid);
+ return 0;
+}
+
+static int32_t
+filter_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+filter_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, NULL);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ filter_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+}
+
+static int32_t
+filter_ftruncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+filter_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ filter_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+}
+
+int32_t
+filter_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+int32_t
+filter_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, NULL);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ filter_utimens_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimens,
+ loc,
+ tv);
+ return 0;
+}
+
+static int32_t
+filter_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *path)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, path);
+ return 0;
+}
+
+int32_t
+filter_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IRGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IROTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc,
+ size);
+ return 0;
+}
+
+
+static int32_t
+filter_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int ret = 0;
+
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "couldn't set context");
+ }
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+filter_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t rdev)
+{
+ int ret = 0;
+ inode_t *parent = loc->parent;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (parent->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (parent->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev);
+ return 0;
+}
+
+static int32_t
+filter_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int ret = 0;
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "couldn't set context");
+ }
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+filter_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int ret = 0;
+ inode_t *parent = loc->parent;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (parent->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (parent->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode);
+ return 0;
+}
+
+static int32_t
+filter_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+filter_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t ret = 0;
+ inode_t *parent = loc->parent;
+ if (!parent)
+ parent = inode_parent (loc->inode, 0, NULL);
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (parent->st_mode & S_IWGRP)
+ break;
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (parent->st_mode & S_IWOTH)
+ break;
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc);
+ return 0;
+}
+
+static int32_t
+filter_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+filter_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t ret = 0;
+ inode_t *parent = loc->parent;
+ if (!parent)
+ parent = inode_parent (loc->inode, 0, NULL);
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (parent->st_mode & S_IWGRP)
+ break;
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (parent->st_mode & S_IWOTH)
+ break;
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc);
+ return 0;
+}
+
+static int32_t
+filter_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int ret = 0;
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "couldn't set context");
+ }
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+filter_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkpath,
+ loc_t *loc)
+{
+ int ret = 0;
+ inode_t *parent = loc->parent;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (parent->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (parent->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc);
+ return 0;
+}
+
+
+static int32_t
+filter_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+filter_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int32_t ret = 0;
+ inode_t *parent = oldloc->parent;
+ if (!parent)
+ parent = inode_parent (oldloc->inode, 0, NULL);
+ ret = update_frame (frame, oldloc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (parent->st_mode & S_IWGRP)
+ break;
+ if (oldloc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (parent->st_mode & S_IWOTH)
+ break;
+ if (oldloc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s -> %s: returning permission denied", oldloc->path, newloc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc);
+ return 0;
+}
+
+
+static int32_t
+filter_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int ret = 0;
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "couldn't set context");
+ }
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+filter_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int ret = 0;
+ ret = update_frame (frame, oldloc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc);
+ return 0;
+}
+
+
+static int32_t
+filter_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ int ret = 0;
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "couldn't set context");
+ }
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+}
+
+int32_t
+filter_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode, fd_t *fd)
+{
+ int ret = 0;
+ inode_t *parent = loc->parent;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (parent->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (parent->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL, NULL);
+ return 0;
+ }
+ STACK_WIND (frame, filter_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+static int32_t
+filter_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+int32_t
+filter_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ if (!((flags & O_WRONLY) || (flags & O_RDWR))
+ && (loc->inode->st_mode & S_IRGRP))
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ if (!((flags & O_WRONLY) || (flags & O_RDWR))
+ && (loc->inode->st_mode & S_IROTH))
+ break;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: returning permission denied (mode: 0%o, flag=0%o)",
+ loc->path, loc->inode->st_mode, flags);
+ STACK_UNWIND (frame, -1, EPERM, fd);
+ return 0;
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ if (!((flags & O_WRONLY) || (flags & O_RDWR)))
+ break;
+ STACK_UNWIND (frame, -1, EROFS, NULL);
+ return 0;
+
+ }
+ STACK_WIND (frame,
+ filter_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd);
+ return 0;
+}
+
+static int32_t
+filter_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ if (op_ret >= 0) {
+ update_stat (stbuf, this->private);
+ }
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ vector,
+ count,
+ stbuf);
+ return 0;
+}
+
+int32_t
+filter_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ filter_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+}
+
+
+static int32_t
+filter_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ if (op_ret >= 0) {
+ update_stat (stbuf, this->private);
+ }
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ stbuf);
+ return 0;
+}
+
+int32_t
+filter_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, fd->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ filter_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ off);
+ return 0;
+}
+
+static int32_t
+filter_fstat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ if (op_ret >= 0) {
+ update_stat (buf, this->private);
+ }
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+filter_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ STACK_WIND (frame,
+ filter_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd);
+ return 0;
+}
+
+static int32_t
+filter_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ fd);
+ return 0;
+}
+
+int32_t
+filter_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ if (loc->inode->st_mode & S_IRGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ if (loc->inode->st_mode & S_IROTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, fd);
+ return 0;
+ }
+ STACK_WIND (frame,
+ filter_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd);
+ return 0;
+}
+
+
+static int32_t
+filter_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+filter_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ filter_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags);
+ return 0;
+}
+
+static int32_t
+filter_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ dict);
+ return 0;
+}
+
+int32_t
+filter_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IRGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IROTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ filter_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc,
+ name);
+ return 0;
+}
+
+static int32_t
+filter_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+filter_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int32_t ret = 0;
+ ret = update_frame (frame, loc->inode, this->private);
+ switch (ret) {
+ case GF_FILTER_MAP_UID:
+ if (loc->inode->st_mode & S_IWGRP)
+ break;
+ case GF_FILTER_MAP_BOTH:
+ if (loc->inode->st_mode & S_IWOTH)
+ break;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM);
+ return 0;
+ case GF_FILTER_FILTER_UID:
+ case GF_FILTER_FILTER_GID:
+ case GF_FILTER_RO_FS:
+ STACK_UNWIND (frame, -1, EROFS);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ filter_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ char *value = NULL;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *input_value_str1 = NULL;
+ char *input_value_str2 = NULL;
+ char *output_value_str = NULL;
+ int32_t input_value = 0;
+ int32_t output_value = 0;
+ data_t *option_data = NULL;
+ struct gf_filter *filter = NULL;
+ gf_boolean_t tmp_bool = 0;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ filter = CALLOC (sizeof (*filter), 1);
+ ERR_ABORT (filter);
+
+ if (dict_get (this->options, "read-only")) {
+ value = data_to_str (dict_get (this->options, "read-only"));
+ if (gf_string2boolean (value, &filter->complete_read_only) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong value provided for 'read-only'");
+ return -1;
+ }
+ }
+
+ if (dict_get (this->options, "root-squashing")) {
+ value = data_to_str (dict_get (this->options, "root-squashing"));
+ if (gf_string2boolean (value, &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong value provided for 'root-squashing'");
+ return -1;
+ }
+ if (tmp_bool) {
+ filter->translate_num_uid_entries = 1;
+ filter->translate_num_gid_entries = 1;
+ filter->translate_input_uid[0][0] = GF_FILTER_ROOT_UID; /* root */
+ filter->translate_input_uid[0][1] = GF_FILTER_ROOT_UID; /* root */
+ filter->translate_input_gid[0][0] = GF_FILTER_ROOT_GID; /* root */
+ filter->translate_input_gid[0][1] = GF_FILTER_ROOT_GID; /* root */
+ filter->translate_output_uid[0] = GF_FILTER_NOBODY_UID;
+ filter->translate_output_gid[0] = GF_FILTER_NOBODY_GID;
+ }
+ }
+
+ if (dict_get (this->options, "translate-uid")) {
+ option_data = dict_get (this->options, "translate-uid");
+ value = strtok_r (option_data->data, ",", &tmp_str);
+ while (value) {
+ dup_str = strdup (value);
+ input_value_str1 = strtok_r (dup_str, "=", &tmp_str1);
+ if (input_value_str1) {
+ /* Check for n-m */
+ char *temp_string = strdup (input_value_str1);
+ input_value_str2 = strtok_r (temp_string, "-", &tmp_str2);
+ if (gf_string2int (input_value_str2, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str2);
+ return -1;
+ }
+ filter->translate_input_uid[filter->translate_num_uid_entries][0] = input_value;
+ input_value_str2 = strtok_r (NULL, "-", &tmp_str2);
+ if (input_value_str2) {
+ if (gf_string2int (input_value_str2, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str2);
+ return -1;
+ }
+ }
+ filter->translate_input_uid[filter->translate_num_uid_entries][1] = input_value;
+ FREE (temp_string);
+ output_value_str = strtok_r (NULL, "=", &tmp_str1);
+ if (output_value_str) {
+ if (gf_string2int (output_value_str, &output_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ output_value_str);
+ return -1;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mapping string not valid");
+ return -1;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mapping string not valid");
+ return -1;
+ }
+ filter->translate_output_uid[filter->translate_num_uid_entries] = output_value;
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "pair %d: input uid '%d' will be changed to uid '%d'",
+ filter->translate_num_uid_entries, input_value, output_value);
+
+ filter->translate_num_uid_entries++;
+ if (filter->translate_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
+ break;
+ value = strtok_r (NULL, ",", &tmp_str);
+ FREE (dup_str);
+ }
+ }
+
+ tmp_str1 = NULL;
+ tmp_str2 = NULL;
+ tmp_str = NULL;
+
+ if (dict_get (this->options, "translate-gid")) {
+ option_data = dict_get (this->options, "translate-gid");
+ value = strtok_r (option_data->data, ",", &tmp_str);
+ while (value) {
+ dup_str = strdup (value);
+ input_value_str1 = strtok_r (dup_str, "=", &tmp_str1);
+ if (input_value_str1) {
+ /* Check for n-m */
+ char *temp_string = strdup (input_value_str1);
+ input_value_str2 = strtok_r (temp_string, "-", &tmp_str2);
+ if (gf_string2int (input_value_str2, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str2);
+ return -1;
+ }
+ filter->translate_input_gid[filter->translate_num_gid_entries][0] = input_value;
+ input_value_str2 = strtok_r (NULL, "-", &tmp_str2);
+ if (input_value_str2) {
+ if (gf_string2int (input_value_str2, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str2);
+ return -1;
+ }
+ }
+ filter->translate_input_gid[filter->translate_num_gid_entries][1] = input_value;
+ FREE (temp_string);
+ output_value_str = strtok_r (NULL, "=", &tmp_str1);
+ if (output_value_str) {
+ if (gf_string2int (output_value_str, &output_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ output_value_str);
+ return -1;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translate-gid value not valid");
+ return -1;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translate-gid value not valid");
+ return -1;
+ }
+
+ filter->translate_output_gid[filter->translate_num_gid_entries] = output_value;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pair %d: input gid '%d' will be changed to gid '%d'",
+ filter->translate_num_gid_entries, input_value, output_value);
+
+ filter->translate_num_gid_entries++;
+ if (filter->translate_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
+ break;
+ value = strtok_r (NULL, ",", &tmp_str);
+ FREE (dup_str);
+ }
+ }
+
+ tmp_str = NULL;
+ tmp_str1 = NULL;
+
+ if (dict_get (this->options, "filter-uid")) {
+ option_data = dict_get (this->options, "filter-uid");
+ value = strtok_r (option_data->data, ",", &tmp_str);
+ while (value) {
+ dup_str = strdup (value);
+ /* Check for n-m */
+ input_value_str1 = strtok_r (dup_str, "-", &tmp_str1);
+ if (gf_string2int (input_value_str1, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str1);
+ return -1;
+ }
+ filter->filter_input_uid[filter->filter_num_uid_entries][0] = input_value;
+ input_value_str1 = strtok_r (NULL, "-", &tmp_str1);
+ if (input_value_str1) {
+ if (gf_string2int (input_value_str1, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str1);
+ return -1;
+ }
+ }
+ filter->filter_input_uid[filter->filter_num_uid_entries][1] = input_value;
+
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "filter [%d]: input uid(s) '%s' will be filtered",
+ filter->filter_num_uid_entries, dup_str);
+
+ filter->filter_num_uid_entries++;
+ if (filter->filter_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
+ break;
+ value = strtok_r (NULL, ",", &tmp_str);
+ FREE (dup_str);
+ }
+ filter->partial_filter = 1;
+ }
+
+ tmp_str = NULL;
+ tmp_str1 = NULL;
+
+ if (dict_get (this->options, "filter-gid")) {
+ option_data = dict_get (this->options, "filter-gid");
+ value = strtok_r (option_data->data, ",", &tmp_str);
+ while (value) {
+ dup_str = strdup (value);
+ /* Check for n-m */
+ input_value_str1 = strtok_r (dup_str, "-", &tmp_str1);
+ if (gf_string2int (input_value_str1, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str1);
+ return -1;
+ }
+ filter->filter_input_gid[filter->filter_num_gid_entries][0] = input_value;
+ input_value_str1 = strtok_r (NULL, "-", &tmp_str1);
+ if (input_value_str1) {
+ if (gf_string2int (input_value_str1, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ input_value_str1);
+ return -1;
+ }
+ }
+ filter->filter_input_gid[filter->filter_num_gid_entries][1] = input_value;
+
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "filter [%d]: input gid(s) '%s' will be filtered",
+ filter->filter_num_gid_entries, dup_str);
+
+ filter->filter_num_gid_entries++;
+ if (filter->filter_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
+ break;
+ value = strtok_r (NULL, ",", &tmp_str);
+ FREE (dup_str);
+ }
+ gf_log (this->name, GF_LOG_ERROR, "this option is not supported currently.. exiting");
+ return -1;
+ filter->partial_filter = 1;
+ }
+
+ if (dict_get (this->options, "fixed-uid")) {
+ option_data = dict_get (this->options, "fixed-uid");
+ if (gf_string2int (option_data->data, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ option_data->data);
+ return -1;
+ }
+ filter->fixed_uid = input_value;
+ filter->fixed_uid_set = 1;
+ }
+
+ if (dict_get (this->options, "fixed-gid")) {
+ option_data = dict_get (this->options, "fixed-gid");
+ if (gf_string2int (option_data->data, &input_value) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"",
+ option_data->data);
+ return -1;
+ }
+ filter->fixed_gid = input_value;
+ filter->fixed_gid_set = 1;
+ }
+
+ this->private = filter;
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ struct gf_filter *filter = this->private;
+
+ FREE (filter);
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = filter_lookup,
+ .stat = filter_stat,
+ .fstat = filter_fstat,
+ .chmod = filter_chmod,
+ .fchmod = filter_fchmod,
+ .readlink = filter_readlink,
+ .mknod = filter_mknod,
+ .mkdir = filter_mkdir,
+ .unlink = filter_unlink,
+ .rmdir = filter_rmdir,
+ .symlink = filter_symlink,
+ .rename = filter_rename,
+ .link = filter_link,
+ .chown = filter_chown,
+ .fchown = filter_fchown,
+ .truncate = filter_truncate,
+ .ftruncate = filter_ftruncate,
+ .create = filter_create,
+ .open = filter_open,
+ .readv = filter_readv,
+ .writev = filter_writev,
+ .setxattr = filter_setxattr,
+ .getxattr = filter_getxattr,
+ .removexattr = filter_removexattr,
+ .opendir = filter_opendir,
+ .utimens = filter_utimens,
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = { "root-squashing" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = { "read-only" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = { "fixed-uid" },
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = { "fixed-gid" },
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = { "translate-uid" },
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = { "translate-gid" },
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = { "filter-uid" },
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = { "filter-gid" },
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/locks/Makefile.am b/xlators/features/locks/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/features/locks/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
new file mode 100644
index 00000000000..ec4a953eb91
--- /dev/null
+++ b/xlators/features/locks/src/Makefile.am
@@ -0,0 +1,20 @@
+xlator_LTLIBRARIES = locks.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+locks_la_LDFLAGS = -module -avoidversion
+
+locks_la_SOURCES = common.c posix.c internal.c
+locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = locks.h common.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+
+CLEANFILES =
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
+
+install-data-hook:
+ ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so \ No newline at end of file
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
new file mode 100644
index 00000000000..9ac1250cc57
--- /dev/null
+++ b/xlators/features/locks/src/common.c
@@ -0,0 +1,561 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+
+
+int
+pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom);
+static void
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom);
+
+
+pl_inode_t *
+pl_inode_get (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ mode_t st_mode = 0;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto out;
+ }
+
+ pl_inode = CALLOC (1, sizeof (*pl_inode));
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ st_mode = inode->st_mode;
+ if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP))
+ pl_inode->mandatory = 1;
+
+
+ pthread_mutex_init (&pl_inode->mutex, NULL);
+
+ INIT_LIST_HEAD (&pl_inode->dir_list);
+ INIT_LIST_HEAD (&pl_inode->ext_list);
+ INIT_LIST_HEAD (&pl_inode->int_list);
+ INIT_LIST_HEAD (&pl_inode->rw_list);
+
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)pl_inode);
+ }
+out:
+ UNLOCK (&inode->lock);
+ return pl_inode;
+}
+
+
+/* Create a new posix_lock_t */
+posix_lock_t *
+new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid)
+{
+ posix_lock_t *lock = NULL;
+
+ lock = CALLOC (1, sizeof (posix_lock_t));
+ if (!lock) {
+ return NULL;
+ }
+
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
+
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ lock->transport = transport;
+ lock->client_pid = client_pid;
+
+ INIT_LIST_HEAD (&lock->list);
+
+ return lock;
+}
+
+
+/* Delete a lock from the inode's lock list */
+void
+__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ list_del_init (&lock->list);
+}
+
+
+/* Destroy a posix_lock */
+void
+__destroy_lock (posix_lock_t *lock)
+{
+ free (lock);
+}
+
+
+/* Convert a posix_lock to a struct flock */
+void
+posix_lock_to_flock (posix_lock_t *lock, struct flock *flock)
+{
+ flock->l_pid = lock->client_pid;
+ flock->l_type = lock->fl_type;
+ flock->l_start = lock->fl_start;
+
+ if (lock->fl_end == 0)
+ flock->l_len = LLONG_MAX;
+ else
+ flock->l_len = lock->fl_end - lock->fl_start + 1;
+}
+
+
+/* Insert the lock into the inode's lock list */
+void
+pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+{
+ list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom));
+
+ return;
+}
+
+
+/* Return true if the locks overlap, false otherwise */
+int
+locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
+{
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
+}
+
+
+/* Return true if the locks have the same owner */
+int
+same_owner (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return ((l1->client_pid == l2->client_pid) &&
+ (l1->transport == l2->transport));
+}
+
+
+/* Delete all F_UNLCK locks */
+void
+__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+}
+
+
+/* Add two locks */
+static posix_lock_t *
+add_locks (posix_lock_t *l1, posix_lock_t *l2)
+{
+ posix_lock_t *sum = NULL;
+
+ sum = CALLOC (1, sizeof (posix_lock_t));
+ if (!sum)
+ return NULL;
+
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
+
+ return sum;
+}
+
+/* Subtract two locks */
+struct _values {
+ posix_lock_t *locks[3];
+};
+
+/* {big} must always be contained inside {small} */
+static struct _values
+subtract_locks (posix_lock_t *big, posix_lock_t *small)
+{
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ }
+ else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[1]);
+ v.locks[2] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[2]);
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ memcpy (v.locks[2], big, sizeof (posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ }
+ /* one edge coincides with big */
+ else if (small->fl_start == big->fl_start) {
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ }
+ else if (small->fl_end == big->fl_end) {
+ v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[0]);
+ v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
+ ERR_ABORT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ }
+ else {
+ gf_log ("posix-locks", GF_LOG_DEBUG,
+ "unexpected case in subtract_locks");
+ }
+
+ return v;
+}
+
+/*
+ Start searching from {begin}, and return the first lock that
+ conflicts, NULL if no conflict
+ If {begin} is NULL, then start from the beginning of the list
+*/
+static posix_lock_t *
+first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom)
+{
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (l->blocked)
+ continue;
+
+ if (locks_overlap (l, lock))
+ return l;
+ }
+
+ return NULL;
+}
+
+
+
+/* Return true if lock is grantable */
+int
+pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom)
+{
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (!l->blocked && locks_overlap (lock, l)) {
+ if (((l->fl_type == F_WRLCK)
+ || (lock->fl_type == F_WRLCK))
+ && (lock->fl_type != F_UNLCK)
+ && !same_owner (l, lock)) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ return ret;
+}
+
+
+extern void do_blocked_rw (pl_inode_t *);
+
+
+static void
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom)
+{
+ posix_lock_t *conf = NULL;
+ posix_lock_t *t = NULL;
+ posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = { .locks = {0, 0, 0} };
+
+ list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (!locks_overlap (conf, lock))
+ continue;
+
+ if (same_owner (conf, lock)) {
+ if (conf->fl_type == lock->fl_type) {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = lock->fl_type;
+ sum->transport = lock->transport;
+ sum->client_pid = lock->client_pid;
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __destroy_lock (lock);
+ __insert_and_merge (pl_inode, sum, dom);
+
+ return;
+ } else {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = conf->fl_type;
+ sum->transport = conf->transport;
+ sum->client_pid = conf->client_pid;
+
+ v = subtract_locks (sum, lock);
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __delete_lock (pl_inode, lock);
+ __destroy_lock (lock);
+
+ __destroy_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ if (v.locks[i]->fl_type == F_UNLCK) {
+ __destroy_lock (v.locks[i]);
+ continue;
+ }
+ __insert_and_merge (pl_inode,
+ v.locks[i], dom);
+ }
+
+ __delete_unlck_locks (pl_inode, dom);
+ return;
+ }
+ }
+
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
+
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ pl_insert_lock (pl_inode, lock, dom);
+ return;
+ }
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ pl_insert_lock (pl_inode, lock, dom);
+ } else {
+ __destroy_lock (lock);
+ }
+}
+
+
+void
+__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode,
+ gf_lk_domain_t dom, struct list_head *granted)
+{
+ struct list_head tmp_list;
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *conf = NULL;
+
+ INIT_LIST_HEAD (&tmp_list);
+
+ list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
+ if (l->blocked) {
+ conf = first_overlap (pl_inode, l, dom);
+ if (conf)
+ continue;
+
+ l->blocked = 0;
+ list_move_tail (&l->list, &tmp_list);
+ }
+ }
+
+ list_for_each_entry_safe (l, tmp, &tmp_list, list) {
+ list_del_init (&l->list);
+
+ if (pl_is_lock_grantable (pl_inode, l, dom)) {
+ conf = CALLOC (1, sizeof (*conf));
+
+ if (!conf) {
+ l->blocked = 1;
+ pl_insert_lock (pl_inode, l, dom);
+ continue;
+ }
+
+ conf->frame = l->frame;
+ l->frame = NULL;
+
+ posix_lock_to_flock (l, &conf->user_flock);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => Granted",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ l->client_pid,
+ l->user_flock.l_start,
+ l->user_flock.l_len);
+
+ __insert_and_merge (pl_inode, l, dom);
+
+ list_add (&conf->list, granted);
+ } else {
+ l->blocked = 1;
+ pl_insert_lock (pl_inode, l, dom);
+ }
+ }
+}
+
+
+void
+grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom)
+{
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
+
+ INIT_LIST_HEAD (&granted_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_locks (this, pl_inode, dom, &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted_list, list) {
+ list_del_init (&lock->list);
+
+ STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
+
+ FREE (lock);
+ }
+
+ return;
+}
+
+
+int
+pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block, gf_lk_domain_t dom)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (pl_is_lock_grantable (pl_inode, lock, dom)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ __insert_and_merge (pl_inode, lock, dom);
+ } else if (can_block) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ lock->blocked = 1;
+ pl_insert_lock (pl_inode, lock, dom);
+ ret = -1;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ errno = EAGAIN;
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_locks (this, pl_inode, dom);
+
+ do_blocked_rw (pl_inode);
+
+ return ret;
+}
+
+
+posix_lock_t *
+pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+{
+ posix_lock_t *conf = NULL;
+
+ conf = first_overlap (pl_inode, lock, dom);
+
+ if (conf == NULL) {
+ lock->fl_type = F_UNLCK;
+ return lock;
+ }
+
+ return conf;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
new file mode 100644
index 00000000000..135f33011bf
--- /dev/null
+++ b/xlators/features/locks/src/common.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+posix_lock_t *
+new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid);
+
+pl_inode_t *
+pl_inode_get (xlator_t *this, inode_t *inode);
+
+posix_lock_t *
+pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain);
+
+int
+pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock,
+ int can_block, gf_lk_domain_t domain);
+
+int
+pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
+ gf_lk_domain_t dom);
+
+void
+pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom);
+
+void
+grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain);
+
+void
+posix_lock_to_flock (posix_lock_t *lock, struct flock *flock);
+
+int
+locks_overlap (posix_lock_t *l1, posix_lock_t *l2);
+
+int
+same_owner (posix_lock_t *l1, posix_lock_t *l2);
+
+void __delete_lock (pl_inode_t *, posix_lock_t *);
+
+void __destroy_lock (posix_lock_t *);
+
+#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/internal.c b/xlators/features/locks/src/internal.c
new file mode 100644
index 00000000000..7f454a78e22
--- /dev/null
+++ b/xlators/features/locks/src/internal.c
@@ -0,0 +1,762 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+
+
+static int
+delete_locks_of_transport (pl_inode_t *pinode, transport_t *trans)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry_safe (l, tmp, &pinode->dir_list, list) {
+ if (l->transport == trans) {
+ __delete_lock (pinode, tmp);
+ __destroy_lock (tmp);
+ }
+ }
+
+ return 0;
+}
+
+
+static posix_lock_t *
+__find_exact_matching_lock (pl_inode_t *pinode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *match = NULL;
+
+ list_for_each_entry (l, DOMAIN_HEAD (pinode, GF_LOCK_INTERNAL), list) {
+ if (same_owner (l, lock)
+ && (l->fl_start == lock->fl_start)
+ && (l->fl_end == lock->fl_end)) {
+ match = l;
+ break;
+ }
+ }
+
+ return match;
+}
+
+/**
+ * pl_inodelk:
+ *
+ * This fop provides fcntl-style locking on files for internal
+ * purposes. Locks held through this fop reside in a domain different
+ * from those held by applications. This fop is for the use of AFR.
+ */
+
+
+static int
+pl_inodelk_common (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t cmd, struct flock *flock)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int can_block = 0;
+
+ posix_locks_private_t * priv = NULL;
+ transport_t * transport = NULL;
+ pid_t client_pid = -1;
+ pl_inode_t * pinode = NULL;
+
+ posix_lock_t * reqlock = NULL;
+ posix_lock_t * matchlock = NULL; /* steady, fire! */
+
+ VALIDATE_OR_GOTO (frame, unwind);
+ VALIDATE_OR_GOTO (inode, unwind);
+ VALIDATE_OR_GOTO (flock, unwind);
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ transport = frame->root->trans;
+ client_pid = frame->root->pid;
+
+ priv = (posix_locks_private_t *) this->private;
+
+ VALIDATE_OR_GOTO (priv, unwind);
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (client_pid == 0) {
+ /*
+ special case: this means release all locks
+ from this transport
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing all locks from transport %p", transport);
+
+ delete_locks_of_transport (pinode, transport);
+ goto unwind;
+ }
+
+ reqlock = new_posix_lock (flock, transport, client_pid);
+ if (!reqlock) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ switch (cmd) {
+ case F_SETLKW:
+ can_block = 1;
+ reqlock->frame = frame;
+ reqlock->this = this;
+
+ /* fall through */
+
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
+
+ switch (flock->l_type) {
+
+ case F_WRLCK:
+ if (!pl_is_lock_grantable (pinode, reqlock, GF_LOCK_INTERNAL)) {
+ if (can_block) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => blocked",
+ reqlock->fl_type == F_UNLCK ? "unlock" : "lock",
+ reqlock->client_pid,
+ reqlock->user_flock.l_start,
+ reqlock->user_flock.l_len);
+ pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL);
+
+ goto unlock;
+ }
+
+ __destroy_lock (reqlock);
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => NOK",
+ reqlock->fl_type == F_UNLCK ? "unlock" : "lock",
+ reqlock->client_pid, reqlock->user_flock.l_start,
+ reqlock->user_flock.l_len);
+ op_errno = EAGAIN;
+
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (pid=%d) %"PRId64" - %"PRId64" => OK",
+ reqlock->fl_type == F_UNLCK ? "unlock" : "lock",
+ reqlock->client_pid,
+ reqlock->user_flock.l_start,
+ reqlock->user_flock.l_len);
+ pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL);
+
+ break;
+
+ case F_UNLCK:
+ matchlock = __find_exact_matching_lock (pinode, reqlock);
+
+ __destroy_lock (reqlock);
+ if (!matchlock) {
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ __delete_lock (pinode, matchlock);
+ __destroy_lock (matchlock);
+
+ break;
+
+ default:
+ op_errno = ENOTSUP;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lock type %d not supported for [F]INODELK",
+ flock->l_type);
+ goto unlock;
+ }
+
+
+ break;
+
+ default:
+ op_errno = ENOTSUP;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lock command F_GETLK not supported for [F]INODELK (cmd=%d)",
+ cmd);
+ goto unlock;
+ }
+
+ op_ret = 0;
+
+ unlock:
+ if (pinode)
+ pthread_mutex_unlock (&pinode->mutex);
+ }
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *flock)
+{
+ return pl_inodelk_common (frame, this, loc->inode, cmd, flock);
+}
+
+
+int
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *flock)
+{
+ return pl_inodelk_common (frame, this, fd->inode, cmd, flock);
+}
+
+
+/**
+ * types_conflict - do two types of lock conflict?
+ * @t1: type
+ * @t2: type
+ *
+ * two read locks do not conflict
+ * any other case conflicts
+ */
+
+static int
+types_conflict (entrylk_type t1, entrylk_type t2)
+{
+ return !((t1 == ENTRYLK_RDLCK) && (t2 == ENTRYLK_RDLCK));
+}
+
+/**
+ * all_names - does a basename represent all names?
+ * @basename: name to check
+ */
+
+#define all_names(basename) ((basename == NULL) ? 1 : 0)
+
+/**
+ * names_conflict - do two names conflict?
+ * @n1: name
+ * @n2: name
+ */
+
+static int
+names_conflict (const char *n1, const char *n2)
+{
+ return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
+}
+
+
+static int
+names_equal (const char *n1, const char *n2)
+{
+ return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
+}
+
+/**
+ * lock_grantable - is this lock grantable?
+ * @inode: inode in which to look
+ * @basename: name we're trying to lock
+ * @type: type of lock
+ */
+
+static pl_entry_lock_t *
+__lock_grantable (pl_inode_t *pinode, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ if (list_empty (&pinode->dir_list))
+ return NULL;
+
+ list_for_each_entry (lock, &pinode->dir_list, inode_list) {
+ if (names_conflict (lock->basename, basename) &&
+ types_conflict (lock->type, type))
+ return lock;
+ }
+
+ return NULL;
+}
+
+/**
+ * find_most_matching_lock - find the lock struct which most matches in order of:
+ * lock on the exact basename ||
+ * an all_names lock
+ *
+ *
+ * @inode: inode in which to look
+ * @basename: name to search for
+ */
+
+static pl_entry_lock_t *
+__find_most_matching_lock (pl_inode_t *pinode, const char *basename)
+{
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *all = NULL;
+ pl_entry_lock_t *exact = NULL;
+
+ if (list_empty (&pinode->dir_list))
+ return NULL;
+
+ list_for_each_entry (lock, &pinode->dir_list, inode_list) {
+ if (all_names (lock->basename))
+ all = lock;
+ else if (names_equal (lock->basename, basename))
+ exact = lock;
+ }
+
+ return (exact ? exact : all);
+}
+
+
+/**
+ * insert_new_lock - insert a new dir lock into the inode with the given parameters
+ * @pinode: inode to insert into
+ * @basename: basename for the lock
+ * @type: type of the lock
+ */
+
+static pl_entry_lock_t *
+new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ transport_t *trans)
+{
+ pl_entry_lock_t *newlock = NULL;
+
+ newlock = CALLOC (sizeof (pl_entry_lock_t), 1);
+ if (!newlock) {
+ goto out;
+ }
+
+ newlock->basename = basename ? strdup (basename) : NULL;
+ newlock->type = type;
+ newlock->trans = trans;
+
+ if (type == ENTRYLK_RDLCK)
+ newlock->read_count = 1;
+
+ INIT_LIST_HEAD (&newlock->inode_list);
+ INIT_LIST_HEAD (&newlock->blocked_locks);
+
+out:
+ return newlock;
+}
+
+/**
+ * lock_name - lock a name in a directory
+ * @inode: inode for the directory in which to lock
+ * @basename: name of the entry to lock
+ * if null, lock the entire directory
+ *
+ * the entire directory being locked is represented as: a single
+ * pl_entry_lock_t present in the entrylk_locks list with its
+ * basename = NULL
+ */
+
+int
+__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ call_frame_t *frame, xlator_t *this, int nonblock)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *conf = NULL;
+
+ transport_t *trans = NULL;
+
+ int ret = -EINVAL;
+
+ trans = frame->root->trans;
+
+ conf = __lock_grantable (pinode, basename, type);
+ if (conf) {
+ ret = -EAGAIN;
+ if (nonblock)
+ goto out;
+
+ lock = new_entrylk_lock (pinode, basename, type, trans);
+
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "blocking lock: {pinode=%p, basename=%s}",
+ pinode, basename);
+
+ lock->frame = frame;
+ lock->this = this;
+ lock->blocked = 1;
+
+ list_add (&lock->blocked_locks, &conf->blocked_locks);
+
+
+ goto out;
+ }
+
+ switch (type) {
+ case ENTRYLK_RDLCK:
+ lock = __find_most_matching_lock (pinode, basename);
+
+ if (lock && names_equal (lock->basename, basename)) {
+ lock->read_count++;
+
+ FREE (lock->basename);
+ FREE (lock);
+
+ lock = NULL;
+ } else {
+ lock = new_entrylk_lock (pinode, basename, type, trans);
+
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add (&lock->inode_list, &pinode->dir_list);
+ }
+ break;
+
+ case ENTRYLK_WRLCK:
+ lock = new_entrylk_lock (pinode, basename, type, trans);
+
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add (&lock->inode_list, &pinode->dir_list);
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/**
+ * unlock_name - unlock a name in a directory
+ * @inode: inode for the directory to unlock in
+ * @basename: name of the entry to unlock
+ * if null, unlock the entire directory
+ */
+
+pl_entry_lock_t *
+__unlock_name (pl_inode_t *pinode, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *ret_lock = NULL;
+
+ lock = __find_most_matching_lock (pinode, basename);
+
+ if (!lock) {
+ gf_log ("locks", GF_LOG_DEBUG,
+ "unlock on %s (type=%s) attempted but no matching lock found",
+ basename, type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK");
+ goto out;
+ }
+
+ if (names_equal (lock->basename, basename)
+ && lock->type == type) {
+ if (type == ENTRYLK_RDLCK) {
+ lock->read_count--;
+ }
+ if (type == ENTRYLK_WRLCK || lock->read_count == 0) {
+ list_del (&lock->inode_list);
+ ret_lock = lock;
+ }
+ } else {
+ gf_log ("locks", GF_LOG_ERROR,
+ "unlock for a non-existing lock!");
+ goto out;
+ }
+
+out:
+ return ret_lock;
+}
+
+
+void
+__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *lock,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ pl_entry_lock_t *bl = NULL;
+ pl_entry_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (bl, tmp, &lock->blocked_locks,
+ blocked_locks) {
+ list_del_init (&bl->blocked_locks);
+
+ /* TODO: error checking */
+
+ gf_log ("locks", GF_LOG_DEBUG,
+ "trying to unblock: {pinode=%p, basename=%s}",
+ pl_inode, bl->basename);
+
+ bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
+ bl->frame, bl->this, 0);
+
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
+ } else {
+ if (bl->basename)
+ FREE (bl->basename);
+ FREE (bl);
+ }
+ }
+ return;
+}
+
+
+void
+grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *unlocked)
+{
+ struct list_head granted_list;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lock = NULL;
+
+ INIT_LIST_HEAD (&granted_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_entry_locks (this, pl_inode, unlocked,
+ &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND (lock->frame, 0, 0);
+
+ FREE (lock->basename);
+ FREE (lock);
+ }
+
+ FREE (unlocked->basename);
+ FREE (unlocked);
+
+ return;
+}
+
+
+/**
+ * release_entry_locks_for_transport: release all entry locks from this
+ * transport for this loc_t
+ */
+
+static int
+release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
+ transport_t *trans)
+{
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *tmp;
+ struct list_head granted;
+
+ INIT_LIST_HEAD (&granted);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ if (list_empty (&pinode->dir_list)) {
+ goto unlock;
+ }
+
+ list_for_each_entry_safe (lock, tmp, &pinode->dir_list,
+ inode_list) {
+ if (lock->trans != trans)
+ continue;
+
+ list_del_init (&lock->inode_list);
+ __grant_blocked_entry_locks (this, pinode, lock,
+ &granted);
+
+ FREE (lock->basename);
+ FREE (lock);
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&pinode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND (lock->frame, 0, 0);
+
+ FREE (lock->basename);
+ FREE (lock);
+ }
+
+ return 0;
+}
+
+
+/**
+ * pl_entrylk:
+ *
+ * Locking on names (directory entries)
+ */
+
+int
+pl_entrylk_common (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ transport_t * transport = NULL;
+ pid_t pid = -1;
+
+ pl_inode_t * pinode = NULL;
+ int ret = -1;
+ pl_entry_lock_t *unlocked = NULL;
+ char unwind = 1;
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pid = frame->root->pid;
+ transport = frame->root->trans;
+
+ if (pid == 0) {
+ /*
+ this is a special case that means release
+ all locks from this transport
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing locks for transport %p", transport);
+
+ release_entry_locks_for_transport (this, pinode, transport);
+ op_ret = 0;
+
+ goto out;
+ }
+
+ switch (cmd) {
+ case ENTRYLK_LOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, this, 0);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ unwind = 0;
+ op_errno = -ret;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_LOCK_NB:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, this, 1);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_UNLOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ unlocked = __unlock_name (pinode, basename, type);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (unlocked)
+ grant_blocked_entry_locks (this, pinode, unlocked);
+
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unexpected case!");
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (unwind) {
+ STACK_UNWIND (frame, op_ret, op_errno);
+ }
+
+ return 0;
+}
+
+
+int
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ return pl_entrylk_common (frame, this, loc->inode, basename, cmd, type);
+}
+
+
+int
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ return pl_entrylk_common (frame, this, fd->inode, basename, cmd, type);
+}
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
new file mode 100644
index 00000000000..8ed7bb63f1c
--- /dev/null
+++ b/xlators/features/locks/src/locks.h
@@ -0,0 +1,111 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __POSIX_LOCKS_H__
+#define __POSIX_LOCKS_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "compat-errno.h"
+#include "transport.h"
+#include "stack.h"
+#include "call-stub.h"
+
+struct __pl_fd;
+
+struct __posix_lock {
+ struct list_head list;
+
+ short fl_type;
+ off_t fl_start;
+ off_t fl_end;
+
+ short blocked; /* waiting to acquire */
+ struct flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ fd_t *fd;
+
+ call_frame_t *frame;
+
+ /* These two together serve to uniquely identify each process
+ across nodes */
+
+ transport_t *transport; /* to identify client node */
+ pid_t client_pid; /* pid of client process */
+};
+typedef struct __posix_lock posix_lock_t;
+
+struct __pl_rw_req_t {
+ struct list_head list;
+ call_stub_t *stub;
+ posix_lock_t region;
+};
+typedef struct __pl_rw_req_t pl_rw_req_t;
+
+
+struct __entry_lock {
+ struct list_head inode_list; /* list_head back to pl_inode_t */
+ struct list_head blocked_locks; /* locks blocked due to this lock */
+
+ call_frame_t *frame;
+ xlator_t *this;
+ int blocked;
+
+ const char *basename;
+ entrylk_type type;
+ unsigned int read_count; /* number of read locks */
+ transport_t *trans;
+};
+typedef struct __entry_lock pl_entry_lock_t;
+
+
+/* The "simulated" inode. This contains a list of all the locks associated
+ with this file */
+
+struct __pl_inode {
+ pthread_mutex_t mutex;
+
+ struct list_head dir_list; /* list of entry locks */
+ struct list_head ext_list; /* list of fcntl locks */
+ struct list_head int_list; /* list of internal locks */
+ struct list_head rw_list; /* list of waiting r/w requests */
+ int mandatory; /* if mandatory locking is enabled */
+};
+typedef struct __pl_inode pl_inode_t;
+
+#define DOMAIN_HEAD(pl_inode, dom) (dom == GF_LOCK_POSIX \
+ ? &pl_inode->ext_list \
+ : &pl_inode->int_list)
+
+
+struct __pl_fd {
+ gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
+};
+typedef struct __pl_fd pl_fd_t;
+
+
+typedef struct {
+ gf_boolean_t mandatory; /* if mandatory locking is enabled */
+} posix_locks_private_t;
+
+
+#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
new file mode 100644
index 00000000000..e2b336607c4
--- /dev/null
+++ b/xlators/features/locks/src/posix.c
@@ -0,0 +1,834 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+#include "common.h"
+
+#ifndef LLONG_MAX
+#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
+#endif /* LLONG_MAX */
+
+/* Forward declarations */
+
+
+void do_blocked_rw (pl_inode_t *);
+static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
+
+struct _truncate_ops {
+ loc_t loc;
+ fd_t *fd;
+ off_t offset;
+ enum {TRUNCATE, FTRUNCATE} op;
+};
+
+
+int
+pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct _truncate_ops *local = NULL;
+
+ local = frame->local;
+
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+static int
+truncate_allowed (pl_inode_t *pl_inode,
+ transport_t *transport, pid_t client_pid,
+ off_t offset)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int ret = 1;
+
+ region.fl_start = offset;
+ region.fl_end = LLONG_MAX;
+ region.transport = transport;
+ region.client_pid = client_pid;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (!l->blocked
+ && locks_overlap (&region, l)
+ && !same_owner (&region, l)) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return ret;
+}
+
+
+static int
+truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ posix_locks_private_t *priv = NULL;
+ struct _truncate_ops *local = NULL;
+ inode_t *inode = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got error (errno=%d, stderror=%s) from child",
+ op_errno, strerror (op_errno));
+ goto unwind;
+ }
+
+ if (local->op == TRUNCATE)
+ inode = local->loc.inode;
+ else
+ inode = local->fd->inode;
+
+ pl_inode = pl_inode_get (this, inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to get pl_inode from %p", inode);
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (priv->mandatory
+ && pl_inode->mandatory
+ && !truncate_allowed (pl_inode, frame->root->trans,
+ frame->root->pid, local->offset)) {
+ op_errno = EAGAIN;
+ goto unwind;
+ }
+
+ switch (local->op) {
+ case TRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ &local->loc, local->offset);
+ break;
+ case FTRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ local->fd, local->offset);
+ break;
+ }
+
+ return 0;
+
+unwind:
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
+
+ STACK_UNWIND (frame, -1, ENOMEM, buf);
+ return 0;
+}
+
+
+int
+pl_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset)
+{
+ struct _truncate_ops *local = NULL;
+
+ local = CALLOC (1, sizeof (struct _truncate_ops));
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto unwind;
+ }
+
+ local->op = TRUNCATE;
+ local->offset = offset;
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+int
+pl_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ struct _truncate_ops *local = NULL;
+
+ local = CALLOC (1, sizeof (struct _truncate_ops));
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto unwind;
+ }
+
+ local->op = FTRUNCATE;
+ local->offset = offset;
+ local->fd = fd;
+
+ frame->local = local;
+
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd);
+ return 0;
+
+unwind:
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+static void
+__delete_locks_of_owner (pl_inode_t *pl_inode,
+ transport_t *transport, pid_t pid)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ /* TODO: what if it is a blocked lock with pending l->frame */
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if ((l->transport == transport)
+ && (l->client_pid == pid)) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) {
+ if ((l->transport == transport)
+ && (l->client_pid == pid)) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+
+ return;
+}
+
+
+int
+pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+
+int
+pl_flush (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __delete_locks_of_owner (pl_inode, frame->root->trans,
+ frame->root->pid);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX);
+ grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL);
+
+ do_blocked_rw (pl_inode);
+
+ STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd);
+ return 0;
+}
+
+
+int
+pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}
+
+
+int
+pl_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd)
+{
+ /* why isn't O_TRUNC being handled ? */
+ STACK_WIND (frame, pl_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags & ~O_TRUNC, fd);
+
+ return 0;
+}
+
+
+int
+pl_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+
+ return 0;
+}
+
+
+int
+pl_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ STACK_WIND (frame, pl_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+
+int
+pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+
+ return 0;
+}
+
+int
+pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+
+ return 0;
+}
+
+
+void
+do_blocked_rw (pl_inode_t *pl_inode)
+{
+ struct list_head wind_list;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&wind_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) {
+ if (__rw_allowable (pl_inode, &rw->region,
+ rw->stub->fop)) {
+ list_del_init (&rw->list);
+ list_add_tail (&rw->list, &wind_list);
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (rw, tmp, &wind_list, list) {
+ list_del_init (&rw->list);
+ call_resume (rw->stub);
+ free (rw);
+ }
+
+ return;
+}
+
+
+static int
+__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
+ glusterfs_fop_t op)
+{
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (locks_overlap (l, region) && !same_owner (l, region)) {
+ if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
+ continue;
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+
+int
+pl_readv_cont (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ STACK_WIND (frame, pl_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset);
+
+ return 0;
+}
+
+
+int
+pl_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char allowable = 0;
+
+
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + size - 1;
+ region.transport = frame->root->trans;
+ region.client_pid = frame->root->pid;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowable = __rw_allowable (pl_inode, &region,
+ GF_FOP_READ);
+ if (allowable)
+ goto unlock;
+
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "returning EWOULDBLOCK");
+ op_errno = EWOULDBLOCK;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = CALLOC (1, sizeof (*rw));
+ if (!rw) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_readv_stub (frame, pl_readv_cont,
+ fd, size, offset);
+ if (!rw->stub) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ free (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ goto unwind;
+ }
+
+
+ STACK_WIND (frame, pl_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset);
+ return 0;
+
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int
+pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t offset)
+{
+ STACK_WIND (frame, pl_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset);
+
+ return 0;
+}
+
+
+int
+pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char allowable = 0;
+
+
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + iov_length (vector, count) - 1;
+ region.transport = frame->root->trans;
+ region.client_pid = frame->root->pid;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ allowable = __rw_allowable (pl_inode, &region,
+ GF_FOP_WRITE);
+ if (allowable)
+ goto unlock;
+
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "returning EWOULDBLOCK");
+ op_errno = EWOULDBLOCK;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = CALLOC (1, sizeof (*rw));
+ if (!rw) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_writev_stub (frame, pl_writev_cont,
+ fd, vector, count, offset);
+ if (!rw->stub) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ op_ret = -1;
+ free (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ goto unwind;
+ }
+
+
+ STACK_WIND (frame, pl_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset);
+ return 0;
+
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int
+pl_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *flock)
+{
+ transport_t *transport = NULL;
+ pid_t client_pid = 0;
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ transport = frame->root->trans;
+ client_pid = frame->root->pid;
+ priv = this->private;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ reqlock = new_posix_lock (flock, transport, client_pid);
+ if (!reqlock) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ switch (cmd) {
+
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX);
+ posix_lock_to_flock (conf, flock);
+ __destroy_lock (reqlock);
+
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ can_block = 1;
+ reqlock->frame = frame;
+ reqlock->this = this;
+ reqlock->fd = fd;
+
+ /* fall through */
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
+ ret = pl_setlk (this, pl_inode, reqlock,
+ can_block, GF_LOCK_POSIX);
+
+ if (ret == -1) {
+ if (can_block)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock (reqlock);
+ }
+ }
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, flock);
+out:
+ return 0;
+}
+
+
+/* TODO: this function just logs, no action required?? */
+int
+pl_forget (xlator_t *this,
+ inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+
+ pl_inode = pl_inode_get (this, inode);
+
+ if (!list_empty (&pl_inode->rw_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "pending R/W requests found!");
+ }
+
+ if (!list_empty (&pl_inode->ext_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Pending fcntl locks found!");
+ }
+
+ if (!list_empty (&pl_inode->int_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Pending internal locks found!");
+ }
+
+ if (!list_empty (&pl_inode->dir_list)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Pending entry locks found!");
+ }
+
+ FREE (pl_inode);
+
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ posix_locks_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ data_t *mandatory = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: posix-locks should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ trav = this->children;
+ while (trav->xlator->children)
+ trav = trav->xlator->children;
+
+ if (strncmp ("storage/", trav->xlator->type, 8)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'posix-locks' not loaded over storage translator");
+ return -1;
+ }
+
+ priv = CALLOC (1, sizeof (*priv));
+
+ mandatory = dict_get (this->options, "mandatory-locks");
+ if (mandatory) {
+ if (gf_string2boolean (mandatory->data,
+ &priv->mandatory) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'mandatory-locks' takes only boolean "
+ "options");
+ return -1;
+ }
+ }
+
+ this->private = priv;
+ return 0;
+}
+
+
+int
+fini (xlator_t *this)
+{
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+ free (priv);
+
+ return 0;
+}
+
+
+int
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *flock);
+
+int
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *flock);
+
+int
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+int
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+struct xlator_fops fops = {
+ .create = pl_create,
+ .truncate = pl_truncate,
+ .ftruncate = pl_ftruncate,
+ .open = pl_open,
+ .readv = pl_readv,
+ .writev = pl_writev,
+ .lk = pl_lk,
+ .inodelk = pl_inodelk,
+ .finodelk = pl_finodelk,
+ .entrylk = pl_entrylk,
+ .fentrylk = pl_fentrylk,
+ .flush = pl_flush,
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = pl_forget,
+};
+
+
+struct volume_options options[] = {
+ { .key = { "mandatory-locks", "mandatory" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
new file mode 100644
index 00000000000..6a1bfbf6871
--- /dev/null
+++ b/xlators/features/locks/tests/unit-test.c
@@ -0,0 +1,75 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+#define expect(cond) if (!(cond)) { goto out; }
+
+extern int lock_name (pl_inode_t *, const char *, entrylk_type);
+extern int unlock_name (pl_inode_t *, const char *, entrylk_type);
+
+int main (int argc, char **argv)
+{
+ int ret = 1;
+ int r = -1;
+
+ pl_inode_t *pinode = CALLOC (sizeof (pl_inode_t), 1);
+ pthread_mutex_init (&pinode->dir_lock_mutex, NULL);
+ INIT_LIST_HEAD (&pinode->gf_dir_locks);
+
+ r = lock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0);
+ {
+ r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN);
+ }
+ r = unlock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0);
+
+ r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
+ {
+ r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
+ {
+ r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN);
+ }
+ r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
+ }
+ r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
+
+ r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0);
+ r = unlock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0);
+
+ r = lock_name (pinode, "baz", ENTRYLK_WRLCK); expect (r == 0);
+ r = lock_name (pinode, "baz", ENTRYLK_RDLCK); expect (r == -EAGAIN);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/path-convertor/Makefile.am b/xlators/features/path-convertor/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/features/path-convertor/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am
new file mode 100644
index 00000000000..1fde1935238
--- /dev/null
+++ b/xlators/features/path-convertor/src/Makefile.am
@@ -0,0 +1,14 @@
+
+xlator_LTLIBRARIES = path-converter.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+path_converter_la_LDFLAGS = -module -avoidversion
+
+path_converter_la_SOURCES = path.c
+path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c
new file mode 100644
index 00000000000..41ef1d8a823
--- /dev/null
+++ b/xlators/features/path-convertor/src/path.c
@@ -0,0 +1,1217 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/* TODO: add gf_log to all the cases returning errors */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/**
+ * xlators/features/path-translator:
+ * This translator converts the path it gets into user specified targets.
+ */
+
+#include <sys/types.h>
+#include <regex.h>
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+
+typedef struct path_private
+{
+ int32_t this_len;
+ int32_t start_off;
+ int32_t end_off;
+ char *this;
+ char *that;
+ char *path;
+ regex_t *preg;
+} path_private_t;
+
+static char *
+name_this_to_that (xlator_t *xl, const char *path, const char *name)
+{
+ path_private_t *priv = xl->private;
+ char priv_path[ZR_PATH_MAX] = {0,};
+ char *tmp_name = NULL;
+ int32_t path_len = strlen (path);
+ int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN;
+ int32_t total_len = path_len + name_len;
+ int32_t i = 0, j = 0;
+
+ if (path_len >= priv->end_off)
+ return (char *)name;
+
+ if (priv->end_off && (total_len > priv->end_off)) {
+ j = priv->start_off;
+ tmp_name = CALLOC (1, (total_len + ZR_FILE_CONTENT_STRLEN));
+ ERR_ABORT (tmp_name);
+
+ /* Get the complete path for the file first */
+ strcpy (tmp_name, path);
+ strcat (tmp_name, name + ZR_FILE_CONTENT_STRLEN);
+
+ strncpy (priv_path, tmp_name, priv->start_off);
+ for (i = priv->start_off; i < priv->end_off; i++) {
+ if (tmp_name[i] == '/')
+ continue;
+ priv_path[j++] = tmp_name[i];
+ }
+ memcpy ((priv_path + j),
+ (tmp_name + priv->end_off),
+ (total_len - priv->end_off));
+ priv_path[(total_len - (priv->end_off - j))] = '\0';
+
+ strcpy (tmp_name, ZR_FILE_CONTENT_STR);
+ strcat (tmp_name, priv_path);
+
+ return tmp_name;
+ }
+
+ return (char *)name;
+}
+
+/* This function should return
+ * NULL -
+ * converted path - if path match
+ * same path - if it doesn't match
+ */
+static char *
+path_this_to_that (xlator_t *xl, const char *path)
+{
+ path_private_t *priv = xl->private;
+ char *priv_path = NULL;
+ int32_t path_len = strlen (path);
+ int32_t i = 0, j = 0;
+
+ if (priv->end_off && (path_len > priv->start_off)) {
+ priv_path = CALLOC (1, path_len);
+ ERR_ABORT (priv_path);
+
+ if (priv->start_off && (path_len > priv->start_off))
+ memcpy (priv_path, path, priv->start_off);
+ if (path_len > priv->end_off) {
+ j = priv->start_off;
+ for (i = priv->start_off; i < priv->end_off; i++) {
+ if (path[i] == '/')
+ continue;
+ priv_path[j++] = path[i];
+ }
+ memcpy ((priv_path + j),
+ (path + priv->end_off),
+ (path_len - priv->end_off));
+ priv_path[(path_len - (priv->end_off - j))] = '\0';
+ }
+ return priv_path;
+ }
+ return (char *)path;
+}
+
+int32_t
+path_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+}
+
+int32_t
+path_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+int32_t
+path_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, entries, count);
+ return 0;
+}
+
+int32_t
+path_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+int32_t
+path_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+path_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *xattr)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
+ return 0;
+}
+
+
+int32_t
+path_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+path_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+
+int32_t
+path_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+path_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+int32_t
+path_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+
+int32_t
+path_common_buf_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+path_common_dict_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+ return 0;
+}
+
+int32_t
+path_common_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/* */
+int32_t
+path_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame, path_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc,
+ size);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t dev)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc,
+ mode,
+ dev);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc,
+ mode);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkpath,
+ loc_t *loc)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath,
+ loc);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ char *oldloc_path = (char *)oldloc->path;
+ char *tmp_oldloc_path = NULL;
+
+ char *newloc_path = (char *)newloc->path;
+ char *tmp_newloc_path = NULL;
+
+ if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ oldloc->path = tmp_oldloc_path;
+
+ if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ newloc->path = tmp_newloc_path;
+
+ STACK_WIND (frame,
+ path_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc,
+ newloc);
+
+ oldloc->path = oldloc_path;
+ if (tmp_oldloc_path != oldloc_path)
+ FREE (tmp_oldloc_path);
+
+ newloc->path = newloc_path;
+ if (tmp_newloc_path != newloc_path)
+ FREE (tmp_newloc_path);
+
+ return 0;
+}
+
+int32_t
+path_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ char *oldloc_path = (char *)oldloc->path;
+ char *tmp_oldloc_path = NULL;
+
+ char *newloc_path = (char *)newloc->path;
+ char *tmp_newloc_path = NULL;
+
+ if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ oldloc->path = tmp_oldloc_path;
+
+ if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ newloc->path = tmp_newloc_path;
+
+ STACK_WIND (frame,
+ path_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc,
+ newloc);
+
+ oldloc->path = oldloc_path;
+ if (tmp_oldloc_path != oldloc_path)
+ FREE (tmp_oldloc_path);
+
+ newloc->path = newloc_path;
+ if (tmp_newloc_path != newloc_path)
+ FREE (tmp_newloc_path);
+
+ return 0;
+}
+
+int32_t
+path_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chmod,
+ loc,
+ mode);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chown,
+ loc,
+ uid,
+ gid);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimens,
+ loc,
+ tv);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ fd);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ char *tmp_name = NULL;
+ data_pair_t *trav = dict->members_list;
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ if (ZR_FILE_CONTENT_REQUEST(trav->key)) {
+ tmp_name = name_this_to_that (this, loc->path, trav->key);
+ if (tmp_name != trav->key) {
+ trav->key = tmp_name;
+ } else {
+ tmp_name = NULL;
+ }
+ }
+
+ STACK_WIND (frame,
+ path_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ if (tmp_name)
+ FREE (tmp_name);
+
+ return 0;
+}
+
+int32_t
+path_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ char *tmp_name = (char *)name;
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ if (ZR_FILE_CONTENT_REQUEST(name)) {
+ tmp_name = name_this_to_that (this, loc->path, name);
+ }
+
+ STACK_WIND (frame,
+ path_common_dict_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc,
+ tmp_name);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ if (tmp_name != name)
+ FREE (tmp_name);
+
+ return 0;
+}
+
+int32_t
+path_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ char *tmp_name = (char *)name;
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ if (ZR_FILE_CONTENT_REQUEST(name)) {
+ tmp_name = name_this_to_that (this, loc->path, name);
+ }
+
+ STACK_WIND (frame,
+ path_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ tmp_name);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ if (tmp_name != name)
+ FREE (tmp_name);
+
+ return 0;
+}
+
+int32_t
+path_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc,
+ fd);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc,
+ mask);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *fchecksum,
+ uint8_t *dchecksum)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+ return 0;
+}
+
+int32_t
+path_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_checksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->checksum,
+ loc,
+ flag);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+
+int32_t
+path_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame, path_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk,
+ loc, basename, cmd, type);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+int32_t
+path_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *lock)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ loc, cmd, lock);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+
+int32_t
+path_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ char *loc_path = (char *)loc->path;
+ char *tmp_path = NULL;
+
+ if (!(tmp_path = path_this_to_that (this, loc->path))) {
+ STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
+ return 0;
+ }
+ loc->path = tmp_path;
+
+ STACK_WIND (frame,
+ path_common_dict_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc,
+ flags,
+ dict);
+
+ loc->path = loc_path;
+ if (tmp_path != loc_path)
+ FREE (tmp_path);
+
+ return 0;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ dict_t *options = this->options;
+ path_private_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path translator requires exactly one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = CALLOC (1, sizeof (*priv));
+ ERR_ABORT (priv);
+ if (dict_get (options, "start-offset")) {
+ priv->start_off = data_to_int32 (dict_get (options,
+ "start-offset"));
+ }
+ if (dict_get (options, "end-offset")) {
+ priv->end_off = data_to_int32 (dict_get (options,
+ "end-offset"));
+ }
+
+ if (dict_get (options, "regex")) {
+ int32_t ret = 0;
+ priv->preg = CALLOC (1, sizeof (regex_t));
+ ERR_ABORT (priv->preg);
+ ret = regcomp (priv->preg,
+ data_to_str (dict_get (options, "regex")),
+ REG_EXTENDED);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to compile the 'option regex'");
+ FREE (priv);
+ return -1;
+ }
+ if (dict_get (options, "replace-with")) {
+ priv->that = data_to_str (dict_get (options,
+ "replace-with"));
+ } else {
+ priv->that = "";
+ }
+ }
+
+ this->private = priv;
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .stat = path_stat,
+ .readlink = path_readlink,
+ .mknod = path_mknod,
+ .mkdir = path_mkdir,
+ .unlink = path_unlink,
+ .rmdir = path_rmdir,
+ .symlink = path_symlink,
+ .rename = path_rename,
+ .link = path_link,
+ .chmod = path_chmod,
+ .chown = path_chown,
+ .truncate = path_truncate,
+ .utimens = path_utimens,
+ .open = path_open,
+ .setxattr = path_setxattr,
+ .getxattr = path_getxattr,
+ .removexattr = path_removexattr,
+ .opendir = path_opendir,
+ .access = path_access,
+ .create = path_create,
+ .lookup = path_lookup,
+ .checksum = path_checksum,
+ .xattrop = path_xattrop,
+ .entrylk = path_entrylk,
+ .inodelk = path_inodelk,
+};
+
+
+struct xlator_mops mops = {
+};
+
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"start-offset"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 4095
+ },
+ { .key = {"end-offset"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 4096
+ },
+ { .key = {"replace-with"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/quota/Makefile.am b/xlators/features/quota/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/features/quota/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
new file mode 100644
index 00000000000..886d839643c
--- /dev/null
+++ b/xlators/features/quota/src/Makefile.am
@@ -0,0 +1,13 @@
+xlator_LTLIBRARIES = quota.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+quota_la_LDFLAGS = -module -avoidversion
+
+quota_la_SOURCES = quota.c
+quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
new file mode 100644
index 00000000000..c898899b5e3
--- /dev/null
+++ b/xlators/features/quota/src/quota.c
@@ -0,0 +1,1056 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/time.h>
+
+#include "xlator.h"
+#include "defaults.h"
+#include "common-utils.h"
+
+struct quota_local {
+ struct stat stbuf;
+ inode_t *inode;
+ char *path;
+ fd_t *fd;
+ off_t offset;
+ int32_t count;
+ struct iovec *vector;
+ dict_t *refs;
+ loc_t loc;
+};
+
+
+struct quota_priv {
+ char only_first_time; /* Used to make sure a call is done only one time */
+ gf_lock_t lock; /* Used while updating variables */
+
+ uint64_t disk_usage_limit; /* Used for Disk usage quota */
+ uint64_t current_disk_usage; /* Keep the current usage value */
+
+ uint32_t min_free_disk_limit; /* user specified limit, in %*/
+ uint32_t current_free_disk; /* current free disk space available, in % */
+ uint32_t refresh_interval; /* interval in seconds */
+ uint32_t min_disk_last_updated_time; /* used for interval calculation */
+};
+
+
+int
+quota_statvfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
+{
+ struct quota_priv *priv = this->private;
+
+ if (op_ret >= 0) {
+ priv->current_free_disk =
+ (stbuf->f_bavail * 100) / stbuf->f_blocks;
+ }
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static void
+build_root_loc (xlator_t *this, loc_t *loc)
+{
+ loc->path = "/";
+}
+
+
+void
+gf_quota_usage_subtract (xlator_t *this, size_t size)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ LOCK (&priv->lock);
+ {
+ if (priv->current_disk_usage < size)
+ priv->current_disk_usage = 0;
+ else
+ priv->current_disk_usage -= size;
+ }
+ UNLOCK (&priv->lock);
+}
+
+
+void
+gf_quota_usage_add (xlator_t *this, size_t size)
+{
+ struct quota_priv *priv = this->private;
+
+ LOCK (&priv->lock);
+ {
+ priv->current_disk_usage += size;
+ }
+ UNLOCK (&priv->lock);
+}
+
+
+void
+gf_quota_update_current_free_disk (xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ call_pool_t *pool = NULL;
+ loc_t loc;
+
+ pool = this->ctx->pool;
+ frame = create_frame (this, pool);
+
+ build_root_loc (this, &loc);
+
+ STACK_WIND (frame, quota_statvfs_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->statfs, &loc);
+
+ return ;
+}
+
+
+int
+gf_quota_check_free_disk (xlator_t *this)
+{
+ struct quota_priv * priv = NULL;
+ struct timeval tv = {0, 0};
+
+ priv = this->private;
+ if (priv->min_free_disk_limit) {
+ gettimeofday (&tv, NULL);
+ if (tv.tv_sec > (priv->refresh_interval +
+ priv->min_disk_last_updated_time)) {
+ priv->min_disk_last_updated_time = tv.tv_sec;
+ gf_quota_update_current_free_disk (this);
+ }
+ if (priv->current_free_disk <= priv->min_free_disk_limit)
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct quota_priv *priv = this->private;
+ struct quota_local *local = NULL;
+
+ local = frame->local;
+
+ if ((op_ret >= 0) && priv->disk_usage_limit) {
+ gf_quota_usage_subtract (this, (local->stbuf.st_blocks -
+ buf->st_blocks) * 512);
+ loc_wipe (&local->loc);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+int
+quota_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ local->stbuf = *buf;
+ }
+
+ STACK_WIND (frame, quota_truncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
+ &local->loc, local->offset);
+ return 0;
+}
+
+
+int
+quota_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->disk_usage_limit) {
+ local = CALLOC (1, sizeof (struct quota_local));
+ frame->local = local;
+
+ loc_copy (&local->loc, loc);
+ local->offset = offset;
+
+ STACK_WIND (frame, quota_truncate_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc, offset);
+ return 0;
+}
+
+
+int
+quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct quota_priv *priv = NULL;
+ struct quota_local *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if ((op_ret >= 0) && priv->disk_usage_limit) {
+ gf_quota_usage_subtract (this, (local->stbuf.st_blocks -
+ buf->st_blocks) * 512);
+ fd_unref (local->fd);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+int
+quota_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ local->stbuf = *buf;
+ }
+
+ STACK_WIND (frame, quota_ftruncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
+ local->fd, local->offset);
+ return 0;
+}
+
+
+int
+quota_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+
+
+ priv = this->private;
+
+ if (priv->disk_usage_limit) {
+ local = CALLOC (1, sizeof (struct quota_local));
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+ local->offset = offset;
+
+ STACK_WIND (frame, quota_ftruncate_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset);
+ return 0;
+}
+
+
+int
+quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *buf)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if ((op_ret >= 0) && priv->disk_usage_limit) {
+ gf_quota_usage_add (this, buf->st_blocks * 512);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+
+int
+quota_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (gf_quota_check_free_disk (this) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "min-free-disk limit (%u) crossed, current available is %u",
+ priv->min_free_disk_limit, priv->current_free_disk);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
+ return 0;
+ }
+
+ if (priv->current_disk_usage > priv->disk_usage_limit) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
+ priv->disk_usage_limit, priv->current_disk_usage);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev);
+ return 0;
+}
+
+
+int
+quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct stat *buf)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if ((op_ret >= 0) && priv->disk_usage_limit) {
+ gf_quota_usage_subtract (this, buf->st_blocks * 512);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+
+int
+quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (gf_quota_check_free_disk (this) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "min-free-disk limit (%u) crossed, current available is %u",
+ priv->min_free_disk_limit, priv->current_free_disk);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
+ return 0;
+
+ }
+
+ if (priv->current_disk_usage > priv->disk_usage_limit) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
+ priv->disk_usage_limit, priv->current_disk_usage);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode);
+
+ return 0;
+}
+
+
+int
+quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ struct quota_local *local = NULL;
+
+ local = frame->local;
+
+ if (local) {
+ if (op_ret >= 0) {
+ gf_quota_usage_subtract (this,
+ local->stbuf.st_blocks * 512);
+ }
+ loc_wipe (&local->loc);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int
+quota_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct quota_local *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ if (buf->st_nlink == 1) {
+ local->stbuf = *buf;
+ }
+ }
+
+ STACK_WIND (frame, quota_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ &local->loc);
+
+ return 0;
+}
+
+
+int
+quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->disk_usage_limit) {
+ local = CALLOC (1, sizeof (struct quota_local));
+ frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame,
+ quota_unlink_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc);
+ return 0;
+}
+
+
+int
+quota_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ struct quota_local *local = NULL;
+
+ local = frame->local;
+
+ if (local) {
+ if (op_ret >= 0) {
+ gf_quota_usage_subtract (this, local->stbuf.st_blocks * 512);
+ }
+ loc_wipe (&local->loc);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int
+quota_rmdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct quota_local *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ local->stbuf = *buf;
+ }
+
+ STACK_WIND (frame, quota_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ &local->loc);
+
+ return 0;
+}
+
+
+int
+quota_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->disk_usage_limit) {
+ local = CALLOC (1, sizeof (struct quota_local));
+ frame->local = local;
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, quota_rmdir_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc);
+ return 0;
+}
+
+
+int
+quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct stat *buf)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if ((op_ret >= 0) && priv->disk_usage_limit) {
+ gf_quota_usage_add (this, buf->st_blocks * 512);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+
+int
+quota_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (gf_quota_check_free_disk (this) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "min-free-disk limit (%u) crossed, current available is %u",
+ priv->min_free_disk_limit, priv->current_free_disk);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
+ return 0;
+
+ }
+ if (priv->current_disk_usage > priv->disk_usage_limit) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
+ priv->disk_usage_limit, priv->current_disk_usage);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc);
+ return 0;
+}
+
+
+int
+quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct stat *buf)
+{
+ struct quota_priv *priv = this->private;
+ int ret = 0;
+
+ if ((op_ret >= 0) && priv->disk_usage_limit) {
+ gf_quota_usage_add (this, buf->st_blocks * 512);
+
+ ret = fd_ctx_set (fd, this, 1);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+}
+
+
+int
+quota_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (gf_quota_check_free_disk (this) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "min-free-disk limit (%u) crossed, current available is %u",
+ priv->min_free_disk_limit, priv->current_free_disk);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL, NULL);
+ return 0;
+
+ }
+ if (priv->current_disk_usage > priv->disk_usage_limit) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
+ priv->disk_usage_limit, priv->current_disk_usage);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+
+int
+quota_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
+{
+ int ret = 0;
+
+ if (op_ret >= 0)
+ ret = fd_ctx_set (fd, this, 1);
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+
+int
+quota_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd)
+{
+ STACK_WIND (frame, quota_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd);
+ return 0;
+}
+
+
+int
+quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ struct quota_priv *priv = NULL;
+ struct quota_local *local = NULL;
+
+
+ priv = this->private;
+ local = frame->local;
+
+ if (priv->disk_usage_limit) {
+ if (op_ret >= 0) {
+ gf_quota_usage_add (this, (stbuf->st_blocks -
+ local->stbuf.st_blocks) * 512);
+ }
+ fd_unref (local->fd);
+ dict_unref (local->refs);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+
+int
+quota_writev_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+ int iovlen = 0;
+
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret >= 0) {
+ if (priv->current_disk_usage > priv->disk_usage_limit) {
+ iovlen = iov_length (local->vector, local->count);
+
+ if (iovlen > (buf->st_blksize - (buf->st_size % buf->st_blksize))) {
+ fd_unref (local->fd);
+ dict_unref (local->refs);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL);
+ return 0;
+ }
+ }
+ local->stbuf = *buf;
+ }
+
+ STACK_WIND (frame, quota_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd, local->vector, local->count, local->offset);
+
+ return 0;
+}
+
+
+int
+quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off)
+{
+ struct quota_local *local = NULL;
+ struct quota_priv *priv = NULL;
+
+ priv = this->private;
+
+ if (gf_quota_check_free_disk (this) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "min-free-disk limit (%u) crossed, current available is %u",
+ priv->min_free_disk_limit, priv->current_free_disk);
+ STACK_UNWIND (frame, -1, ENOSPC, NULL);
+ return 0;
+ }
+
+ if (priv->disk_usage_limit) {
+ local = CALLOC (1, sizeof (struct quota_local));
+ local->fd = fd_ref (fd);
+ local->refs = dict_ref (frame->root->req_refs);
+ local->vector = vector;
+ local->count = count;
+ local->offset = off;
+ frame->local = local;
+
+ STACK_WIND (frame, quota_writev_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd);
+ return 0;
+ }
+
+ STACK_WIND (frame, quota_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, off);
+ return 0;
+}
+
+
+int
+quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "failed to remove the disk-usage value: %s",
+ strerror (op_errno));
+ }
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+int
+quota_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "failed to set the disk-usage value: %s",
+ strerror (op_errno));
+ }
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+int
+quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *statvfs)
+{
+ struct quota_priv *priv = NULL;
+ uint64_t f_blocks = 0;
+ int64_t f_bfree = 0;
+ uint64_t f_bused = 0;
+
+
+ priv = this->private;
+
+ if (op_ret != 0)
+ goto unwind;
+
+ f_blocks = priv->disk_usage_limit / statvfs->f_frsize;
+ f_bused = priv->current_disk_usage / statvfs->f_frsize;
+
+ if (f_blocks && (f_blocks < statvfs->f_blocks))
+ statvfs->f_blocks = f_blocks;
+
+ f_bfree = (statvfs->f_blocks - f_bused);
+
+ if (f_bfree >= 0)
+ statvfs->f_bfree = statvfs->f_bavail = f_bfree;
+ else
+ statvfs->f_bfree = statvfs->f_bavail = 0;
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, statvfs);
+ return 0;
+}
+
+
+int
+quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ STACK_WIND (frame, quota_statfs_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->statfs, loc);
+
+ return 0;
+}
+
+
+int
+quota_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *value)
+{
+ data_t *data = NULL;
+ struct quota_priv *priv = this->private;
+
+ if (op_ret >= 0) {
+ data = dict_get (value, "trusted.glusterfs-quota-du");
+ if (data) {
+ LOCK (&priv->lock);
+ {
+ priv->current_disk_usage = data_to_uint64 (data);
+ }
+ UNLOCK (&priv->lock);
+
+ return 0;
+ }
+ }
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+void
+gf_quota_get_disk_usage (xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ call_pool_t *pool = NULL;
+ loc_t loc;
+
+ pool = this->ctx->pool;
+ frame = create_frame (this, pool);
+ build_root_loc (this, &loc);
+
+ STACK_WIND (frame, quota_getxattr_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->getxattr,
+ &loc,
+ "trusted.glusterfs-quota-du");
+ return ;
+}
+
+
+void
+gf_quota_cache_sync (xlator_t *this)
+{
+ struct quota_priv *priv = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = get_new_dict ();
+ loc_t loc;
+
+
+ priv = this->private;
+ build_root_loc (this, &loc);
+
+ frame = create_frame (this, this->ctx->pool);
+ dict_set (dict, "trusted.glusterfs-quota-du",
+ data_from_uint64 (priv->current_disk_usage));
+
+ STACK_WIND (frame, quota_setxattr_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->setxattr,
+ &loc, dict, 0);
+}
+
+
+int
+quota_release (xlator_t *this, fd_t *fd)
+{
+ gf_quota_cache_sync (this);
+
+ return 0;
+}
+
+
+/* notify */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ struct quota_priv *priv = this->private;
+
+ switch (event)
+ {
+ case GF_EVENT_CHILD_UP:
+ if (priv->only_first_time) {
+ priv->only_first_time = 0;
+ if (priv->disk_usage_limit) {
+ gf_quota_get_disk_usage (this);
+ }
+ }
+ default:
+ default_notify (this, event, data);
+ break;
+ }
+
+ return 0;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = 0;
+ data_t *data = NULL;
+ struct quota_priv *_private = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: quota should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ _private = CALLOC (1, sizeof (struct quota_priv));
+ _private->disk_usage_limit = 0;
+ data = dict_get (this->options, "disk-usage-limit");
+ if (data) {
+ if (gf_string2bytesize (data->data, &_private->disk_usage_limit) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number '%s' for disk-usage limit", data->data);
+ ret = -1;
+ goto out;
+ }
+
+ LOCK_INIT (&_private->lock);
+ _private->current_disk_usage = 0;
+ }
+
+ _private->min_free_disk_limit = 0;
+ data = dict_get (this->options, "min-free-disk-limit");
+ if (data) {
+ if (gf_string2percent (data->data, &_private->min_free_disk_limit) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid percent '%s' for min-free-disk limit", data->data);
+ ret = -1;
+ goto out;
+ }
+ _private->refresh_interval = 20; /* 20seconds is default */
+ data = dict_get (this->options, "refresh-interval");
+ if (data) {
+ if (gf_string2time (data->data,
+ &_private->refresh_interval)!= 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid time '%s' for refresh "
+ "interval", data->data);
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ _private->only_first_time = 1;
+ this->private = (void *)_private;
+ ret = 0;
+ out:
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ struct quota_priv *_private = this->private;
+
+ if (_private) {
+ gf_quota_cache_sync (this);
+ this->private = NULL;
+ }
+
+ return ;
+}
+
+struct xlator_fops fops = {
+ .create = quota_create,
+ .open = quota_open,
+ .truncate = quota_truncate,
+ .ftruncate = quota_ftruncate,
+ .writev = quota_writev,
+ .unlink = quota_unlink,
+ .rmdir = quota_rmdir,
+ .mknod = quota_mknod,
+ .mkdir = quota_mkdir,
+ .symlink = quota_symlink,
+ .statfs = quota_statfs,
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+ .release = quota_release
+};
+
+struct volume_options options[] = {
+ { .key = {"min-free-disk-limit"},
+ .type = GF_OPTION_TYPE_PERCENT
+ },
+ { .key = {"refresh-interval"},
+ .type = GF_OPTION_TYPE_TIME
+ },
+ { .key = {"disk-usage-limit"},
+ .type = GF_OPTION_TYPE_SIZET
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/trash/Makefile.am b/xlators/features/trash/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/features/trash/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am
new file mode 100644
index 00000000000..d61f608aaa8
--- /dev/null
+++ b/xlators/features/trash/src/Makefile.am
@@ -0,0 +1,13 @@
+xlator_LTLIBRARIES = trash.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+trash_la_LDFLAGS = -module -avoidversion
+
+trash_la_SOURCES = trash.c
+trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
new file mode 100644
index 00000000000..c8e7357ee08
--- /dev/null
+++ b/xlators/features/trash/src/trash.c
@@ -0,0 +1,596 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+
+#include <libgen.h>
+
+/* TODO: currently it can work only above posix, no other translators
+ * between them. Not a good thing. Try making more reliable methods.
+ */
+
+struct trash_struct {
+ inode_t *inode;
+ loc_t loc1;
+ loc_t loc2;
+ char origpath[ZR_PATH_MAX];
+ char newpath[ZR_PATH_MAX];
+ char oldpath[ZR_PATH_MAX]; // used only in case of rename
+};
+typedef struct trash_struct trash_local_t;
+
+struct trash_priv {
+ char trash_dir[ZR_PATH_MAX];
+};
+typedef struct trash_priv trash_private_t;
+
+int32_t
+trash_unlink_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf);
+int32_t
+trash_rename_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf);
+
+/**
+ * trash_common_unwind_cbk -
+ */
+int32_t
+trash_common_unwind_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ trash_local_t *local = frame->local;
+
+ if (local->loc1.path)
+ loc_wipe (&local->loc1);
+
+ if (local->loc2.path)
+ loc_wipe (&local->loc2);
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+/**
+ * trash_common_unwind_buf_cbk -
+ */
+int32_t
+trash_common_unwind_buf_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ trash_local_t *local = frame->local;
+
+ if (local->loc1.path)
+ loc_wipe (&local->loc1);
+
+ if (local->loc2.path)
+ loc_wipe (&local->loc2);
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+trash_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ trash_local_t *local = frame->local;
+ char *tmp_str = strdup (local->newpath);
+ int32_t count = 0;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+
+ if (op_ret == -1 && op_errno == ENOENT) {
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ tmp_path = CALLOC (1, count + 1);
+ ERR_ABORT (tmp_path);
+ memcpy (tmp_path, local->newpath, count);
+ loc_t tmp_loc = {
+ .inode = NULL,
+ .path = tmp_path,
+ };
+
+ /* TODO:create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame,
+ trash_mkdir_cbk,
+ tmp_path,
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc,
+ 0777);
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ free (cookie);
+ free (tmp_str);
+ return 0;
+ }
+ char *dir_name = dirname (tmp_str);
+ if (strcmp((char*)cookie, dir_name) == 0) {
+ loc_t new_loc = {
+ .inode = NULL,
+ .path = local->newpath
+ };
+ STACK_WIND (frame,
+ trash_unlink_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc2,
+ &new_loc);
+
+ }
+ free (cookie); /* strdup (dir_name) was sent here :) */
+ free (tmp_str);
+ return 0;
+}
+
+/**
+ * trash_unlink_rename_cbk -
+ */
+int32_t
+trash_unlink_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ trash_local_t *local = frame->local;
+ if (op_ret == -1 && op_errno == ENOENT) {
+ /* check for the errno, if its ENOENT create directory and call
+ * rename later
+ */
+ char *tmp_str = strdup (local->newpath);
+ char *dir_name = dirname (tmp_str);
+ loc_t tmp_loc = {
+ .inode = NULL,
+ .path = dir_name,
+ };
+ /* TODO: create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame,
+ trash_mkdir_cbk,
+ strdup (dir_name),
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc,
+ 0777);
+ free (tmp_str);
+ } else if (op_ret == -1 && op_errno == ENOTDIR) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Target exists, cannot keep the copy, deleting");
+ STACK_WIND (frame,
+ trash_common_unwind_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->unlink,
+ &local->loc2);
+ } else if (op_ret == -1 && op_errno == EISDIR) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Target exists as a directory, cannot keep the copy, "
+ "deleting");
+ STACK_WIND (frame,
+ trash_common_unwind_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->unlink,
+ &local->loc2);
+ } else {
+ /* */
+ STACK_UNWIND (frame, 0, op_errno);
+ }
+
+ return 0;
+}
+
+
+/**
+ * trash_unlink -
+ */
+int32_t
+trash_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ trash_private_t *priv = this->private;
+ trash_local_t *local = NULL;
+ time_t utime = 0;
+ struct tm *tm = NULL;
+ char timestr[256];
+
+ if (strncmp (loc->path, priv->trash_dir,
+ strlen(priv->trash_dir)) == 0) {
+ /* Trying to rename from the trash can dir, do the
+ actual unlink */
+ STACK_WIND (frame,
+ trash_common_unwind_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->unlink,
+ loc);
+ } else {
+ local = CALLOC (1, sizeof (trash_local_t));
+ if (!local) {
+ STACK_UNWIND (frame, -1, ENOMEM);
+ return 0;
+ }
+ frame->local = local;
+
+ loc_copy (&local->loc2, loc);
+
+ strcpy (local->newpath, priv->trash_dir);
+ strcat (local->newpath, loc->path);
+
+ utime = time (NULL);
+ tm = localtime (&utime);
+ strftime (timestr, 256, ".%Y%m%d%H%M%S", tm);
+ strcat (local->newpath, timestr);
+
+ {
+ loc_t new_loc = {
+ .inode = NULL,
+ .path = local->newpath
+ };
+ STACK_WIND (frame,
+ trash_unlink_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ loc,
+ &new_loc);
+ }
+ }
+ return 0;
+}
+
+/* */
+int32_t
+trash_rename_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ trash_local_t *local = frame->local;
+ char *tmp_str = strdup (local->newpath);
+
+ if (op_ret == -1 && op_errno == ENOENT) {
+ int32_t count = 0;
+ char *tmp_path = NULL;
+ char *tmp_dirname = strchr (tmp_str, '/');
+
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ tmp_path = CALLOC (1, count + 2);
+ ERR_ABORT (tmp_path);
+ memcpy (tmp_path, local->newpath, count);
+ loc_t tmp_loc = {
+ .inode = NULL,
+ .path = tmp_path,
+ };
+
+ /* TODO:create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame,
+ trash_rename_mkdir_cbk,
+ tmp_path,
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc,
+ 0777);
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ free (cookie);
+ free (tmp_str);
+ return 0;
+ }
+ char *dir_name = dirname (tmp_str);
+ if (strcmp((char*)cookie, dir_name) == 0) {
+ loc_t new_loc = {
+ .inode = NULL,
+ .path = local->newpath
+ };
+ STACK_WIND (frame,
+ trash_rename_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc2,
+ &new_loc);
+
+ }
+ free (cookie); /* strdup (dir_name) was sent here :) */
+ free (tmp_str);
+ return 0;
+}
+
+
+/**
+ * trash_unlink_rename_cbk -
+ */
+int32_t
+trash_rename_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ trash_local_t *local = frame->local;
+ if (op_ret == -1 && op_errno == ENOENT) {
+ /* check for the errno, if its ENOENT create directory and call
+ * rename later
+ */
+ char *tmp_str = strdup (local->newpath);
+ char *dir_name = dirname (tmp_str);
+ loc_t tmp_loc = {
+ .inode = NULL,
+ .path = dir_name,
+ };
+ /* TODO: create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame,
+ trash_rename_mkdir_cbk,
+ strdup (dir_name),
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc,
+ 0777);
+ free (tmp_str);
+ return 0;
+ } else if (op_ret == -1 && op_errno == ENOTDIR) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Target exists, cannot keep the dest entry %s, "
+ "renaming",
+ local->loc2.path);
+ } else if (op_ret == -1 && op_errno == EISDIR) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Target exists as a directory, cannot keep the "
+ "copy %s, renaming",
+ local->loc2.path);
+ }
+ loc_t new_loc = {
+ .inode = NULL,
+ .parent = local->loc2.parent,
+ .path = local->loc2.path,
+ };
+ STACK_WIND (frame,
+ trash_common_unwind_buf_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc1,
+ &new_loc);
+
+ return 0;
+}
+
+/**
+ * trash_rename_lookup_cbk -
+ */
+int32_t
+trash_rename_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf,
+ dict_t *xattr)
+{
+ trash_local_t *local = frame->local;
+
+ if (op_ret == -1) {
+ STACK_WIND (frame,
+ trash_common_unwind_buf_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc1,
+ &local->loc2);
+ return 0;
+ }
+
+ loc_t oldloc = {
+ .parent = local->loc2.parent,
+ .inode = inode,
+ .path = local->loc2.path,
+ };
+ loc_t newloc = {
+ .inode = NULL,
+ .path = local->newpath
+ };
+ STACK_WIND (frame,
+ trash_rename_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &oldloc,
+ &newloc);
+
+ return 0;
+}
+
+
+/**
+ * trash_rename -
+ */
+int32_t
+trash_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ trash_private_t *priv = this->private;
+ trash_local_t *local = NULL;
+ time_t utime = 0;
+ struct tm *tm = NULL;
+ char timestr[256];
+
+ if (strncmp (oldloc->path, priv->trash_dir,
+ strlen(priv->trash_dir)) == 0) {
+ /* Trying to rename from the trash can dir,
+ do the actual rename */
+ STACK_WIND (frame,
+ trash_common_unwind_buf_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ oldloc,
+ newloc);
+ } else {
+ /* Trying to rename a regular file from GlusterFS */
+ local = CALLOC (1, sizeof (trash_local_t));
+ if (!local) {
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+ frame->local = local;
+ loc_copy (&local->loc1, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ strcpy (local->newpath, priv->trash_dir);
+ strcat (local->newpath, newloc->path);
+
+ utime = time (NULL);
+ tm = localtime (&utime);
+ strftime (timestr, 256, ".%Y%m%d%H%M%S", tm);
+ strcat (local->newpath, timestr);
+
+ /* Send a lookup call on newloc, to ensure we are not
+ overwriting */
+ STACK_WIND (frame,
+ trash_rename_lookup_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->lookup,
+ newloc,
+ 0);
+ }
+ return 0;
+}
+
+/**
+ * trash_init -
+ */
+int32_t
+init (xlator_t *this)
+{
+ data_t *trash_dir = NULL;
+ xlator_list_t *trav = NULL;
+ trash_private_t *_priv = NULL;
+
+ /* Create .trashcan directory in init */
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ trav = this->children;
+ while (trav->xlator->children)
+ trav = trav->xlator->children;
+
+ if (strncmp ("storage/", trav->xlator->type, 8))
+ {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'trash' translator not loaded over storage "
+ "translator, not a supported setup");
+ return -1;
+ }
+
+ _priv = CALLOC (1, sizeof (*_priv));
+ ERR_ABORT (_priv);
+
+ trash_dir = dict_get (this->options, "trash-dir");
+ if (!trash_dir) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "no option specified for 'trash-dir', "
+ "using \"/.trashcan/\"");
+ strcpy (_priv->trash_dir, "/.trashcan");
+ } else {
+ /* Need a path with '/' as the first char, if not
+ given, append it */
+ if (trash_dir->data[0] == '/') {
+ strcpy (_priv->trash_dir, trash_dir->data);
+ } else {
+ strcpy (_priv->trash_dir, "/");
+ strcat (_priv->trash_dir, trash_dir->data);
+ }
+ }
+
+ this->private = (void *)_priv;
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ trash_private_t *priv = this->private;
+ FREE (priv);
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .unlink = trash_unlink,
+ .rename = trash_rename,
+};
+
+struct xlator_mops mops = {
+
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = { "trash-dir" },
+ .type = GF_OPTION_TYPE_PATH
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/meta/Makefile.am b/xlators/meta/Makefile.am
new file mode 100644
index 00000000000..e1c45f3051c
--- /dev/null
+++ b/xlators/meta/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS=src \ No newline at end of file
diff --git a/xlators/meta/src/Makefile.am b/xlators/meta/src/Makefile.am
new file mode 100644
index 00000000000..385ff553f59
--- /dev/null
+++ b/xlators/meta/src/Makefile.am
@@ -0,0 +1,10 @@
+xlator_PROGRAMS = meta.so
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/
+
+meta_so_SOURCES = meta.c tree.c misc.c view.c
+noinst_HEADERS = meta.h tree.h misc.h view.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles
+
+CLEANFILES =
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
new file mode 100644
index 00000000000..ce49ed2c459
--- /dev/null
+++ b/xlators/meta/src/meta.c
@@ -0,0 +1,1285 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "dict.h"
+#include "xlator.h"
+
+#include "meta.h"
+#include "view.h"
+
+int32_t
+meta_getattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+meta_getattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ if (file) {
+ if (file->fops && file->fops->getattr) {
+ STACK_WIND (frame, meta_getattr_cbk,
+ this, file->fops->getattr, path);
+ return 0;
+ }
+ else {
+ STACK_UNWIND (frame, 0, 0, file->stbuf);
+ return 0;
+ }
+ }
+ else {
+ STACK_WIND (frame, meta_getattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getattr,
+ path);
+ return 0;
+ }
+}
+
+int32_t
+meta_chmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_chmod (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ mode_t mode)
+{
+ STACK_WIND (frame,
+ meta_chmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chmod,
+ path,
+ mode);
+ return 0;
+}
+
+int32_t
+meta_chown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_chown (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ uid_t uid,
+ gid_t gid)
+{
+ STACK_WIND (frame,
+ meta_chown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chown,
+ path,
+ uid,
+ gid);
+ return 0;
+}
+
+
+int32_t
+meta_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_truncate (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ meta_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ path,
+ offset);
+ return 0;
+}
+
+
+int32_t
+meta_ftruncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ meta_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+}
+
+
+int32_t
+meta_utimes_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_utimes (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ struct timespec *buf)
+{
+ STACK_WIND (frame,
+ meta_utimes_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimes,
+ path,
+ buf);
+ return 0;
+}
+
+
+int32_t
+meta_access_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_access (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ mode_t mode)
+{
+ STACK_WIND (frame,
+ meta_access_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ path,
+ mode);
+ return 0;
+}
+
+int32_t
+meta_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ char *dest)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ dest);
+ return 0;
+}
+
+int32_t
+meta_readlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ size_t size)
+{
+ STACK_WIND (frame,
+ meta_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ path,
+ size);
+ return 0;
+}
+
+int32_t
+meta_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_mknod (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ mode_t mode,
+ dev_t dev)
+{
+ STACK_WIND (frame,
+ meta_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ path,
+ mode,
+ dev);
+ return 0;
+}
+
+int32_t
+meta_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ mode_t mode)
+{
+ STACK_WIND (frame,
+ meta_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ path,
+ mode);
+ return 0;
+}
+
+int32_t
+meta_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_unlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ STACK_WIND (frame,
+ meta_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ path);
+ return 0;
+}
+
+int32_t
+meta_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ STACK_WIND (frame,
+ meta_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ path);
+ return 0;
+}
+
+int32_t
+meta_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *oldpath,
+ const char *newpath)
+{
+ STACK_WIND (frame,
+ meta_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ oldpath,
+ newpath);
+ return 0;
+}
+
+int32_t
+meta_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_rename (call_frame_t *frame,
+ xlator_t *this,
+ const char *oldpath,
+ const char *newpath)
+{
+ STACK_WIND (frame,
+ meta_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldpath,
+ newpath);
+ return 0;
+}
+
+int32_t
+meta_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_link (call_frame_t *frame,
+ xlator_t *this,
+ const char *oldpath,
+ const char *newpath)
+{
+ STACK_WIND (frame,
+ meta_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldpath,
+ newpath);
+ return 0;
+}
+
+struct _open_local {
+ const char *path;
+};
+
+int32_t
+meta_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *ctx, struct stat *buf)
+{
+ struct _open_local *local = frame->local;
+ if (local)
+ dict_set (ctx, this->name, str_to_data (local->path));
+ STACK_UNWIND (frame, op_ret, op_errno, ctx, buf);
+ return 0;
+}
+
+int32_t
+meta_open (call_frame_t *frame, xlator_t *this,
+ const char *path, int32_t flags, mode_t mode)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ if (file) {
+ if (file->fops && file->fops->open) {
+ struct _open_local *local = CALLOC (1, sizeof (struct _open_local));
+ ERR_ABORT (local);
+ local->path = strdup (path);
+ frame->local = local;
+ STACK_WIND (frame, meta_open_cbk,
+ this, file->fops->open,
+ path, flags, mode);
+ return 0;
+ }
+ else {
+ dict_t *ctx = get_new_dict ();
+ dict_ref (ctx);
+ dict_set (ctx, this->name, str_to_data (strdup (path)));
+ STACK_UNWIND (frame, 0, 0, ctx, file->stbuf);
+ return 0;
+ }
+ }
+ else {
+ STACK_WIND (frame, meta_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ path, flags, mode);
+ return 0;
+ }
+}
+
+int32_t
+meta_create (call_frame_t *frame, xlator_t *this,
+ const char *path, int32_t flags, mode_t mode)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ if (file) {
+ if (file->fops && file->fops->create) {
+ struct _open_local *local = CALLOC (1, sizeof (struct _open_local));
+ ERR_ABORT (local);
+ local->path = strdup (path);
+ frame->local = local;
+ STACK_WIND (frame, meta_open_cbk,
+ this, file->fops->create,
+ path, flags, mode);
+ return 0;
+ }
+ else {
+ STACK_UNWIND (frame, -1, 0, NULL, NULL);
+ return 0;
+ }
+ }
+ else {
+ STACK_WIND (frame, meta_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ path, flags, mode);
+ return 0;
+ }
+}
+
+int32_t
+meta_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ vector,
+ count);
+ return 0;
+}
+
+int32_t
+meta_readv (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd,
+ size_t size,
+ off_t offset)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ data_t *path_data = dict_get (fd, this->name);
+
+ if (path_data) {
+ const char *path = data_to_str (path_data);
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ if (file && file->fops && file->fops->readv) {
+ STACK_WIND (frame, meta_readv_cbk,
+ this, file->fops->readv,
+ fd, size, offset);
+ return 0;
+ }
+ }
+ else {
+ STACK_WIND (frame, meta_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset);
+ return 0;
+ }
+}
+
+int32_t
+meta_writev_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+meta_writev (call_frame_t *frame, xlator_t *this,
+ dict_t *fd,
+ struct iovec *vector, int32_t count, off_t offset)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ data_t *path_data = dict_get (fd, this->name);
+
+ if (path_data) {
+ const char *path = data_to_str (path_data);
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ if (file && file->fops && file->fops->writev) {
+ STACK_WIND (frame, meta_writev_cbk,
+ this, file->fops->writev,
+ fd, vector, count, offset);
+ return 0;
+ }
+ }
+ else {
+ STACK_WIND (frame, meta_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset);
+ return 0;
+ }
+}
+
+int32_t
+meta_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_flush (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ data_t *path_data = dict_get (fd, this->name);
+
+ if (path_data) {
+ const char *path = data_to_str (path_data);
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ if (file) {
+ if (file->fops && file->fops->flush) {
+ STACK_WIND (frame, meta_flush_cbk,
+ this, file->fops->flush,
+ fd);
+ return 0;
+ }
+ else {
+ STACK_UNWIND (frame, 0, 0);
+ return 0;
+ }
+ }
+ }
+ else {
+ STACK_WIND (frame, meta_flush_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush,
+ fd);
+ return 0;
+ }
+}
+
+int32_t
+meta_release_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_release (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ data_t *path_data = dict_get (fd, this->name);
+
+ if (path_data) {
+ const char *path = data_to_str (path_data);
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ if (file) {
+ dict_unref (fd);
+ STACK_UNWIND (frame, 0, 0);
+ return 0;
+ }
+ }
+ else {
+ STACK_WIND (frame, meta_release_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->release,
+ fd);
+ return 0;
+ }
+}
+
+int32_t
+meta_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_fsync (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd,
+ int32_t flags)
+{
+ STACK_WIND (frame,
+ meta_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags);
+ return 0;
+}
+
+int32_t
+meta_fgetattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_fgetattr (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd)
+{
+ STACK_WIND (frame,
+ meta_fgetattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetattr,
+ fd);
+ return 0;
+}
+
+int32_t
+meta_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *fd)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ fd);
+ return 0;
+}
+
+int32_t
+meta_opendir (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ meta_dirent_t *dir = lookup_meta_entry (root, path, NULL);
+
+ if (dir) {
+ dict_t *ctx = get_new_dict ();
+ dict_set (ctx, this->name, str_to_data (strdup (path)));
+ STACK_UNWIND (frame, 0, 0, ctx);
+ return 0;
+ }
+ else {
+ STACK_WIND (frame, meta_opendir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
+ path);
+ return 0;
+ }
+}
+
+int32_t
+meta_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ meta_private_t *priv = (meta_private_t *)this->private;
+
+ if ((int) cookie == 1) {
+ dir_entry_t *dir = CALLOC (1, sizeof (dir_entry_t));
+ ERR_ABORT (dir);
+
+ dir->name = strdup (".meta");
+ memcpy (&dir->buf, priv->tree->stbuf, sizeof (struct stat));
+ dir->next = entries->next;
+ entries->next = dir;
+
+ STACK_UNWIND (frame, op_ret, op_errno, entries, count+1);
+ return 0;
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, entries, count);
+ return 0;
+}
+
+int32_t
+meta_readdir (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+
+ meta_dirent_t *dir = lookup_meta_entry (root, path, NULL);
+ if (dir) {
+ if (dir->fops && dir->fops->readdir) {
+ STACK_WIND (frame, meta_readdir_cbk,
+ this, dir->fops->readdir, path);
+ return 0;
+ }
+ else {
+ int count = 0;
+ dir = dir->children;
+ dir_entry_t *entries = NULL;
+
+ while (dir) {
+ dir_entry_t *d = CALLOC (1, sizeof (dir_entry_t));
+ ERR_ABORT (d);
+ d->name = dir->name;
+ d->buf = *dir->stbuf;
+ d->next = entries;
+ entries = d;
+ count++;
+ dir = dir->next;
+ }
+
+ dir_entry_t *header = CALLOC (1, sizeof (dir_entry_t));
+ ERR_ABORT (header);
+ header->next = entries;
+ STACK_UNWIND (frame, 0, 0, header, count);
+ return 0;
+ }
+ }
+ else {
+ if (!strcmp (path, "/")) {
+ STACK_WIND_COOKIE (frame, meta_readdir_cbk,
+ (int) 1, /* cookie to tell _cbk to add .meta entry */
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+ path);
+ }
+ else {
+ STACK_WIND (frame, meta_readdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+ path);
+ }
+ }
+ return 0;
+}
+
+int32_t
+meta_releasedir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_releasedir (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd)
+{
+ STACK_WIND (frame,
+ meta_releasedir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->releasedir,
+ fd);
+ return 0;
+}
+
+int32_t
+meta_fsyncdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *fd,
+ int32_t flags)
+{
+ STACK_WIND (frame,
+ meta_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ flags);
+ return 0;
+}
+
+int32_t
+meta_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *buf)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ buf);
+ return 0;
+}
+
+int32_t
+meta_statfs (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ STACK_WIND (frame,
+ meta_statfs_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ path);
+ return 0;
+}
+
+int32_t
+meta_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ const char *name,
+ const char *value,
+ size_t size,
+ int32_t flags)
+{
+ STACK_WIND (frame,
+ meta_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ path,
+ name,
+ value,
+ size,
+ flags);
+ return 0;
+}
+
+int32_t
+meta_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ char *value)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ value);
+ return 0;
+}
+
+int32_t
+meta_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ const char *name,
+ size_t size)
+{
+ STACK_WIND (frame,
+ meta_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ path,
+ name,
+ size);
+ return 0;
+}
+
+int32_t
+meta_listxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ char *value)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ value);
+ return 0;
+}
+
+int32_t
+meta_listxattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ size_t size)
+{
+ STACK_WIND (frame,
+ meta_listxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->listxattr,
+ path,
+ size);
+ return 0;
+}
+
+int32_t
+meta_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno);
+ return 0;
+}
+
+int32_t
+meta_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path,
+ const char *name)
+{
+ STACK_WIND (frame,
+ meta_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ path,
+ name);
+ return 0;
+}
+
+int32_t
+meta_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ STACK_UNWIND (frame,
+ op_ret,
+ op_errno,
+ lock);
+ return 0;
+}
+
+int32_t
+meta_lk (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *file,
+ int32_t cmd,
+ struct flock *lock)
+{
+ STACK_WIND (frame,
+ meta_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ file,
+ cmd,
+ lock);
+ return 0;
+}
+
+static void
+add_xlator_to_tree (meta_dirent_t *tree, xlator_t *this,
+ const char *prefix)
+{
+ char *dir;
+ asprintf (&dir, "%s/%s", prefix, this->name);
+
+ char *children;
+ asprintf (&children, "%s/%s", dir, "subvolumes");
+
+ char *type;
+ asprintf (&type, "%s/%s", dir, "type");
+
+ char *view;
+ asprintf (&view, "%s/%s", dir, "view");
+
+ insert_meta_entry (tree, dir, S_IFDIR, NULL, NULL);
+ insert_meta_entry (tree, children, S_IFDIR, NULL, NULL);
+ meta_dirent_t *v = insert_meta_entry (tree, view, S_IFDIR, NULL,
+ &meta_xlator_view_fops);
+ v->view_xlator = this;
+ meta_dirent_t *t = insert_meta_entry (tree, type, S_IFREG, NULL,
+ &meta_xlator_type_fops);
+ t->view_xlator = this;
+
+ xlator_list_t *trav = this->children;
+ while (trav) {
+ add_xlator_to_tree (tree, trav->xlator, children);
+ trav = trav->next;
+ }
+}
+
+static void
+build_meta_tree (xlator_t *this)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ priv->tree = CALLOC (1, sizeof (meta_dirent_t));
+ ERR_ABORT (priv->tree);
+ priv->tree->name = strdup (".meta");
+ priv->tree->stbuf = new_stbuf ();
+ priv->tree->stbuf->st_mode = S_IFDIR | S_IRUSR | S_IRGRP | S_IROTH |
+ S_IXUSR | S_IXGRP | S_IXOTH;
+
+ insert_meta_entry (priv->tree, "/.meta/version",
+ S_IFREG, NULL, &meta_version_fops);
+
+ insert_meta_entry (priv->tree, "/.meta/xlators",
+ S_IFDIR, NULL, NULL);
+
+ xlator_list_t *trav = this->children;
+ while (trav) {
+ add_xlator_to_tree (priv->tree, trav->xlator, "/.meta/xlators");
+ trav = trav->next;
+ }
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (this->parent != NULL) {
+ gf_log ("meta", GF_LOG_ERROR, "FATAL: meta should be the root of the xlator tree");
+ return -1;
+ }
+
+ meta_private_t *priv = CALLOC (1, sizeof (meta_private_t));
+ ERR_ABORT (priv);
+
+ data_t *directory = dict_get (this->options, "directory");
+ if (directory) {
+ priv->directory = strdup (data_to_str (directory));
+ }
+ else {
+ priv->directory = ".meta";
+ }
+
+ this->private = priv;
+ build_meta_tree (this);
+
+ return 0;
+}
+
+int32_t
+fini (xlator_t *this)
+{
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .getattr = meta_getattr,
+ .readlink = meta_readlink,
+ .mknod = meta_mknod,
+ .mkdir = meta_mkdir,
+ .unlink = meta_unlink,
+ .rmdir = meta_rmdir,
+ .symlink = meta_symlink,
+ .rename = meta_rename,
+ .link = meta_link,
+ .chmod = meta_chmod,
+ .chown = meta_chown,
+ .truncate = meta_truncate,
+ .utimes = meta_utimes,
+ .open = meta_open,
+ .readv = meta_readv,
+ .writev = meta_writev,
+ .statfs = meta_statfs,
+ .flush = meta_flush,
+ .release = meta_release,
+ .fsync = meta_fsync,
+ .setxattr = meta_setxattr,
+ .getxattr = meta_getxattr,
+ .listxattr = meta_listxattr,
+ .removexattr = meta_removexattr,
+ .opendir = meta_opendir,
+ .readdir = meta_readdir,
+ .releasedir = meta_releasedir,
+ .fsyncdir = meta_fsyncdir,
+ .access = meta_access,
+ .ftruncate = meta_ftruncate,
+ .fgetattr = meta_fgetattr,
+ .create = meta_create,
+ .lk = meta_lk,
+};
+
+struct xlator_mops mops = {
+};
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
new file mode 100644
index 00000000000..6823ef85bee
--- /dev/null
+++ b/xlators/meta/src/meta.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __META_H__
+#define __META_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+struct _meta_dirent {
+ const char *name;
+ int type;
+ struct _meta_dirent *children;
+ struct _meta_dirent *parent;
+ struct _meta_dirent *next;
+ struct stat *stbuf;
+ xlator_t *view_xlator;
+ struct xlator_fops *fops;
+};
+typedef struct _meta_dirent meta_dirent_t;
+
+typedef struct {
+ const char *directory;
+ meta_dirent_t *tree;
+} meta_private_t;
+
+#include "tree.h"
+#include "misc.h"
+
+#endif /* __META_H__ */
diff --git a/xlators/meta/src/misc.c b/xlators/meta/src/misc.c
new file mode 100644
index 00000000000..9c2f50d3426
--- /dev/null
+++ b/xlators/meta/src/misc.c
@@ -0,0 +1,67 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <unistd.h>
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "meta.h"
+
+#define min(x,y) ((x) < (y) ? (x) : (y))
+
+/* /.meta/version */
+static const char *version_str = PACKAGE_NAME " " PACKAGE_VERSION "\n";
+
+int32_t
+meta_version_readv (call_frame_t *frame, xlator_t *this,
+ dict_t *fd, size_t size, off_t offset)
+{
+ static int version_size;
+ version_size = strlen (version_str);
+
+ struct iovec vec;
+ vec.iov_base = version_str + offset;
+ vec.iov_len = min (version_size - offset, size);
+
+ STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1);
+ return 0;
+}
+
+int32_t
+meta_version_getattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+ file->stbuf->st_size = strlen (version_str);
+ STACK_UNWIND (frame, 0, 0, file->stbuf);
+}
+
+struct xlator_fops meta_version_fops = {
+ .readv = meta_version_readv,
+ .getattr = meta_version_getattr
+};
+
diff --git a/xlators/meta/src/misc.h b/xlators/meta/src/misc.h
new file mode 100644
index 00000000000..433c604ebc3
--- /dev/null
+++ b/xlators/meta/src/misc.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __MISC_H__
+#define __MISC_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+struct xlator_fops meta_version_fops;
+
+#endif /* __MISC_H__ */
diff --git a/xlators/meta/src/tree.c b/xlators/meta/src/tree.c
new file mode 100644
index 00000000000..ec88c42a084
--- /dev/null
+++ b/xlators/meta/src/tree.c
@@ -0,0 +1,176 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "glusterfs.h"
+#include "xlator.h"
+
+#include "meta.h"
+
+static int
+is_meta_path (const char *path)
+{
+ while (*path == '/')
+ path++;
+ if (!strncmp (path, ".meta", strlen (".meta")))
+ return 1;
+ return 0;
+}
+
+struct stat *
+new_stbuf (void)
+{
+ static int next_inode = 0;
+ struct stat *stbuf = CALLOC (1, sizeof (struct stat));
+
+ ERR_ABORT (stbuf);
+
+ stbuf->st_dev = 0;
+ stbuf->st_ino = next_inode++;
+ stbuf->st_mode = S_IRUSR | S_IRGRP | S_IROTH;
+ stbuf->st_nlink = 1;
+ stbuf->st_uid = 0;
+ stbuf->st_gid = 0;
+ stbuf->st_rdev = 0;
+ stbuf->st_size = 0;
+ stbuf->st_blksize = 0;
+ stbuf->st_blocks = 0;
+ stbuf->st_atime = time (NULL);
+ stbuf->st_atim.tv_nsec = 0;
+ stbuf->st_mtime = stbuf->st_atime;
+ stbuf->st_mtim.tv_nsec = 0;
+ stbuf->st_ctime = stbuf->st_ctime;
+ stbuf->st_ctim.tv_nsec = 0;
+
+ return stbuf;
+}
+
+/* find an entry among the siblings of an entry */
+static meta_dirent_t *
+find_entry (meta_dirent_t *node, const char *dir)
+{
+ meta_dirent_t *trav = node;
+ while (trav) {
+ if (!strcmp (trav->name, dir))
+ return trav;
+ trav = trav->next;
+ }
+ return NULL;
+}
+
+/*
+ * Return the meta_dirent_t corresponding to the pathname.
+ *
+ * If pathname does not exist in the meta tree, try to return
+ * its highest parent that does exist. The part of the
+ * pathname that is left over is returned in the value-result
+ * variable {remain}.
+ * For example, for "/.meta/xlators/brick1/view/foo/bar/baz",
+ * return the entry for "/.meta/xlators/brick1/view"
+ * and set remain to "/bar/baz"
+ */
+
+meta_dirent_t *
+lookup_meta_entry (meta_dirent_t *root, const char *path,
+ char **remain)
+{
+ char *_path = strdup (path);
+
+ if (!is_meta_path (path))
+ return NULL;
+
+ meta_dirent_t *trav = root;
+ char *dir = strtok (_path, "/");
+ dir = strtok (NULL, "/");
+
+ while (dir) {
+ meta_dirent_t *ntrav;
+ ntrav = find_entry (trav->children, dir);
+ if (!ntrav) {
+ /* we have reached bottom of the meta tree.
+ Unknown dragons lie further below */
+ if (remain) {
+ char *piece = dir;
+ while (piece) {
+ char *tmp = *remain;
+ if (*remain)
+ asprintf (remain, "/%s/%s", *remain, piece);
+ else
+ asprintf (remain, "/%s", piece);
+ if (tmp) free (tmp);
+ piece = strtok (NULL, "/");
+ }
+ }
+ return trav;
+ }
+ dir = strtok (NULL, "/");
+ trav = ntrav;
+ }
+
+ free (_path);
+ return trav;
+}
+
+meta_dirent_t *
+insert_meta_entry (meta_dirent_t *root, const char *path,
+ int type, struct stat *stbuf, struct xlator_fops *fops)
+{
+ if (!is_meta_path (path))
+ return NULL;
+ char *slashpos = strrchr (path, '/');
+ char *dir = strndup (path, slashpos - path);
+ meta_dirent_t *parent = lookup_meta_entry (root, dir, NULL);
+ if (!dir)
+ return NULL;
+
+ meta_dirent_t *new = CALLOC (1, sizeof (meta_dirent_t));
+ ERR_ABORT (new);
+ new->name = strdup (slashpos+1);
+ new->type = type;
+ new->parent = parent;
+ new->next = parent->children;
+ parent->children = new;
+ if (stbuf)
+ new->stbuf = stbuf;
+ else
+ new->stbuf = new_stbuf ();
+
+ new->stbuf->st_mode |= type;
+ new->fops = fops;
+ return new;
+}
+
+int main (void)
+{
+ meta_dirent_t *root = CALLOC (1, sizeof (meta_dirent_t));
+ ERR_ABORT (root);
+ root->name = strdup (".meta");
+
+ insert_meta_entry (root, "/.meta/version", S_IFREG, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/meta/src/tree.h b/xlators/meta/src/tree.h
new file mode 100644
index 00000000000..eb2cf0220ff
--- /dev/null
+++ b/xlators/meta/src/tree.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __TREE_H__
+#define __TREE_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+meta_dirent_t *
+insert_meta_entry (meta_dirent_t *root, const char *path,
+ int type, struct stat *stbuf, struct xlator_fops *fops);
+meta_dirent_t *
+lookup_meta_entry (meta_dirent_t *root, const char *path,
+ char **remain);
+
+#endif /* __TREE_H__ */
diff --git a/xlators/meta/src/view.c b/xlators/meta/src/view.c
new file mode 100644
index 00000000000..7104d10e912
--- /dev/null
+++ b/xlators/meta/src/view.c
@@ -0,0 +1,258 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+
+#include "meta.h"
+
+/*
+ * This file contains fops for the files and directories in
+ * an xlator directory
+ */
+
+/* /.meta/xlators/.../type */
+
+int32_t
+meta_xlator_type_readv (call_frame_t *frame, xlator_t *this,
+ dict_t *fd, size_t size, off_t offset)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ data_t *path_data = dict_get (fd, this->name);
+
+ if (path_data) {
+ const char *path = data_to_str (path_data);
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+ xlator_t *view_xlator = file->view_xlator;
+
+ int type_size;
+ type_size = strlen (view_xlator->type);
+
+ struct iovec vec;
+ vec.iov_base = view_xlator->type + offset;
+ vec.iov_len = min (type_size - offset, size);
+
+ STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1);
+ return 0;
+ }
+}
+
+int32_t
+meta_xlator_type_getattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+ xlator_t *view_xlator = file->view_xlator;
+ file->stbuf->st_size = strlen (view_xlator->type);
+
+ STACK_UNWIND (frame, 0, 0, file->stbuf);
+ return 0;
+}
+
+struct xlator_fops meta_xlator_type_fops = {
+ .readv = meta_xlator_type_readv,
+ .getattr = meta_xlator_type_getattr
+};
+
+/*
+ * fops for the "view" directory
+ * {xlator}/view shows the filesystem as it appears
+ * to {xlator}
+ */
+
+static int32_t
+meta_xlator_view_getattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+meta_xlator_view_getattr (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ char *op_path = NULL;
+
+ meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
+
+ if (op_path) {
+ STACK_WIND (frame, meta_xlator_view_getattr_cbk, file->view_xlator,
+ file->view_xlator->fops->getattr,
+ op_path);
+ }
+ else {
+ STACK_UNWIND (frame, 0, 0, file->stbuf);
+ }
+
+ return 0;
+}
+
+static int32_t
+meta_xlator_view_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dir_entry_t *entries, int32_t count)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, entries, count);
+ return 0;
+}
+
+int32_t
+meta_xlator_view_readdir (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ char *op_path = NULL;
+
+ meta_dirent_t *dir = lookup_meta_entry (root, path, &op_path);
+
+ STACK_WIND (frame, meta_xlator_view_readdir_cbk,
+ dir->view_xlator, dir->view_xlator->fops->readdir,
+ op_path ? op_path : "/");
+ return 0;
+}
+
+static int32_t
+meta_xlator_view_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *ctx, struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, ctx, buf);
+ return 0;
+}
+
+int32_t
+meta_xlator_view_open (call_frame_t *frame, xlator_t *this,
+ const char *path, int32_t flags, mode_t mode)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ char *op_path = NULL;
+
+ meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
+ STACK_WIND (frame, meta_xlator_view_open_cbk,
+ file->view_xlator, file->view_xlator->fops->open,
+ op_path, flags, mode);
+ return 0;
+}
+
+int32_t
+meta_xlator_view_create (call_frame_t *frame, xlator_t *this,
+ const char *path, int32_t flags, mode_t mode)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ char *op_path = NULL;
+
+ meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
+ STACK_WIND (frame, meta_xlator_view_open_cbk,
+ file->view_xlator, file->view_xlator->fops->create,
+ op_path, flags, mode);
+ return 0;
+}
+
+static int32_t
+meta_xlator_view_readv_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector,
+ int32_t count)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count);
+ return 0;
+}
+
+int32_t
+meta_xlator_view_readv (call_frame_t *frame, xlator_t *this,
+ dict_t *fd, size_t size, off_t offset)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ data_t *path_data = dict_get (fd, this->name);
+
+ if (path_data) {
+ const char *path = data_to_str (path_data);
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ STACK_WIND (frame, meta_xlator_view_readv_cbk,
+ file->view_xlator, file->view_xlator->fops->readv,
+ fd, size, offset);
+ return 0;
+ }
+
+ STACK_UNWIND (frame, -1, EBADFD, NULL, 0);
+ return 0;
+}
+
+static int32_t
+meta_xlator_view_writev_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+meta_xlator_view_writev (call_frame_t *frame, xlator_t *this,
+ dict_t *fd,
+ struct iovec *vector, int32_t count, off_t offset)
+{
+ meta_private_t *priv = (meta_private_t *) this->private;
+ meta_dirent_t *root = priv->tree;
+ data_t *path_data = dict_get (fd, this->name);
+
+ if (path_data) {
+ const char *path = data_to_str (path_data);
+ meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+
+ STACK_WIND (frame, meta_xlator_view_writev_cbk,
+ file->view_xlator, file->view_xlator->fops->writev,
+ fd, vector, count, offset);
+ return 0;
+ }
+
+ STACK_UNWIND (frame, -1, EBADFD, NULL, 0);
+ return 0;
+}
+
+struct xlator_fops meta_xlator_view_fops = {
+ .getattr = meta_xlator_view_getattr,
+ .readdir = meta_xlator_view_readdir,
+ .open = meta_xlator_view_open,
+ .create = meta_xlator_view_create,
+ .readv = meta_xlator_view_readv,
+ .writev = meta_xlator_view_writev
+};
diff --git a/xlators/meta/src/view.h b/xlators/meta/src/view.h
new file mode 100644
index 00000000000..2e1ac3ebf44
--- /dev/null
+++ b/xlators/meta/src/view.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __VIEW_H__
+#define __VIEW_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+struct xlator_fops meta_xlator_type_fops;
+struct xlator_fops meta_xlator_view_fops;
+
+#endif /* __VIEW_H__ */
diff --git a/xlators/mount/Makefile.am b/xlators/mount/Makefile.am
new file mode 100644
index 00000000000..945982d95b4
--- /dev/null
+++ b/xlators/mount/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = @FUSE_CLIENT_SUBDIR@
+
+CLEANFILES =
diff --git a/xlators/mount/fuse/Makefile.am b/xlators/mount/fuse/Makefile.am
new file mode 100644
index 00000000000..3b344b1d7da
--- /dev/null
+++ b/xlators/mount/fuse/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src utils
+
+CLEANFILES =
diff --git a/xlators/mount/fuse/src/Makefile.am b/xlators/mount/fuse/src/Makefile.am
new file mode 100644
index 00000000000..9d8d45e4f02
--- /dev/null
+++ b/xlators/mount/fuse/src/Makefile.am
@@ -0,0 +1,14 @@
+
+noinst_HEADERS = fuse-extra.h
+
+xlator_LTLIBRARIES = fuse.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
+fuse_la_SOURCES = fuse-bridge.c fuse-extra.c
+fuse_la_LDFLAGS = -module -avoidversion -shared -nostartfiles $(GF_FUSE_LDADD)
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \
+ -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -DFUSE_USE_VERSION=26
+
+
+CLEANFILES =
+
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
new file mode 100644
index 00000000000..8e70558782d
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -0,0 +1,2859 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * TODO:
+ * Need to free_state() when fuse_reply_err() + return.
+ * Check loc->path for "" after fuse_loc_fill in all fops
+ * (now being done in getattr, lookup) or better - make
+ * fuse_loc_fill() and inode_path() return success/failure.
+ */
+
+#include <stdint.h>
+#include <signal.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "xlator.h"
+#include "glusterfs.h"
+#include "defaults.h"
+#include "common-utils.h"
+
+#include <fuse/fuse_lowlevel.h>
+
+#include "fuse-extra.h"
+#include "list.h"
+#include "dict.h"
+
+#include "compat.h"
+#include "compat-errno.h"
+
+/* TODO: when supporting posix acl, remove this definition */
+#define DISABLE_POSIX_ACL
+
+#define ZR_MOUNTPOINT_OPT "mountpoint"
+#define ZR_DIRECT_IO_OPT "direct-io-mode"
+
+#define BIG_FUSE_CHANNEL_SIZE 1048576
+
+struct fuse_private {
+ int fd;
+ struct fuse *fuse;
+ struct fuse_session *se;
+ struct fuse_chan *ch;
+ char *volfile;
+ size_t volfile_size;
+ char *mount_point;
+ data_t *buf;
+ pthread_t fuse_thread;
+ char fuse_thread_started;
+ uint32_t direct_io_mode;
+ uint32_t entry_timeout;
+ uint32_t attribute_timeout;
+
+};
+typedef struct fuse_private fuse_private_t;
+
+#define _FI_TO_FD(fi) ((fd_t *)((long)fi->fh))
+
+#define FI_TO_FD(fi) ((_FI_TO_FD (fi))?(fd_ref (_FI_TO_FD(fi))):((fd_t *) 0))
+
+#define FUSE_FOP(state, ret, op_num, fop, args ...) \
+ do { \
+ call_frame_t *frame = get_call_frame_for_req (state, 1); \
+ xlator_t *xl = frame->this->children ? \
+ frame->this->children->xlator : NULL; \
+ dict_t *refs = frame->root->req_refs; \
+ frame->root->state = state; \
+ frame->root->op = op_num; \
+ STACK_WIND (frame, ret, xl, xl->fops->fop, args); \
+ dict_unref (refs); \
+ } while (0)
+
+
+typedef struct {
+ void *pool;
+ xlator_t *this;
+ inode_table_t *itable;
+ loc_t loc;
+ loc_t loc2;
+ fuse_req_t req;
+ int32_t flags;
+ off_t off;
+ size_t size;
+ unsigned long nlookup;
+ fd_t *fd;
+ dict_t *dict;
+ char *name;
+ char is_revalidate;
+} fuse_state_t;
+
+int fuse_chan_receive (struct fuse_chan *ch,
+ char *buf,
+ int32_t size);
+
+
+static void
+free_state (fuse_state_t *state)
+{
+ loc_wipe (&state->loc);
+
+ loc_wipe (&state->loc2);
+
+ if (state->dict) {
+ dict_unref (state->dict);
+ state->dict = (void *)0xaaaaeeee;
+ }
+ if (state->name) {
+ FREE (state->name);
+ state->name = NULL;
+ }
+ if (state->fd) {
+ fd_unref (state->fd);
+ state->fd = (void *)0xfdfdfdfd;
+ }
+#ifdef DEBUG
+ memset (state, 0x90, sizeof (*state));
+#endif
+ FREE (state);
+ state = NULL;
+}
+
+
+fuse_state_t *
+state_from_req (fuse_req_t req)
+{
+ fuse_state_t *state;
+ xlator_t *this = NULL;
+
+ this = fuse_req_userdata (req);
+
+ state = (void *)calloc (1, sizeof (*state));
+ ERR_ABORT (state);
+ state->pool = this->ctx->pool;
+ state->itable = this->itable;
+ state->req = req;
+ state->this = this;
+
+ return state;
+}
+
+static pid_t
+get_pid_from_req (fuse_req_t req)
+{
+ const struct fuse_ctx *ctx = NULL;
+ ctx = fuse_req_ctx(req);
+ return ctx->pid;
+}
+
+static call_frame_t *
+get_call_frame_for_req (fuse_state_t *state, char d)
+{
+ call_pool_t *pool = state->pool;
+ fuse_req_t req = state->req;
+ const struct fuse_ctx *ctx = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+
+
+ if (req) {
+ this = fuse_req_userdata (req);
+ } else {
+ this = state->this;
+ }
+ priv = this->private;
+
+ frame = create_frame (this, pool);
+
+ if (req) {
+ ctx = fuse_req_ctx(req);
+
+ frame->root->uid = ctx->uid;
+ frame->root->gid = ctx->gid;
+ frame->root->pid = ctx->pid;
+ frame->root->unique = req_callid (req);
+ }
+
+ if (d) {
+ frame->root->req_refs = dict_ref (get_new_dict ());
+ dict_set (frame->root->req_refs, NULL, priv->buf);
+ }
+
+ frame->root->type = GF_OP_TYPE_FOP_REQUEST;
+
+ return frame;
+}
+
+
+GF_MUST_CHECK static int32_t
+fuse_loc_fill (loc_t *loc,
+ fuse_state_t *state,
+ ino_t ino,
+ ino_t par,
+ const char *name)
+{
+ inode_t *inode = NULL, *parent = NULL;
+ int32_t ret = -1;
+ char *path = NULL;
+
+ /* resistance against multiple invocation of loc_fill not to get
+ reference leaks via inode_search() */
+
+ inode = loc->inode;
+
+ if (!inode) {
+ if (ino)
+ inode = inode_search (state->itable, ino, NULL);
+ if (par && name)
+ inode = inode_search (state->itable, par, name);
+
+ loc->inode = inode;
+ if (inode)
+ loc->ino = inode->ino;
+ }
+
+ parent = loc->parent;
+ if (!parent) {
+ if (inode)
+ parent = inode_parent (inode, par, name);
+ else
+ parent = inode_search (state->itable, par, NULL);
+ loc->parent = parent;
+ }
+
+ if (name && parent) {
+ ret = inode_path (parent, name, &path);
+ if (ret <= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "inode_path failed for %"PRId64"/%s",
+ parent->ino, name);
+ goto fail;
+ } else {
+ loc->path = path;
+ }
+ } else if (inode) {
+ ret = inode_path (inode, NULL, &path);
+ if (ret <= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "inode_path failed for %"PRId64,
+ inode->ino);
+ goto fail;
+ } else {
+ loc->path = path;
+ }
+ }
+ if (loc->path) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ else loc->name = "";
+ }
+
+ if ((ino != 1) &&
+ (parent == NULL)) {
+ gf_log ("fuse-bridge", GF_LOG_ERROR,
+ "failed to search parent for %"PRId64"/%s (%"PRId64")",
+ (ino_t)par, name, (ino_t)ino);
+ ret = -1;
+ goto fail;
+ }
+ ret = 0;
+fail:
+ return ret;
+}
+
+
+static int
+need_fresh_lookup (int32_t op_ret, int32_t op_errno,
+ loc_t *loc, struct stat *buf)
+{
+ if (op_ret == -1) {
+ gf_log ("fuse-bridge",
+ (op_errno == ENOENT)? GF_LOG_DEBUG: GF_LOG_WARNING,
+ "revalidate of %s failed (%s)",
+ loc->path, strerror (op_errno));
+ return 1;
+ }
+
+ if (loc->inode->ino != buf->st_ino) {
+ gf_log ("fuse-bridge", GF_LOG_WARNING,
+ "inode num of %s changed %"PRId64" -> %"PRId64,
+ loc->path, loc->inode->ino, buf->st_ino);
+ return 1;
+ }
+
+ if ((loc->inode->st_mode & S_IFMT) ^ (buf->st_mode & S_IFMT)) {
+ gf_log ("fuse-bridge", GF_LOG_WARNING,
+ "inode mode of %s changed 0%o -> 0%o",
+ loc->path, loc->inode->st_mode, buf->st_mode);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int
+fuse_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stat,
+ dict_t *dict);
+
+static int
+fuse_entry_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *buf)
+{
+ fuse_state_t *state;
+ fuse_req_t req;
+ struct fuse_entry_param e = {0, };
+ fuse_private_t *priv = this->private;
+
+ state = frame->root->state;
+ req = state->req;
+
+ if (!op_ret && state->loc.ino == 1) {
+ buf->st_ino = 1;
+ }
+
+ if (state->is_revalidate == 1
+ && need_fresh_lookup (op_ret, op_errno, &state->loc, buf)) {
+ inode_unref (state->loc.inode);
+ state->loc.inode = inode_new (state->itable);
+ state->is_revalidate = 2;
+
+ STACK_WIND (frame, fuse_lookup_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup,
+ &state->loc, state->dict);
+
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s() %s => %"PRId64" (%"PRId64")",
+ frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.path, buf->st_ino, state->loc.ino);
+
+ inode_link (inode, state->loc.parent, state->loc.name, buf);
+
+ inode_lookup (inode);
+
+ /* TODO: make these timeouts configurable (via meta?) */
+ e.ino = inode->ino;
+
+#ifdef GF_DARWIN_HOST_OS
+ e.generation = 0;
+#else
+ e.generation = buf->st_ctime;
+#endif
+
+ e.entry_timeout = priv->entry_timeout;
+ e.attr_timeout = priv->attribute_timeout;
+ e.attr = *buf;
+ e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE;
+
+ if (!e.ino || !buf->st_ino) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": %s() %s returning inode 0",
+ frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path);
+ }
+
+ if (state->loc.parent)
+ fuse_reply_entry (req, &e);
+ else
+ fuse_reply_attr (req, buf, priv->attribute_timeout);
+ } else {
+ gf_log ("glusterfs-fuse",
+ (op_errno == ENOENT ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "%"PRId64": %s() %s => -1 (%s)", frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ strerror (op_errno));
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static int
+fuse_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stat,
+ dict_t *dict)
+{
+ fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat);
+ return 0;
+}
+
+
+static void
+fuse_lookup (fuse_req_t req,
+ fuse_ino_t par,
+ const char *name)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": LOOKUP %"PRId64"/%s (fuse_loc_fill() failed)",
+ req_callid (req), (ino_t)par, name);
+ free_state (state);
+ fuse_reply_err (req, EINVAL);
+ return;
+ }
+
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": LOOKUP %s", req_callid (req),
+ state->loc.path);
+
+ state->loc.inode = inode_new (state->itable);
+ /* to differntiate in entry_cbk what kind of call it is */
+ state->is_revalidate = -1;
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": LOOKUP %s(%"PRId64")", req_callid (req),
+ state->loc.path, state->loc.inode->ino);
+ state->is_revalidate = 1;
+ }
+
+ state->dict = dict_new();
+
+ FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP,
+ lookup, &state->loc, state->dict);
+}
+
+
+static void
+fuse_forget (fuse_req_t req,
+ fuse_ino_t ino,
+ unsigned long nlookup)
+{
+ inode_t *fuse_inode;
+ fuse_state_t *state;
+
+ if (ino == 1) {
+ fuse_reply_none (req);
+ return;
+ }
+
+ state = state_from_req (req);
+ fuse_inode = inode_search (state->itable, ino, NULL);
+ if (fuse_inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "got forget on inode (%lu)", ino);
+ inode_forget (fuse_inode, nlookup);
+ inode_unref (fuse_inode);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "got forget, but inode (%lu) not found", ino);
+ }
+
+ free_state (state);
+ fuse_reply_none (req);
+}
+
+
+static int
+fuse_attr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ fuse_state_t *state;
+ fuse_req_t req;
+ fuse_private_t *priv = this->private;
+
+ state = frame->root->state;
+ req = state->req;
+
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse",
+ (buf->st_ino ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "%"PRId64": %s() %s => %"PRId64, frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ buf->st_ino);
+
+ /* TODO: make these timeouts configurable via meta */
+ /* TODO: what if the inode number has changed by now */
+ buf->st_blksize = BIG_FUSE_CHANNEL_SIZE;
+
+ fuse_reply_attr (req, buf, priv->attribute_timeout);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": %s() %s => -1 (%s)", frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ strerror (op_errno));
+
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static void
+fuse_getattr (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+
+ if (ino == 1) {
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": GETATTR %"PRId64" (fuse_loc_fill() failed)",
+ req_callid(req), (ino_t)ino);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ if (state->loc.inode)
+ state->is_revalidate = 1;
+ else
+ state->is_revalidate = -1;
+
+ state->dict = dict_new();
+
+ FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP,
+ lookup, &state->loc, state->dict);
+ return;
+ }
+
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": GETATTR %"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
+ req_callid (req), (int64_t)ino, state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ return;
+ }
+
+ fd = fd_lookup (state->loc.inode, get_pid_from_req (req));
+ state->fd = fd;
+ if (!fd || S_ISDIR (state->loc.inode->st_mode)) {
+ /* this is the @ret of fuse_loc_fill, checked here
+ to permit fstat() to happen even when fuse_loc_fill fails
+ */
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": GETATTR %"PRId64" (fuse_loc_fill() failed)",
+ req_callid(req), (ino_t)ino);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": GETATTR %"PRId64" (%s)",
+ req_callid (req), (int64_t)ino, state->loc.path);
+
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT,
+ stat, &state->loc);
+ } else {
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": FGETATTR %"PRId64" (%s/%p)",
+ req_callid (req), (int64_t)ino, state->loc.path, fd);
+
+ FUSE_FOP (state,fuse_attr_cbk, GF_FOP_FSTAT,
+ fstat, fd);
+ }
+}
+
+
+static int
+fuse_fd_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ fuse_state_t *state;
+ fuse_req_t req;
+ fuse_private_t *priv = this->private;
+
+ state = frame->root->state;
+ req = state->req;
+
+ if (op_ret >= 0) {
+ struct fuse_file_info fi = {0, };
+
+ fi.fh = (unsigned long) fd;
+ fi.flags = state->flags;
+
+ if (!S_ISDIR (fd->inode->st_mode)) {
+ if ((fi.flags & 3) && priv->direct_io_mode)
+ fi.direct_io = 1;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s() %s => %p", frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path, fd);
+
+ fd_ref (fd);
+ if (fuse_reply_open (req, &fi) == -ENOENT) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "open() got EINTR");
+ fd_unref (fd);
+ goto out;
+ }
+
+ fd_bind (fd);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": %s() %s => -1 (%s)", frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ strerror (op_errno));
+
+ fuse_reply_err (req, op_errno);
+ }
+out:
+ free_state (state);
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+
+static void
+do_chmod (fuse_req_t req,
+ fuse_ino_t ino,
+ struct stat *attr,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state = state_from_req (req);
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+
+ if (fi) {
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+ }
+
+ if (fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": FCHMOD %p", req_callid (req), fd);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHMOD,
+ fchmod, fd, attr->st_mode);
+ } else {
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": CHMOD %"PRId64" (%s) (fuse_loc_fill() failed)",
+ req_callid (req), (int64_t)ino,
+ state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": CHMOD %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHMOD,
+ chmod, &state->loc, attr->st_mode);
+ }
+}
+
+
+static void
+do_chown (fuse_req_t req,
+ fuse_ino_t ino,
+ struct stat *attr,
+ int valid,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t) -1;
+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t) -1;
+
+ state = state_from_req (req);
+
+ if (fi) {
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+ }
+
+ if (fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": FCHOWN %p", req_callid (req), fd);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHOWN,
+ fchown, fd, uid, gid);
+ } else {
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": CHOWN %"PRId64" (%s) (fuse_loc_fill() failed)",
+ req_callid (req), (int64_t)ino,
+ state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": CHOWN %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHOWN,
+ chown, &state->loc, uid, gid);
+ }
+}
+
+
+static void
+do_truncate (fuse_req_t req,
+ fuse_ino_t ino,
+ struct stat *attr,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+
+ if (fi) {
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+ }
+ if (fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": FTRUNCATE %p/%"PRId64, req_callid (req),
+ fd, attr->st_size);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FTRUNCATE,
+ ftruncate, fd, attr->st_size);
+ } else {
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": TRUNCATE %s/%"PRId64" (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path,
+ attr->st_size);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": TRUNCATE %s/%"PRId64"(%lu)",
+ req_callid (req),
+ state->loc.path, attr->st_size, ino);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_TRUNCATE,
+ truncate, &state->loc, attr->st_size);
+ }
+
+ return;
+}
+
+
+static void
+do_utimes (fuse_req_t req,
+ fuse_ino_t ino,
+ struct stat *attr)
+{
+ fuse_state_t *state;
+
+ struct timespec tv[2];
+ int32_t ret = -1;
+
+ tv[0].tv_sec = attr->st_atime;
+ tv[0].tv_nsec = ST_ATIM_NSEC(attr);
+ tv[1].tv_sec = attr->st_mtime;
+ tv[1].tv_nsec = ST_ATIM_NSEC(attr);
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": UTIMENS %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": UTIMENS (%lu)%s", req_callid (req),
+ ino, state->loc.path);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_UTIMENS,
+ utimens, &state->loc, tv);
+}
+
+
+static void
+fuse_setattr (fuse_req_t req,
+ fuse_ino_t ino,
+ struct stat *attr,
+ int valid,
+ struct fuse_file_info *fi)
+{
+
+ if (valid & FUSE_SET_ATTR_MODE)
+ do_chmod (req, ino, attr, fi);
+ else if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID))
+ do_chown (req, ino, attr, valid, fi);
+ else if (valid & FUSE_SET_ATTR_SIZE)
+ do_truncate (req, ino, attr, fi);
+ else if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME))
+ do_utimes (req, ino, attr);
+ else
+ fuse_getattr (req, ino, fi);
+}
+
+
+static int gf_fuse_xattr_enotsup_log;
+
+static int
+fuse_err_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s() %s => 0", frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR");
+
+ fuse_reply_err (req, 0);
+ } else {
+ if (frame->root->op == GF_FOP_SETXATTR) {
+ op_ret = gf_compat_setxattr (state->dict);
+ if (op_ret == 0)
+ op_errno = 0;
+ if (op_errno == ENOTSUP) {
+ gf_fuse_xattr_enotsup_log++;
+ if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER))
+ gf_log ("glusterfs-fuse", GF_LOG_CRITICAL,
+ "[ ERROR ] Extended attribute not supported by the backend storage");
+ }
+ } else {
+ if ((frame->root->op == GF_FOP_REMOVEXATTR)
+ && (op_errno == ENOATTR)) {
+ goto nolog;
+ }
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": %s() %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ strerror (op_errno));
+ }
+ nolog:
+
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+
+static int
+fuse_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+ if (op_ret == 0)
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
+
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s() %s => 0", frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path);
+
+ fuse_reply_err (req, 0);
+ } else {
+ gf_log ("glusterfs-fuse",
+ (op_errno != ENOTEMPTY ? GF_LOG_ERROR : GF_LOG_DEBUG),
+ "%"PRId64": %s() %s => -1 (%s)", frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ strerror (op_errno));
+
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_access (fuse_req_t req,
+ fuse_ino_t ino,
+ int mask)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": ACCESS %"PRId64" (%s) (fuse_loc_fill() failed)",
+ req_callid (req), (int64_t)ino, state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64" ACCESS %s/%lu mask=%d", req_callid (req),
+ state->loc.path, ino, mask);
+
+ FUSE_FOP (state, fuse_err_cbk,
+ GF_FOP_ACCESS, access,
+ &state->loc, mask);
+
+ return;
+}
+
+
+
+static int
+fuse_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *linkname)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+ if (op_ret > 0) {
+ ((char *)linkname)[op_ret] = '\0';
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s => %s", frame->root->unique,
+ state->loc.path, linkname);
+
+ fuse_reply_readlink(req, linkname);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": %s => -1 (%s)", frame->root->unique,
+ state->loc.path, strerror(op_errno));
+
+ fuse_reply_err(req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_readlink (fuse_req_t req,
+ fuse_ino_t ino)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64" READLINK %s/%"PRId64" (fuse_loc_fill() returned NULL inode)",
+ req_callid (req), state->loc.path,
+ state->loc.inode->ino);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64" READLINK %s/%"PRId64, req_callid (req),
+ state->loc.path, state->loc.inode->ino);
+
+ FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK,
+ readlink, &state->loc, 4096);
+
+ return;
+}
+
+
+static void
+fuse_mknod (fuse_req_t req,
+ fuse_ino_t par,
+ const char *name,
+ mode_t mode,
+ dev_t rdev)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64" MKNOD %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ state->loc.inode = inode_new (state->itable);
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": MKNOD %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKNOD,
+ mknod, &state->loc, mode, rdev);
+
+ return;
+}
+
+
+static void
+fuse_mkdir (fuse_req_t req,
+ fuse_ino_t par,
+ const char *name,
+ mode_t mode)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64" MKDIR %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ state->loc.inode = inode_new (state->itable);
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": MKDIR %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKDIR,
+ mkdir, &state->loc, mode);
+
+ return;
+}
+
+
+static void
+fuse_unlink (fuse_req_t req,
+ fuse_ino_t par,
+ const char *name)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": UNLINK %s (fuse_loc_fill() returned NULL inode)",
+ req_callid (req), state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": UNLINK %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK,
+ unlink, &state->loc);
+
+ return;
+}
+
+
+static void
+fuse_rmdir (fuse_req_t req,
+ fuse_ino_t par,
+ const char *name)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": RMDIR %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": RMDIR %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR,
+ rmdir, &state->loc);
+
+ return;
+}
+
+
+static void
+fuse_symlink (fuse_req_t req,
+ const char *linkname,
+ fuse_ino_t par,
+ const char *name)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64" SYMLINK %s -> %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path, linkname);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ state->loc.inode = inode_new (state->itable);
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": SYMLINK %s -> %s", req_callid (req),
+ state->loc.path, linkname);
+
+ FUSE_FOP (state, fuse_entry_cbk, GF_FOP_SYMLINK,
+ symlink, linkname, &state->loc);
+
+ return;
+}
+
+
+int
+fuse_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s -> %s => 0 (buf->st_ino=%"PRId64" , loc->ino=%"PRId64")",
+ frame->root->unique, state->loc.path, state->loc2.path,
+ buf->st_ino, state->loc.ino);
+
+ {
+ /* ugly ugly - to stay blind to situation where
+ rename happens on a new inode
+ */
+ buf->st_ino = state->loc.ino;
+ buf->st_mode = state->loc.inode->st_mode;
+ }
+ inode_rename (state->itable,
+ state->loc.parent, state->loc.name,
+ state->loc2.parent, state->loc2.name,
+ state->loc.inode, buf);
+
+ fuse_reply_err (req, 0);
+ } else {
+ gf_log ("glusterfs-fuse",
+ (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "%"PRId64": %s -> %s => -1 (%s)", frame->root->unique,
+ state->loc.path, state->loc2.path,
+ strerror (op_errno));
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static void
+fuse_rename (fuse_req_t req,
+ fuse_ino_t oldpar,
+ const char *oldname,
+ fuse_ino_t newpar,
+ const char *newname)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+
+ ret = fuse_loc_fill (&state->loc, state, 0, oldpar, oldname);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "for %s %"PRId64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)",
+ state->loc.path, req_callid (req), state->loc.path,
+ state->loc2.path);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ ret = fuse_loc_fill (&state->loc2, state, 0, newpar, newname);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "for %s %"PRId64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)",
+ state->loc.path, req_callid (req), state->loc.path,
+ state->loc2.path);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": RENAME `%s (%"PRId64")' -> `%s (%"PRId64")'",
+ req_callid (req), state->loc.path, state->loc.ino,
+ state->loc2.path, state->loc2.ino);
+
+ FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME,
+ rename, &state->loc, &state->loc2);
+
+ return;
+}
+
+
+static void
+fuse_link (fuse_req_t req,
+ fuse_ino_t ino,
+ fuse_ino_t par,
+ const char *name)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+ ret = fuse_loc_fill (&state->loc2, state, ino, 0, NULL);
+
+ if ((state->loc2.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "fuse_loc_fill() failed for %s %"PRId64": LINK %s %s",
+ state->loc2.path, req_callid (req),
+ state->loc2.path, state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ state->loc.inode = inode_ref (state->loc2.inode);
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": LINK() %s (%"PRId64") -> %s (%"PRId64")",
+ req_callid (req), state->loc2.path, state->loc2.ino,
+ state->loc.path, state->loc.ino);
+
+ FUSE_FOP (state, fuse_entry_cbk, GF_FOP_LINK,
+ link, &state->loc2, &state->loc);
+
+ return;
+}
+
+
+static int
+fuse_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+ fuse_private_t *priv = this->private;
+
+ struct fuse_file_info fi = {0, };
+ struct fuse_entry_param e = {0, };
+
+ fi.flags = state->flags;
+ if (op_ret >= 0) {
+ fi.fh = (unsigned long) fd;
+
+ if ((fi.flags & 3) && priv->direct_io_mode)
+ fi.direct_io = 1;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s() %s => %p (ino=%"PRId64")",
+ frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.path, fd, buf->st_ino);
+
+ e.ino = buf->st_ino;
+
+#ifdef GF_DARWIN_HOST_OS
+ e.generation = 0;
+#else
+ e.generation = buf->st_ctime;
+#endif
+
+ e.entry_timeout = priv->entry_timeout;
+ e.attr_timeout = priv->attribute_timeout;
+ e.attr = *buf;
+ e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE;
+
+ fi.keep_cache = 0;
+
+ inode_link (inode, state->loc.parent,
+ state->loc.name, buf);
+
+ inode_lookup (inode);
+
+ fd_ref (fd);
+ if (fuse_reply_create (req, &e, &fi) == -ENOENT) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "create() got EINTR");
+ inode_forget (inode, 1);
+ fd_unref (fd);
+ goto out;
+ }
+
+ fd_bind (fd);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": %s => -1 (%s)", req_callid (req),
+ state->loc.path, strerror (op_errno));
+ fuse_reply_err (req, op_errno);
+ }
+out:
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_create (fuse_req_t req,
+ fuse_ino_t par,
+ const char *name,
+ mode_t mode,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ state->flags = fi->flags;
+
+ ret = fuse_loc_fill (&state->loc, state, 0, par, name);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64" CREATE %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path);
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ state->loc.inode = inode_new (state->itable);
+
+ fd = fd_create (state->loc.inode, get_pid_from_req (req));
+ state->fd = fd;
+ fd->flags = state->flags;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": CREATE %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE,
+ create, &state->loc, state->flags, mode, fd);
+
+ return;
+}
+
+
+static void
+fuse_open (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ state->flags = fi->flags;
+
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": OPEN %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+
+ fd = fd_create (state->loc.inode, get_pid_from_req (req));
+ state->fd = fd;
+ fd->flags = fi->flags;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": OPEN %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN,
+ open, &state->loc, fi->flags, fd);
+
+ return;
+}
+
+
+static int
+fuse_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+ if (op_ret >= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64,
+ frame->root->unique,
+ op_ret, state->size, state->off, stbuf->st_size);
+
+ fuse_reply_vec (req, vector, count);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": READ => %d (%s)", frame->root->unique,
+ op_ret, strerror (op_errno));
+
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+static void
+fuse_readv (fuse_req_t req,
+ fuse_ino_t ino,
+ size_t size,
+ off_t off,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+ state = state_from_req (req);
+ state->size = size;
+ state->off = off;
+
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": READ (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ req_callid (req), fd, size, off);
+
+ FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ,
+ readv, fd, size, off);
+
+}
+
+
+static int
+fuse_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+ if (op_ret >= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64,
+ frame->root->unique,
+ op_ret, state->size, state->off, stbuf->st_size);
+
+ fuse_reply_write (req, op_ret);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": WRITE => -1 (%s)", frame->root->unique,
+ strerror(op_errno));
+
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_write (fuse_req_t req,
+ fuse_ino_t ino,
+ const char *buf,
+ size_t size,
+ off_t off,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ struct iovec vector;
+ fd_t *fd = NULL;
+
+ state = state_from_req (req);
+ state->size = size;
+ state->off = off;
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+ vector.iov_base = (void *)buf;
+ vector.iov_len = size;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": WRITE (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ req_callid (req), fd, size, off);
+
+ FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE,
+ writev, fd, &vector, 1, off);
+ return;
+}
+
+
+static void
+fuse_flush (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+
+ state = state_from_req (req);
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": FLUSH %p", req_callid (req), fd);
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH,
+ flush, fd);
+
+ return;
+}
+
+
+static void
+fuse_release (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+
+ state = state_from_req (req);
+ state->fd = FI_TO_FD (fi);
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": RELEASE %p", req_callid (req), state->fd);
+
+ fd_unref (state->fd);
+
+ fuse_reply_err (req, 0);
+
+ free_state (state);
+ return;
+}
+
+
+static void
+fuse_fsync (fuse_req_t req,
+ fuse_ino_t ino,
+ int datasync,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+
+ state = state_from_req (req);
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": FSYNC %p", req_callid (req), fd);
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNC,
+ fsync, fd, datasync);
+
+ return;
+}
+
+
+static void
+fuse_opendir (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": OPENDIR %s (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ fd = fd_create (state->loc.inode, get_pid_from_req (req));
+ state->fd = fd;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": OPENDIR %s", req_callid (req),
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPENDIR,
+ opendir, &state->loc, fd);
+}
+
+static int
+fuse_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+ int size = 0;
+ int entry_size = 0;
+ char *buf = NULL;
+ gf_dirent_t *entry = NULL;
+ struct stat stbuf = {0, };
+
+ if (op_ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": READDIR => -1 (%s)", frame->root->unique,
+ strerror (op_errno));
+
+ fuse_reply_err (req, op_errno);
+ goto out;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": READDIR => %d/%"GF_PRI_SIZET",%"PRId64,
+ frame->root->unique, op_ret, state->size, state->off);
+
+ list_for_each_entry (entry, &entries->list, list) {
+ size += fuse_dirent_size (strlen (entry->d_name));
+ }
+
+ buf = CALLOC (1, size);
+ if (!buf) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": READDIR => -1 (%s)", frame->root->unique,
+ strerror (ENOMEM));
+ fuse_reply_err (req, -ENOMEM);
+ goto out;
+ }
+
+ size = 0;
+ list_for_each_entry (entry, &entries->list, list) {
+ stbuf.st_ino = entry->d_ino;
+ entry_size = fuse_dirent_size (strlen (entry->d_name));
+ fuse_add_direntry (req, buf + size, entry_size,
+ entry->d_name, &stbuf,
+ entry->d_off);
+ size += entry_size;
+ }
+
+ fuse_reply_buf (req, (void *)buf, size);
+
+out:
+ free_state (state);
+ STACK_DESTROY (frame->root);
+ if (buf)
+ FREE (buf);
+ return 0;
+
+}
+
+static void
+fuse_readdir (fuse_req_t req,
+ fuse_ino_t ino,
+ size_t size,
+ off_t off,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+
+ state = state_from_req (req);
+ state->size = size;
+ state->off = off;
+ fd = FI_TO_FD (fi);
+ state->fd = fd;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": READDIR (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ req_callid (req), fd, size, off);
+
+ FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR,
+ readdir, fd, size, off);
+}
+
+
+static void
+fuse_releasedir (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+
+ state = state_from_req (req);
+ state->fd = FI_TO_FD (fi);
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": RELEASEDIR %p", req_callid (req), state->fd);
+
+ fd_unref (state->fd);
+
+ fuse_reply_err (req, 0);
+
+ free_state (state);
+
+ return;
+}
+
+
+static void
+fuse_fsyncdir (fuse_req_t req,
+ fuse_ino_t ino,
+ int datasync,
+ struct fuse_file_info *fi)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+
+ fd = FI_TO_FD (fi);
+
+ state = state_from_req (req);
+ state->fd = fd;
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR,
+ fsyncdir, fd, datasync);
+
+ return;
+}
+
+
+static int
+fuse_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *buf)
+{
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+ /*
+ Filesystems (like ZFS on solaris) reports
+ different ->f_frsize and ->f_bsize. Old coreutils
+ df tools use statfs() and do not see ->f_frsize.
+ the ->f_blocks, ->f_bavail and ->f_bfree are
+ w.r.t ->f_frsize and not ->f_bsize which makes the
+ df tools report wrong values.
+
+ Scale the block counts to match ->f_bsize.
+ */
+ /* TODO: with old coreutils, f_bsize is taken from stat()'s st_blksize
+ * so the df with old coreutils this wont work :(
+ */
+
+ if (op_ret == 0) {
+#ifndef GF_DARWIN_HOST_OS
+ /* MacFUSE doesn't respect anyof these tweaks */
+ buf->f_blocks *= buf->f_frsize;
+ buf->f_blocks /= BIG_FUSE_CHANNEL_SIZE;
+
+ buf->f_bavail *= buf->f_frsize;
+ buf->f_bavail /= BIG_FUSE_CHANNEL_SIZE;
+
+ buf->f_bfree *= buf->f_frsize;
+ buf->f_bfree /= BIG_FUSE_CHANNEL_SIZE;
+
+ buf->f_frsize = buf->f_bsize = BIG_FUSE_CHANNEL_SIZE;
+#endif /* GF_DARWIN_HOST_OS */
+ fuse_reply_statfs (req, buf);
+
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": ERR => -1 (%s)", frame->root->unique,
+ strerror(op_errno));
+ fuse_reply_err (req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_statfs (fuse_req_t req,
+ fuse_ino_t ino)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, 1, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": STATFS (fuse_loc_fill() fail)",
+ req_callid (req));
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": STATFS", req_callid (req));
+
+ FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS,
+ statfs, &state->loc);
+}
+
+
+static void
+fuse_setxattr (fuse_req_t req,
+ fuse_ino_t ino,
+ const char *name,
+ const char *value,
+ size_t size,
+ int flags)
+{
+ fuse_state_t *state;
+ char *dict_value = NULL;
+ int32_t ret = -1;
+
+#ifdef DISABLE_POSIX_ACL
+ if (!strncmp (name, "system.", 7)) {
+ fuse_reply_err (req, EOPNOTSUPP);
+ return;
+ }
+#endif
+
+ state = state_from_req (req);
+ state->size = size;
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": SETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)",
+ req_callid (req),
+ state->loc.path, (int64_t)ino, name);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ state->dict = get_new_dict ();
+
+ dict_value = memdup (value, size);
+ dict_set (state->dict, (char *)name,
+ data_from_dynptr ((void *)dict_value, size));
+ dict_ref (state->dict);
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": SETXATTR %s/%"PRId64" (%s)", req_callid (req),
+ state->loc.path, (int64_t)ino, name);
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_SETXATTR,
+ setxattr, &state->loc, state->dict, flags);
+
+ return;
+}
+
+
+static int
+fuse_xattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ int need_to_free_dict = 0;
+ int32_t ret = op_ret;
+ char *value = "";
+ fuse_state_t *state = frame->root->state;
+ fuse_req_t req = state->req;
+
+#ifdef GF_DARWIN_HOST_OS
+ /* This is needed in MacFuse, where MacOSX Finder needs some specific
+ * keys to be supported from FS
+ */
+ int32_t dummy_ret = 0;
+ if (state->name) {
+ if (!dict) {
+ dict = get_new_dict ();
+ need_to_free_dict = 1;
+ }
+ dummy_ret = gf_compat_getxattr (state->name, dict);
+ if (dummy_ret != -1)
+ ret = dummy_ret;
+ } else {
+ if (!dict) {
+ dict = get_new_dict ();
+ need_to_free_dict = 1;
+ }
+ dummy_ret = gf_compat_listxattr (ret, dict, state->size);
+ if (dummy_ret != -1)
+ ret = dummy_ret;
+ }
+#endif /* DARWIN */
+
+ if (ret >= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s() %s => %d", frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path, op_ret);
+
+ /* if successful */
+ if (state->name) {
+ /* if callback for getxattr */
+ data_t *value_data = dict_get (dict, state->name);
+ if (value_data) {
+ ret = value_data->len; /* Don't return the value for '\0' */
+ value = value_data->data;
+
+ /* linux kernel limits the size of xattr value to 64k */
+ if (ret > GLUSTERFS_XATTR_LEN_MAX) {
+ fuse_reply_err (req, E2BIG);
+ } else if (state->size) {
+ /* if callback for getxattr and asks for value */
+ fuse_reply_buf (req, value, ret);
+ } else {
+ /* if callback for getxattr and asks for value length only */
+ fuse_reply_xattr (req, ret);
+ } /* if(ret >...)...else if...else */
+ } else if (!strcmp (state->name, "user.glusterfs-booster-volfile")) {
+ fuse_private_t *priv = this->private;
+
+ if (!priv->volfile) {
+ int32_t fd = -1, ret = -1;
+ struct stat st;
+ char *file = NULL;
+
+ memset (&st, 0, sizeof (st));
+ fd = fileno (this->ctx->specfp);
+ ret = fstat (fd, &st);
+ if (ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "fstat on fd (%d) failed (%s)", fd, strerror (errno));
+ fuse_reply_err (req, ENODATA);
+ }
+
+ priv->volfile_size = st.st_size;
+ file = priv->volfile = CALLOC (1, priv->volfile_size);
+ ret = lseek (fd, 0, SEEK_SET);
+ while ((ret = read (fd, file, GF_UNIT_KB)) > 0) {
+ file += ret;
+ }
+ }
+
+ if (priv->volfile_size > GLUSTERFS_XATTR_LEN_MAX) {
+ fuse_reply_err (req, E2BIG);
+ } else if (state->size) {
+ /* if callback for getxattr and asks for value */
+ fuse_reply_buf (req, priv->volfile, priv->volfile_size);
+ } else {
+ /* if callback for getxattr and asks for value length only */
+ fuse_reply_xattr (req, priv->volfile_size);
+ } /* if(ret >...)...else if...else */
+ } else if (!strcmp (state->name, "user.glusterfs-booster-path")) {
+ if (state->size) {
+ fuse_reply_buf (req, state->loc.path, strlen (state->loc.path) + 1);
+ } else {
+ fuse_reply_xattr (req, strlen (state->loc.path) + 1);
+ }
+ } else {
+ fuse_reply_err (req, ENODATA);
+ } /* if(value_data)...else */
+ } else {
+ /* if callback for listxattr */
+ int32_t len = 0;
+ data_pair_t *trav = dict->members_list;
+ while (trav) {
+ len += strlen (trav->key) + 1;
+ trav = trav->next;
+ } /* while(trav) */
+ value = alloca (len + 1);
+ ERR_ABORT (value);
+ len = 0;
+ trav = dict->members_list;
+ while (trav) {
+ strcpy (value + len, trav->key);
+ value[len + strlen(trav->key)] = '\0';
+ len += strlen (trav->key) + 1;
+ trav = trav->next;
+ } /* while(trav) */
+ if (state->size) {
+ /* if callback for listxattr and asks for list of keys */
+ fuse_reply_buf (req, value, len);
+ } else {
+ /* if callback for listxattr and asks for length of keys only */
+ fuse_reply_xattr (req, len);
+ } /* if(state->size)...else */
+ } /* if(state->name)...else */
+ } else {
+ /* if failure - no need to check if listxattr or getxattr */
+ if (op_errno != ENODATA) {
+ if (op_errno == ENOTSUP)
+ {
+ gf_fuse_xattr_enotsup_log++;
+ if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER))
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "[ ERROR ] Extended attribute not supported by the backend storage");
+ }
+ else
+ {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": %s() %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path, strerror(op_errno));
+ }
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": %s() %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ strerror(op_errno));
+ } /* if(op_errno!= ENODATA)...else */
+
+ fuse_reply_err (req, op_errno);
+ } /* if(op_ret>=0)...else */
+
+ if (need_to_free_dict)
+ dict_unref (dict);
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_getxattr (fuse_req_t req,
+ fuse_ino_t ino,
+ const char *name,
+ size_t size)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+#ifdef DISABLE_POSIX_ACL
+ if (!strncmp (name, "system.", 7)) {
+ fuse_reply_err (req, ENODATA);
+ return;
+ }
+#endif
+
+ state = state_from_req (req);
+ state->size = size;
+ state->name = strdup (name);
+
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": GETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path, (int64_t)ino, name);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": GETXATTR %s/%"PRId64" (%s)", req_callid (req),
+ state->loc.path, (int64_t)ino, name);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
+ getxattr, &state->loc, name);
+
+ return;
+}
+
+
+static void
+fuse_listxattr (fuse_req_t req,
+ fuse_ino_t ino,
+ size_t size)
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ state->size = size;
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": LISTXATTR %s/%"PRId64" (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path, (int64_t)ino);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": LISTXATTR %s/%"PRId64, req_callid (req),
+ state->loc.path, (int64_t)ino);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
+ getxattr, &state->loc, NULL);
+
+ return;
+}
+
+
+static void
+fuse_removexattr (fuse_req_t req,
+ fuse_ino_t ino,
+ const char *name)
+
+{
+ fuse_state_t *state;
+ int32_t ret = -1;
+
+ state = state_from_req (req);
+ ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL);
+ if ((state->loc.inode == NULL) ||
+ (ret < 0)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)",
+ req_callid (req), state->loc.path, (int64_t)ino, name);
+
+ fuse_reply_err (req, EINVAL);
+ free_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s)", req_callid (req),
+ state->loc.path, (int64_t)ino, name);
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR,
+ removexattr, &state->loc, name);
+
+ return;
+}
+
+
+static int gf_fuse_lk_enosys_log;
+
+static int
+fuse_getlk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ fuse_state_t *state = frame->root->state;
+
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": ERR => 0", frame->root->unique);
+ fuse_reply_lock (state->req, lock);
+ } else {
+ if (op_errno == ENOSYS) {
+ gf_fuse_lk_enosys_log++;
+ if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "[ ERROR ] loading 'features/posix-locks' on server side may help your application");
+ }
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRId64": ERR => -1 (%s)",
+ frame->root->unique, strerror (op_errno));
+ }
+ fuse_reply_err (state->req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_getlk (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi,
+ struct flock *lock)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+
+ fd = FI_TO_FD (fi);
+ state = state_from_req (req);
+ state->req = req;
+ state->fd = fd;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": GETLK %p", req_callid (req), fd);
+
+ FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK,
+ lk, fd, F_GETLK, lock);
+
+ return;
+}
+
+
+static int
+fuse_setlk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ fuse_state_t *state = frame->root->state;
+
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": ERR => 0", frame->root->unique);
+ fuse_reply_err (state->req, 0);
+ } else {
+ if (op_errno == ENOSYS) {
+ gf_fuse_lk_enosys_log++;
+ if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "[ ERROR ] loading 'features/posix-locks' on server side may help your application");
+ }
+ } else {
+ gf_log ("glusterfs-fuse",
+ (op_errno == EAGAIN) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "%"PRId64": ERR => -1 (%s)",
+ frame->root->unique, strerror (op_errno));
+ }
+
+ fuse_reply_err (state->req, op_errno);
+ }
+
+ free_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+static void
+fuse_setlk (fuse_req_t req,
+ fuse_ino_t ino,
+ struct fuse_file_info *fi,
+ struct flock *lock,
+ int sleep)
+{
+ fuse_state_t *state;
+ fd_t *fd = NULL;
+
+ fd = FI_TO_FD (fi);
+ state = state_from_req (req);
+ state->req = req;
+ state->fd = fd;
+
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRId64": SETLK %p (sleep=%d)", req_callid (req), fd,
+ sleep);
+
+ FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK,
+ lk, fd, (sleep ? F_SETLKW : F_SETLK), lock);
+
+ return;
+}
+
+
+static void
+fuse_init (void *data, struct fuse_conn_info *conn)
+{
+ xlator_t *this_xl = NULL;
+
+ if (data == NULL) {
+ return ;
+ }
+
+ this_xl = data;
+
+ this_xl->itable = inode_table_new (0, this_xl);
+
+ return ;
+}
+
+static void
+fuse_destroy (void *data)
+{
+
+}
+
+static struct fuse_lowlevel_ops fuse_ops = {
+ .init = fuse_init,
+ .destroy = fuse_destroy,
+ .lookup = fuse_lookup,
+ .forget = fuse_forget,
+ .getattr = fuse_getattr,
+ .setattr = fuse_setattr,
+ .opendir = fuse_opendir,
+ .readdir = fuse_readdir,
+ .releasedir = fuse_releasedir,
+ .access = fuse_access,
+ .readlink = fuse_readlink,
+ .mknod = fuse_mknod,
+ .mkdir = fuse_mkdir,
+ .unlink = fuse_unlink,
+ .rmdir = fuse_rmdir,
+ .symlink = fuse_symlink,
+ .rename = fuse_rename,
+ .link = fuse_link,
+ .create = fuse_create,
+ .open = fuse_open,
+ .read = fuse_readv,
+ .write = fuse_write,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ .fsyncdir = fuse_fsyncdir,
+ .statfs = fuse_statfs,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+ .getlk = fuse_getlk,
+ .setlk = fuse_setlk
+};
+
+
+static void *
+fuse_thread_proc (void *data)
+{
+ char *mount_point = NULL;
+ xlator_t *this = data;
+ fuse_private_t *priv = this->private;
+ int32_t res = 0;
+ data_t *buf = priv->buf;
+ int32_t ref = 0;
+ size_t chan_size = fuse_chan_bufsize (priv->ch);
+ char *recvbuf = CALLOC (1, chan_size);
+ ERR_ABORT (recvbuf);
+
+ while (!fuse_session_exited (priv->se)) {
+
+
+ res = fuse_chan_receive (priv->ch,
+ recvbuf,
+ chan_size);
+
+ if (res == -1) {
+ if (errno != EINTR) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "fuse_chan_receive() returned -1 (%d)", errno);
+ }
+ if (errno == ENODEV)
+ break;
+ continue;
+ }
+
+ buf = priv->buf;
+
+ if (res && res != -1) {
+ if (buf->len < (res)) {
+ if (buf->data) {
+ FREE (buf->data);
+ buf->data = NULL;
+ }
+ buf->data = CALLOC (1, res);
+ ERR_ABORT (buf->data);
+ buf->len = res;
+ }
+ memcpy (buf->data, recvbuf, res); // evil evil
+
+ fuse_session_process (priv->se,
+ buf->data,
+ res,
+ priv->ch);
+ }
+
+ LOCK (&buf->lock);
+ ref = buf->refcount;
+ UNLOCK (&buf->lock);
+ if (1) {
+ data_unref (buf);
+
+ priv->buf = data_ref (data_from_dynptr (NULL, 0));
+ }
+ }
+ if (dict_get (this->options, ZR_MOUNTPOINT_OPT))
+ mount_point = data_to_str (dict_get (this->options,
+ ZR_MOUNTPOINT_OPT));
+ if (mount_point) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unmounting %s", mount_point);
+ dict_del (this->options, ZR_MOUNTPOINT_OPT);
+ }
+ fuse_session_remove_chan (priv->ch);
+ fuse_session_destroy (priv->se);
+ // fuse_unmount (priv->mount_point, priv->ch);
+
+ raise (SIGTERM);
+
+ return NULL;
+}
+
+
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
+{
+
+ switch (event)
+ {
+ case GF_EVENT_CHILD_UP:
+
+#ifndef GF_DARWIN_HOST_OS
+ /*
+ * This is because macfuse sends statfs() once the fuse thread
+ * gets activated, and by that time if the client is not
+ * connected, it give 'Device not configured' error. Hence,
+ * create thread only when client sends CHILD_UP (ie, client
+ * is connected).
+ */
+
+ /* TODO: somehow, try to get the mountpoint active as soon as
+ * init() is complete, so that the hang effect when the
+ * server is not not started is removed.
+ */
+
+ /* This code causes problem with 'automount' too */
+ /* case GF_EVENT_CHILD_CONNECTING: */
+#endif /* DARWIN */
+
+ {
+ fuse_private_t *private = this->private;
+ int32_t ret = 0;
+
+ if (!private->fuse_thread_started)
+ {
+ private->fuse_thread_started = 1;
+
+ ret = pthread_create (&private->fuse_thread, NULL,
+ fuse_thread_proc, this);
+
+ if (ret != 0)
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "pthread_create() failed (%s)", strerror (errno));
+ assert (ret == 0);
+ }
+ break;
+ }
+ case GF_EVENT_PARENT_UP:
+ {
+ default_notify (this, GF_EVENT_PARENT_UP, data);
+ }
+ default:
+ break;
+ }
+ return 0;
+}
+
+int
+init (xlator_t *this_xl)
+{
+ int ret = 0;
+ dict_t *options = NULL;
+ char *value_string = NULL;
+ fuse_private_t *priv = NULL;
+ struct stat stbuf = {0,};
+
+#ifdef GF_DARWIN_HOST_OS
+ int fuse_argc = 9;
+ char *fuse_argv[] = {"glusterfs",
+ "-o", "allow_other",
+ "-o", "default_permissions",
+ "-o", "fsname=glusterfs",
+ "-o", "local",
+ NULL};
+
+#elif GF_LINUX_HOST_OS /* ! DARWIN_OS */
+ int fuse_argc = 19;
+
+ char *fuse_argv[] = {"glusterfs",
+ "-o", "nonempty",
+ "-o", "max_readahead=1048576",
+ "-o", "max_read=1048576",
+ "-o", "max_write=1048576",
+ "-o", "allow_other",
+ "-o", "default_permissions",
+ "-o", "fsname=glusterfs",
+ "-o", "dev",
+ "-o", "suid",
+ NULL};
+
+#else /* BSD || SOLARIS */
+ /* BSD fuse doesn't support '-o dev', '-o nonempty' option */
+ int fuse_argc = 15;
+
+ char *fuse_argv[] = {"glusterfs",
+ "-o", "max_readahead=1048576",
+ "-o", "max_read=1048576",
+ "-o", "max_write=1048576",
+ "-o", "allow_other",
+ "-o", "default_permissions",
+ "-o", "fsname=glusterfs",
+ "-o", "suid",
+ NULL};
+
+#endif /* ! DARWIN_OS || ! LINUX */
+ struct fuse_args args = FUSE_ARGS_INIT (fuse_argc, fuse_argv);
+
+ if (this_xl == NULL)
+ return -1;
+
+ if (this_xl->options == NULL)
+ return -1;
+
+ options = this_xl->options;
+
+ if (this_xl->name == NULL)
+ this_xl->name = strdup ("fuse");
+
+ priv = CALLOC (1, sizeof (*priv));
+ ERR_ABORT (priv);
+ this_xl->private = (void *) priv;
+
+
+#ifdef GF_DARWIN_HOST_OS
+ if (dict_get (options, "macfuse-local")) {
+ /* This way, GlusterFS will be detected as 'servers' instead
+ * of 'devices'. This method is useful if you want to do
+ * 'umount <mount_point>' over network, instead of 'eject'ing
+ * it from desktop. Works better for servers
+ */
+ /* Make the '-o local' in argv as NULL, so that its not
+ in effect */
+ fuse_argv[--args.argc] = NULL;
+ fuse_argv[--args.argc] = NULL;
+ }
+#endif /* ! DARWIN */
+
+ /* get options from option dictionary */
+ ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string);
+ if (value_string == NULL) {
+ gf_log ("fuse", GF_LOG_ERROR,
+ "mandatory option mountpoint is not specified");
+ return -1;
+ }
+
+ if (stat (value_string, &stbuf) != 0) {
+ if (errno == ENOENT) {
+ gf_log (this_xl->name, GF_LOG_ERROR ,
+ "%s %s does not exist",
+ ZR_MOUNTPOINT_OPT, value_string);
+ } else if (errno == ENOTCONN) {
+ gf_log (this_xl->name, GF_LOG_ERROR ,
+ "mountpoint %s seems to have a stale "
+ "mount, run 'umount %s' and try again",
+ value_string, value_string);
+ } else {
+ gf_log (this_xl->name, GF_LOG_ERROR ,
+ "%s %s : stat returned %s",
+ ZR_MOUNTPOINT_OPT,
+ value_string, strerror (errno));
+ }
+ return -1;
+ }
+
+ if (S_ISDIR (stbuf.st_mode) == 0) {
+ gf_log (this_xl->name, GF_LOG_ERROR ,
+ "%s %s is not a directory",
+ ZR_MOUNTPOINT_OPT, value_string);
+ return -1;
+ }
+ priv->mount_point = strdup (value_string);
+
+
+ ret = dict_get_uint32 (options, "attribute-timeout",
+ &priv->attribute_timeout);
+ if (!priv->attribute_timeout)
+ priv->attribute_timeout = 1; /* default */
+
+ ret = dict_get_uint32 (options, "entry-timeout",
+ &priv->entry_timeout);
+ if (!priv->entry_timeout)
+ priv->entry_timeout = 1; /* default */
+
+
+ priv->direct_io_mode = 1;
+ ret = dict_get_str (options, ZR_DIRECT_IO_OPT, &value_string);
+ if (value_string) {
+ ret = gf_string2boolean (value_string, &priv->direct_io_mode);
+ }
+
+ priv->ch = fuse_mount (priv->mount_point, &args);
+ if (priv->ch == NULL) {
+ if (errno == ENOTCONN) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "A stale mount present on %s. "
+ "run 'umount %s' and try again",
+ priv->mount_point,
+ priv->mount_point);
+ } else {
+ if (errno == ENOENT) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "unable to mount on %s. run "
+ "'modprobe fuse' and try again",
+ priv->mount_point);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "fuse_mount() failed with error %s "
+ "on mount point %s",
+ strerror (errno),
+ priv->mount_point);
+ }
+ }
+
+ goto cleanup_exit;
+ }
+
+ priv->se = fuse_lowlevel_new (&args, &fuse_ops,
+ sizeof (fuse_ops), this_xl);
+ if (priv->se == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "fuse_lowlevel_new() failed with error %s on "
+ "mount point %s",
+ strerror (errno), priv->mount_point);
+ goto umount_exit;
+ }
+
+ ret = fuse_set_signal_handlers (priv->se);
+ if (ret == -1) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "fuse_set_signal_handlers() failed on mount point %s",
+ priv->mount_point);
+ goto umount_exit;
+ }
+
+ fuse_opt_free_args (&args);
+
+ fuse_session_add_chan (priv->se, priv->ch);
+
+ priv->fd = fuse_chan_fd (priv->ch);
+ priv->buf = data_ref (data_from_dynptr (NULL, 0));
+
+ this_xl->ctx->top = this_xl;
+ return 0;
+
+umount_exit:
+ fuse_unmount (priv->mount_point, priv->ch);
+cleanup_exit:
+ fuse_opt_free_args (&args);
+ FREE (priv->mount_point);
+ FREE (priv);
+ return -1;
+}
+
+
+void
+fini (xlator_t *this_xl)
+{
+ fuse_private_t *priv = NULL;
+ char *mount_point = NULL;
+
+ if (this_xl == NULL)
+ return;
+
+ if ((priv = this_xl->private) == NULL)
+ return;
+
+ if (dict_get (this_xl->options, ZR_MOUNTPOINT_OPT))
+ mount_point = data_to_str (dict_get (this_xl->options,
+ ZR_MOUNTPOINT_OPT));
+ if (mount_point != NULL) {
+ gf_log (this_xl->name, GF_LOG_WARNING,
+ "unmounting '%s'", mount_point);
+
+ dict_del (this_xl->options, ZR_MOUNTPOINT_OPT);
+ fuse_session_exit (priv->se);
+ fuse_unmount (mount_point, priv->ch);
+ }
+}
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct xlator_mops mops = {
+};
+
+struct volume_options options[] = {
+ { .key = {"direct-io-mode"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"macfuse-local"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"mountpoint", "mount-point"},
+ .type = GF_OPTION_TYPE_PATH
+ },
+ { .key = {"attribute-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 3600
+ },
+ { .key = {"entry-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 3600
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/mount/fuse/src/fuse-extra.c b/xlators/mount/fuse/src/fuse-extra.c
new file mode 100644
index 00000000000..93574d174d5
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-extra.c
@@ -0,0 +1,137 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include "fuse-extra.h"
+#include "common-utils.h"
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include "common-utils.h"
+
+struct fuse_req;
+struct fuse_ll;
+
+struct fuse_req {
+ struct fuse_ll *f;
+ uint64_t unique;
+ int ctr;
+ pthread_mutex_t lock;
+ struct fuse_ctx ctx;
+ struct fuse_chan *ch;
+ int interrupted;
+ union {
+ struct {
+ uint64_t unique;
+ } i;
+ struct {
+ fuse_interrupt_func_t func;
+ void *data;
+ } ni;
+ } u;
+ struct fuse_req *next;
+ struct fuse_req *prev;
+};
+
+struct fuse_ll {
+ int debug;
+ int allow_root;
+ struct fuse_lowlevel_ops op;
+ int got_init;
+ void *userdata;
+ uid_t owner;
+ struct fuse_conn_info conn;
+ struct fuse_req list;
+ struct fuse_req interrupts;
+ pthread_mutex_t lock;
+ int got_destroy;
+};
+
+struct fuse_out_header {
+ uint32_t len;
+ int32_t error;
+ uint64_t unique;
+};
+
+uint64_t req_callid (fuse_req_t req)
+{
+ return req->unique;
+}
+
+static void destroy_req(fuse_req_t req)
+{
+ pthread_mutex_destroy (&req->lock);
+ FREE (req);
+}
+
+static void list_del_req(struct fuse_req *req)
+{
+ struct fuse_req *prev = req->prev;
+ struct fuse_req *next = req->next;
+ prev->next = next;
+ next->prev = prev;
+}
+
+static void
+free_req (fuse_req_t req)
+{
+ int ctr;
+ struct fuse_ll *f = req->f;
+
+ pthread_mutex_lock(&req->lock);
+ req->u.ni.func = NULL;
+ req->u.ni.data = NULL;
+ pthread_mutex_unlock(&req->lock);
+
+ pthread_mutex_lock(&f->lock);
+ list_del_req(req);
+ ctr = --req->ctr;
+ pthread_mutex_unlock(&f->lock);
+ if (!ctr)
+ destroy_req(req);
+}
+
+int32_t
+fuse_reply_vec (fuse_req_t req,
+ struct iovec *vector,
+ int32_t count)
+{
+ int32_t error = 0;
+ struct fuse_out_header out;
+ struct iovec *iov;
+ int res;
+
+ iov = alloca ((count + 1) * sizeof (*vector));
+ out.unique = req->unique;
+ out.error = error;
+ iov[0].iov_base = &out;
+ iov[0].iov_len = sizeof(struct fuse_out_header);
+ memcpy (&iov[1], vector, count * sizeof (*vector));
+ count++;
+ out.len = iov_length(iov, count);
+ res = fuse_chan_send(req->ch, iov, count);
+ free_req(req);
+
+ return res;
+}
diff --git a/xlators/mount/fuse/src/fuse-extra.h b/xlators/mount/fuse/src/fuse-extra.h
new file mode 100644
index 00000000000..0e8052b5a34
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-extra.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _FUSE_EXTRA_H
+#define _FUSE_EXTRA_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include <stdlib.h>
+#include <fuse/fuse_lowlevel.h>
+
+#define GLUSTERFS_XATTR_LEN_MAX 65536
+
+uint64_t req_callid (fuse_req_t req);
+
+size_t fuse_dirent_size (size_t dname_len);
+
+int32_t
+fuse_reply_vec (fuse_req_t req,
+ struct iovec *vector,
+ int32_t count);
+
+#endif /* _FUSE_EXTRA_H */
diff --git a/xlators/mount/fuse/utils/Makefile.am b/xlators/mount/fuse/utils/Makefile.am
new file mode 100644
index 00000000000..1217c30dafa
--- /dev/null
+++ b/xlators/mount/fuse/utils/Makefile.am
@@ -0,0 +1,10 @@
+utildir = $(destdir)/sbin
+
+if GF_DARWIN_HOST_OS
+util_SCRIPTS = mount_glusterfs
+else
+util_SCRIPTS = mount.glusterfs
+endif
+
+CLEANFILES =
+
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
new file mode 100755
index 00000000000..481fd265fff
--- /dev/null
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -0,0 +1,152 @@
+#!/bin/sh
+# (C) 2006, 2007, 2008 Z RESEARCH Inc. <http://www.zresearch.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free
+# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301 USA
+
+
+
+_init ()
+{
+ # log level definitions
+ LOG_NONE=NONE;
+ LOG_CRITICAL=CRITICAL;
+ LOG_ERROR=ERROR;
+ LOG_WARNING=WARNING;
+ LOG_DEBUG=DEBUG;
+
+ # set default log level to ERROR
+ log_level=$LOG_WARNING;
+}
+
+start_glusterfs ()
+{
+ prefix="@prefix@";
+ exec_prefix=@exec_prefix@;
+ cmd_line=$(echo "@sbindir@/glusterfs");
+
+ if [ -n "$log_level_str" ]; then
+ case "$log_level_str" in
+ "ERROR")
+ log_level=$LOG_ERROR;
+ ;;
+ "DEBUG")
+ log_level=$LOG_DEBUG;
+ ;;
+ "CRITICAL")
+ log_level=$LOG_CRITICAL;
+ ;;
+ "WARNING")
+ log_level=$LOG_WARNING;
+ ;;
+ "NONE")
+ log_level=$LOG_NONE;
+ ;;
+ *)
+ echo "invalid log level $log_level_str, using ERROR";
+ log_level=$LOG_ERROR;
+ ;;
+ esac
+ fi
+ cmd_line=$(echo "$cmd_line --log-level=$log_level");
+
+ if [ -n "$log_file" ]; then
+ cmd_line=$(echo "$cmd_line --log-file=$log_file");
+ fi
+
+ if [ -n "$direct_io_mode" ]; then
+ cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode");
+ fi
+
+ if [ -z "$volfile_loc" ]; then
+ if [ -n "$transport" ]; then
+ cmd_line=$(echo "$cmd_line \
+--volfile-server=$server_ip \
+--volfile-server-port=$server_port \
+--volfile-server-transport=$transport");
+ else
+ cmd_line=$(echo "$cmd_line \
+--volfile-server=$server_ip \
+--volfile-server-port=$server_port");
+ fi
+ else
+ cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
+ fi
+
+ if [ -n "$volume_name" ]; then
+ cmd_line=$(echo "$cmd_line --volume-name=$volume_name");
+ fi
+
+ if [ -n "$volume_id" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+ fi
+
+ cmd_line=$(echo "$cmd_line $mount_point");
+ exec $cmd_line;
+}
+
+
+main ()
+{
+ options=$(echo "$@" | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p');
+ new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p');
+
+ [ -n "$new_log_level" ] && {
+ log_level_str="$new_log_level";
+ }
+ log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p');
+
+ transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p');
+
+ direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p');
+
+ volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p');
+
+ volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p');
+
+ volfile_loc="$1";
+
+ [ -r "$volfile_loc" ] || {
+ server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p');
+ server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
+ [ -n "$server_port" ] || {
+ server_port="6996";
+ }
+
+ volfile_loc="";
+ }
+ new_fs_options=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \
+ -e 's/[,]*log-level=[^,]*//' \
+ -e 's/[,]*volume-name=[^,]*//' \
+ -e 's/[,]*direct-io-mode=[^,]*//' \
+ -e 's/[,]*transport=[^,]*//' \
+ -e 's/[,]*volume-id=[^,]*//');
+ # following line is product of love towards sed
+ # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p');
+
+ mount_point="$2";
+
+ # Simple check to avoid multiple identical mounts
+ if grep -q "glusterfs $mount_point fuse" /etc/mtab; then
+ echo "$0: according to mtab, GlusterFS is already mounted on $mount_point"
+ exit 1
+ fi
+
+ fs_options=$(echo "$fs_options,$new_fs_options");
+
+ start_glusterfs;
+}
+
+_init "$@" && main "$@";
diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in
new file mode 100755
index 00000000000..1376a8897ab
--- /dev/null
+++ b/xlators/mount/fuse/utils/mount_glusterfs.in
@@ -0,0 +1,181 @@
+#!/bin/sh
+# (C) 2008 Z RESEARCH Inc. <http://www.zresearch.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free
+# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301 USA
+
+
+
+_init ()
+{
+ # log level definitions
+ LOG_NONE=NONE;
+ LOG_CRITICAL=CRITICAL;
+ LOG_ERROR=ERROR;
+ LOG_WARNING=WARNING;
+ LOG_DEBUG=DEBUG;
+
+ # set default log level to ERROR
+ log_level=$LOG_WARNING;
+}
+
+start_glusterfs ()
+{
+ prefix="@prefix@";
+ exec_prefix=@exec_prefix@;
+ cmd_line=$(echo "@sbindir@/glusterfs");
+
+ if [ -n "$log_level_str" ]; then
+ case "$log_level_str" in
+ "ERROR")
+ log_level=$LOG_ERROR;
+ ;;
+ "DEBUG")
+ log_level=$LOG_DEBUG;
+ ;;
+ "CRITICAL")
+ log_level=$LOG_CRITICAL;
+ ;;
+ "WARNING")
+ log_level=$LOG_WARNING;
+ ;;
+ "NONE")
+ log_level=$LOG_NONE;
+ ;;
+ *)
+ echo "invalid log level $log_level_str, using ERROR";
+ log_level=$LOG_WARNING;
+ ;;
+ esac
+ fi
+ cmd_line=$(echo "$cmd_line --log-level=$log_level");
+
+ if [ -n "$log_file" ]; then
+ cmd_line=$(echo "$cmd_line --log-file=$log_file");
+ fi
+
+ if [ -n "$direct_io_mode" ]; then
+ cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode");
+ fi
+
+ if [ -z "$volfile_loc" ]; then
+ if [ -n "$transport" ]; then
+ cmd_line=$(echo "$cmd_line \
+--volfile-server=$server_ip \
+--volfile-server-port=$server_port \
+--volfile-server-transport=$transport");
+ else
+ cmd_line=$(echo "$cmd_line \
+--volfile-server=$server_ip \
+--volfile-server-port=$server_port");
+ fi
+ else
+ cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
+ fi
+
+ if [ -n "$volume_name" ]; then
+ cmd_line=$(echo "$cmd_line --volume-name=$volume_name");
+ fi
+
+ if [ -n "$volume_id" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+ fi
+
+ cmd_line=$(echo "$cmd_line $mount_point");
+ exec $cmd_line;
+}
+
+
+main ()
+{
+
+ new_log_level=""
+ log_file=""
+ transport=""
+ direct_io_mode=""
+ volume_name=""
+ new_fs_options=""
+
+ while getopts o: opt; do
+ case "$opt" in
+ o)
+ options=$(echo $OPTARG | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p');
+ [ -z $new_log_level ] && {
+ new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p');
+ }
+
+ [ -z $log_file ] && {
+ log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p');
+ }
+
+ [ -z $transport ] && {
+ transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p');
+ }
+
+ [ -z $direct_io_mode ] && {
+ direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p');
+ }
+
+ [ -z $volume_name ] && {
+ volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p');
+ }
+
+ [ -z $volume_id ] && {
+ volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p');
+ }
+
+ this_option=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \
+ -e 's/[,]*log-level=[^,]*//' \
+ -e 's/[,]*volume-name=[^,]*//' \
+ -e 's/[,]*direct-io-mode=[^,]*//' \
+ -e 's/[,]*transport=[^,]*//' \
+ -e 's/[,]*volume-id=[^,]*//');
+ new_fs_options="$new_fs_options $this_option";
+ ;;
+ esac
+ done
+
+ [ -n "$new_log_level" ] && {
+ log_level_str="$new_log_level";
+ }
+
+ # TODO: use getopt. This is very much darwin specific
+ volfile_loc="$1";
+ while [ "$volfile_loc" == "-o" ] ; do
+ shift ;
+ shift ;
+ volfile_loc="$1";
+ done
+
+ [ -r "$volfile_loc" ] || {
+ server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p');
+ server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
+ [ -n "$server_port" ] || {
+ server_port="6996";
+ }
+
+ volfile_loc="";
+ }
+ # following line is product of love towards sed
+ # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p');
+
+ mount_point="$2";
+
+ fs_options=$(echo "$fs_options,$new_fs_options");
+
+ start_glusterfs;
+}
+
+_init "$@" && main "$@";
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am
new file mode 100644
index 00000000000..f7504bbe8f3
--- /dev/null
+++ b/xlators/performance/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache
+
+CLEANFILES =
diff --git a/xlators/performance/io-cache/Makefile.am b/xlators/performance/io-cache/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/performance/io-cache/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am
new file mode 100644
index 00000000000..b1bf5bfbf71
--- /dev/null
+++ b/xlators/performance/io-cache/src/Makefile.am
@@ -0,0 +1,14 @@
+xlator_LTLIBRARIES = io-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+io_cache_la_LDFLAGS = -module -avoidversion
+
+io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c
+io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = io-cache.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
new file mode 100644
index 00000000000..f367cdb88de
--- /dev/null
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -0,0 +1,1478 @@
+/*
+ Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "io-cache.h"
+#include <assert.h>
+#include <sys/time.h>
+
+static uint32_t
+ioc_get_priority (ioc_table_t *table,
+ const char *path);
+
+static uint32_t
+ioc_get_priority (ioc_table_t *table,
+ const char *path);
+
+static inline ioc_inode_t *
+ioc_inode_reupdate (ioc_inode_t *ioc_inode)
+{
+ ioc_table_t *table = ioc_inode->table;
+
+ list_add_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+
+ return ioc_inode;
+}
+
+static inline ioc_inode_t *
+ioc_get_inode (dict_t *dict,
+ char *name)
+{
+ ioc_inode_t *ioc_inode = NULL;
+ data_t *ioc_inode_data = dict_get (dict, name);
+ ioc_table_t *table = NULL;
+
+ if (ioc_inode_data) {
+ ioc_inode = data_to_ptr (ioc_inode_data);
+ table = ioc_inode->table;
+
+ ioc_table_lock (table);
+ {
+ if (list_empty (&ioc_inode->inode_lru)) {
+ ioc_inode = ioc_inode_reupdate (ioc_inode);
+ }
+ }
+ ioc_table_unlock (table);
+ }
+
+ return ioc_inode;
+}
+
+int32_t
+ioc_inode_need_revalidate (ioc_inode_t *ioc_inode)
+{
+ int8_t need_revalidate = 0;
+ struct timeval tv = {0,};
+ int32_t ret = -1;
+ ioc_table_t *table = ioc_inode->table;
+
+ ret = gettimeofday (&tv, NULL);
+
+ if (time_elapsed (&tv, &ioc_inode->tv) >= table->cache_timeout)
+ need_revalidate = 1;
+
+ return need_revalidate;
+}
+
+/*
+ * __ioc_inode_flush - flush all the cached pages of the given inode
+ *
+ * @ioc_inode:
+ *
+ * assumes lock is held
+ */
+int32_t
+__ioc_inode_flush (ioc_inode_t *ioc_inode)
+{
+ ioc_page_t *curr = NULL, *next = NULL;
+ int32_t destroy_size = 0;
+ int32_t ret = 0;
+
+ list_for_each_entry_safe (curr, next, &ioc_inode->pages, pages) {
+ ret = ioc_page_destroy (curr);
+
+ if (ret != -1)
+ destroy_size += ret;
+ }
+
+ return destroy_size;
+}
+
+void
+ioc_inode_flush (ioc_inode_t *ioc_inode)
+{
+ int32_t destroy_size = 0;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ return;
+}
+
+/*
+ * ioc_utimens_cbk -
+ *
+ * @frame:
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ */
+int32_t
+ioc_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+/*
+ * ioc_utimens -
+ *
+ * @frame:
+ * @this:
+ * @loc:
+ * @tv:
+ *
+ */
+int32_t
+ioc_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec *tv)
+{
+ uint64_t ioc_inode = 0;
+ inode_ctx_get (loc->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame, ioc_utimens_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->utimens,
+ loc, tv);
+ return 0;
+}
+
+int32_t
+ioc_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf,
+ dict_t *dict)
+{
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_local_t *local = frame->local;
+ ioc_table_t *table = this->private;
+ ioc_page_t *page = NULL;
+ data_t *page_data = NULL;
+ data_t *content_data = NULL;
+ char *src = NULL;
+ char *dst = NULL;
+ char need_unref = 0;
+ uint8_t cache_still_valid = 0;
+ uint32_t weight = 0;
+ uint64_t tmp_ioc_inode = 0;
+ char *buf = NULL;
+ char *tmp = NULL;
+ int i;
+
+ if (op_ret != 0)
+ goto out;
+
+ inode_ctx_get (inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (ioc_inode) {
+ cache_still_valid = ioc_cache_still_valid (ioc_inode,
+ stbuf);
+
+ if (!cache_still_valid) {
+ ioc_inode_flush (ioc_inode);
+ }
+ /* update the time-stamp of revalidation */
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ if (local && stbuf->st_size &&
+ local->need_xattr >= stbuf->st_size) {
+ if (!ioc_inode) {
+ weight = ioc_get_priority (table,
+ local->file_loc.path);
+ ioc_inode = ioc_inode_update (table,
+ inode, weight);
+ inode_ctx_put (inode, this,
+ (uint64_t)(long)ioc_inode);
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ content_data = dict_get (dict, "glusterfs.content");
+ page = ioc_page_get (ioc_inode, 0);
+
+ if (content_data) {
+ if (page) {
+ dict_unref (page->ref);
+ free (page->vector);
+ page->vector = NULL;
+
+ ioc_table_lock (table);
+ {
+ table->cache_used -=
+ page->size;
+ }
+ ioc_table_unlock (table);
+ } else {
+ page = ioc_page_create (ioc_inode, 0);
+ }
+
+ dst = CALLOC (1, stbuf->st_size);
+ page->ref = dict_ref (get_new_dict ());
+ page_data = data_from_dynptr (dst,
+ stbuf->st_size);
+ dict_set (page->ref, NULL, page_data);
+
+ src = data_to_ptr (content_data);
+ memcpy (dst, src, stbuf->st_size);
+
+ page->vector = CALLOC (1,
+ sizeof (*page->vector));
+ page->vector->iov_base = dst;
+ page->vector->iov_len = stbuf->st_size;
+ page->count = 1;
+
+ page->waitq = NULL;
+ page->size = stbuf->st_size;
+ page->ready = 1;
+
+ ioc_table_lock (table);
+ {
+ table->cache_used += page->size;
+ }
+ ioc_table_unlock (table);
+
+ } else {
+ if (!(page && page->ready)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "page not present");
+
+ ioc_inode_unlock (ioc_inode);
+ STACK_WIND (frame,
+ ioc_lookup_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup,
+ &local->file_loc,
+ local->xattr_req);
+ return 0;
+ }
+ buf = CALLOC (1, stbuf->st_size);
+ tmp = buf;
+
+ for (i = 0; i < page->count; i++) {
+ memcpy (tmp, page->vector[i].iov_base,
+ page->vector[i].iov_len);
+ tmp += page->vector[i].iov_len;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "serving file %s from cache",
+ local->file_loc.path);
+
+ if (!dict) {
+ need_unref = 1;
+ dict = dict_ref (
+ get_new_dict ());
+ }
+ dict_set (dict, "glusterfs.content",
+ data_from_dynptr (buf,
+ stbuf->st_size));
+ }
+
+ ioc_inode->mtime = stbuf->st_mtime;
+ gettimeofday (&ioc_inode->tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (content_data &&
+ ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
+ }
+
+ out:
+ STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, dict);
+
+ if (need_unref) {
+ dict_unref (dict);
+ }
+
+ return 0;
+}
+
+int32_t
+ioc_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ uint64_t content_limit = 0;
+
+ if (GF_FILE_CONTENT_REQUESTED(xattr_req, &content_limit)) {
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_page_t *page = NULL;
+ ioc_local_t *local = CALLOC (1, sizeof (*local));
+
+ local->need_xattr = content_limit;
+ local->file_loc.path = loc->path;
+ local->file_loc.inode = loc->inode;
+ frame->local = local;
+
+ inode_ctx_get (loc->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+
+ if (ioc_inode) {
+ ioc_inode_lock (ioc_inode);
+ {
+ page = ioc_page_get (ioc_inode, 0);
+ if ((content_limit <=
+ ioc_inode->table->page_size) &&
+ page && page->ready) {
+ local->need_xattr = -1;
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+ }
+ }
+
+ STACK_WIND (frame,
+ ioc_lookup_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup,
+ loc,
+ xattr_req);
+ return 0;
+}
+
+/*
+ * ioc_forget -
+ *
+ * @frame:
+ * @this:
+ * @inode:
+ *
+ */
+int32_t
+ioc_forget (xlator_t *this,
+ inode_t *inode)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
+
+ return 0;
+}
+
+
+/*
+ * ioc_cache_validate_cbk -
+ *
+ * @frame:
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @buf
+ *
+ */
+int32_t
+ioc_cache_validate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ ioc_local_t *local = frame->local;
+ ioc_inode_t *ioc_inode = NULL;
+ size_t destroy_size = 0;
+ struct stat *local_stbuf = stbuf;
+
+ ioc_inode = local->inode;
+
+ if ((op_ret == -1) ||
+ ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
+ "cache for inode(%p) is invalid. flushing all pages",
+ ioc_inode);
+ /* NOTE: only pages with no waiting frames are flushed by
+ * ioc_inode_flush. page_fault will be generated for all
+ * the pages which have waiting frames by ioc_inode_wakeup()
+ */
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ if (op_ret >= 0)
+ ioc_inode->mtime = stbuf->st_mtime;
+ }
+ ioc_inode_unlock (ioc_inode);
+ local_stbuf = NULL;
+ }
+
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ if (op_ret < 0)
+ local_stbuf = NULL;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
+
+ /* any page-fault initiated by ioc_inode_wakeup() will have its own
+ * fd_ref on fd, safe to unref validate frame's private copy
+ */
+ fd_unref (local->fd);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+static int32_t
+ioc_wait_on_inode (ioc_inode_t *ioc_inode,
+ ioc_page_t *page)
+{
+ ioc_waitq_t *waiter = NULL, *trav = NULL;
+ uint32_t page_found = 0;
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ if (trav->data == page) {
+ page_found = 1;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if (!page_found) {
+ waiter = CALLOC (1, sizeof (ioc_waitq_t));
+ ERR_ABORT (waiter);
+ waiter->data = page;
+ waiter->next = ioc_inode->waitq;
+ ioc_inode->waitq = waiter;
+ }
+
+ return 0;
+}
+
+/*
+ * ioc_cache_validate -
+ *
+ * @frame:
+ * @ioc_inode:
+ * @fd:
+ *
+ */
+static int32_t
+ioc_cache_validate (call_frame_t *frame,
+ ioc_inode_t *ioc_inode,
+ fd_t *fd,
+ ioc_page_t *page)
+{
+ call_frame_t *validate_frame = NULL;
+ ioc_local_t *validate_local = NULL;
+
+ validate_local = CALLOC (1, sizeof (ioc_local_t));
+ ERR_ABORT (validate_local);
+ validate_frame = copy_frame (frame);
+ validate_local->fd = fd_ref (fd);
+ validate_local->inode = ioc_inode;
+ validate_frame->local = validate_local;
+
+ STACK_WIND (validate_frame,
+ ioc_cache_validate_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->fstat,
+ fd);
+
+ return 0;
+}
+
+static inline uint32_t
+is_match (const char *path,
+ const char *pattern)
+{
+ char *pathname = strdup (path);
+ int32_t ret = 0;
+
+ ret = fnmatch (pattern, path, FNM_NOESCAPE);
+
+ free (pathname);
+
+ return (ret == 0);
+}
+
+static uint32_t
+ioc_get_priority (ioc_table_t *table,
+ const char *path)
+{
+ uint32_t priority = 0;
+ struct ioc_priority *curr = NULL;
+
+ list_for_each_entry (curr, &table->priority_list, list) {
+ if (is_match (path, curr->pattern))
+ priority = curr->priority;
+ }
+
+ return priority;
+}
+
+/*
+ * ioc_open_cbk - open callback for io cache
+ *
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @fd:
+ *
+ */
+int32_t
+ioc_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ uint64_t tmp_ioc_inode = 0;
+ ioc_local_t *local = frame->local;
+ ioc_table_t *table = this->private;
+ ioc_inode_t *ioc_inode = NULL;
+ inode_t *inode = local->file_loc.inode;
+ uint32_t weight = 0;
+ const char *path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ /* look for ioc_inode corresponding to this fd */
+ LOCK (&fd->inode->lock);
+ //{
+
+ inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+
+ if (!ioc_inode) {
+ /* this is the first time someone is opening this
+ file, assign weight
+ */
+ weight = ioc_get_priority (table, path);
+
+ ioc_inode = ioc_inode_update (table, inode, weight);
+ inode_ctx_put (fd->inode, this,
+ (uint64_t)(long)ioc_inode);
+ } else {
+ ioc_table_lock (ioc_inode->table);
+ //{
+ list_move_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ //}
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ //}
+ UNLOCK (&fd->inode->lock);
+
+ /* If mandatory locking has been enabled on this file,
+ we disable caching on it */
+ if (((inode->st_mode & S_ISGID) &&
+ !(inode->st_mode & S_IXGRP))) {
+ fd_ctx_set (fd, this, 1);
+ }
+
+ /* If O_DIRECT open, we disable caching on it */
+ if ((local->flags & O_DIRECT)){
+ /* O_DIRECT is only for one fd, not the inode
+ * as a whole
+ */
+ fd_ctx_set (fd, this, 1);
+ }
+ }
+
+ FREE (local);
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}
+
+/*
+ * ioc_create_cbk - create callback for io cache
+ *
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @fd:
+ * @inode:
+ * @buf:
+ *
+ */
+int32_t
+ioc_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ ioc_local_t *local = frame->local;
+ ioc_table_t *table = this->private;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0;
+ const char *path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ {
+ /* assign weight */
+ weight = ioc_get_priority (table, path);
+
+ ioc_inode = ioc_inode_update (table, inode, weight);
+ LOCK (&fd->inode->lock);
+ {
+ inode_ctx_put (fd->inode, this,
+ (uint64_t)(long)ioc_inode);
+ }
+ UNLOCK (&fd->inode->lock);
+ }
+ /* If mandatory locking has been enabled on this file,
+ we disable caching on it */
+ if ((inode->st_mode & S_ISGID) &&
+ !(inode->st_mode & S_IXGRP)) {
+ fd_ctx_set (fd, this, 1);
+ }
+
+ /* If O_DIRECT open, we disable caching on it */
+ if (local->flags & O_DIRECT){
+ /* O_DIRECT is only for one fd, not the inode
+ * as a whole
+ */
+ fd_ctx_set (fd, this, 1);
+ }
+
+ }
+
+ frame->local = NULL;
+ FREE (local);
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+
+ return 0;
+}
+
+/*
+ * ioc_open - open fop for io cache
+ * @frame:
+ * @this:
+ * @loc:
+ * @flags:
+ *
+ */
+int32_t
+ioc_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+
+ ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t));
+ ERR_ABORT (local);
+
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ local->file_loc.inode = loc->inode;
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ ioc_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd);
+
+ return 0;
+}
+
+/*
+ * ioc_create - create fop for io cache
+ *
+ * @frame:
+ * @this:
+ * @pathname:
+ * @flags:
+ * @mode:
+ *
+ */
+int32_t
+ioc_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t));
+ ERR_ABORT (local);
+
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+
+
+
+/*
+ * ioc_release - release fop for io cache
+ *
+ * @frame:
+ * @this:
+ * @fd:
+ *
+ */
+int32_t
+ioc_release (xlator_t *this,
+ fd_t *fd)
+{
+ return 0;
+}
+
+/*
+ * ioc_readv_disabled_cbk
+ * @frame:
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @vector:
+ * @count:
+ *
+ */
+int32_t
+ioc_readv_disabled_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+ return 0;
+}
+
+
+int32_t
+ioc_need_prune (ioc_table_t *table)
+{
+ int64_t cache_difference = 0;
+
+ ioc_table_lock (table);
+ {
+ cache_difference = table->cache_used - table->cache_size;
+ }
+ ioc_table_unlock (table);
+
+ if (cache_difference > 0)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * dispatch_requests -
+ *
+ * @frame:
+ * @inode:
+ *
+ *
+ */
+static void
+dispatch_requests (call_frame_t *frame,
+ ioc_inode_t *ioc_inode,
+ fd_t *fd,
+ off_t offset,
+ size_t size)
+{
+ ioc_local_t *local = frame->local;
+ ioc_table_t *table = ioc_inode->table;
+ ioc_page_t *trav = NULL;
+ ioc_waitq_t *waitq = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ int32_t fault = 0;
+ int8_t need_validate = 0;
+ int8_t might_need_validate = 0; /* if a page exists, do we need
+ to validate it? */
+
+ rounded_offset = floor (offset, table->page_size);
+ rounded_end = roof (offset + size, table->page_size);
+ trav_offset = rounded_offset;
+
+ /* once a frame does read, it should be waiting on something */
+ local->wait_count++;
+
+ /* Requested region can fall in three different pages,
+ * 1. Ready - region is already in cache, we just have to serve it.
+ * 2. In-transit - page fault has been generated on this page, we need
+ * to wait till the page is ready
+ * 3. Fault - page is not in cache, we have to generate a page fault
+ */
+
+ might_need_validate = ioc_inode_need_revalidate (ioc_inode);
+
+ while (trav_offset < rounded_end) {
+ size_t trav_size = 0;
+ off_t local_offset = 0;
+
+ ioc_inode_lock (ioc_inode);
+ //{
+
+ /* look for requested region in the cache */
+ trav = ioc_page_get (ioc_inode, trav_offset);
+
+ local_offset = max (trav_offset, offset);
+ trav_size = min (((offset+size) - local_offset),
+ table->page_size);
+
+ if (!trav) {
+ /* page not in cache, we need to generate page fault */
+ trav = ioc_page_create (ioc_inode, trav_offset);
+ fault = 1;
+ if (!trav) {
+ gf_log (frame->this->name, GF_LOG_CRITICAL,
+ "ioc_page_create returned NULL");
+ }
+ }
+
+ ioc_wait_on_page (trav, frame, local_offset, trav_size);
+
+ if (trav->ready) {
+ /* page found in cache */
+ if (!might_need_validate) {
+ /* fresh enough */
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "cache hit for trav_offset=%"PRId64""
+ "/local_offset=%"PRId64"",
+ trav_offset, local_offset);
+ waitq = ioc_page_wakeup (trav);
+ } else {
+ /* if waitq already exists, fstat revalidate is
+ already on the way */
+ if (!ioc_inode->waitq) {
+ need_validate = 1;
+ }
+ ioc_wait_on_inode (ioc_inode, trav);
+ }
+ }
+
+ //}
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+ waitq = NULL;
+
+ if (fault) {
+ fault = 0;
+ /* new page created, increase the table->cache_used */
+ ioc_page_fault (ioc_inode, frame, fd, trav_offset);
+ }
+
+ if (need_validate) {
+ need_validate = 0;
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "sending validate request for "
+ "inode(%"PRId64") at offset=%"PRId64"",
+ fd->inode->ino, trav_offset);
+ ioc_cache_validate (frame, ioc_inode, fd, trav);
+ }
+
+ trav_offset += table->page_size;
+ }
+
+ ioc_frame_return (frame);
+
+ if (ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
+
+ return;
+}
+
+
+/*
+ * ioc_readv -
+ *
+ * @frame:
+ * @this:
+ * @fd:
+ * @size:
+ * @offset:
+ *
+ */
+int32_t
+ioc_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_local_t *local = NULL;
+ uint32_t weight = 0;
+
+ inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (!ioc_inode) {
+ /* caching disabled, go ahead with normal readv */
+ STACK_WIND (frame,
+ ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+ }
+
+ if (!fd_ctx_get (fd, this, NULL)) {
+ /* disable caching for this fd, go ahead with normal readv */
+ STACK_WIND (frame,
+ ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+ }
+
+ local = (ioc_local_t *) CALLOC (1, sizeof (ioc_local_t));
+ ERR_ABORT (local);
+ INIT_LIST_HEAD (&local->fill_list);
+
+ frame->local = local;
+ local->pending_offset = offset;
+ local->pending_size = size;
+ local->offset = offset;
+ local->size = size;
+ local->inode = ioc_inode;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
+ frame, offset, size);
+
+ weight = ioc_inode->weight;
+
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &ioc_inode->table->inode_lru[weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
+
+ dispatch_requests (frame, ioc_inode, fd, offset, size);
+
+ return 0;
+}
+
+/*
+ * ioc_writev_cbk -
+ *
+ * @frame:
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ */
+int32_t
+ioc_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ ioc_local_t *local = frame->local;
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (local->fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+/*
+ * ioc_writev
+ *
+ * @frame:
+ * @this:
+ * @fd:
+ * @vector:
+ * @count:
+ * @offset:
+ *
+ */
+int32_t
+ioc_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
+
+ local = CALLOC (1, sizeof (ioc_local_t));
+ ERR_ABORT (local);
+
+ /* TODO: why is it not fd_ref'ed */
+ local->fd = fd;
+ frame->local = local;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame,
+ ioc_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ offset);
+
+ return 0;
+}
+
+/*
+ * ioc_truncate_cbk -
+ *
+ * @frame:
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @buf:
+ *
+ */
+int32_t
+ioc_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+/*
+ * ioc_truncate -
+ *
+ * @frame:
+ * @this:
+ * @loc:
+ * @offset:
+ *
+ */
+int32_t
+ioc_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ uint64_t ioc_inode = 0;
+ inode_ctx_get (loc->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame,
+ ioc_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+}
+
+/*
+ * ioc_ftruncate -
+ *
+ * @frame:
+ * @this:
+ * @fd:
+ * @offset:
+ *
+ */
+int32_t
+ioc_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ uint64_t ioc_inode = 0;
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame,
+ ioc_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+}
+
+int32_t
+ioc_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, lock);
+ return 0;
+}
+
+int32_t
+ioc_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ ioc_inode_t *ioc_inode = NULL;
+ uint64_t tmp_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &tmp_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_inode;
+ if (!ioc_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "inode context is NULL: returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ STACK_WIND (frame, ioc_lk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lk, fd, cmd, lock);
+ return 0;
+}
+
+int32_t
+ioc_get_priority_list (const char *opt_str, struct list_head *first)
+{
+ int32_t max_pri = 0;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = strdup (opt_str);
+ struct ioc_priority *curr = NULL;
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ stripe_str = strtok_r (string, ",", &tmp_str);
+ while (stripe_str) {
+ curr = CALLOC (1, sizeof (struct ioc_priority));
+ ERR_ABORT (curr);
+ list_add_tail (&curr->list, first);
+
+ dup_str = strdup (stripe_str);
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ if (!pattern)
+ return -1;
+ priority = strtok_r (NULL, ":", &tmp_str1);
+ if (!priority)
+ return -1;
+ gf_log ("io-cache",
+ GF_LOG_DEBUG,
+ "ioc priority : pattern %s : priority %s",
+ pattern,
+ priority);
+ curr->pattern = strdup (pattern);
+ curr->priority = strtol (priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2))
+ return -1;
+ else
+ max_pri = max (max_pri, curr->priority);
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ return max_pri;
+}
+
+/*
+ * init -
+ * @this:
+ *
+ */
+int32_t
+init (xlator_t *this)
+{
+ ioc_table_t *table;
+ dict_t *options = this->options;
+ uint32_t index = 0;
+ char *page_size_string = NULL;
+ char *cache_size_string = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: io-cache not configured with exactly "
+ "one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ table = (void *) CALLOC (1, sizeof (*table));
+ ERR_ABORT (table);
+
+ table->xl = this;
+ table->page_size = IOC_PAGE_SIZE;
+ table->cache_size = IOC_CACHE_SIZE;
+
+ if (dict_get (options, "page-size"))
+ page_size_string = data_to_str (dict_get (options,
+ "page-size"));
+
+ if (page_size_string) {
+ if (gf_string2bytesize (page_size_string,
+ &table->page_size) != 0) {
+ gf_log ("io-cache", GF_LOG_ERROR,
+ "invalid number format \"%s\" of "
+ "\"option page-size\"",
+ page_size_string);
+ return -1;
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "using page-size %"PRIu64"", table->page_size);
+ }
+
+ if (dict_get (options, "cache-size"))
+ cache_size_string = data_to_str (dict_get (options,
+ "cache-size"));
+ if (cache_size_string) {
+ if (gf_string2bytesize (cache_size_string,
+ &table->cache_size) != 0) {
+ gf_log ("io-cache", GF_LOG_ERROR,
+ "invalid number format \"%s\" of "
+ "\"option cache-size\"",
+ cache_size_string);
+ return -1;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "using cache-size %"PRIu64"", table->cache_size);
+ }
+
+ table->cache_timeout = 1;
+
+ if (dict_get (options, "cache-timeout")) {
+ table->cache_timeout =
+ data_to_uint32 (dict_get (options,
+ "cache-timeout"));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Using %d seconds to revalidate cache",
+ table->cache_timeout);
+ }
+
+ INIT_LIST_HEAD (&table->priority_list);
+ if (dict_get (options, "priority")) {
+ char *option_list = data_to_str (dict_get (options,
+ "priority"));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list (option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1)
+ return -1;
+ }
+ table->max_pri ++;
+ INIT_LIST_HEAD (&table->inodes);
+
+ table->inode_lru = CALLOC (table->max_pri, sizeof (struct list_head));
+ ERR_ABORT (table->inode_lru);
+ for (index = 0; index < (table->max_pri); index++)
+ INIT_LIST_HEAD (&table->inode_lru[index]);
+
+ pthread_mutex_init (&table->table_lock, NULL);
+ this->private = table;
+ return 0;
+}
+
+/*
+ * fini -
+ *
+ * @this:
+ *
+ */
+void
+fini (xlator_t *this)
+{
+ ioc_table_t *table = this->private;
+
+ pthread_mutex_destroy (&table->table_lock);
+ FREE (table);
+
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .open = ioc_open,
+ .create = ioc_create,
+ .readv = ioc_readv,
+ .writev = ioc_writev,
+ .truncate = ioc_truncate,
+ .ftruncate = ioc_ftruncate,
+ .utimens = ioc_utimens,
+ .lookup = ioc_lookup,
+ .lk = ioc_lk
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+ .forget = ioc_forget,
+ .release = ioc_release
+};
+
+struct volume_options options[] = {
+ { .key = {"priority"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"cache-timeout", "force-revalidate-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 60
+ },
+ { .key = {"page-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 16 * GF_UNIT_KB,
+ .max = 4 * GF_UNIT_MB
+ },
+ { .key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4 * GF_UNIT_MB,
+ .max = 6 * GF_UNIT_GB
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h
new file mode 100644
index 00000000000..e997f6e7ca6
--- /dev/null
+++ b/xlators/performance/io-cache/src/io-cache.h
@@ -0,0 +1,330 @@
+/*
+ Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __IO_CACHE_H
+#define __IO_CACHE_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include "compat-errno.h"
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "call-stub.h"
+#include <sys/time.h>
+#include <fnmatch.h>
+
+#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
+#define IOC_CACHE_SIZE (32 * 1024 * 1024)
+
+struct ioc_table;
+struct ioc_local;
+struct ioc_page;
+struct ioc_inode;
+
+struct ioc_priority {
+ struct list_head list;
+ char *pattern;
+ uint32_t priority;
+};
+
+/*
+ * ioc_waitq - this structure is used to represents the waiting
+ * frames on a page
+ *
+ * @next: pointer to next object in waitq
+ * @data: pointer to the frame which is waiting
+ */
+struct ioc_waitq {
+ struct ioc_waitq *next;
+ void *data;
+ off_t pending_offset;
+ size_t pending_size;
+};
+
+/*
+ * ioc_fill -
+ *
+ */
+struct ioc_fill {
+ struct list_head list; /* list of ioc_fill structures of a frame */
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ dict_t *refs;
+};
+
+struct ioc_local {
+ mode_t mode;
+ int32_t flags;
+ loc_t file_loc;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ struct list_head fill_list; /* list of ioc_fill structures */
+ off_t pending_offset; /* offset from this frame should continue */
+ size_t pending_size; /* size of data this frame is waiting on */
+ struct ioc_inode *inode;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
+ struct ioc_waitq *waitq;
+ void *stub;
+ fd_t *fd;
+ int32_t need_xattr;
+ dict_t *xattr_req;
+};
+
+/*
+ * ioc_page - structure to store page of data from file
+ *
+ */
+struct ioc_page {
+ struct list_head pages;
+ struct list_head page_lru;
+ struct ioc_inode *inode; /* inode this page belongs to */
+ struct ioc_priority *priority;
+ char dirty;
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ioc_waitq *waitq;
+ dict_t *ref;
+ pthread_mutex_t page_lock;
+};
+
+struct ioc_inode {
+ struct ioc_table *table;
+ struct list_head pages; /* list of pages of this inode */
+ struct list_head inode_list; /* list of inodes, maintained by io-cache translator */
+ struct list_head inode_lru;
+ struct list_head page_lru;
+ struct ioc_waitq *waitq;
+ pthread_mutex_t inode_lock;
+ uint32_t weight; /* weight of the inode, increases on each read */
+ time_t mtime; /* mtime of the server file when last cached */
+ struct timeval tv; /* time-stamp at last re-validate */
+};
+
+struct ioc_table {
+ uint64_t page_size;
+ uint64_t cache_size;
+ uint64_t cache_used;
+ struct list_head inodes; /* list of inodes cached */
+ struct list_head active;
+ struct list_head *inode_lru;
+ struct list_head priority_list;
+ int32_t readv_count;
+ pthread_mutex_t table_lock;
+ xlator_t *xl;
+ uint32_t inode_count;
+ int32_t cache_timeout;
+ int32_t max_pri;
+};
+
+typedef struct ioc_table ioc_table_t;
+typedef struct ioc_local ioc_local_t;
+typedef struct ioc_page ioc_page_t;
+typedef struct ioc_inode ioc_inode_t;
+typedef struct ioc_waitq ioc_waitq_t;
+typedef struct ioc_fill ioc_fill_t;
+
+void *
+str_to_ptr (char *string);
+
+char *
+ptr_to_str (void *ptr);
+
+int32_t
+ioc_readv_disabled_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf);
+
+ioc_page_t *
+ioc_page_get (ioc_inode_t *ioc_inode,
+ off_t offset);
+
+ioc_page_t *
+ioc_page_create (ioc_inode_t *ioc_inode,
+ off_t offset);
+
+void
+ioc_page_fault (ioc_inode_t *ioc_inode,
+ call_frame_t *frame,
+ fd_t *fd,
+ off_t offset);
+void
+ioc_wait_on_page (ioc_page_t *page,
+ call_frame_t *frame,
+ off_t offset,
+ size_t size);
+
+ioc_waitq_t *
+ioc_page_wakeup (ioc_page_t *page);
+
+void
+ioc_page_flush (ioc_page_t *page);
+
+ioc_waitq_t *
+ioc_page_error (ioc_page_t *page,
+ int32_t op_ret,
+ int32_t op_errno);
+void
+ioc_page_purge (ioc_page_t *page);
+
+void
+ioc_frame_return (call_frame_t *frame);
+
+void
+ioc_waitq_return (ioc_waitq_t *waitq);
+
+void
+ioc_frame_fill (ioc_page_t *page,
+ call_frame_t *frame,
+ off_t offset,
+ size_t size);
+
+#define ioc_inode_lock(ioc_inode) \
+ do { \
+ gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, \
+ "locked inode(%p)", ioc_inode); \
+ pthread_mutex_lock (&ioc_inode->inode_lock); \
+ } while (0)
+
+
+#define ioc_inode_unlock(ioc_inode) \
+ do { \
+ gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, \
+ "unlocked inode(%p)", ioc_inode); \
+ pthread_mutex_unlock (&ioc_inode->inode_lock); \
+ } while (0)
+
+
+#define ioc_table_lock(table) \
+ do { \
+ gf_log (table->xl->name, GF_LOG_DEBUG, \
+ "locked table(%p)", table); \
+ pthread_mutex_lock (&table->table_lock); \
+ } while (0)
+
+
+#define ioc_table_unlock(table) \
+ do { \
+ gf_log (table->xl->name, GF_LOG_DEBUG, \
+ "unlocked table(%p)", table); \
+ pthread_mutex_unlock (&table->table_lock); \
+ } while (0)
+
+
+#define ioc_local_lock(local) \
+ do { \
+ gf_log (local->inode->table->xl->name, GF_LOG_DEBUG, \
+ "locked local(%p)", local); \
+ pthread_mutex_lock (&local->local_lock); \
+ } while (0)
+
+
+#define ioc_local_unlock(local) \
+ do { \
+ gf_log (local->inode->table->xl->name, GF_LOG_DEBUG, \
+ "unlocked local(%p)", local); \
+ pthread_mutex_unlock (&local->local_lock); \
+ } while (0)
+
+
+#define ioc_page_lock(page) \
+ do { \
+ gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, \
+ "locked page(%p)", page); \
+ pthread_mutex_lock (&page->page_lock); \
+ } while (0)
+
+
+#define ioc_page_unlock(page) \
+ do { \
+ gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, \
+ "unlocked page(%p)", page); \
+ pthread_mutex_unlock (&page->page_lock); \
+ } while (0)
+
+
+static inline uint64_t
+time_elapsed (struct timeval *now,
+ struct timeval *then)
+{
+ uint64_t sec = now->tv_sec - then->tv_sec;
+
+ if (sec)
+ return sec;
+
+ return 0;
+}
+
+ioc_inode_t *
+ioc_inode_search (ioc_table_t *table,
+ inode_t *inode);
+
+void
+ioc_inode_destroy (ioc_inode_t *ioc_inode);
+
+ioc_inode_t *
+ioc_inode_update (ioc_table_t *table,
+ inode_t *inode,
+ uint32_t weight);
+
+int64_t
+ioc_page_destroy (ioc_page_t *page);
+
+int32_t
+__ioc_inode_flush (ioc_inode_t *ioc_inode);
+
+void
+ioc_inode_flush (ioc_inode_t *ioc_inode);
+
+void
+ioc_inode_wakeup (call_frame_t *frame,
+ ioc_inode_t *ioc_inode,
+ struct stat *stbuf);
+
+int8_t
+ioc_cache_still_valid (ioc_inode_t *ioc_inode,
+ struct stat *stbuf);
+
+int32_t
+ioc_prune (ioc_table_t *table);
+
+int32_t
+ioc_need_prune (ioc_table_t *table);
+
+#endif /* __READ_AHEAD_H */
diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c
new file mode 100644
index 00000000000..2e2e561dd23
--- /dev/null
+++ b/xlators/performance/io-cache/src/ioc-inode.c
@@ -0,0 +1,201 @@
+/*
+ Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "io-cache.h"
+
+
+/*
+ * str_to_ptr - convert a string to pointer
+ * @string: string
+ *
+ */
+void *
+str_to_ptr (char *string)
+{
+ void *ptr = (void *)strtoul (string, NULL, 16);
+ return ptr;
+}
+
+
+/*
+ * ptr_to_str - convert a pointer to string
+ * @ptr: pointer
+ *
+ */
+char *
+ptr_to_str (void *ptr)
+{
+ char *str;
+ asprintf (&str, "%p", ptr);
+ return str;
+}
+
+void
+ioc_inode_wakeup (call_frame_t *frame,
+ ioc_inode_t *ioc_inode,
+ struct stat *stbuf)
+{
+ ioc_waitq_t *waiter = NULL, *waited = NULL;
+ ioc_waitq_t *page_waitq = NULL;
+ int8_t cache_still_valid = 1;
+ ioc_local_t *local = frame->local;
+ int8_t need_fault = 0;
+ ioc_page_t *waiter_page = NULL;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ waiter = ioc_inode->waitq;
+ ioc_inode->waitq = NULL;
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (stbuf)
+ cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf);
+ else
+ cache_still_valid = 0;
+
+ if (!waiter) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "cache validate called without any "
+ "page waiting to be validated");
+ }
+
+ while (waiter) {
+ waiter_page = waiter->data;
+ page_waitq = NULL;
+
+ if (waiter_page) {
+ if (cache_still_valid) {
+ /* cache valid, wake up page */
+ ioc_inode_lock (ioc_inode);
+ {
+ page_waitq =
+ ioc_page_wakeup (waiter_page);
+ }
+ ioc_inode_unlock (ioc_inode);
+ if (page_waitq)
+ ioc_waitq_return (page_waitq);
+ } else {
+ /* cache invalid, generate page fault and set
+ * page->ready = 0, to avoid double faults
+ */
+ ioc_inode_lock (ioc_inode);
+
+ if (waiter_page->ready) {
+ waiter_page->ready = 0;
+ need_fault = 1;
+ } else {
+ gf_log (frame->this->name,
+ GF_LOG_DEBUG,
+ "validate frame(%p) is waiting"
+ "for in-transit page = %p",
+ frame, waiter_page);
+ }
+
+ ioc_inode_unlock (ioc_inode);
+
+ if (need_fault) {
+ need_fault = 0;
+ ioc_page_fault (ioc_inode, frame,
+ local->fd,
+ waiter_page->offset);
+ }
+ }
+ }
+
+ waited = waiter;
+ waiter = waiter->next;
+
+ waited->data = NULL;
+ free (waited);
+ }
+}
+
+/*
+ * ioc_inode_update - create a new ioc_inode_t structure and add it to
+ * the table table. fill in the fields which are derived
+ * from inode_t corresponding to the file
+ *
+ * @table: io-table structure
+ * @inode: inode structure
+ *
+ * not for external reference
+ */
+ioc_inode_t *
+ioc_inode_update (ioc_table_t *table,
+ inode_t *inode,
+ uint32_t weight)
+{
+ ioc_inode_t *ioc_inode = CALLOC (1, sizeof (ioc_inode_t));
+ ERR_ABORT (ioc_inode);
+
+ ioc_inode->table = table;
+
+ /* initialize the list for pages */
+ INIT_LIST_HEAD (&ioc_inode->pages);
+ INIT_LIST_HEAD (&ioc_inode->page_lru);
+
+ ioc_table_lock (table);
+
+ table->inode_count++;
+ list_add (&ioc_inode->inode_list, &table->inodes);
+ list_add_tail (&ioc_inode->inode_lru, &table->inode_lru[weight]);
+
+ gf_log (table->xl->name,
+ GF_LOG_DEBUG,
+ "adding to inode_lru[%d]", weight);
+
+ ioc_table_unlock (table);
+
+ pthread_mutex_init (&ioc_inode->inode_lock, NULL);
+ ioc_inode->weight = weight;
+
+ return ioc_inode;
+}
+
+
+/*
+ * ioc_inode_destroy - destroy an ioc_inode_t object.
+ *
+ * @inode: inode to destroy
+ *
+ * to be called only from ioc_forget.
+ */
+void
+ioc_inode_destroy (ioc_inode_t *ioc_inode)
+{
+ ioc_table_t *table = ioc_inode->table;
+
+ ioc_table_lock (table);
+ table->inode_count--;
+ list_del (&ioc_inode->inode_list);
+ list_del (&ioc_inode->inode_lru);
+ ioc_table_unlock (table);
+
+ ioc_inode_flush (ioc_inode);
+
+ pthread_mutex_destroy (&ioc_inode->inode_lock);
+ free (ioc_inode);
+}
+
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
new file mode 100644
index 00000000000..e549f0bb547
--- /dev/null
+++ b/xlators/performance/io-cache/src/page.c
@@ -0,0 +1,778 @@
+/*
+ Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "io-cache.h"
+#include <assert.h>
+#include <sys/time.h>
+
+ioc_page_t *
+ioc_page_get (ioc_inode_t *ioc_inode,
+ off_t offset)
+{
+ int8_t found = 0;
+ ioc_page_t *page = NULL;
+ ioc_table_t *table = ioc_inode->table;
+ off_t rounded_offset = floor (offset, table->page_size);
+
+ if (list_empty (&ioc_inode->pages)) {
+ return NULL;
+ }
+
+ list_for_each_entry (page, &ioc_inode->pages, pages) {
+ if (page->offset == rounded_offset) {
+ found = 1;
+ break;
+ }
+ }
+
+ /* was previously returning ioc_inode itself..,
+ * 1st of its type and found one more downstairs :O */
+ if (!found){
+ page = NULL;
+ } else {
+ /* push the page to the end of the lru list */
+ list_move_tail (&page->page_lru, &ioc_inode->page_lru);
+ }
+
+ return page;
+}
+
+
+/*
+ * ioc_page_destroy -
+ *
+ * @page:
+ *
+ */
+int64_t
+ioc_page_destroy (ioc_page_t *page)
+{
+ int64_t page_size = 0;
+
+ page_size = page->size;
+
+ if (page->waitq) {
+ /* frames waiting on this page, do not destroy this page */
+ page_size = -1;
+ } else {
+
+ list_del (&page->pages);
+ list_del (&page->page_lru);
+
+ gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
+ "destroying page = %p, offset = %"PRId64" "
+ "&& inode = %p",
+ page, page->offset, page->inode);
+
+ if (page->vector){
+ dict_unref (page->ref);
+ free (page->vector);
+ page->vector = NULL;
+ }
+
+ page->inode = NULL;
+
+ }
+
+ if (page_size != -1) {
+ pthread_mutex_destroy (&page->page_lock);
+ free (page);
+ }
+
+ return page_size;
+}
+
+/*
+ * ioc_prune - prune the cache. we have a limit to the number of pages we
+ * can have in-memory.
+ *
+ * @table: ioc_table_t of this translator
+ *
+ */
+int32_t
+ioc_prune (ioc_table_t *table)
+{
+ ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
+ ioc_page_t *page = NULL, *next = NULL;
+ int32_t ret = -1;
+ int32_t index = 0;
+ uint64_t size_to_prune = 0;
+ uint64_t size_pruned = 0;
+
+ ioc_table_lock (table);
+ {
+ size_to_prune = table->cache_used - table->cache_size;
+ /* take out the least recently used inode */
+ for (index=0; index < table->max_pri; index++) {
+ list_for_each_entry_safe (curr, next_ioc_inode,
+ &table->inode_lru[index],
+ inode_lru) {
+ /* prune page-by-page for this inode, till
+ * we reach the equilibrium */
+ ioc_inode_lock (curr);
+ /* { */
+
+ list_for_each_entry_safe (page, next,
+ &curr->page_lru,
+ page_lru) {
+ /* done with all pages, and not
+ * reached equilibrium yet??
+ * continue with next inode in
+ * lru_list */
+ size_pruned += page->size;
+ ret = ioc_page_destroy (page);
+
+ if (ret != -1)
+ table->cache_used -= ret;
+
+ gf_log (table->xl->name,
+ GF_LOG_DEBUG,
+ "index = %d && table->cache_"
+ "used = %"PRIu64" && table->"
+ "cache_size = %"PRIu64,
+ index, table->cache_used,
+ table->cache_size);
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* list_for_each_entry_safe(page...) */
+ if (list_empty (&curr->pages)) {
+ list_del_init (&curr->inode_lru);
+ }
+
+ /* } */
+ ioc_inode_unlock (curr);
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* list_for_each_entry_safe (curr...) */
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* for(index=0;...) */
+
+ } /* ioc_inode_table locked region end */
+ ioc_table_unlock (table);
+
+ return 0;
+}
+
+/*
+ * ioc_page_create - create a new page.
+ *
+ * @ioc_inode:
+ * @offset:
+ *
+ */
+ioc_page_t *
+ioc_page_create (ioc_inode_t *ioc_inode,
+ off_t offset)
+{
+ ioc_table_t *table = ioc_inode->table;
+ ioc_page_t *page = NULL;
+ off_t rounded_offset = floor (offset, table->page_size);
+ ioc_page_t *newpage = CALLOC (1, sizeof (*newpage));
+ ERR_ABORT (newpage);
+
+ if (ioc_inode)
+ table = ioc_inode->table;
+ else {
+ return NULL;
+ }
+
+ newpage->offset = rounded_offset;
+ newpage->inode = ioc_inode;
+ pthread_mutex_init (&newpage->page_lock, NULL);
+
+ list_add_tail (&newpage->page_lru, &ioc_inode->page_lru);
+ list_add_tail (&newpage->pages, &ioc_inode->pages);
+
+ page = newpage;
+
+ gf_log ("io-cache", GF_LOG_DEBUG,
+ "returning new page %p", page);
+ return page;
+}
+
+/*
+ * ioc_wait_on_page - pause a frame to wait till the arrival of a page.
+ * here we need to handle the case when the frame who calls wait_on_page
+ * himself has caused page_fault
+ *
+ * @page: page to wait on
+ * @frame: call frame who is waiting on page
+ *
+ */
+void
+ioc_wait_on_page (ioc_page_t *page,
+ call_frame_t *frame,
+ off_t offset,
+ size_t size)
+{
+ ioc_waitq_t *waitq = NULL;
+ ioc_local_t *local = frame->local;
+
+ waitq = CALLOC (1, sizeof (*waitq));
+ ERR_ABORT (waitq);
+
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "frame(%p) waiting on page = %p, offset=%"PRId64", "
+ "size=%"GF_PRI_SIZET"",
+ frame, page, offset, size);
+
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ waitq->pending_offset = offset;
+ waitq->pending_size = size;
+ page->waitq = waitq;
+ /* one frame can wait only once on a given page,
+ * local->wait_count is number of pages a frame is waiting on */
+ ioc_local_lock (local);
+ {
+ local->wait_count++;
+ }
+ ioc_local_unlock (local);
+}
+
+
+/*
+ * ioc_cache_still_valid - see if cached pages ioc_inode are still valid
+ * against given stbuf
+ *
+ * @ioc_inode:
+ * @stbuf:
+ *
+ * assumes ioc_inode is locked
+ */
+int8_t
+ioc_cache_still_valid (ioc_inode_t *ioc_inode,
+ struct stat *stbuf)
+{
+ int8_t cache_still_valid = 1;
+
+#if 0
+ if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime) ||
+ (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec))
+ cache_still_valid = 0;
+
+#else
+ if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime))
+ cache_still_valid = 0;
+
+#endif
+
+#if 0
+ /* talk with avati@zresearch.com to enable this section */
+ if (!ioc_inode->mtime && stbuf) {
+ cache_still_valid = 1;
+ ioc_inode->mtime = stbuf->st_mtime;
+ }
+#endif
+
+ return cache_still_valid;
+}
+
+
+void
+ioc_waitq_return (ioc_waitq_t *waitq)
+{
+ ioc_waitq_t *trav = NULL;
+ ioc_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
+
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
+
+ frame = trav->data;
+ ioc_frame_return (frame);
+ free (trav);
+ }
+}
+
+
+int
+ioc_fault_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ ioc_local_t *local = frame->local;
+ off_t offset = local->pending_offset;
+ ioc_inode_t *ioc_inode = local->inode;
+ ioc_table_t *table = ioc_inode->table;
+ ioc_page_t *page = NULL;
+ off_t trav_offset = 0;
+ size_t payload_size = 0;
+ int32_t destroy_size = 0;
+ size_t page_size = 0;
+ ioc_waitq_t *waitq = NULL;
+
+ trav_offset = offset;
+ payload_size = op_ret;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ if (op_ret == -1 ||
+ (op_ret >= 0 &&
+ !ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
+ "cache for inode(%p) is invalid. flushing "
+ "all pages", ioc_inode);
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ }
+
+ if (op_ret >= 0)
+ ioc_inode->mtime = stbuf->st_mtime;
+
+ gettimeofday (&ioc_inode->tv, NULL);
+
+ if (op_ret < 0) {
+ /* error, readv returned -1 */
+ page = ioc_page_get (ioc_inode, offset);
+ if (page)
+ waitq = ioc_page_error (page, op_ret,
+ op_errno);
+ } else {
+ gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
+ "op_ret = %d", op_ret);
+ page = ioc_page_get (ioc_inode, offset);
+ if (!page) {
+ /* page was flushed */
+ /* some serious bug ? */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "wasted copy: %"PRId64"[+%"PRId64"] "
+ "ioc_inode=%p", offset,
+ table->page_size, ioc_inode);
+ } else {
+ if (page->vector) {
+ dict_unref (page->ref);
+ free (page->vector);
+ page->vector = NULL;
+ }
+
+ /* keep a copy of the page for our cache */
+ page->vector = iov_dup (vector, count);
+ page->count = count;
+ if (frame->root->rsp_refs) {
+ dict_ref (frame->root->rsp_refs);
+ page->ref = frame->root->rsp_refs;
+ } else {
+ /* TODO: we have got a response to
+ * our request and no data */
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "frame>root>rsp_refs is null");
+ } /* if(frame->root->rsp_refs) */
+
+ /* page->size should indicate exactly how
+ * much the readv call to the child
+ * translator returned. earlier op_ret
+ * from child translator was used, which
+ * gave rise to a bug where reads from
+ * io-cached volume were resulting in 0
+ * byte replies */
+ page_size = iov_length(vector, count);
+
+ page->size = page_size;
+
+ if (page->waitq) {
+ /* wake up all the frames waiting on
+ * this page, including
+ * the frame which triggered fault */
+ waitq = ioc_page_wakeup (page);
+ } /* if(page->waitq) */
+ } /* if(!page)...else */
+ } /* if(op_ret < 0)...else */
+ } /* ioc_inode locked region end */
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+
+ if (page_size) {
+ ioc_table_lock (table);
+ {
+ table->cache_used += page_size;
+ }
+ ioc_table_unlock (table);
+ }
+
+ if (destroy_size) {
+ ioc_table_lock (table);
+ {
+ table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (table);
+ }
+
+ if (ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "fault frame %p returned", frame);
+ pthread_mutex_destroy (&local->local_lock);
+
+ fd_unref (local->fd);
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+/*
+ * ioc_page_fault -
+ *
+ * @ioc_inode:
+ * @frame:
+ * @fd:
+ * @offset:
+ *
+ */
+void
+ioc_page_fault (ioc_inode_t *ioc_inode,
+ call_frame_t *frame,
+ fd_t *fd,
+ off_t offset)
+{
+ ioc_table_t *table = ioc_inode->table;
+ call_frame_t *fault_frame = copy_frame (frame);
+ ioc_local_t *fault_local = CALLOC (1, sizeof (ioc_local_t));
+ ERR_ABORT (fault_local);
+
+ /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
+ * are using till now won't be valid till we get reply from server.
+ * we unref this fd, in fault_cbk */
+ fault_local->fd = fd_ref (fd);
+
+ fault_frame->local = fault_local;
+ pthread_mutex_init (&fault_local->local_lock, NULL);
+
+ INIT_LIST_HEAD (&fault_local->fill_list);
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = table->page_size;
+ fault_local->inode = ioc_inode;
+
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "stack winding page fault for offset = %"PRId64" with "
+ "frame %p", offset, fault_frame);
+
+ STACK_WIND (fault_frame, ioc_fault_cbk,
+ FIRST_CHILD(fault_frame->this),
+ FIRST_CHILD(fault_frame->this)->fops->readv,
+ fd, table->page_size, offset);
+ return;
+}
+
+void
+ioc_frame_fill (ioc_page_t *page,
+ call_frame_t *frame,
+ off_t offset,
+ size_t size)
+{
+ ioc_local_t *local = frame->local;
+ ioc_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ioc_inode_t *ioc_inode = page->inode;
+
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" "
+ "&& page->size = %"GF_PRI_SIZET" && wait_count = %d",
+ frame, offset, size, page->size, local->wait_count);
+
+ /* immediately move this page to the end of the page_lru list */
+ list_move_tail (&page->page_lru, &ioc_inode->page_lru);
+ /* fill local->pending_size bytes from local->pending_offset */
+ if (local->op_ret != -1 && page->size) {
+ if (offset > page->offset)
+ /* offset is offset in file, convert it to offset in
+ * page */
+ src_offset = offset - page->offset;
+ /*FIXME: since offset is the offset within page is the
+ * else case valid? */
+ else
+ /* local->pending_offset is in previous page. do not
+ * fill until we have filled all previous pages */
+ dst_offset = page->offset - offset;
+
+ /* we have to copy from offset to either end of this page
+ * or till the requested size */
+ copy_size = min (page->size - src_offset,
+ size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
+
+ gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
+ "copy_size = %"GF_PRI_SIZET" && src_offset = "
+ "%"PRId64" && dst_offset = %"PRId64"",
+ copy_size, src_offset, dst_offset);
+
+ {
+ ioc_fill_t *new = CALLOC (1, sizeof (*new));
+ ERR_ABORT (new);
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->refs = dict_ref (page->ref);
+ new->count = iov_subset (page->vector,
+ page->count,
+ src_offset,
+ src_offset + copy_size,
+ NULL);
+ new->vector = CALLOC (new->count,
+ sizeof (struct iovec));
+ ERR_ABORT (new->vector);
+ new->count = iov_subset (page->vector,
+ page->count,
+ src_offset,
+ src_offset + copy_size,
+ new->vector);
+
+
+
+ /* add the ioc_fill to fill_list for this frame */
+ if (list_empty (&local->fill_list)) {
+ /* if list is empty, then this is the first
+ * time we are filling frame, add the
+ * ioc_fill_t to the end of list */
+ list_add_tail (&new->list, &local->fill_list);
+ } else {
+ int8_t found = 0;
+ /* list is not empty, we need to look for
+ * where this offset fits in list */
+ list_for_each_entry (fill, &local->fill_list,
+ list) {
+ if (fill->offset > new->offset) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ found = 0;
+ list_add_tail (&new->list,
+ &fill->list);
+ } else {
+ list_add_tail (&new->list,
+ &local->fill_list);
+ }
+ }
+ }
+ local->op_ret += copy_size;
+ }
+}
+
+/*
+ * ioc_frame_unwind - frame unwinds only from here
+ *
+ * @frame: call frame to unwind
+ *
+ * to be used only by ioc_frame_return(), when a frame has
+ * finished waiting on all pages, required
+ *
+ */
+static void
+ioc_frame_unwind (call_frame_t *frame)
+{
+ ioc_local_t *local = frame->local;
+ ioc_fill_t *fill = NULL, *next = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ dict_t *refs = NULL;
+ struct stat stbuf = {0,};
+ int32_t op_ret = 0;
+
+ // ioc_local_lock (local);
+ refs = get_new_dict ();
+
+ frame->local = NULL;
+
+ if (list_empty (&local->fill_list)) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "frame(%p) has 0 entries in local->fill_list "
+ "(offset = %"PRId64" && size = %"GF_PRI_SIZET")",
+ frame, local->offset, local->size);
+ }
+
+ list_for_each_entry (fill, &local->fill_list, list) {
+ count += fill->count;
+ }
+
+ vector = CALLOC (count, sizeof (*vector));
+ ERR_ABORT (vector);
+
+ list_for_each_entry_safe (fill, next, &local->fill_list, list) {
+ memcpy (((char *)vector) + copied,
+ fill->vector,
+ fill->count * sizeof (*vector));
+
+ copied += (fill->count * sizeof (*vector));
+
+ dict_copy (fill->refs, refs);
+
+ list_del (&fill->list);
+ dict_unref (fill->refs);
+ free (fill->vector);
+ free (fill);
+ }
+
+ frame->root->rsp_refs = dict_ref (refs);
+
+ op_ret = iov_length (vector, count);
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "frame(%p) unwinding with op_ret=%d", frame, op_ret);
+
+ // ioc_local_unlock (local);
+
+ STACK_UNWIND (frame,
+ op_ret,
+ local->op_errno,
+ vector,
+ count,
+ &stbuf);
+
+ dict_unref (refs);
+
+ pthread_mutex_destroy (&local->local_lock);
+ free (local);
+ free (vector);
+
+ return;
+}
+
+/*
+ * ioc_frame_return -
+ * @frame:
+ *
+ * to be called only when a frame is waiting on an in-transit page
+ */
+void
+ioc_frame_return (call_frame_t *frame)
+{
+ ioc_local_t *local = frame->local;
+ int32_t wait_count;
+ assert (local->wait_count > 0);
+
+ ioc_local_lock (local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ioc_local_unlock (local);
+
+ if (!wait_count) {
+ ioc_frame_unwind (frame);
+ }
+
+ return;
+}
+
+/*
+ * ioc_page_wakeup -
+ * @page:
+ *
+ * to be called only when a frame is waiting on an in-transit page
+ */
+ioc_waitq_t *
+ioc_page_wakeup (ioc_page_t *page)
+{
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ trav = waitq;
+ page->ready = 1;
+
+ gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
+ "page is %p && waitq = %p", page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ioc_frame_fill (page, frame, trav->pending_offset,
+ trav->pending_size);
+ }
+
+ return waitq;
+}
+
+
+/*
+ * ioc_page_error -
+ * @page:
+ * @op_ret:
+ * @op_errno:
+ *
+ */
+ioc_waitq_t *
+ioc_page_error (ioc_page_t *page,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int64_t ret = 0;
+ ioc_table_t *table = NULL;
+ ioc_local_t *local = NULL;
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
+ "page error for page = %p & waitq = %p", page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+
+ frame = trav->data;
+
+ local = frame->local;
+ ioc_local_lock (local);
+ {
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ ioc_local_unlock (local);
+ }
+
+ table = page->inode->table;
+ ret = ioc_page_destroy (page);
+
+ if (ret != -1) {
+ table->cache_used -= ret;
+ }
+
+ return waitq;
+}
diff --git a/xlators/performance/io-threads/Makefile.am b/xlators/performance/io-threads/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/performance/io-threads/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am
new file mode 100644
index 00000000000..38dea3eb7fc
--- /dev/null
+++ b/xlators/performance/io-threads/src/Makefile.am
@@ -0,0 +1,14 @@
+xlator_LTLIBRARIES = io-threads.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+io_threads_la_LDFLAGS = -module -avoidversion
+
+io_threads_la_SOURCES = io-threads.c
+io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = io-threads.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
new file mode 100644
index 00000000000..5acdd627da4
--- /dev/null
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -0,0 +1,1254 @@
+/*
+ Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "call-stub.h"
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "io-threads.h"
+
+static void
+iot_queue (iot_worker_t *worker,
+ call_stub_t *stub);
+
+static call_stub_t *
+iot_dequeue (iot_worker_t *worker);
+
+static iot_worker_t *
+iot_schedule (iot_conf_t *conf,
+ iot_file_t *file,
+ ino_t ino)
+{
+ int32_t cnt = (ino % conf->thread_count);
+ iot_worker_t *trav = conf->workers.next;
+
+ for (; cnt; cnt--)
+ trav = trav->next;
+
+ if (file)
+ file->worker = trav;
+ trav->fd_count++;
+ return trav;
+}
+
+int32_t
+iot_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ iot_conf_t *conf = this->private;
+
+ if (op_ret >= 0) {
+ iot_file_t *file = CALLOC (1, sizeof (*file));
+ ERR_ABORT (file);
+
+ iot_schedule (conf, file, fd->inode->ino);
+ file->fd = fd;
+
+ fd_ctx_set (fd, this, (uint64_t)(long)file);
+
+ pthread_mutex_lock (&conf->files_lock);
+ file->next = &conf->files;
+ file->prev = file->next->prev;
+ file->next->prev = file;
+ file->prev->next = file;
+ pthread_mutex_unlock (&conf->files_lock);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+int32_t
+iot_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ STACK_WIND (frame,
+ iot_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd);
+ return 0;
+}
+
+
+int32_t
+iot_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ iot_conf_t *conf = this->private;
+
+ if (op_ret >= 0) {
+ iot_file_t *file = CALLOC (1, sizeof (*file));
+ ERR_ABORT (file);
+
+ iot_schedule (conf, file, fd->inode->ino);
+ file->fd = fd;
+
+ fd_ctx_set (fd, this, (uint64_t)(long)file);
+
+ pthread_mutex_lock (&conf->files_lock);
+ file->next = &conf->files;
+ file->prev = file->next->prev;
+ file->next->prev = file;
+ file->prev->next = file;
+ pthread_mutex_unlock (&conf->files_lock);
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf);
+ return 0;
+}
+
+int32_t
+iot_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ STACK_WIND (frame,
+ iot_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ fd);
+ return 0;
+}
+
+
+
+int32_t
+iot_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ iot_local_t *local = frame->local;
+
+ local->frame_size = 0; //iov_length (vector, count);
+
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+
+ return 0;
+}
+
+static int32_t
+iot_readv_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ iot_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd,
+ size,
+ offset);
+ return 0;
+}
+
+int32_t
+iot_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_file_t *file = NULL;
+ iot_worker_t *worker = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context is NULL, returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (iot_file_t *)(long)tmp_file;
+ worker = file->worker;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+ frame->local = local;
+
+ stub = fop_readv_stub (frame,
+ iot_readv_wrapper,
+ fd,
+ size,
+ offset);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot get readv call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL, 0);
+ return 0;
+ }
+
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+int32_t
+iot_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+static int32_t
+iot_flush_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ STACK_WIND (frame,
+ iot_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
+ return 0;
+}
+
+int32_t
+iot_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_file_t *file = NULL;
+ iot_worker_t *worker = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context is NULL, returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (iot_file_t *)(long)tmp_file;
+ worker = file->worker;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+
+ frame->local = local;
+
+ stub = fop_flush_stub (frame,
+ iot_flush_wrapper,
+ fd);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get flush_cbk call stub");
+ STACK_UNWIND (frame, -1, ENOMEM);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+int32_t
+iot_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+static int32_t
+iot_fsync_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync)
+{
+ STACK_WIND (frame,
+ iot_fsync_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync,
+ fd,
+ datasync);
+ return 0;
+}
+
+int32_t
+iot_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_file_t *file = NULL;
+ iot_worker_t *worker = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context is NULL, returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (iot_file_t *)(long)tmp_file;
+ worker = file->worker;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+
+ frame->local = local;
+
+ stub = fop_fsync_stub (frame,
+ iot_fsync_wrapper,
+ fd,
+ datasync);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fsync_cbk call stub");
+ STACK_UNWIND (frame, -1, ENOMEM);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+int32_t
+iot_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ iot_local_t *local = frame->local;
+
+ local->frame_size = 0; /* hehe, caught me! */
+
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+static int32_t
+iot_writev_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ iot_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ offset);
+ return 0;
+}
+
+int32_t
+iot_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_file_t *file = NULL;
+ iot_worker_t *worker = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context is NULL, returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (iot_file_t *)(long)tmp_file;
+ worker = file->worker;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+
+ if (frame->root->req_refs)
+ local->frame_size = dict_serialized_length (frame->root->req_refs);
+ else
+ local->frame_size = iov_length (vector, count);
+ frame->local = local;
+
+ stub = fop_writev_stub (frame, iot_writev_wrapper,
+ fd, vector, count, offset);
+
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get writev call stub");
+ STACK_UNWIND (frame, -1, ENOMEM);
+ return 0;
+ }
+
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+
+int32_t
+iot_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *flock)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, flock);
+ return 0;
+}
+
+
+static int32_t
+iot_lk_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *flock)
+{
+ STACK_WIND (frame,
+ iot_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd,
+ cmd,
+ flock);
+ return 0;
+}
+
+
+int32_t
+iot_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *flock)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_file_t *file = NULL;
+ iot_worker_t *worker = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context is NULL, returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (iot_file_t *)(long)tmp_file;
+ worker = file->worker;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+ frame->local = local;
+
+ stub = fop_lk_stub (frame, iot_lk_wrapper,
+ fd, cmd, flock);
+
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_lk call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+
+int32_t
+iot_stat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+static int32_t
+iot_stat_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ STACK_WIND (frame,
+ iot_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+ return 0;
+}
+
+int32_t
+iot_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_worker_t *worker = NULL;
+ iot_conf_t *conf;
+ fd_t *fd = NULL;
+
+ conf = this->private;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+ frame->local = local;
+
+ fd = fd_lookup (loc->inode, frame->root->pid);
+
+ if (fd == NULL) {
+ STACK_WIND(frame,
+ iot_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+ return 0;
+ }
+
+ fd_unref (fd);
+
+ worker = iot_schedule (conf, NULL, loc->inode->ino);
+
+ stub = fop_stat_stub (frame,
+ iot_stat_wrapper,
+ loc);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_stat call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+
+int32_t
+iot_fstat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+static int32_t
+iot_fstat_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ STACK_WIND (frame,
+ iot_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd);
+ return 0;
+}
+
+int32_t
+iot_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_file_t *file = NULL;
+ iot_worker_t *worker = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context is NULL, returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (iot_file_t *)(long)tmp_file;
+ worker = file->worker;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+ frame->local = local;
+ stub = fop_fstat_stub (frame,
+ iot_fstat_wrapper,
+ fd);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_fstat call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+int32_t
+iot_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+static int32_t
+iot_truncate_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ iot_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+}
+
+int32_t
+iot_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_worker_t *worker = NULL;
+ iot_conf_t *conf;
+ fd_t *fd = NULL;
+
+ conf = this->private;
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+ frame->local = local;
+
+ fd = fd_lookup (loc->inode, frame->root->pid);
+
+ if (fd == NULL) {
+ STACK_WIND(frame,
+ iot_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+ }
+
+ fd_unref (fd);
+
+ worker = iot_schedule (conf, NULL, loc->inode->ino);
+
+ stub = fop_truncate_stub (frame,
+ iot_truncate_wrapper,
+ loc,
+ offset);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_stat call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+int32_t
+iot_ftruncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+static int32_t
+iot_ftruncate_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ STACK_WIND (frame,
+ iot_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+}
+
+int32_t
+iot_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_file_t *file = NULL;
+ iot_worker_t *worker = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd context is NULL, returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (iot_file_t *)(long)tmp_file;
+ worker = file->worker;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+ frame->local = local;
+
+ stub = fop_ftruncate_stub (frame,
+ iot_ftruncate_wrapper,
+ fd,
+ offset);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_ftruncate call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+int32_t
+iot_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+static int32_t
+iot_utimens_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ STACK_WIND (frame,
+ iot_utimens_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimens,
+ loc,
+ tv);
+
+ return 0;
+}
+
+int32_t
+iot_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ call_stub_t *stub;
+ iot_local_t *local = NULL;
+ iot_worker_t *worker = NULL;
+ iot_conf_t *conf;
+ fd_t *fd = NULL;
+
+ conf = this->private;
+
+ local = CALLOC (1, sizeof (*local));
+ ERR_ABORT (local);
+ frame->local = local;
+
+ fd = fd_lookup (loc->inode, frame->root->pid);
+
+ if (fd == NULL) {
+ STACK_WIND(frame,
+ iot_utimens_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimens,
+ loc,
+ tv);
+ return 0;
+ }
+
+ fd_unref (fd);
+
+ worker = iot_schedule (conf, NULL, loc->inode->ino);
+
+ stub = fop_utimens_stub (frame,
+ iot_utimens_wrapper,
+ loc,
+ tv);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_utimens call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+
+int32_t
+iot_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *file_checksum,
+ uint8_t *dir_checksum)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum);
+ return 0;
+}
+
+static int32_t
+iot_checksum_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags)
+{
+ STACK_WIND (frame,
+ iot_checksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->checksum,
+ loc,
+ flags);
+
+ return 0;
+}
+
+int32_t
+iot_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags)
+{
+ call_stub_t *stub = NULL;
+ iot_local_t *local = NULL;
+ iot_worker_t *worker = NULL;
+ iot_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ local = CALLOC (1, sizeof (*local));
+ frame->local = local;
+
+ worker = iot_schedule (conf, NULL, conf->misc_thread_index++);
+
+ stub = fop_checksum_stub (frame,
+ iot_checksum_wrapper,
+ loc,
+ flags);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_checksum call stub");
+ STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+
+int32_t
+iot_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+static int32_t
+iot_unlink_wrapper (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ STACK_WIND (frame,
+ iot_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc);
+
+ return 0;
+}
+
+int32_t
+iot_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ call_stub_t *stub = NULL;
+ iot_local_t *local = NULL;
+ iot_worker_t *worker = NULL;
+ iot_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ local = CALLOC (1, sizeof (*local));
+ frame->local = local;
+
+ worker = iot_schedule (conf, NULL, conf->misc_thread_index++);
+
+ stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot get fop_unlink call stub");
+ STACK_UNWIND (frame, -1, ENOMEM);
+ return 0;
+ }
+ iot_queue (worker, stub);
+
+ return 0;
+}
+
+int32_t
+iot_release (xlator_t *this,
+ fd_t *fd)
+{
+ iot_file_t *file = NULL;
+ iot_conf_t *conf = NULL;
+ uint64_t tmp_file = 0;
+ int ret = 0;
+
+ conf = this->private;
+ ret = fd_ctx_del (fd, this, &tmp_file);
+ if (ret)
+ return 0;
+
+ file = (iot_file_t *)(long)tmp_file;
+
+ pthread_mutex_lock (&conf->files_lock);
+ {
+ (file->prev)->next = file->next;
+ (file->next)->prev = file->prev;
+ }
+ pthread_mutex_unlock (&conf->files_lock);
+
+ FREE (file);
+ return 0;
+}
+
+
+static void
+iot_queue (iot_worker_t *worker,
+ call_stub_t *stub)
+{
+ iot_queue_t *queue;
+ iot_conf_t *conf = worker->conf;
+ iot_local_t *local = stub->frame->local;
+ size_t frame_size = local->frame_size;
+
+ queue = CALLOC (1, sizeof (*queue));
+ ERR_ABORT (queue);
+ queue->stub = stub;
+
+ pthread_mutex_lock (&conf->lock);
+
+ /*
+ while (worker->queue_size >= worker->queue_limit)
+ pthread_cond_wait (&worker->q_cond, &worker->lock);
+ */
+ if (conf->cache_size) {
+ while (frame_size && (conf->current_size >= conf->cache_size))
+ pthread_cond_wait (&conf->q_cond, &conf->lock);
+ }
+
+ queue->next = &worker->queue;
+ queue->prev = worker->queue.prev;
+
+ queue->next->prev = queue;
+ queue->prev->next = queue;
+
+ /* dq_cond */
+ worker->queue_size++;
+ worker->q++;
+
+ conf->current_size += local->frame_size;
+
+ pthread_cond_broadcast (&worker->dq_cond);
+
+ pthread_mutex_unlock (&conf->lock);
+}
+
+static call_stub_t *
+iot_dequeue (iot_worker_t *worker)
+{
+ call_stub_t *stub = NULL;
+ iot_queue_t *queue = NULL;
+ iot_conf_t *conf = worker->conf;
+ iot_local_t *local = NULL;
+
+
+ pthread_mutex_lock (&conf->lock);
+
+ while (!worker->queue_size)
+ /*
+ pthread_cond_wait (&worker->dq_cond, &worker->lock);
+ */
+ pthread_cond_wait (&worker->dq_cond, &conf->lock);
+
+ queue = worker->queue.next;
+
+ queue->next->prev = queue->prev;
+ queue->prev->next = queue->next;
+
+ stub = queue->stub;
+ local = stub->frame->local;
+
+ worker->queue_size--;
+ worker->dq++;
+
+ /* q_cond */
+ conf->current_size -= local->frame_size;
+
+ pthread_cond_broadcast (&conf->q_cond);
+
+ pthread_mutex_unlock (&conf->lock);
+
+ FREE (queue);
+
+ return stub;
+}
+
+static void *
+iot_worker (void *arg)
+{
+ iot_worker_t *worker = arg;
+
+ while (1) {
+ call_stub_t *stub;
+
+ stub = iot_dequeue (worker);
+ call_resume (stub);
+ }
+}
+
+#if 0
+static void *
+iot_reply (void *arg)
+{
+ iot_worker_t *reply = arg;
+
+ while (1) {
+ call_stub_t *stub;
+
+ stub = iot_dequeue (reply);
+ FREE (stub->frame->local);
+ stub->frame->local = NULL;
+ call_resume (stub);
+ }
+}
+#endif
+
+static void
+workers_init (iot_conf_t *conf)
+{
+ int i;
+
+ conf->workers.next = &conf->workers;
+ conf->workers.prev = &conf->workers;
+
+ for (i=0; i<conf->thread_count; i++) {
+
+ iot_worker_t *worker = CALLOC (1, sizeof (*worker));
+ ERR_ABORT (worker);
+
+ worker->next = &conf->workers;
+ worker->prev = conf->workers.prev;
+ worker->next->prev = worker;
+ worker->prev->next = worker;
+
+ worker->queue.next = &worker->queue;
+ worker->queue.prev = &worker->queue;
+
+ /*
+ pthread_mutex_init (&worker->lock, NULL);
+ pthread_cond_init (&worker->q_cond, NULL);
+ */
+ pthread_cond_init (&worker->dq_cond, NULL);
+
+ /*
+ worker->queue_limit = conf->queue_limit;
+ */
+
+ worker->conf = conf;
+
+ pthread_create (&worker->thread, NULL, iot_worker, worker);
+ }
+}
+
+int32_t
+init (xlator_t *this)
+{
+ iot_conf_t *conf;
+ dict_t *options = this->options;
+
+ if (!this->children || this->children->next) {
+ gf_log ("io-threads",
+ GF_LOG_ERROR,
+ "FATAL: iot not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = (void *) CALLOC (1, sizeof (*conf));
+ ERR_ABORT (conf);
+
+ conf->thread_count = 1;
+
+ if (dict_get (options, "thread-count")) {
+ conf->thread_count = data_to_int32 (dict_get (options,
+ "thread-count"));
+ gf_log ("io-threads",
+ GF_LOG_DEBUG,
+ "Using conf->thread_count = %d",
+ conf->thread_count);
+ }
+
+ pthread_mutex_init (&conf->lock, NULL);
+ pthread_cond_init (&conf->q_cond, NULL);
+
+ conf->files.next = &conf->files;
+ conf->files.prev = &conf->files;
+ pthread_mutex_init (&conf->files_lock, NULL);
+
+ workers_init (conf);
+
+ this->private = conf;
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ iot_conf_t *conf = this->private;
+
+ FREE (conf);
+
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .open = iot_open,
+ .create = iot_create,
+ .readv = iot_readv,
+ .writev = iot_writev,
+ .flush = iot_flush,
+ .fsync = iot_fsync,
+ .lk = iot_lk,
+ .stat = iot_stat,
+ .fstat = iot_fstat,
+ .truncate = iot_truncate,
+ .ftruncate = iot_ftruncate,
+ .utimens = iot_utimens,
+ .checksum = iot_checksum,
+ .unlink = iot_unlink,
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+ .release = iot_release,
+};
+
+struct volume_options options[] = {
+ { .key = {"thread-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 32
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
new file mode 100644
index 00000000000..6595d3e277b
--- /dev/null
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __IOT_H
+#define __IOT_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "compat-errno.h"
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "common-utils.h"
+
+#define min(a,b) ((a)<(b)?(a):(b))
+#define max(a,b) ((a)>(b)?(a):(b))
+
+struct iot_conf;
+struct iot_worker;
+struct iot_queue;
+struct iot_local;
+struct iot_file;
+
+struct iot_local {
+ struct iot_file *file;
+ size_t frame_size;
+};
+
+struct iot_queue {
+ struct iot_queue *next, *prev;
+ call_stub_t *stub;
+};
+
+struct iot_worker {
+ struct iot_worker *next, *prev;
+ struct iot_queue queue;
+ struct iot_conf *conf;
+ int64_t q,dq;
+ pthread_cond_t dq_cond;
+ /*
+ pthread_cond_t q_cond;
+ pthread_mutex_t lock;
+ */
+ int32_t fd_count;
+ int32_t queue_size;
+ /*
+ int32_t queue_limit;
+ */
+ pthread_t thread;
+};
+
+struct iot_file {
+ struct iot_file *next, *prev; /* all open files via this xlator */
+ struct iot_worker *worker;
+ fd_t *fd;
+ int32_t pending_ops;
+};
+
+struct iot_conf {
+ int32_t thread_count;
+ int32_t misc_thread_index; /* Used to schedule the miscellaneous calls like checksum */
+ struct iot_worker workers;
+ struct iot_file files;
+ pthread_mutex_t files_lock;
+
+ uint64_t cache_size;
+ off_t current_size;
+ pthread_cond_t q_cond;
+ pthread_mutex_t lock;
+};
+
+typedef struct iot_file iot_file_t;
+typedef struct iot_conf iot_conf_t;
+typedef struct iot_local iot_local_t;
+typedef struct iot_worker iot_worker_t;
+typedef struct iot_queue iot_queue_t;
+
+#endif /* __IOT_H */
diff --git a/xlators/performance/read-ahead/Makefile.am b/xlators/performance/read-ahead/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/performance/read-ahead/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am
new file mode 100644
index 00000000000..7bb90228227
--- /dev/null
+++ b/xlators/performance/read-ahead/src/Makefile.am
@@ -0,0 +1,14 @@
+xlator_LTLIBRARIES = read-ahead.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+read_ahead_la_LDFLAGS = -module -avoidversion
+
+read_ahead_la_SOURCES = read-ahead.c page.c
+read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = read-ahead.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c
new file mode 100644
index 00000000000..3b8d4d2093e
--- /dev/null
+++ b/xlators/performance/read-ahead/src/page.c
@@ -0,0 +1,487 @@
+/*
+ Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "read-ahead.h"
+#include <assert.h>
+
+
+ra_page_t *
+ra_page_get (ra_file_t *file,
+ off_t offset)
+{
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
+
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
+
+ if (page == &file->pages || page->offset != rounded_offset)
+ page = NULL;
+
+ return page;
+}
+
+
+ra_page_t *
+ra_page_create (ra_file_t *file, off_t offset)
+{
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ra_page_t *newpage = NULL;
+
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
+
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
+
+ if (page == &file->pages || page->offset != rounded_offset) {
+ newpage = CALLOC (1, sizeof (*newpage));
+ if (!newpage)
+ return NULL;
+
+ newpage->offset = rounded_offset;
+ newpage->prev = page->prev;
+ newpage->next = page;
+ newpage->file = file;
+ page->prev->next = newpage;
+ page->prev = newpage;
+
+ page = newpage;
+ }
+
+ return page;
+}
+
+
+void
+ra_wait_on_page (ra_page_t *page, call_frame_t *frame)
+{
+ ra_waitq_t *waitq = NULL;
+ ra_local_t *local = NULL;
+
+
+ local = frame->local;
+ waitq = CALLOC (1, sizeof (*waitq));
+ if (!waitq) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ return;
+ }
+
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ page->waitq = waitq;
+
+ ra_local_lock (local);
+ {
+ local->wait_count++;
+ }
+ ra_local_unlock (local);
+}
+
+
+void
+ra_waitq_return (ra_waitq_t *waitq)
+{
+ ra_waitq_t *trav = NULL;
+ ra_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
+
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
+
+ frame = trav->data;
+ ra_frame_return (frame);
+ free (trav);
+ }
+}
+
+
+int
+ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct stat *stbuf)
+{
+ ra_local_t *local = NULL;
+ off_t pending_offset = 0;
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ off_t trav_offset = 0;
+ size_t payload_size = 0;
+ ra_waitq_t *waitq = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ local = frame->local;
+ fd = local->fd;
+
+ ret = fd_ctx_get (fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ pending_offset = local->pending_offset;
+ trav_offset = pending_offset;
+ payload_size = op_ret;
+
+ ra_file_lock (file);
+ {
+ if (op_ret >= 0)
+ file->stbuf = *stbuf;
+
+ if (op_ret < 0) {
+ page = ra_page_get (file, pending_offset);
+ if (page)
+ waitq = ra_page_error (page, op_ret, op_errno);
+ goto unlock;
+ }
+
+ page = ra_page_get (file, pending_offset);
+ if (!page) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
+ pending_offset, file->page_size, file);
+ goto unlock;
+ }
+
+ if (page->vector) {
+ dict_unref (page->ref);
+ free (page->vector);
+ }
+
+ page->vector = iov_dup (vector, count);
+ page->count = count;
+ page->ref = dict_ref (frame->root->rsp_refs);
+ page->ready = 1;
+
+ page->size = iov_length (vector, count);
+
+ waitq = ra_page_wakeup (page);
+ }
+unlock:
+ ra_file_unlock (file);
+
+ ra_waitq_return (waitq);
+
+ fd_unref (local->fd);
+
+ free (frame->local);
+ frame->local = NULL;
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+void
+ra_page_fault (ra_file_t *file,
+ call_frame_t *frame,
+ off_t offset)
+{
+ call_frame_t *fault_frame = NULL;
+ ra_local_t *fault_local = NULL;
+
+ fault_frame = copy_frame (frame);
+ fault_local = CALLOC (1, sizeof (ra_local_t));
+
+ fault_frame->local = fault_local;
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = file->page_size;
+
+ fault_local->fd = fd_ref (file->fd);
+
+ STACK_WIND (fault_frame, ra_fault_cbk,
+ FIRST_CHILD (fault_frame->this),
+ FIRST_CHILD (fault_frame->this)->fops->readv,
+ file->fd, file->page_size, offset);
+ return;
+}
+
+void
+ra_frame_fill (ra_page_t *page, call_frame_t *frame)
+{
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ra_fill_t *new = NULL;
+
+
+ local = frame->local;
+ fill = &local->fill;
+
+ if (local->op_ret != -1 && page->size) {
+ if (local->offset > page->offset)
+ src_offset = local->offset - page->offset;
+ else
+ dst_offset = page->offset - local->offset;
+
+ copy_size = min (page->size - src_offset,
+ local->size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
+
+ fill = fill->next;
+ while (fill != &local->fill) {
+ if (fill->offset > page->offset) {
+ break;
+ }
+ fill = fill->next;
+ }
+
+ new = CALLOC (1, sizeof (*new));
+
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->refs = dict_ref (page->ref);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ NULL);
+ new->vector = CALLOC (new->count, sizeof (struct iovec));
+
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ new->vector);
+
+ new->next = fill;
+ new->prev = new->next->prev;
+ new->next->prev = new;
+ new->prev->next = new;
+
+ local->op_ret += copy_size;
+ }
+}
+
+
+void
+ra_frame_unwind (call_frame_t *frame)
+{
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ int32_t count = 0;
+ struct iovec *vector;
+ int32_t copied = 0;
+ dict_t *refs = NULL;
+ ra_fill_t *next = NULL;
+ fd_t *fd = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ local = frame->local;
+ fill = local->fill.next;
+
+ refs = get_new_dict ();
+
+ frame->local = NULL;
+
+ while (fill != &local->fill) {
+ count += fill->count;
+ fill = fill->next;
+ }
+
+ vector = CALLOC (count, sizeof (*vector));
+
+ fill = local->fill.next;
+
+ while (fill != &local->fill) {
+ next = fill->next;
+
+ memcpy (((char *)vector) + copied, fill->vector,
+ fill->count * sizeof (*vector));
+
+ copied += (fill->count * sizeof (*vector));
+ dict_copy (fill->refs, refs);
+
+ fill->next->prev = fill->prev;
+ fill->prev->next = fill->prev;
+
+ dict_unref (fill->refs);
+ free (fill->vector);
+ free (fill);
+
+ fill = next;
+ }
+
+ frame->root->rsp_refs = dict_ref (refs);
+
+ fd = local->fd;
+ ret = fd_ctx_get (fd, frame->this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ vector, count, &file->stbuf);
+
+ dict_unref (refs);
+ pthread_mutex_destroy (&local->local_lock);
+ free (local);
+ free (vector);
+
+ return;
+}
+
+/*
+ * ra_frame_return -
+ * @frame:
+ *
+ */
+void
+ra_frame_return (call_frame_t *frame)
+{
+ ra_local_t *local = NULL;
+ int32_t wait_count = 0;
+
+ local = frame->local;
+ assert (local->wait_count > 0);
+
+ ra_local_lock (local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ra_local_unlock (local);
+
+ if (!wait_count)
+ ra_frame_unwind (frame);
+
+ return;
+}
+
+/*
+ * ra_page_wakeup -
+ * @page:
+ *
+ */
+ra_waitq_t *
+ra_page_wakeup (ra_page_t *page)
+{
+ ra_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame;
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ trav = waitq;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ra_frame_fill (page, frame);
+ }
+
+ return waitq;
+}
+
+/*
+ * ra_page_purge -
+ * @page:
+ *
+ */
+void
+ra_page_purge (ra_page_t *page)
+{
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ if (page->ref) {
+ dict_unref (page->ref);
+ }
+ free (page->vector);
+ free (page);
+}
+
+/*
+ * ra_page_error -
+ * @page:
+ * @op_ret:
+ * @op_errno:
+ *
+ */
+ra_waitq_t *
+ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
+{
+
+ ra_waitq_t *waitq = NULL;
+ ra_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ ra_local_t *local = NULL;
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ trav = waitq;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+
+ local = frame->local;
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+
+ ra_page_purge (page);
+
+ return waitq;
+}
+
+/*
+ * ra_file_destroy -
+ * @file:
+ *
+ */
+void
+ra_file_destroy (ra_file_t *file)
+{
+ ra_conf_t *conf = NULL;
+ ra_page_t *trav = NULL;
+
+ conf = file->conf;
+
+ ra_conf_lock (conf);
+ {
+ file->prev->next = file->next;
+ file->next->prev = file->prev;
+ }
+ ra_conf_unlock (conf);
+
+ trav = file->pages.next;
+ while (trav != &file->pages) {
+ ra_page_error (trav, -1, EINVAL);
+ trav = file->pages.next;
+ }
+
+ pthread_mutex_destroy (&file->file_lock);
+ free (file);
+}
+
diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c
new file mode 100644
index 00000000000..0060e00fd41
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead.c
@@ -0,0 +1,890 @@
+/*
+ Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ TODO:
+ - handle O_DIRECT
+ - maintain offset, flush on lseek
+ - ensure efficient memory managment in case of random seek
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "read-ahead.h"
+#include <assert.h>
+#include <sys/time.h>
+
+
+static void
+read_ahead (call_frame_t *frame,
+ ra_file_t *file);
+
+
+int
+ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
+{
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = CALLOC (1, sizeof (*file));
+ if (!file) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto unwind;
+ }
+
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+
+ /* If mandatory locking has been enabled on this file,
+ we disable caching on it */
+
+ if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
+ file->disabled = 1;
+
+ /* If O_DIRECT open, we disable caching on it */
+
+ if ((fd->flags & O_DIRECT) || (fd->flags & O_WRONLY))
+ file->disabled = 1;
+
+ file->offset = (unsigned long long) 0;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
+
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
+
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
+
+ if (!file->disabled) {
+ file->page_count = 1;
+ }
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}
+
+
+int
+ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct stat *buf)
+{
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
+
+ file = CALLOC (1, sizeof (*file));
+ if (!file) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto unwind;
+ }
+
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+
+ /* If mandatory locking has been enabled on this file,
+ we disable caching on it */
+
+ if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
+ file->disabled = 1;
+
+ /* If O_DIRECT open, we disable caching on it */
+
+ if ((fd->flags & O_DIRECT) || (fd->flags & O_WRONLY))
+ file->disabled = 1;
+
+ file->offset = (unsigned long long) 0;
+ //file->size = fd->inode->buf.st_size;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
+
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
+
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+
+ return 0;
+}
+
+
+int
+ra_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd)
+{
+ STACK_WIND (frame, ra_open_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open,
+ loc, flags, fd);
+
+ return 0;
+}
+
+int
+ra_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+{
+ STACK_WIND (frame, ra_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, fd);
+
+ return 0;
+}
+
+/* free cache pages between offset and offset+size,
+ does not touch pages with frames waiting on it
+*/
+
+static void
+flush_region (call_frame_t *frame,
+ ra_file_t *file,
+ off_t offset,
+ off_t size)
+{
+ ra_page_t *trav = NULL;
+ ra_page_t *next = NULL;
+
+
+ ra_file_lock (file);
+ {
+ trav = file->pages.next;
+ while (trav != &file->pages
+ && trav->offset < (offset + size)) {
+
+ next = trav->next;
+ if (trav->offset >= offset && !trav->waitq) {
+ ra_page_purge (trav);
+ }
+ trav = next;
+ }
+ }
+ ra_file_unlock (file);
+}
+
+
+
+int
+ra_release (xlator_t *this,
+ fd_t *fd)
+{
+ uint64_t tmp_file = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &tmp_file);
+
+ if (!ret) {
+ ra_file_destroy ((ra_file_t *)(long)tmp_file);
+ }
+
+ return 0;
+}
+
+
+void
+read_ahead (call_frame_t *frame, ra_file_t *file)
+{
+ off_t ra_offset = 0;
+ size_t ra_size = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ off_t cap = 0;
+ char fault = 0;
+
+ if (!file->page_count)
+ return;
+
+ ra_size = file->page_size * file->page_count;
+ ra_offset = floor (file->offset, file->page_size);
+ cap = file->size ? file->size : file->offset + ra_size;
+
+ while (ra_offset < min (file->offset + ra_size, cap)) {
+
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, ra_offset);
+ }
+ ra_file_unlock (file);
+
+ if (!trav)
+ break;
+
+ ra_offset += file->page_size;
+ }
+
+ if (trav)
+ /* comfortable enough */
+ return;
+
+ trav_offset = ra_offset;
+
+ trav = file->pages.next;
+ cap = file->size ? file->size : ra_offset + ra_size;
+
+ while (trav_offset < min(ra_offset + ra_size, cap)) {
+ fault = 0;
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ fault = 1;
+ trav = ra_page_create (file, trav_offset);
+ if (trav)
+ trav->dirty = 1;
+ }
+ }
+ ra_file_unlock (file);
+
+ if (!trav) {
+ /* OUT OF MEMORY */
+ break;
+ }
+
+ if (fault) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "RA at offset=%"PRId64, trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
+ trav_offset += file->page_size;
+ }
+
+ return;
+}
+
+
+int
+ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct stat *stbuf)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static void
+dispatch_requests (call_frame_t *frame,
+ ra_file_t *file)
+{
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ call_frame_t *ra_frame = NULL;
+ char need_atime_update = 1;
+ char fault = 0;
+
+
+ local = frame->local;
+ conf = file->conf;
+
+ rounded_offset = floor (local->offset, file->page_size);
+ rounded_end = roof (local->offset + local->size, file->page_size);
+
+ trav_offset = rounded_offset;
+ trav = file->pages.next;
+
+ while (trav_offset < rounded_end) {
+ fault = 0;
+
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ trav = ra_page_create (file, trav_offset);
+ fault = 1;
+ need_atime_update = 0;
+ }
+
+ if (!trav)
+ goto unlock;
+
+ if (trav->ready) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "HIT at offset=%"PRId64".",
+ trav_offset);
+ ra_frame_fill (trav, frame);
+ } else {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "IN-TRANSIT at offset=%"PRId64".",
+ trav_offset);
+ ra_wait_on_page (trav, frame);
+ need_atime_update = 0;
+ }
+ }
+ unlock:
+ ra_file_unlock (file);
+
+ if (fault) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "MISS at offset=%"PRId64".",
+ trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
+
+ trav_offset += file->page_size;
+ }
+
+ if (need_atime_update && conf->force_atime_update) {
+ /* TODO: use untimens() since readv() can confuse underlying
+ io-cache and others */
+ ra_frame = copy_frame (frame);
+ STACK_WIND (ra_frame, ra_need_atime_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ file->fd, 1, 1);
+ }
+
+ return ;
+}
+
+
+int
+ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+
+ return 0;
+}
+
+
+int
+ra_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ ra_file_t *file = NULL;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ int op_errno = 0;
+ int ret = 0;
+ char expected_offset = 1;
+ uint64_t tmp_file = 0;
+
+ conf = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
+ offset, size);
+
+ ret = fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file->offset != offset) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unexpected offset (%"PRId64" != %"PRId64") resetting",
+ file->offset, offset);
+
+ expected_offset = file->expected = file->page_count = 0;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "expected offset (%"PRId64") when page_count=%d",
+ offset, file->page_count);
+
+ if (file->expected < (conf->page_size * conf->page_count)) {
+ file->expected += size;
+ file->page_count = min ((file->expected / file->page_size),
+ conf->page_count);
+ }
+ }
+
+ if (!expected_offset) {
+ flush_region (frame, file, 0, file->pages.prev->offset + 1);
+ }
+
+ if (file->disabled) {
+ STACK_WIND (frame, ra_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ file->fd, size, offset);
+ return 0;
+ }
+
+ local = (void *) CALLOC (1, sizeof (*local));
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ local->fd = fd;
+ local->offset = offset;
+ local->size = size;
+ local->wait_count = 1;
+
+ local->fill.next = &local->fill;
+ local->fill.prev = &local->fill;
+
+ pthread_mutex_init (&local->local_lock, NULL);
+
+ frame->local = local;
+
+ dispatch_requests (frame, file);
+
+ flush_region (frame, file, 0, floor (offset, file->page_size));
+
+ read_ahead (frame, file);
+
+ ra_frame_return (frame);
+
+ file->offset = offset + size;
+
+ return 0;
+
+unwind:
+ STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int
+ra_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int
+ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ ra_file_t *file = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ ret = fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1);
+ }
+
+ STACK_WIND (frame, ra_flush_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->flush,
+ fd);
+ return 0;
+}
+
+
+int
+ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync)
+{
+ ra_file_t *file = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ ret = fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1);
+ }
+
+ STACK_WIND (frame, ra_flush_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync,
+ fd, datasync);
+ return 0;
+}
+
+
+int
+ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+{
+ fd_t *fd = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ fd = frame->local;
+
+ ret = fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1);
+ }
+
+ frame->local = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+
+int
+ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset)
+{
+ ra_file_t *file = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ ret = fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1);
+
+ /* reset the read-ahead counters too */
+ file->expected = file->page_count = 0;
+ }
+
+ frame->local = fd;
+
+ STACK_WIND (frame, ra_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset);
+
+ return 0;
+}
+
+
+int
+ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+int
+ra_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ inode = loc->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ ret = fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_attr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset);
+ return 0;
+}
+
+
+int
+ra_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ ret = fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_attr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fstat,
+ fd);
+ return 0;
+}
+
+
+int
+ra_fchown (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, uid_t uid, gid_t gid)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ ret = fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_attr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fchown,
+ fd, uid, gid);
+ return 0;
+}
+
+
+int
+ra_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ uint64_t tmp_file = 0;
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ ret = fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_attr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset);
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ ra_conf_t *conf;
+ dict_t *options = this->options;
+ char *page_size_string = NULL;
+ char *page_count_string = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: read-ahead not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = (void *) CALLOC (1, sizeof (*conf));
+ ERR_ABORT (conf);
+ conf->page_size = 256 * 1024;
+ conf->page_count = 2;
+
+ if (dict_get (options, "page-size"))
+ page_size_string = data_to_str (dict_get (options,
+ "page-size"));
+ if (page_size_string)
+ {
+ if (gf_string2bytesize (page_size_string, &conf->page_size) != 0)
+ {
+ gf_log ("read-ahead",
+ GF_LOG_ERROR,
+ "invalid number format \"%s\" of \"option page-size\"",
+ page_size_string);
+ return -1;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_size = %"PRIu64"",
+ conf->page_size);
+ }
+
+ if (dict_get (options, "page-count"))
+ page_count_string = data_to_str (dict_get (options,
+ "page-count"));
+ if (page_count_string)
+ {
+ if (gf_string2uint_base10 (page_count_string, &conf->page_count) != 0)
+ {
+ gf_log ("read-ahead",
+ GF_LOG_ERROR,
+ "invalid number format \"%s\" of \"option page-count\"",
+ page_count_string);
+ return -1;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_count = %u",
+ conf->page_count);
+ }
+
+ if (dict_get (options, "force-atime-update")) {
+ char *force_atime_update_str = data_to_str (dict_get (options,
+ "force-atime-update"));
+ if (gf_string2boolean (force_atime_update_str, &conf->force_atime_update) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'force-atime-update' takes only boolean options");
+ return -1;
+ }
+ if (conf->force_atime_update)
+ gf_log (this->name, GF_LOG_DEBUG, "Forcing atime updates on cache hit");
+ }
+
+ conf->files.next = &conf->files;
+ conf->files.prev = &conf->files;
+
+ pthread_mutex_init (&conf->conf_lock, NULL);
+ this->private = conf;
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ ra_conf_t *conf = this->private;
+
+ pthread_mutex_destroy (&conf->conf_lock);
+ FREE (conf);
+
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .open = ra_open,
+ .create = ra_create,
+ .readv = ra_readv,
+ .writev = ra_writev,
+ .flush = ra_flush,
+ .fsync = ra_fsync,
+ .truncate = ra_truncate,
+ .ftruncate = ra_ftruncate,
+ .fstat = ra_fstat,
+ .fchown = ra_fchown,
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+ .release = ra_release,
+};
+
+struct volume_options options[] = {
+ { .key = {"force-atime-update"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"page-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 64 * GF_UNIT_KB,
+ .max = 2 * GF_UNIT_MB
+ },
+ { .key = {"page-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 16
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h
new file mode 100644
index 00000000000..d624ca8abc8
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead.h
@@ -0,0 +1,194 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __READ_AHEAD_H
+#define __READ_AHEAD_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "common-utils.h"
+
+struct ra_conf;
+struct ra_local;
+struct ra_page;
+struct ra_file;
+struct ra_waitq;
+
+
+struct ra_waitq {
+ struct ra_waitq *next;
+ void *data;
+};
+
+
+struct ra_fill {
+ struct ra_fill *next;
+ struct ra_fill *prev;
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ dict_t *refs;
+};
+
+
+struct ra_local {
+ mode_t mode;
+ struct ra_fill fill;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ off_t pending_offset;
+ size_t pending_size;
+ fd_t *fd;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
+};
+
+
+struct ra_page {
+ struct ra_page *next;
+ struct ra_page *prev;
+ struct ra_file *file;
+ char dirty;
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ra_waitq *waitq;
+ dict_t *ref;
+};
+
+
+struct ra_file {
+ struct ra_file *next;
+ struct ra_file *prev;
+ struct ra_conf *conf;
+ fd_t *fd;
+ int disabled;
+ size_t expected;
+ struct ra_page pages;
+ off_t offset;
+ size_t size;
+ int32_t refcount;
+ pthread_mutex_t file_lock;
+ struct stat stbuf;
+ uint64_t page_size;
+ uint32_t page_count;
+};
+
+
+struct ra_conf {
+ uint64_t page_size;
+ uint32_t page_count;
+ void *cache_block;
+ struct ra_file files;
+ gf_boolean_t force_atime_update;
+ pthread_mutex_t conf_lock;
+};
+
+
+typedef struct ra_conf ra_conf_t;
+typedef struct ra_local ra_local_t;
+typedef struct ra_page ra_page_t;
+typedef struct ra_file ra_file_t;
+typedef struct ra_waitq ra_waitq_t;
+typedef struct ra_fill ra_fill_t;
+
+ra_page_t *
+ra_page_get (ra_file_t *file,
+ off_t offset);
+ra_page_t *
+ra_page_create (ra_file_t *file,
+ off_t offset);
+void
+ra_page_fault (ra_file_t *file,
+ call_frame_t *frame,
+ off_t offset);
+void
+ra_wait_on_page (ra_page_t *page,
+ call_frame_t *frame);
+ra_waitq_t *
+ra_page_wakeup (ra_page_t *page);
+
+void
+ra_page_flush (ra_page_t *page);
+
+ra_waitq_t *
+ra_page_error (ra_page_t *page,
+ int32_t op_ret,
+ int32_t op_errno);
+void
+ra_page_purge (ra_page_t *page);
+
+void
+ra_frame_return (call_frame_t *frame);
+void
+ra_frame_fill (ra_page_t *page,
+ call_frame_t *frame);
+
+void
+ra_file_destroy (ra_file_t *file);
+
+static inline void
+ra_file_lock (ra_file_t *file)
+{
+ pthread_mutex_lock (&file->file_lock);
+}
+
+static inline void
+ra_file_unlock (ra_file_t *file)
+{
+ pthread_mutex_unlock (&file->file_lock);
+}
+
+static inline void
+ra_conf_lock (ra_conf_t *conf)
+{
+ pthread_mutex_lock (&conf->conf_lock);
+}
+
+static inline void
+ra_conf_unlock (ra_conf_t *conf)
+{
+ pthread_mutex_unlock (&conf->conf_lock);
+}
+static inline void
+ra_local_lock (ra_local_t *local)
+{
+ pthread_mutex_lock (&local->local_lock);
+}
+
+static inline void
+ra_local_unlock (ra_local_t *local)
+{
+ pthread_mutex_unlock (&local->local_lock);
+}
+
+#endif /* __READ_AHEAD_H */
diff --git a/xlators/performance/stat-prefetch/Makefile.am b/xlators/performance/stat-prefetch/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/performance/stat-prefetch/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/performance/stat-prefetch/src/Makefile.am b/xlators/performance/stat-prefetch/src/Makefile.am
new file mode 100644
index 00000000000..e52f2df48fd
--- /dev/null
+++ b/xlators/performance/stat-prefetch/src/Makefile.am
@@ -0,0 +1,11 @@
+xlator_PROGRAMS = stat-prefetch.so
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+stat_prefetch_so_SOURCES = stat-prefetch.c
+noinst_HEADERS = stat-prefetch.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles
+
+CLEANFILES =
+
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.c b/xlators/performance/stat-prefetch/src/stat-prefetch.c
new file mode 100644
index 00000000000..f2a78f676f9
--- /dev/null
+++ b/xlators/performance/stat-prefetch/src/stat-prefetch.c
@@ -0,0 +1,508 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "stat-prefetch.h"
+#include "dict.h"
+#include "xlator.h"
+#include <sys/time.h>
+
+struct sp_cache {
+ struct sp_cache *next;
+ struct sp_cache *prev;
+ pid_t pid;
+ long long tv_time;
+ char *dirname;
+ dir_entry_t entries;
+ int32_t count;
+ pthread_mutex_t lock;
+};
+
+static void
+stat_prefetch_cache_flush (struct sp_cache *cache, int32_t force)
+{
+ struct sp_cache *trav;
+ struct timeval tv;
+ long long tv_time;
+
+ gettimeofday (&tv, NULL);
+ tv_time = (tv.tv_usec + (tv.tv_sec * 1000000));
+
+ pthread_mutex_lock (&cache->lock);
+
+ trav = cache->next;
+ while (trav != cache) {
+ struct sp_cache *next = trav->next;
+ {
+ if (tv_time > trav->tv_time || force) {
+ gf_log ("stat-prefetch",
+ GF_LOG_DEBUG,
+ "flush on: %s",
+ trav->dirname);
+ dir_entry_t *entries;
+
+ trav->prev->next = trav->next;
+ trav->next->prev = trav->prev;
+
+ entries = trav->entries.next;
+
+ while (entries) {
+ dir_entry_t *nextentry = entries->next;
+ {
+ free (entries->name);
+ free (entries);
+ }
+ entries = nextentry;
+ }
+ free (trav->dirname);
+ free (trav);
+ }
+ }
+ trav = next;
+ }
+
+ pthread_mutex_unlock (&cache->lock);
+}
+
+static int32_t
+stat_prefetch_cache_fill (struct sp_cache *cache,
+ pid_t pid,
+ char *dirname,
+ dir_entry_t *entries)
+{
+ struct sp_cache *trav;
+ struct timeval tv;
+
+ pthread_mutex_unlock (&cache->lock);
+ trav = cache->next;
+ while (trav != cache) {
+ // if (trav->pid == pid && !strcmp (trav->dirname, dirname)) {
+ if (!strcmp (trav->dirname, dirname)) {
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if (trav == cache) {
+ trav = CALLOC (1, sizeof (*trav));
+ ERR_ABORT (trav);
+ trav->pid = pid;
+ trav->dirname = dirname;
+
+ trav->prev = cache->prev;
+ trav->next = cache;
+ trav->next->prev = trav;
+ trav->prev->next = trav;
+ } else {
+ free (dirname);
+ }
+
+ while (trav->entries.next) {
+ dir_entry_t *tmp = trav->entries.next;
+
+ trav->entries.next = trav->entries.next->next;
+ free (tmp->name);
+ free (tmp);
+ }
+ trav->entries.next = entries->next;
+ entries->next = NULL;
+
+ gettimeofday (&tv, NULL);
+ trav->tv_time = (tv.tv_usec + (tv.tv_sec * 1000000)) + cache->tv_time;
+
+ pthread_mutex_unlock (&cache->lock);
+ return 0;
+}
+
+static int32_t
+stat_prefetch_cache_lookup (struct sp_cache *cache,
+ pid_t pid,
+ const char *path,
+ struct stat *buf)
+{
+ struct sp_cache *trav;
+ char *dirname = strdup (path);
+ char *filename = strrchr (dirname, '/');
+ dir_entry_t *entries;
+ dir_entry_t *prev = NULL;
+
+ *filename = '\0';
+ filename ++;
+
+ pthread_mutex_lock (&cache->lock);
+ trav = cache->next;
+ while (trav != cache) {
+ // if ((trav->pid == pid) && !strcmp (dirname, trav->dirname))
+ if (!strcmp (dirname, trav->dirname))
+ break;
+ trav = trav->next;
+ }
+ if (trav == cache) {
+ free (dirname);
+ pthread_mutex_unlock (&cache->lock);
+ return -1;
+ }
+
+ entries = trav->entries.next;
+ prev = &trav->entries;
+ while (entries) {
+ if (!strcmp (entries->name, filename))
+ break;
+ prev = entries;
+ entries = entries->next;
+ }
+ if (!entries) {
+ free (dirname);
+ pthread_mutex_unlock (&cache->lock);
+ return -1;
+ }
+
+ *buf = entries->buf;
+ prev->next = entries->next;
+ free (entries->name);
+ free (entries);
+ free (dirname);
+
+ pthread_mutex_unlock (&cache->lock);
+
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ char *path = frame->local;
+ pid_t pid = frame->root->pid;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno, entries, count);
+
+ if (op_ret == 0)
+ stat_prefetch_cache_fill (this->private,
+ pid,
+ path,
+ entries);
+ else
+ free (path);
+
+ return 0;
+}
+
+int32_t
+stat_prefetch_readdir (call_frame_t *frame,
+ xlator_t *this,
+ const char *path)
+{
+ stat_prefetch_cache_flush (this->private, 0);
+
+ frame->local = strdup (path);
+ STACK_WIND (frame,
+ stat_prefetch_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ path);
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_getattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+stat_prefetch_getattr (call_frame_t *frame,
+ struct xlator *this,
+ const char *path)
+{
+ struct stat buf;
+ pid_t pid = frame->root->pid;
+ stat_prefetch_cache_flush (this->private, 0);
+
+ if (stat_prefetch_cache_lookup (this->private,
+ pid,
+ path,
+ &buf) == 0) {
+ STACK_UNWIND (frame, 0, 0, &buf);
+ return 0;
+ }
+
+ STACK_WIND (frame,
+ stat_prefetch_getattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getattr,
+ path);
+
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+stat_prefetch_unlink (call_frame_t *frame,
+ struct xlator *this,
+ const char *path)
+{
+ stat_prefetch_cache_flush (this->private, 1);
+
+ STACK_WIND (frame,
+ stat_prefetch_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ path);
+
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_chmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+stat_prefetch_chmod (call_frame_t *frame,
+ struct xlator *this,
+ const char *path,
+ mode_t mode)
+{
+ stat_prefetch_cache_flush (this->private, 1);
+
+ STACK_WIND (frame,
+ stat_prefetch_chmod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chmod,
+ path,
+ mode);
+
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_chown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+stat_prefetch_chown (call_frame_t *frame,
+ struct xlator *this,
+ const char *path,
+ uid_t uid,
+ gid_t gid)
+{
+ stat_prefetch_cache_flush (this->private, 1);
+
+ STACK_WIND (frame,
+ stat_prefetch_chown_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->chown,
+ path,
+ uid,
+ gid);
+
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_utimes_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+stat_prefetch_utimes (call_frame_t *frame,
+ struct xlator *this,
+ const char *path,
+ struct timespec *tvp)
+{
+ stat_prefetch_cache_flush (this->private, 1);
+
+ STACK_WIND (frame,
+ stat_prefetch_utimes_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimes,
+ path,
+ tvp);
+
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+int32_t
+stat_prefetch_truncate (call_frame_t *frame,
+ struct xlator *this,
+ const char *path,
+ off_t offset)
+{
+ stat_prefetch_cache_flush (this->private, 1);
+
+ STACK_WIND (frame,
+ stat_prefetch_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ path,
+ offset);
+
+ return 0;
+}
+
+
+int32_t
+stat_prefetch_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+stat_prefetch_rename (call_frame_t *frame,
+ struct xlator *this,
+ const char *oldpath,
+ const char *newpath)
+{
+ stat_prefetch_cache_flush (this->private, 1);
+
+ STACK_WIND (frame,
+ stat_prefetch_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldpath,
+ newpath);
+
+ return 0;
+}
+
+int32_t
+init (struct xlator *this)
+{
+ struct sp_cache *cache;
+ dict_t *options = this->options;
+
+ if (!this->children || this->children->next) {
+ gf_log ("stat-prefetch",
+ GF_LOG_ERROR,
+ "FATAL: translator %s does not have exactly one child node",
+ this->name);
+ return -1;
+ }
+
+ cache = (void *) CALLOC (1, sizeof (*cache));
+ ERR_ABORT (cache);
+ cache->next = cache->prev = cache;
+
+ cache->tv_time = 1 * 1000000;
+
+ if (dict_get (options, "cache-seconds")) {
+ cache->tv_time = (data_to_int64 (dict_get (options, "cache-seconds")) *
+ 1000000);
+ }
+
+ pthread_mutex_init (&cache->lock, NULL);
+
+ this->private = cache;
+ return 0;
+}
+
+void
+fini (struct xlator *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .getattr = stat_prefetch_getattr,
+ .readdir = stat_prefetch_readdir,
+ .unlink = stat_prefetch_unlink,
+ .chmod = stat_prefetch_chmod,
+ .chown = stat_prefetch_chown,
+ .rename = stat_prefetch_rename,
+ .utimes = stat_prefetch_utimes,
+ .truncate = stat_prefetch_truncate,
+};
+
+struct xlator_mops mops = {
+};
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.h b/xlators/performance/stat-prefetch/src/stat-prefetch.h
new file mode 100644
index 00000000000..7d9645a2a81
--- /dev/null
+++ b/xlators/performance/stat-prefetch/src/stat-prefetch.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _STAT_PREFETCH_H_
+#define _STAT_PREFETCH_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <sys/time.h>
+#include "xlator.h"
+
+#endif /* _STAT_PREFETCH_H_ */
diff --git a/xlators/performance/symlink-cache/Makefile.am b/xlators/performance/symlink-cache/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/performance/symlink-cache/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am
new file mode 100644
index 00000000000..b8b257c186c
--- /dev/null
+++ b/xlators/performance/symlink-cache/src/Makefile.am
@@ -0,0 +1,12 @@
+xlator_LTLIBRARIES = symlink-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+symlink_cache_la_LDFLAGS = -module -avoidversion
+
+symlink_cache_la_SOURCES = symlink-cache.c
+symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c
new file mode 100644
index 00000000000..fc207a6272e
--- /dev/null
+++ b/xlators/performance/symlink-cache/src/symlink-cache.c
@@ -0,0 +1,399 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "list.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "common-utils.h"
+
+struct symlink_cache {
+ time_t ctime;
+ char *readlink;
+};
+
+
+static int
+symlink_inode_ctx_get (inode_t *inode, xlator_t *this, void **ctx)
+{
+ int ret = 0;
+ uint64_t tmp_ctx = 0;
+ ret = inode_ctx_get (inode, this, &tmp_ctx);
+ if (-1 == ret)
+ gf_log (this->name, GF_LOG_ERROR, "dict get failed");
+ else
+ *ctx = (void *)(long)tmp_ctx;
+
+ return 0;
+}
+
+
+static int
+symlink_inode_ctx_set (inode_t *inode, xlator_t *this, void *ctx)
+{
+ int ret = 0;
+ ret = inode_ctx_put (inode, this, (uint64_t)(long) ctx);
+ if (-1 == ret)
+ gf_log (this->name, GF_LOG_ERROR, "dict set failed");
+
+ return 0;
+}
+
+
+int
+sc_cache_update (xlator_t *this, inode_t *inode, const char *link)
+{
+ struct symlink_cache *sc = NULL;
+
+ symlink_inode_ctx_get (inode, this, VOID(&sc));
+ if (!sc)
+ return 0;
+
+ if (!sc->readlink) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "updating cache: %s", link);
+
+ sc->readlink = strdup (link);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "not updating existing cache: %s with %s",
+ sc->readlink, link);
+ }
+
+ return 0;
+}
+
+
+int
+sc_cache_set (xlator_t *this, inode_t *inode, struct stat *buf,
+ const char *link)
+{
+ struct symlink_cache *sc = NULL;
+ int ret = -1;
+ int need_set = 0;
+
+
+ symlink_inode_ctx_get (inode, this, VOID(&sc));
+ if (!sc) {
+ need_set = 1;
+ sc = CALLOC (1, sizeof (*sc));
+ if (!sc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto err;
+ }
+ }
+
+ if (sc->readlink) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "replacing old cache: %s with new cache: %s",
+ sc->readlink, link);
+ FREE (sc->readlink);
+ sc->readlink = NULL;
+ }
+
+ if (link) {
+ sc->readlink = strdup (link);
+ if (!sc->readlink) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto err;
+ }
+ }
+
+ sc->ctime = buf->st_ctime;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting symlink cache: %s", link);
+
+ if (need_set) {
+ ret = symlink_inode_ctx_set (inode, this, sc);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set inode context (%s)",
+ strerror (-ret));
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+
+ if (sc) {
+ if (sc->readlink)
+ FREE (sc->readlink);
+ sc->readlink = NULL;
+ FREE (sc);
+ }
+
+ return -1;
+}
+
+
+int
+sc_cache_flush (xlator_t *this, inode_t *inode)
+{
+ struct symlink_cache *sc = NULL;
+
+ symlink_inode_ctx_get (inode, this, VOID(&sc));
+ if (!sc)
+ return 0;
+
+ if (sc->readlink) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "flushing cache: %s", sc->readlink);
+
+ FREE (sc->readlink);
+ sc->readlink = NULL;
+ }
+
+ FREE (sc);
+
+ return 0;
+}
+
+
+int
+sc_cache_validate (xlator_t *this, inode_t *inode, struct stat *buf)
+{
+ struct symlink_cache *sc = NULL;
+ uint64_t tmp_sc = 0;
+
+ if (!S_ISLNK (buf->st_mode)) {
+ sc_cache_flush (this, inode);
+ return 0;
+ }
+
+ symlink_inode_ctx_get (inode, this, VOID(&sc));
+
+ if (!sc) {
+ sc_cache_set (this, inode, buf, NULL);
+ inode_ctx_get (inode, this, &tmp_sc);
+
+ if (!sc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ return 0;
+ }
+ sc = (struct symlink_cache *)(long)tmp_sc;
+ }
+
+ if (sc->ctime == buf->st_ctime)
+ return 0;
+
+ /* STALE */
+ if (sc->readlink) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "flushing cache: %s", sc->readlink);
+
+ FREE (sc->readlink);
+ sc->readlink = NULL;
+ }
+
+ sc->ctime = buf->st_ctime;
+
+ return 0;
+}
+
+
+
+int
+sc_cache_get (xlator_t *this, inode_t *inode, char **link)
+{
+ struct symlink_cache *sc = NULL;
+
+ symlink_inode_ctx_get (inode, this, VOID(&sc));
+
+ if (!sc)
+ return 0;
+
+ if (link && sc->readlink)
+ *link = strdup (sc->readlink);
+ return 0;
+}
+
+
+int
+sc_readlink_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ const char *link)
+{
+ if (op_ret > 0)
+ sc_cache_update (this, frame->local, link);
+
+ inode_unref (frame->local);
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno, link);
+ return 0;
+}
+
+
+int
+sc_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size)
+{
+ char *link = NULL;
+
+ sc_cache_get (this, loc->inode, &link);
+
+ if (link) {
+ /* cache hit */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cache hit %s -> %s",
+ loc->path, link);
+ STACK_UNWIND (frame, strlen (link) + 1, 0, link);
+ FREE (link);
+ return 0;
+ }
+
+ frame->local = inode_ref (loc->inode);
+
+ STACK_WIND (frame, sc_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc, size);
+
+ return 0;
+}
+
+
+int
+sc_symlink_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *buf)
+{
+ if (op_ret == 0) {
+ if (frame->local) {
+ sc_cache_set (this, inode, buf, frame->local);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ return 0;
+}
+
+
+int
+sc_symlink (call_frame_t *frame, xlator_t *this,
+ const char *dst, loc_t *src)
+{
+ frame->local = strdup (dst);
+
+ STACK_WIND (frame, sc_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ dst, src);
+
+ return 0;
+}
+
+
+int
+sc_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct stat *buf, dict_t *xattr)
+{
+ if (op_ret == 0)
+ sc_cache_validate (this, inode, buf);
+ else
+ sc_cache_flush (this, inode);
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
+ return 0;
+}
+
+
+int
+sc_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ STACK_WIND (frame, sc_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+
+ return 0;
+}
+
+
+int
+sc_forget (xlator_t *this,
+ inode_t *inode)
+{
+ sc_cache_flush (this, inode);
+
+ return 0;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+
+ if (!this->children || this->children->next)
+ {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child", this->name);
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = sc_lookup,
+ .symlink = sc_symlink,
+ .readlink = sc_readlink,
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+ .forget = sc_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/performance/write-behind/Makefile.am b/xlators/performance/write-behind/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/performance/write-behind/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am
new file mode 100644
index 00000000000..f800abad50d
--- /dev/null
+++ b/xlators/performance/write-behind/src/Makefile.am
@@ -0,0 +1,12 @@
+xlator_LTLIBRARIES = write-behind.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+write_behind_la_LDFLAGS = -module -avoidversion
+
+write_behind_la_SOURCES = write-behind.c
+write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
new file mode 100644
index 00000000000..04a447d49e9
--- /dev/null
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -0,0 +1,1444 @@
+/*
+ Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/*TODO: check for non null wb_file_data before getting wb_file */
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "list.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "common-utils.h"
+
+#define MAX_VECTOR_COUNT 8
+
+typedef struct list_head list_head_t;
+struct wb_conf;
+struct wb_page;
+struct wb_file;
+
+
+struct wb_conf {
+ uint64_t aggregate_size;
+ uint64_t window_size;
+ uint64_t disable_till;
+ gf_boolean_t enable_O_SYNC;
+ gf_boolean_t flush_behind;
+};
+
+
+typedef struct wb_local {
+ list_head_t winds;
+ struct wb_file *file;
+ list_head_t unwind_frames;
+ int op_ret;
+ int op_errno;
+ call_frame_t *frame;
+} wb_local_t;
+
+
+typedef struct write_request {
+ call_frame_t *frame;
+ off_t offset;
+ /* int32_t op_ret;
+ int32_t op_errno; */
+ struct iovec *vector;
+ int32_t count;
+ dict_t *refs;
+ char write_behind;
+ char stack_wound;
+ char got_reply;
+ list_head_t list;
+ list_head_t winds;
+ /* list_head_t unwinds;*/
+} wb_write_request_t;
+
+
+struct wb_file {
+ int disabled;
+ uint64_t disable_till;
+ off_t offset;
+ size_t window_size;
+ int32_t refcount;
+ int32_t op_ret;
+ int32_t op_errno;
+ list_head_t request;
+ fd_t *fd;
+ gf_lock_t lock;
+ xlator_t *this;
+};
+
+
+typedef struct wb_conf wb_conf_t;
+typedef struct wb_page wb_page_t;
+typedef struct wb_file wb_file_t;
+
+
+int32_t
+wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all);
+
+int32_t
+wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds);
+
+int32_t
+wb_sync_all (call_frame_t *frame, wb_file_t *file);
+
+int32_t
+__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size);
+
+
+wb_file_t *
+wb_file_create (xlator_t *this,
+ fd_t *fd)
+{
+ wb_file_t *file = NULL;
+ wb_conf_t *conf = this->private;
+
+ file = CALLOC (1, sizeof (*file));
+ INIT_LIST_HEAD (&file->request);
+
+ /* fd_ref() not required, file should never decide the existance of
+ * an fd */
+ file->fd= fd;
+ file->disable_till = conf->disable_till;
+ file->this = this;
+ file->refcount = 1;
+
+ fd_ctx_set (fd, this, (uint64_t)(long)file);
+
+ return file;
+}
+
+void
+wb_file_destroy (wb_file_t *file)
+{
+ int32_t refcount = 0;
+
+ LOCK (&file->lock);
+ {
+ refcount = --file->refcount;
+ }
+ UNLOCK (&file->lock);
+
+ if (!refcount){
+ LOCK_DESTROY (&file->lock);
+ FREE (file);
+ }
+
+ return;
+}
+
+
+int32_t
+wb_sync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ wb_local_t *local = NULL;
+ list_head_t *winds = NULL;
+ wb_file_t *file = NULL;
+ wb_write_request_t *request = NULL, *dummy = NULL;
+
+ local = frame->local;
+ winds = &local->winds;
+ file = local->file;
+
+ LOCK (&file->lock);
+ {
+ list_for_each_entry_safe (request, dummy, winds, winds) {
+ request->got_reply = 1;
+ if (!request->write_behind && (op_ret == -1)) {
+ wb_local_t *per_request_local = request->frame->local;
+ per_request_local->op_ret = op_ret;
+ per_request_local->op_errno = op_errno;
+ }
+
+ /*
+ request->op_ret = op_ret;
+ request->op_errno = op_errno;
+ */
+ }
+ }
+ UNLOCK (&file->lock);
+
+ if (op_ret == -1)
+ {
+ file->op_ret = op_ret;
+ file->op_errno = op_errno;
+ }
+
+ wb_process_queue (frame, file, 0);
+
+ /* safe place to do fd_unref */
+ fd_unref (file->fd);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+int32_t
+wb_sync_all (call_frame_t *frame, wb_file_t *file)
+{
+ list_head_t winds;
+ int32_t bytes = 0;
+
+ INIT_LIST_HEAD (&winds);
+
+ LOCK (&file->lock);
+ {
+ bytes = __wb_mark_winds (&file->request, &winds, 0);
+ }
+ UNLOCK (&file->lock);
+
+ wb_sync (frame, file, &winds);
+
+ return bytes;
+}
+
+
+int32_t
+wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
+{
+ wb_write_request_t *dummy = NULL, *request = NULL, *first_request = NULL, *next = NULL;
+ size_t total_count = 0, count = 0;
+ size_t copied = 0;
+ call_frame_t *sync_frame = NULL;
+ dict_t *refs = NULL;
+ wb_local_t *local = NULL;
+ struct iovec *vector = NULL;
+ int32_t bytes = 0;
+ size_t bytecount = 0;
+
+ list_for_each_entry (request, winds, winds)
+ {
+ total_count += request->count;
+ bytes += iov_length (request->vector, request->count);
+ }
+
+ if (!total_count) {
+ return 0;
+ }
+
+ list_for_each_entry_safe (request, dummy, winds, winds) {
+ if (!vector) {
+ vector = MALLOC (VECTORSIZE (MAX_VECTOR_COUNT));
+ refs = get_new_dict ();
+
+ local = CALLOC (1, sizeof (*local));
+ INIT_LIST_HEAD (&local->winds);
+
+ first_request = request;
+ }
+
+ count += request->count;
+ bytecount = VECTORSIZE (request->count);
+ memcpy (((char *)vector)+copied,
+ request->vector,
+ bytecount);
+ copied += bytecount;
+
+ if (request->refs) {
+ dict_copy (request->refs, refs);
+ }
+
+ next = NULL;
+ if (request->winds.next != winds) {
+ next = list_entry (request->winds.next, struct write_request, winds);
+ }
+
+ list_del_init (&request->winds);
+ list_add_tail (&request->winds, &local->winds);
+
+ if (!next || ((count + next->count) > MAX_VECTOR_COUNT)) {
+ sync_frame = copy_frame (frame);
+ sync_frame->local = local;
+ local->file = file;
+ sync_frame->root->req_refs = dict_ref (refs);
+ fd_ref (file->fd);
+ STACK_WIND (sync_frame,
+ wb_sync_cbk,
+ FIRST_CHILD(sync_frame->this),
+ FIRST_CHILD(sync_frame->this)->fops->writev,
+ file->fd, vector,
+ count, first_request->offset);
+
+ dict_unref (refs);
+ FREE (vector);
+ first_request = NULL;
+ refs = NULL;
+ vector = NULL;
+ copied = count = 0;
+ }
+ }
+
+ return bytes;
+}
+
+
+int32_t
+wb_stat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ wb_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->file)
+ fd_unref (local->file->fd);
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+
+ return 0;
+}
+
+
+int32_t
+wb_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ wb_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ if (loc->inode)
+ {
+ iter_fd = fd_lookup (loc->inode, frame->root->pid);
+ if (iter_fd) {
+ if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
+ file = (wb_file_t *)(long)tmp_file;
+ } else {
+ fd_unref (iter_fd);
+ }
+ }
+ if (file) {
+ wb_sync_all (frame, file);
+ }
+ }
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+
+ frame->local = local;
+
+ STACK_WIND (frame, wb_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc);
+ return 0;
+}
+
+
+int32_t
+wb_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ wb_file_t *file = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ file = (wb_file_t *)(long)tmp_file;
+ if (file) {
+ fd_ref (file->fd);
+ wb_sync_all (frame, file);
+ }
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ wb_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd);
+ return 0;
+}
+
+
+int32_t
+wb_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ wb_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->file)
+ fd_unref (local->file->fd);
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+int32_t
+wb_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ wb_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ if (loc->inode)
+ {
+ iter_fd = fd_lookup (loc->inode, frame->root->pid);
+ if (iter_fd) {
+ if (!fd_ctx_get (iter_fd, this, &tmp_file)){
+ file = (wb_file_t *)(long)tmp_file;
+ } else {
+ fd_unref (iter_fd);
+ }
+ }
+
+ if (file)
+ {
+ wb_sync_all (frame, file);
+ }
+ }
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ wb_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+}
+
+
+int32_t
+wb_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ wb_file_t *file = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ file = (wb_file_t *)(long)tmp_file;
+ if (file)
+ wb_sync_all (frame, file);
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+
+ if (file)
+ fd_ref (file->fd);
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ wb_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset);
+ return 0;
+}
+
+
+int32_t
+wb_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *buf)
+{
+ wb_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->file)
+ fd_unref (local->file->fd);
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+int32_t
+wb_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec tv[2])
+{
+ wb_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ if (loc->inode) {
+ iter_fd = fd_lookup (loc->inode, frame->root->pid);
+ if (iter_fd) {
+ if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
+ file = (wb_file_t *)(long)tmp_file;
+ } else {
+ fd_unref (iter_fd);
+ }
+ }
+
+ if (file)
+ wb_sync_all (frame, file);
+ }
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ wb_utimens_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->utimens,
+ loc,
+ tv);
+ return 0;
+}
+
+int32_t
+wb_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ int32_t flags = 0;
+ wb_file_t *file = NULL;
+ wb_conf_t *conf = this->private;
+
+ if (op_ret != -1)
+ {
+ file = wb_file_create (this, fd);
+
+ /* If mandatory locking has been enabled on this file,
+ we disable caching on it */
+
+ if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
+ file->disabled = 1;
+
+ /* If O_DIRECT then, we disable chaching */
+ if (frame->local)
+ {
+ flags = *((int32_t *)frame->local);
+ if (((flags & O_DIRECT) == O_DIRECT) ||
+ ((flags & O_RDONLY) == O_RDONLY) ||
+ (((flags & O_SYNC) == O_SYNC) &&
+ conf->enable_O_SYNC == _gf_true)) {
+ file->disabled = 1;
+ }
+ }
+
+ LOCK_INIT (&file->lock);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+
+int32_t
+wb_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ frame->local = CALLOC (1, sizeof(int32_t));
+ *((int32_t *)frame->local) = flags;
+
+ STACK_WIND (frame,
+ wb_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd);
+ return 0;
+}
+
+
+int32_t
+wb_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *buf)
+{
+ wb_file_t *file = NULL;
+
+ if (op_ret != -1)
+ {
+ file = wb_file_create (this, fd);
+ /*
+ * If mandatory locking has been enabled on this file,
+ * we disable caching on it
+ */
+ if ((fd->inode->st_mode & S_ISGID) &&
+ !(fd->inode->st_mode & S_IXGRP))
+ {
+ file->disabled = 1;
+ }
+
+ LOCK_INIT (&file->lock);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ return 0;
+}
+
+
+int32_t
+wb_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ STACK_WIND (frame,
+ wb_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+}
+
+
+int32_t
+__wb_cleanup_queue (wb_file_t *file)
+{
+ wb_write_request_t *request = NULL, *dummy = NULL;
+ int32_t bytes = 0;
+
+ list_for_each_entry_safe (request, dummy, &file->request, list)
+ {
+ if (request->got_reply && request->write_behind)
+ {
+ bytes += iov_length (request->vector, request->count);
+ list_del_init (&request->list);
+
+ FREE (request->vector);
+ dict_unref (request->refs);
+
+ FREE (request);
+ }
+ }
+
+ return bytes;
+}
+
+
+int32_t
+__wb_mark_wind_all (list_head_t *list, list_head_t *winds)
+{
+ wb_write_request_t *request = NULL;
+ size_t size = 0;
+
+ list_for_each_entry (request, list, list)
+ {
+ if (!request->stack_wound)
+ {
+ size += iov_length (request->vector, request->count);
+ request->stack_wound = 1;
+ list_add_tail (&request->winds, winds);
+ }
+ }
+
+ return size;
+}
+
+
+size_t
+__wb_get_aggregate_size (list_head_t *list)
+{
+ wb_write_request_t *request = NULL;
+ size_t size = 0;
+
+ list_for_each_entry (request, list, list)
+ {
+ if (!request->stack_wound)
+ {
+ size += iov_length (request->vector, request->count);
+ }
+ }
+
+ return size;
+}
+
+uint32_t
+__wb_get_incomplete_writes (list_head_t *list)
+{
+ wb_write_request_t *request = NULL;
+ uint32_t count = 0;
+
+ list_for_each_entry (request, list, list)
+ {
+ if (request->stack_wound && !request->got_reply)
+ {
+ count++;
+ }
+ }
+
+ return count;
+}
+
+int32_t
+__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf)
+{
+ size_t aggregate_current = 0;
+ uint32_t incomplete_writes = 0;
+
+ incomplete_writes = __wb_get_incomplete_writes (list);
+
+ aggregate_current = __wb_get_aggregate_size (list);
+
+ if ((incomplete_writes == 0) || (aggregate_current >= aggregate_conf))
+ {
+ __wb_mark_wind_all (list, winds);
+ }
+
+ return aggregate_current;
+}
+
+
+size_t
+__wb_get_window_size (list_head_t *list)
+{
+ wb_write_request_t *request = NULL;
+ size_t size = 0;
+
+ list_for_each_entry (request, list, list)
+ {
+ if (request->write_behind && !request->got_reply)
+ {
+ size += iov_length (request->vector, request->count);
+ }
+ }
+
+ return size;
+}
+
+
+size_t
+__wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size)
+{
+ size_t written_behind = 0;
+ wb_write_request_t *request = NULL;
+
+ list_for_each_entry (request, list, list)
+ {
+ if (written_behind <= size)
+ {
+ if (!request->write_behind)
+ {
+ wb_local_t *local = request->frame->local;
+ written_behind += iov_length (request->vector, request->count);
+ request->write_behind = 1;
+ list_add_tail (&local->unwind_frames, unwinds);
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ return written_behind;
+}
+
+
+int32_t
+__wb_mark_unwinds (list_head_t *list, list_head_t *unwinds, size_t window_conf)
+{
+ size_t window_current = 0;
+
+ window_current = __wb_get_window_size (list);
+ if (window_current <= window_conf)
+ {
+ window_current += __wb_mark_unwind_till (list, unwinds,
+ window_conf - window_current);
+ }
+
+ return window_current;
+}
+
+
+int32_t
+wb_stack_unwind (list_head_t *unwinds)
+{
+ struct stat buf = {0,};
+ wb_local_t *local = NULL, *dummy = NULL;
+
+ list_for_each_entry_safe (local, dummy, unwinds, unwind_frames)
+ {
+ list_del_init (&local->unwind_frames);
+ STACK_UNWIND (local->frame, local->op_ret, local->op_errno, &buf);
+ }
+
+ return 0;
+}
+
+
+int32_t
+wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds, list_head_t *unwinds)
+{
+ /* copy the frame before calling wb_stack_unwind, since this request containing current frame might get unwound */
+ /* call_frame_t *sync_frame = copy_frame (frame); */
+
+ wb_stack_unwind (unwinds);
+ wb_sync (frame, file, winds);
+
+ return 0;
+}
+
+
+int32_t
+wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all)
+{
+ list_head_t winds, unwinds;
+ size_t size = 0;
+ wb_conf_t *conf = file->this->private;
+
+ INIT_LIST_HEAD (&winds);
+ INIT_LIST_HEAD (&unwinds);
+
+ if (!file)
+ {
+ return -1;
+ }
+
+ size = flush_all ? 0 : conf->aggregate_size;
+ LOCK (&file->lock);
+ {
+ __wb_cleanup_queue (file);
+ __wb_mark_winds (&file->request, &winds, size);
+ __wb_mark_unwinds (&file->request, &unwinds, conf->window_size);
+ }
+ UNLOCK (&file->lock);
+
+ wb_do_ops (frame, file, &winds, &unwinds);
+ return 0;
+}
+
+
+wb_write_request_t *
+wb_enqueue (wb_file_t *file,
+ call_frame_t *frame,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ wb_write_request_t *request = NULL;
+ wb_local_t *local = CALLOC (1, sizeof (*local));
+
+ request = CALLOC (1, sizeof (*request));
+
+ INIT_LIST_HEAD (&request->list);
+ INIT_LIST_HEAD (&request->winds);
+
+ request->frame = frame;
+ request->vector = iov_dup (vector, count);
+ request->count = count;
+ request->offset = offset;
+ request->refs = dict_ref (frame->root->req_refs);
+
+ frame->local = local;
+ local->frame = frame;
+ local->op_ret = iov_length (vector, count);
+ local->op_errno = 0;
+ INIT_LIST_HEAD (&local->unwind_frames);
+
+ LOCK (&file->lock);
+ {
+ list_add_tail (&request->list, &file->request);
+ file->offset = offset + iov_length (vector, count);
+ }
+ UNLOCK (&file->lock);
+
+ return request;
+}
+
+
+int32_t
+wb_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ return 0;
+}
+
+
+int32_t
+wb_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ wb_file_t *file = NULL;
+ char offset_expected = 1, wb_disabled = 0;
+ call_frame_t *process_frame = NULL;
+ size_t size = 0;
+ uint64_t tmp_file = 0;
+
+ if (vector != NULL)
+ size = iov_length (vector, count);
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ file = (wb_file_t *)(long)tmp_file;
+ if (!file) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wb_file not found for fd %p", fd);
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ LOCK (&file->lock);
+ {
+ if (file->disabled || file->disable_till) {
+ if (size > file->disable_till) {
+ file->disable_till = 0;
+ } else {
+ file->disable_till -= size;
+ }
+ wb_disabled = 1;
+ }
+
+ if (file->offset != offset)
+ offset_expected = 0;
+ }
+ UNLOCK (&file->lock);
+
+ if (wb_disabled) {
+ STACK_WIND (frame,
+ wb_writev_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->writev,
+ file->fd,
+ vector,
+ count,
+ offset);
+ return 0;
+ }
+
+ process_frame = copy_frame (frame);
+
+ if (!offset_expected)
+ wb_process_queue (process_frame, file, 1);
+
+ wb_enqueue (file, frame, vector, count, offset);
+ wb_process_queue (process_frame, file, 0);
+
+ STACK_DESTROY (process_frame->root);
+
+ return 0;
+}
+
+
+int32_t
+wb_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ wb_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf);
+ return 0;
+}
+
+
+int32_t
+wb_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ wb_file_t *file = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ file = (wb_file_t *)(long)tmp_file;
+ if (file)
+ wb_sync_all (frame, file);
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ wb_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset);
+
+ return 0;
+}
+
+
+int32_t
+wb_ffr_bg_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ wb_local_t *local = NULL;
+ wb_file_t *file = NULL;
+
+ local = frame->local;
+ file = local->file;
+
+ if (file) {
+ fd_unref (file->fd);
+ }
+
+ if (file->op_ret == -1)
+ {
+ op_ret = file->op_ret;
+ op_errno = file->op_errno;
+
+ file->op_ret = 0;
+ }
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+int32_t
+wb_ffr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ wb_local_t *local = NULL;
+ wb_file_t *file = NULL;
+
+ local = frame->local;
+ file = local->file;
+ if (file) {
+ /* corresponds to the fd_ref() done during wb_file_create() */
+ fd_unref (file->fd);
+ }
+
+ if (file->op_ret == -1)
+ {
+ op_ret = file->op_ret;
+ op_errno = file->op_errno;
+
+ file->op_ret = 0;
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+wb_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ wb_conf_t *conf = NULL;
+ wb_file_t *file = NULL;
+ call_frame_t *flush_frame = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ conf = this->private;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (wb_file_t *)(long)tmp_file;
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+ if (file)
+ fd_ref (file->fd);
+
+ if (&file->request != file->request.next) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "request queue is not empty, it has to be synced");
+ }
+
+ if (conf->flush_behind &&
+ (!file->disabled) && (file->disable_till == 0)) {
+ flush_frame = copy_frame (frame);
+ STACK_UNWIND (frame, file->op_ret,
+ file->op_errno); // liar! liar! :O
+
+ flush_frame->local = local;
+ wb_sync_all (flush_frame, file);
+
+ STACK_WIND (flush_frame,
+ wb_ffr_bg_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
+ } else {
+ wb_sync_all (frame, file);
+
+ frame->local = local;
+ STACK_WIND (frame,
+ wb_ffr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd);
+ }
+
+ return 0;
+}
+
+
+int32_t
+wb_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ wb_local_t *local = NULL;
+ wb_file_t *file = NULL;
+
+ local = frame->local;
+ file = local->file;
+
+ if (file->op_ret == -1)
+ {
+ op_ret = file->op_ret;
+ op_errno = file->op_errno;
+
+ file->op_ret = 0;
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+wb_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync)
+{
+ wb_file_t *file = NULL;
+ wb_local_t *local = NULL;
+ uint64_t tmp_file = 0;
+
+ if (fd_ctx_get (fd, this, &tmp_file)) {
+ gf_log (this->name, GF_LOG_ERROR, "returning EBADFD");
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ file = (wb_file_t *)(long)tmp_file;
+ if (file)
+ wb_sync_all (frame, file);
+
+ local = CALLOC (1, sizeof (*local));
+ local->file = file;
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ wb_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd, datasync);
+ return 0;
+}
+
+
+int32_t
+wb_release (xlator_t *this,
+ fd_t *fd)
+{
+ uint64_t file = 0;
+
+ fd_ctx_get (fd, this, &file);
+ wb_file_destroy ((wb_file_t *)(long)file);
+
+ return 0;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ dict_t *options = NULL;
+ wb_conf_t *conf = NULL;
+ char *aggregate_size_string = NULL;
+ char *window_size_string = NULL;
+ char *flush_behind_string = NULL;
+ char *disable_till_string = NULL;
+ char *enable_O_SYNC_string = NULL;
+ int32_t ret = -1;
+
+ if ((this->children == NULL)
+ || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: write-behind (%s) not configured with exactly one child",
+ this->name);
+ return -1;
+ }
+
+ if (this->parents == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile");
+ }
+
+ options = this->options;
+
+ conf = CALLOC (1, sizeof (*conf));
+
+ conf->enable_O_SYNC = _gf_false;
+ ret = dict_get_str (options, "enable-O_SYNC",
+ &enable_O_SYNC_string);
+ if (ret == 0) {
+ ret = gf_string2boolean (enable_O_SYNC_string,
+ &conf->enable_O_SYNC);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'enable-O_SYNC' takes only boolean arguments");
+ return -1;
+ }
+ }
+
+ /* configure 'options aggregate-size <size>' */
+ conf->aggregate_size = 0;
+ ret = dict_get_str (options, "block-size",
+ &aggregate_size_string);
+ if (ret == 0) {
+ ret = gf_string2bytesize (aggregate_size_string,
+ &conf->aggregate_size);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\" of \"option aggregate-size\"",
+ aggregate_size_string);
+ return -1;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "using aggregate-size = %"PRIu64"",
+ conf->aggregate_size);
+
+ conf->disable_till = 1;
+ ret = dict_get_str (options, "disable-for-first-nbytes",
+ &disable_till_string);
+ if (ret == 0) {
+ ret = gf_string2bytesize (disable_till_string,
+ &conf->disable_till);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\" of \"option disable-for-first-nbytes\"",
+ disable_till_string);
+ return -1;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "disabling write-behind for first %"PRIu64" bytes",
+ conf->disable_till);
+
+ /* configure 'option window-size <size>' */
+ conf->window_size = 0;
+ ret = dict_get_str (options, "cache-size",
+ &window_size_string);
+ if (ret == 0) {
+ ret = gf_string2bytesize (window_size_string,
+ &conf->window_size);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\" of \"option window-size\"",
+ window_size_string);
+ FREE (conf);
+ return -1;
+ }
+ }
+
+ if (!conf->window_size && conf->aggregate_size) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "setting window-size to be equal to aggregate-size(%"PRIu64")",
+ conf->aggregate_size);
+ conf->window_size = conf->aggregate_size;
+ }
+
+ if (conf->window_size < conf->aggregate_size) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "aggregate-size(%"PRIu64") cannot be more than window-size"
+ "(%"PRIu64")", conf->window_size, conf->aggregate_size);
+ FREE (conf);
+ return -1;
+ }
+
+ /* configure 'option flush-behind <on/off>' */
+ conf->flush_behind = 0;
+ ret = dict_get_str (options, "flush-behind",
+ &flush_behind_string);
+ if (ret == 0) {
+ ret = gf_string2boolean (flush_behind_string,
+ &conf->flush_behind);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'flush-behind' takes only boolean arguments");
+ return -1;
+ }
+
+ if (conf->flush_behind) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "enabling flush-behind");
+ }
+ }
+ this->private = conf;
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ wb_conf_t *conf = this->private;
+
+ FREE (conf);
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .writev = wb_writev,
+ .open = wb_open,
+ .create = wb_create,
+ .readv = wb_readv,
+ .flush = wb_flush,
+ .fsync = wb_fsync,
+ .stat = wb_stat,
+ .fstat = wb_fstat,
+ .truncate = wb_truncate,
+ .ftruncate = wb_ftruncate,
+ .utimens = wb_utimens,
+};
+
+struct xlator_mops mops = {
+};
+
+struct xlator_cbks cbks = {
+ .release = wb_release
+};
+
+struct volume_options options[] = {
+ { .key = {"flush-behind"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"block-size", "aggregate-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 128 * GF_UNIT_KB,
+ .max = 4 * GF_UNIT_MB
+ },
+ { .key = {"cache-size", "window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 512 * GF_UNIT_KB,
+ .max = 1 * GF_UNIT_GB
+ },
+ { .key = {"disable-for-first-nbytes"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 1,
+ .max = 1 * GF_UNIT_MB,
+ },
+ { .key = {"enable-O_SYNC"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/protocol/Makefile.am b/xlators/protocol/Makefile.am
new file mode 100644
index 00000000000..745e277c2a6
--- /dev/null
+++ b/xlators/protocol/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = client server
+
+CLEANFILES =
diff --git a/xlators/protocol/client/Makefile.am b/xlators/protocol/client/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/protocol/client/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am
new file mode 100644
index 00000000000..fb720942cc6
--- /dev/null
+++ b/xlators/protocol/client/src/Makefile.am
@@ -0,0 +1,16 @@
+
+xlator_LTLIBRARIES = client.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol
+
+client_la_LDFLAGS = -module -avoidversion
+
+client_la_SOURCES = client-protocol.c saved-frames.c
+client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = client-protocol.h saved-frames.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/protocol/client/src/client-protocol.c b/xlators/protocol/client/src/client-protocol.c
new file mode 100644
index 00000000000..5c93bd6f135
--- /dev/null
+++ b/xlators/protocol/client/src/client-protocol.c
@@ -0,0 +1,6671 @@
+/*
+ Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <inttypes.h>
+
+
+#include "glusterfs.h"
+#include "client-protocol.h"
+#include "compat.h"
+#include "dict.h"
+#include "protocol.h"
+#include "transport.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+
+#include <sys/resource.h>
+#include <inttypes.h>
+
+/* for default_*_cbk functions */
+#include "defaults.c"
+#include "saved-frames.h"
+
+
+int protocol_client_cleanup (transport_t *trans);
+int protocol_client_interpret (xlator_t *this, transport_t *trans,
+ char *hdr_p, size_t hdrlen,
+ char *buf_p, size_t buflen);
+int
+protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans,
+ int type, int op,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ struct iovec *vector, int count,
+ dict_t *refs);
+
+static gf_op_t gf_fops[];
+static gf_op_t gf_mops[];
+static gf_op_t gf_cbks[];
+
+
+static ino_t
+this_ino_get_from_inode (inode_t *inode, xlator_t *this)
+{
+ ino_t ino = 0;
+ int32_t ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ if (inode->ino == 1) {
+ ino = 1;
+ goto out;
+ }
+
+ ret = inode_ctx_get (inode, this, &ino);
+
+ if (inode->ino && ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "(%"PRId64"): failed to get remote inode number",
+ inode->ino);
+ }
+
+out:
+ return ino;
+}
+
+
+static ino_t
+this_ino_get (loc_t *loc, xlator_t *this, int32_t which)
+{
+ ino_t ino = 0;
+ int32_t ret = 0;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+
+ if (which == GF_CLIENT_INODE_SELF) {
+ inode = loc->inode;
+ } else if (which == GF_CLIENT_INODE_PARENT) {
+ inode = loc->parent;
+ }
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ if (inode->ino == 1) {
+ ino = 1;
+ goto out;
+ }
+
+ ret = inode_ctx_get (inode, this, &ino);
+
+ if (inode->ino && ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s(%s - %"PRId64") failed to get remote inode number",
+ loc->path,
+ (which == GF_CLIENT_INODE_SELF? "self" : "parent"),
+ inode->ino);
+ }
+
+out:
+ return ino;
+}
+
+
+static void
+this_ino_set (loc_t *loc, xlator_t *this, ino_t ino)
+{
+ ino_t old_ino = 0;
+ int32_t ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+
+ inode = loc->inode;
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &old_ino);
+
+ if (old_ino != ino) {
+ if (old_ino)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: inode number changed from %"PRId64" "
+ "to %"PRId64,
+ loc->path, old_ino, ino);
+
+ ret = inode_ctx_put (inode, this, ino);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to set remote "
+ "inode number to inode ctx",
+ loc->path, ino);
+ }
+ }
+out:
+ return;
+}
+
+
+static int
+this_fd_get (fd_t *file, xlator_t *this, int64_t *remote_fd)
+{
+ int ret = 0;
+ int dict_ret = -1;
+ uint64_t tmp_fd = 0;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file, out);
+ GF_VALIDATE_OR_GOTO (this->name, remote_fd, out);
+
+ dict_ret = fd_ctx_get (file, this, &tmp_fd);
+
+ if (dict_ret < 0) {
+ ret = -1;
+ }
+ *remote_fd = (int64_t)tmp_fd;
+out:
+ return ret;
+}
+
+
+static void
+this_fd_set (fd_t *file, xlator_t *this, loc_t *loc, int64_t fd)
+{
+ uint64_t old_fd = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file, out);
+
+ ret = fd_ctx_get (file, this, &old_fd);
+ if (ret >= 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s (%"PRId64"): trying duplicate remote fd set. "
+ "%"PRId64" over-rides %"PRId64,
+ loc->path, loc->inode->ino, fd, old_fd);
+ }
+
+ ret = fd_ctx_set (file, this, (uint64_t)fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to set remote fd",
+ loc->path, loc->inode->ino);
+ }
+out:
+ return;
+}
+
+
+static int
+client_local_wipe (client_local_t *local)
+{
+ if (local) {
+ loc_wipe (&local->loc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ free (local);
+ }
+
+ return 0;
+}
+
+/*
+ * lookup_frame - lookup call frame corresponding to a given callid
+ * @trans: transport object
+ * @callid: call id of the frame
+ *
+ * not for external reference
+ */
+
+static call_frame_t *
+lookup_frame (transport_t *trans, int32_t op, int8_t type, int64_t callid)
+{
+ client_connection_t *conn = NULL;
+ call_frame_t *frame = NULL;
+
+ conn = trans->xl_private;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ frame = saved_frames_get (conn->saved_frames,
+ op, type, callid);
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ return frame;
+}
+
+
+static void
+call_bail (void *data)
+{
+ client_connection_t *conn = NULL;
+ struct timeval current;
+ int32_t bail_out = 0;
+ transport_t *trans = NULL;
+
+ GF_VALIDATE_OR_GOTO("client", data, out);
+ trans = data;
+
+ conn = trans->xl_private;
+
+ gettimeofday (&current, NULL);
+ pthread_mutex_lock (&conn->lock);
+ {
+ /* Chaining to get call-always functionality from
+ call-once timer */
+ if (conn->timer) {
+ struct timeval timeout = {0,};
+ gf_timer_cbk_t timer_cbk = conn->timer->cbk;
+
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
+
+ gf_timer_call_cancel (trans->xl->ctx, conn->timer);
+ conn->timer = gf_timer_call_after (trans->xl->ctx,
+ timeout,
+ timer_cbk,
+ trans);
+ if (conn->timer == NULL) {
+ gf_log (trans->xl->name, GF_LOG_DEBUG,
+ "Cannot create bailout timer");
+ }
+ }
+
+ if (((conn->saved_frames->count > 0) &&
+ (RECEIVE_TIMEOUT(conn, current)) &&
+ (SEND_TIMEOUT(conn, current)))) {
+
+ struct tm last_sent_tm, last_received_tm;
+ char last_sent[32] = {0,}, last_received[32] = {0,};
+
+ bail_out = 1;
+
+ localtime_r (&conn->last_sent.tv_sec,
+ &last_sent_tm);
+ localtime_r (&conn->last_received.tv_sec,
+ &last_received_tm);
+
+ strftime (last_sent, 32,
+ "%Y-%m-%d %H:%M:%S", &last_sent_tm);
+ strftime (last_received, 32,
+ "%Y-%m-%d %H:%M:%S", &last_received_tm);
+
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "activating bail-out. pending frames = %d. "
+ "last sent = %s. last received = %s. "
+ "transport-timeout = %d",
+ (int32_t) conn->saved_frames->count,
+ last_sent, last_received,
+ conn->transport_timeout);
+ }
+ }
+
+ if (bail_out) {
+ conn->ping_started = 0;
+ }
+
+ pthread_mutex_unlock (&conn->lock);
+
+ if (bail_out) {
+ gf_log (trans->xl->name, GF_LOG_CRITICAL,
+ "bailing transport");
+ transport_disconnect (trans);
+ }
+out:
+ return;
+}
+
+
+void
+save_frame (transport_t *trans, call_frame_t *frame,
+ int32_t op, int8_t type, uint64_t callid)
+{
+ client_connection_t *conn = NULL;
+ struct timeval timeout = {0, };
+
+
+ conn = trans->xl_private;
+
+ saved_frames_put (conn->saved_frames, frame, op, type, callid);
+
+ if (conn->timer == NULL) {
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
+ conn->timer = gf_timer_call_after (trans->xl->ctx, timeout,
+ call_bail, (void *) trans);
+ }
+}
+
+
+int
+client_get_forgets (xlator_t *this, client_forget_t *forget)
+{
+ call_frame_t *fr = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ size_t hdrlen = 0;
+ gf_cbk_forget_req_t *req = NULL;
+ int ret = -1;
+ client_conf_t *conf = NULL;
+ int count = 0;
+ int index = 0;
+
+ conf = this->private;
+
+ if (conf->forget.count > 0) {
+ count = conf->forget.count;
+
+ hdrlen = gf_hdr_len (req, (count * sizeof (int64_t)));
+ hdr = gf_hdr_new (req, (count * sizeof (int64_t)));
+ GF_VALIDATE_OR_GOTO (this->name, hdr, out);
+
+ req = gf_param (hdr);
+
+ req->count = hton32 (count);
+ for (index = 0; index < count; index++) {
+ req->ino_array[index] =
+ hton64 (conf->forget.ino_array[index]);
+ }
+
+ fr = create_frame (this, this->ctx->pool);
+ GF_VALIDATE_OR_GOTO (this->name, fr, out);
+
+ conf->forget.frames_in_transit++;
+
+ forget->frame = fr;
+ forget->hdr = hdr;
+ forget->hdrlen = hdrlen;
+
+ ret = count;
+
+ conf->forget.count = 0;
+ }
+ out:
+ return ret;
+}
+
+
+void
+client_ping_timer_expired (void *data)
+{
+ xlator_t *this = NULL;
+ transport_t *trans = NULL;
+ client_conf_t *conf = NULL;
+ client_connection_t *conn = NULL;
+
+ trans = data;
+ this = trans->xl;
+ conf = this->private;
+ conn = trans->xl_private;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "ping timer expired! bailing transport");
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (conn->ping_timer)
+ gf_timer_call_cancel (trans->xl->ctx,
+ conn->ping_timer);
+
+ conn->ping_started = 0;
+ conn->ping_timer = NULL;
+ }
+ pthread_mutex_unlock (&conn->lock);
+ transport_disconnect (trans);
+}
+
+
+void
+client_start_ping (void *data)
+{
+ xlator_t *this = NULL;
+ transport_t *trans = NULL;
+ client_conf_t *conf = NULL;
+ client_connection_t *conn = NULL;
+ int32_t ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ struct timeval timeout = {0, };
+ call_frame_t *dummy_frame = NULL;
+ size_t hdrlen = -1;
+ gf_mop_ping_req_t *req = NULL;
+
+
+ trans = data;
+ this = trans->xl;
+ conf = this->private;
+ conn = trans->xl_private;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if ((conn->saved_frames->count == 0) ||
+ !conn->connected) {
+ /* using goto looked ugly here,
+ * hence getting out this way */
+ if (conn->ping_timer)
+ gf_timer_call_cancel (trans->xl->ctx,
+ conn->ping_timer);
+ conn->ping_timer = NULL;
+ conn->ping_started = 0;
+ /* unlock */
+ pthread_mutex_unlock (&conn->lock);
+ return;
+ }
+
+ if (conn->saved_frames->count < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "saved_frames->count is %"PRId64,
+ conn->saved_frames->count);
+ conn->saved_frames->count = 0;
+ }
+ timeout.tv_sec = conn->ping_timeout;
+ timeout.tv_usec = 0;
+
+ conn->ping_timer =
+ gf_timer_call_after (trans->xl->ctx, timeout,
+ client_ping_timer_expired,
+ (void *) trans);
+
+ if (conn->ping_timer == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to setup timer");
+ } else
+ conn->ping_started = 1;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ dummy_frame->local = trans;
+
+ ret = protocol_client_xfer (dummy_frame, this, trans,
+ GF_OP_TYPE_MOP_REQUEST, GF_MOP_PING,
+ hdr, hdrlen, NULL, 0, NULL);
+}
+
+
+int
+client_ping_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ xlator_t *this = NULL;
+ transport_t *trans = NULL;
+ client_conf_t *conf = NULL;
+ client_connection_t *conn = NULL;
+ struct timeval timeout = {0, };
+ int op_ret = 0;
+
+ trans = frame->local; frame->local = NULL;
+ this = trans->xl;
+ conf = this->private;
+ conn = trans->xl_private;
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+
+ if (op_ret == -1) {
+ /* timer expired and transport bailed out */
+ gf_log (this->name, GF_LOG_ERROR, "timer must have expired");
+ goto out;
+ }
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ timeout.tv_sec = conn->ping_timeout;
+ timeout.tv_usec = 0;
+
+ gf_timer_call_cancel (trans->xl->ctx,
+ conn->ping_timer);
+
+ conn->ping_timer =
+ gf_timer_call_after (trans->xl->ctx, timeout,
+ client_start_ping, (void *)trans);
+ if (conn->ping_timer == NULL)
+ gf_log (this->name, GF_LOG_ERROR,
+ "gf_timer_call_after() returned NULL");
+ }
+ pthread_mutex_unlock (&conn->lock);
+out:
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+int
+protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans,
+ int type, int op,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ struct iovec *vector, int count,
+ dict_t *refs)
+{
+ client_conf_t *conf = NULL;
+ client_connection_t *conn = NULL;
+ uint64_t callid = 0;
+ int32_t ret = -1;
+ int start_ping = 0;
+ gf_hdr_common_t rsphdr = {0, };
+ client_forget_t forget = {0, };
+ uint8_t send_forget = 0;
+
+
+ conf = this->private;
+
+ if (!trans) {
+ /* default to bulk op since it is 'safer' */
+ trans = conf->transport[CHANNEL_BULK];
+ }
+ conn = trans->xl_private;
+
+ if (!((type == GF_OP_TYPE_CBK_REQUEST) &&
+ (op == GF_CBK_FORGET)))
+ {
+ LOCK (&conf->forget.lock);
+ {
+ ret = client_get_forgets (this, &forget);
+ if (ret <= 0)
+ send_forget = 0;
+ else
+ send_forget = 1;
+ }
+ UNLOCK (&conf->forget.lock);
+
+ if (send_forget) {
+ ret = protocol_client_xfer (forget.frame, this, NULL,
+ GF_OP_TYPE_CBK_REQUEST,
+ GF_CBK_FORGET,
+ forget.hdr, forget.hdrlen,
+ NULL, 0, NULL);
+ }
+ }
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ callid = ++conn->callid;
+
+ hdr->callid = hton64 (callid);
+ hdr->op = hton32 (op);
+ hdr->type = hton32 (type);
+
+ if (frame) {
+ hdr->req.uid = hton32 (frame->root->uid);
+ hdr->req.gid = hton32 (frame->root->gid);
+ hdr->req.pid = hton32 (frame->root->pid);
+ }
+
+ if (conn->connected == 0)
+ transport_connect (trans);
+
+ ret = -1;
+
+ if (conn->connected ||
+ ((type == GF_OP_TYPE_MOP_REQUEST) &&
+ (op == GF_MOP_SETVOLUME))) {
+ ret = transport_submit (trans, (char *)hdr, hdrlen,
+ vector, count, refs);
+ }
+
+ if ((ret >= 0) && frame) {
+ /* TODO: check this logic */
+ gettimeofday (&conn->last_sent, NULL);
+ save_frame (trans, frame, op, type, callid);
+ }
+
+ if (!conn->ping_started && (ret >= 0)) {
+ start_ping = 1;
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (start_ping)
+ client_start_ping ((void *) trans);
+
+ if (frame && (ret < 0)) {
+ rsphdr.op = op;
+ rsphdr.rsp.op_ret = hton32 (-1);
+ rsphdr.rsp.op_errno = hton32 (ENOTCONN);
+
+ if (type == GF_OP_TYPE_FOP_REQUEST) {
+ rsphdr.type = GF_OP_TYPE_FOP_REPLY;
+ gf_fops[op] (frame, &rsphdr, sizeof (rsphdr), NULL, 0);
+ } else if (type == GF_OP_TYPE_MOP_REQUEST) {
+ rsphdr.type = GF_OP_TYPE_MOP_REPLY;
+ gf_mops[op] (frame, &rsphdr, sizeof (rsphdr), NULL, 0);
+ } else {
+ rsphdr.type = GF_OP_TYPE_CBK_REPLY;
+ gf_cbks[op] (frame, &rsphdr, sizeof (rsphdr), NULL, 0);
+ }
+ }
+
+ return ret;
+}
+
+
+
+/**
+ * client_create - create function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @path: complete path to file
+ * @flags: create flags
+ * @mode: create mode
+ *
+ * external reference through client_protocol_xlator->fops->create
+ */
+
+int
+client_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags,
+ mode_t mode, fd_t *fd)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_create_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t pathlen = 0;
+ size_t baselen = 0;
+ int32_t ret = -1;
+ ino_t par = 0;
+ client_conf_t *conf = NULL;
+ client_local_t *local = NULL;
+
+
+ conf = this->private;
+
+ if (conf->child) {
+ STACK_WIND (frame, default_create_cbk,
+ conf->child,
+ conf->child->fops->create,
+ loc, flags, mode, fd);
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ local->fd = fd_ref (fd);
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ pathlen = STRLEN_0(loc->path);
+ baselen = STRLEN_0(loc->name);
+ par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, pathlen + baselen);
+ hdr = gf_hdr_new (req, pathlen + baselen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->flags = hton32 (flags);
+ req->mode = hton32 (mode);
+ req->par = hton64 (par);
+ strcpy (req->path, loc->path);
+ strcpy (req->bname + pathlen, loc->name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_CREATE,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, fd, NULL, NULL);
+ return 0;
+
+}
+
+/**
+ * client_open - open function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location of file
+ * @flags: open flags
+ * @mode: open modes
+ *
+ * external reference through client_protocol_xlator->fops->open
+ */
+int
+client_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ size_t hdrlen = 0;
+ gf_fop_open_req_t *req = NULL;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = NULL;
+ client_local_t *local = NULL;
+
+ conf = this->private;
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame, default_open_cbk,
+ conf->child,
+ conf->child->fops->open,
+ loc, flags, fd);
+
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ local->fd = fd_ref (fd);
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->flags = hton32 (flags);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_OPEN,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, fd);
+ return 0;
+
+}
+
+
+/**
+ * client_stat - stat function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ *
+ * external reference through client_protocol_xlator->fops->stat
+ */
+int32_t
+client_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_stat_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_stat_cbk,
+ conf->child,
+ conf->child->fops->stat,
+ loc);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_STAT,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_readlink - readlink function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ * @size:
+ *
+ * external reference through client_protocol_xlator->fops->readlink
+ */
+int32_t
+client_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_readlink_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_readlink_cbk,
+ conf->child,
+ conf->child->fops->readlink,
+ loc,
+ size);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->size = hton32 (size);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_READLINK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_mknod - mknod function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @path: pathname of node
+ * @mode:
+ * @dev:
+ *
+ * external reference through client_protocol_xlator->fops->mknod
+ */
+int32_t
+client_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t dev)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_mknod_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ size_t baselen = 0;
+ ino_t par = 0;
+ client_conf_t *conf = this->private;
+ client_local_t *local = NULL;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_mknod_cbk,
+ conf->child,
+ conf->child->fops->mknod,
+ loc, mode, dev);
+
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ pathlen = STRLEN_0(loc->path);
+ baselen = STRLEN_0(loc->name);
+ par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, pathlen + baselen);
+ hdr = gf_hdr_new (req, pathlen + baselen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->par = hton64 (par);
+ req->mode = hton32 (mode);
+ req->dev = hton64 (dev);
+ strcpy (req->path, loc->path);
+ strcpy (req->bname + pathlen, loc->name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_MKNOD,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_mkdir - mkdir function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @path: pathname of directory
+ * @mode:
+ *
+ * external reference through client_protocol_xlator->fops->mkdir
+ */
+int32_t
+client_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_mkdir_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ size_t baselen = 0;
+ ino_t par = 0;
+ client_conf_t *conf = this->private;
+ client_local_t *local = NULL;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_mkdir_cbk,
+ conf->child,
+ conf->child->fops->mkdir,
+ loc, mode);
+
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ pathlen = STRLEN_0(loc->path);
+ baselen = STRLEN_0(loc->name);
+ par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, pathlen + baselen);
+ hdr = gf_hdr_new (req, pathlen + baselen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->par = hton64 (par);
+ req->mode = hton32 (mode);
+ strcpy (req->path, loc->path);
+ strcpy (req->bname + pathlen, loc->name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_MKDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL);
+ return 0;
+
+}
+
+
+
+/**
+ * client_unlink - unlink function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location of file
+ *
+ * external reference through client_protocol_xlator->fops->unlink
+ */
+int32_t
+client_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_unlink_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ size_t baselen = 0;
+ ino_t par = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_unlink_cbk,
+ conf->child,
+ conf->child->fops->unlink,
+ loc);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ baselen = STRLEN_0(loc->name);
+ par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, pathlen + baselen);
+ hdr = gf_hdr_new (req, pathlen + baselen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->par = hton64 (par);
+ strcpy (req->path, loc->path);
+ strcpy (req->bname + pathlen, loc->name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_UNLINK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+
+}
+
+/**
+ * client_rmdir - rmdir function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ *
+ * external reference through client_protocol_xlator->fops->rmdir
+ */
+int32_t
+client_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_rmdir_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ size_t baselen = 0;
+ ino_t par = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_rmdir_cbk,
+ conf->child,
+ conf->child->fops->rmdir,
+ loc);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ baselen = STRLEN_0(loc->name);
+ par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, pathlen + baselen);
+ hdr = gf_hdr_new (req, pathlen + baselen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->par = hton64 (par);
+ strcpy (req->path, loc->path);
+ strcpy (req->bname + pathlen, loc->name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_RMDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+
+}
+
+
+
+/**
+ * client_symlink - symlink function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @oldpath: pathname of target
+ * @newpath: pathname of symlink
+ *
+ * external reference through client_protocol_xlator->fops->symlink
+ */
+int32_t
+client_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkname,
+ loc_t *loc)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_symlink_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t pathlen = 0;
+ size_t newlen = 0;
+ size_t baselen = 0;
+ ino_t par = 0;
+ client_conf_t *conf = this->private;
+ client_local_t *local = NULL;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_symlink_cbk,
+ conf->child,
+ conf->child->fops->symlink,
+ linkname, loc);
+
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ pathlen = STRLEN_0 (loc->path);
+ baselen = STRLEN_0 (loc->name);
+ newlen = STRLEN_0 (linkname);
+ par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, pathlen + baselen + newlen);
+ hdr = gf_hdr_new (req, pathlen + baselen + newlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->par = hton64 (par);
+ strcpy (req->path, loc->path);
+ strcpy (req->bname + pathlen, loc->name);
+ strcpy (req->linkname + pathlen + baselen, linkname);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_SYMLINK,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_rename - rename function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @oldloc: location of old pathname
+ * @newloc: location of new pathname
+ *
+ * external reference through client_protocol_xlator->fops->rename
+ */
+int32_t
+client_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_rename_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t oldpathlen = 0;
+ size_t oldbaselen = 0;
+ size_t newpathlen = 0;
+ size_t newbaselen = 0;
+ ino_t oldpar = 0;
+ ino_t newpar = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_rename_cbk,
+ conf->child,
+ conf->child->fops->rename,
+ oldloc, newloc);
+
+ return 0;
+ }
+
+ oldpathlen = STRLEN_0(oldloc->path);
+ oldbaselen = STRLEN_0(oldloc->name);
+ newpathlen = STRLEN_0(newloc->path);
+ newbaselen = STRLEN_0(newloc->name);
+ oldpar = this_ino_get (oldloc, this, GF_CLIENT_INODE_PARENT);
+ newpar = this_ino_get (newloc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, (oldpathlen + oldbaselen +
+ newpathlen + newbaselen));
+ hdr = gf_hdr_new (req, (oldpathlen + oldbaselen +
+ newpathlen + newbaselen));
+
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->oldpar = hton64 (oldpar);
+ req->newpar = hton64 (newpar);
+
+ strcpy (req->oldpath, oldloc->path);
+ strcpy (req->oldbname + oldpathlen, oldloc->name);
+ strcpy (req->newpath + oldpathlen + oldbaselen, newloc->path);
+ strcpy (req->newbname + oldpathlen + oldbaselen + newpathlen,
+ newloc->name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_RENAME,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+
+/**
+ * client_link - link function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @oldloc: location of old pathname
+ * @newpath: new pathname
+ *
+ * external reference through client_protocol_xlator->fops->link
+ */
+
+int32_t
+client_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_link_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t oldpathlen = 0;
+ size_t newpathlen = 0;
+ size_t newbaselen = 0;
+ ino_t oldino = 0;
+ ino_t newpar = 0;
+ client_conf_t *conf = this->private;
+ client_local_t *local = NULL;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_link_cbk,
+ conf->child,
+ conf->child->fops->link,
+ oldloc, newloc);
+
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ loc_copy (&local->loc, oldloc);
+
+ frame->local = local;
+
+ oldpathlen = STRLEN_0(oldloc->path);
+ newpathlen = STRLEN_0(newloc->path);
+ newbaselen = STRLEN_0(newloc->name);
+ oldino = this_ino_get (oldloc, this, GF_CLIENT_INODE_SELF);
+ newpar = this_ino_get (newloc, this, GF_CLIENT_INODE_PARENT);
+
+ hdrlen = gf_hdr_len (req, oldpathlen + newpathlen + newbaselen);
+ hdr = gf_hdr_new (req, oldpathlen + newpathlen + newbaselen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ strcpy (req->oldpath, oldloc->path);
+ strcpy (req->newpath + oldpathlen, newloc->path);
+ strcpy (req->newbname + oldpathlen + newpathlen, newloc->name);
+
+ req->oldino = hton64 (oldino);
+ req->newpar = hton64 (newpar);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_LINK,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, oldloc->inode, NULL);
+ return 0;
+}
+
+
+
+/**
+ * client_chmod - chmod function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ * @mode:
+ *
+ * external reference through client_protocol_xlator->fops->chmod
+ */
+int32_t
+client_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_chmod_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_chmod_cbk,
+ conf->child,
+ conf->child->fops->chmod,
+ loc,
+ mode);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->mode = hton32 (mode);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHMOD,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_chown - chown function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ * @uid: uid of new owner
+ * @gid: gid of new owner group
+ *
+ * external reference through client_protocol_xlator->fops->chown
+ */
+int32_t
+client_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_chown_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_chown_cbk,
+ conf->child,
+ conf->child->fops->chown,
+ loc,
+ uid,
+ gid);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->uid = hton32 (uid);
+ req->gid = hton32 (gid);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHOWN,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+/**
+ * client_truncate - truncate function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ * @offset:
+ *
+ * external reference through client_protocol_xlator->fops->truncate
+ */
+int32_t
+client_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_truncate_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_truncate_cbk,
+ conf->child,
+ conf->child->fops->truncate,
+ loc,
+ offset);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->offset = hton64 (offset);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_TRUNCATE,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+
+/**
+ * client_utimes - utimes function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ * @tvp:
+ *
+ * external reference through client_protocol_xlator->fops->utimes
+ */
+int32_t
+client_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec *tvp)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_utimens_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_utimens_cbk,
+ conf->child,
+ conf->child->fops->utimens,
+ loc,
+ tvp);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ gf_timespec_from_timespec (req->tv, tvp);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_UTIMENS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+
+/**
+ * client_readv - readv function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ * @size:
+ * @offset:
+ *
+ * external reference through client_protocol_xlator->fops->readv
+ */
+int32_t
+client_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_read_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_readv_cbk,
+ conf->child,
+ conf->child->fops->readv,
+ fd,
+ size,
+ offset);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd, returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND (frame, -1, EBADFD, NULL, 0, NULL);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->size = hton32 (size);
+ req->offset = hton64 (offset);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_READ,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL, 0, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_writev - writev function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ * @vector:
+ * @count:
+ * @offset:
+ *
+ * external reference through client_protocol_xlator->fops->writev
+ */
+int32_t
+client_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_write_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_writev_cbk,
+ conf->child,
+ conf->child->fops->writev,
+ fd,
+ vector,
+ count,
+ offset);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->size = hton32 (iov_length (vector, count));
+ req->offset = hton64 (offset);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_WRITE,
+ hdr, hdrlen, vector, count,
+ frame->root->req_refs);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_statfs - statfs function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ *
+ * external reference through client_protocol_xlator->fops->statfs
+ */
+int32_t
+client_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_statfs_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_statfs_cbk,
+ conf->child,
+ conf->child->fops->statfs,
+ loc);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_STATFS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_flush - flush function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ *
+ * external reference through client_protocol_xlator->fops->flush
+ */
+
+int32_t
+client_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_flush_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_flush_cbk,
+ conf->child,
+ conf->child->fops->flush,
+ fd);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FLUSH,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+
+}
+
+
+
+
+/**
+ * client_fsync - fsync function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ * @flags:
+ *
+ * external reference through client_protocol_xlator->fops->fsync
+ */
+
+int32_t
+client_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fsync_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int32_t ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_fsync_cbk,
+ conf->child,
+ conf->child->fops->fsync,
+ fd,
+ flags);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND(frame, -1, EBADFD);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->data = hton32 (flags);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSYNC,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+
+}
+
+int32_t
+client_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_xattrop_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t dict_len = 0;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("client", this, unwind);
+
+ conf = this->private;
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_xattrop_cbk,
+ conf->child,
+ conf->child->fops->xattrop,
+ loc,
+ flags,
+ dict);
+
+ return 0;
+ }
+
+ GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+
+ if (dict) {
+ dict_len = dict_serialized_length (dict);
+ if (dict_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict(%p)",
+ dict);
+ goto unwind;
+ }
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, dict_len + pathlen);
+ hdr = gf_hdr_new (req, dict_len + pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->flags = hton32 (flags);
+ req->dict_len = hton32 (dict_len);
+ if (dict) {
+ ret = dict_serialize (dict, req->dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary(%p)",
+ dict);
+ goto unwind;
+ }
+ }
+ req->ino = hton64 (ino);
+ strcpy (req->path + dict_len, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_XATTROP,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+}
+
+
+int32_t
+client_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fxattrop_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t dict_len = 0;
+ int64_t remote_fd = -1;
+ int32_t ret = -1;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_fxattrop_cbk,
+ conf->child,
+ conf->child->fops->fxattrop,
+ fd,
+ flags,
+ dict);
+
+ return 0;
+ }
+
+ if (dict) {
+ dict_len = dict_serialized_length (dict);
+ if (dict_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict(%p)",
+ dict);
+ goto unwind;
+ }
+ }
+
+ if (fd) {
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ goto unwind;
+ }
+ ino = fd->inode->ino;
+ }
+
+ hdrlen = gf_hdr_len (req, dict_len);
+ hdr = gf_hdr_new (req, dict_len);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->flags = hton32 (flags);
+ req->dict_len = hton32 (dict_len);
+ if (dict) {
+ ret = dict_serialize (dict, req->dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary(%p)",
+ dict);
+ goto unwind;
+ }
+ }
+ req->fd = hton64 (remote_fd);
+ req->ino = hton64 (ino);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FXATTROP,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_setxattr - setxattr function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location
+ * @dict: dictionary which contains key:value to be set.
+ * @flags:
+ *
+ * external reference through client_protocol_xlator->fops->setxattr
+ */
+int32_t
+client_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_setxattr_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t dict_len = 0;
+ int ret = -1;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_setxattr_cbk,
+ conf->child,
+ conf->child->fops->setxattr,
+ loc,
+ dict,
+ flags);
+
+ return 0;
+ }
+
+ dict_len = dict_serialized_length (dict);
+ if (dict_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict(%p)",
+ dict);
+ goto unwind;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, dict_len + pathlen);
+ hdr = gf_hdr_new (req, dict_len + pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->flags = hton32 (flags);
+ req->dict_len = hton32 (dict_len);
+
+ ret = dict_serialize (dict, req->dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary(%p)",
+ dict);
+ goto unwind;
+ }
+
+ strcpy (req->path + dict_len, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_SETXATTR,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+}
+
+/**
+ * client_getxattr - getxattr function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location structure
+ *
+ * external reference through client_protocol_xlator->fops->getxattr
+ */
+int32_t
+client_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_getxattr_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t pathlen = 0;
+ size_t namelen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_getxattr_cbk,
+ conf->child,
+ conf->child->fops->getxattr,
+ loc,
+ name);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ if (name)
+ namelen = STRLEN_0(name);
+
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen + namelen);
+ hdr = gf_hdr_new (req, pathlen + namelen);
+ GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->namelen = hton32 (namelen);
+ strcpy (req->path, loc->path);
+ if (name)
+ strcpy (req->name + pathlen, name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_GETXATTR,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+}
+
+/**
+ * client_removexattr - removexattr function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location structure
+ * @name:
+ *
+ * external reference through client_protocol_xlator->fops->removexattr
+ */
+int32_t
+client_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_removexattr_req_t *req = NULL;
+ size_t hdrlen = 0;
+ size_t namelen = 0;
+ size_t pathlen = 0;
+ ino_t ino = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_removexattr_cbk,
+ conf->child,
+ conf->child->fops->removexattr,
+ loc,
+ name);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ namelen = STRLEN_0(name);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen + namelen);
+ hdr = gf_hdr_new (req, pathlen + namelen);
+ GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ strcpy (req->path, loc->path);
+ strcpy (req->name + pathlen, name);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_REMOVEXATTR,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+}
+
+
+/**
+ * client_opendir - opendir function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location structure
+ *
+ * external reference through client_protocol_xlator->fops->opendir
+ */
+int32_t
+client_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd)
+{
+ gf_fop_opendir_req_t *req = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ size_t hdrlen = 0;
+ int ret = -1;
+ ino_t ino = 0;
+ size_t pathlen = 0;
+ client_conf_t *conf = this->private;
+ client_local_t *local = NULL;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_opendir_cbk,
+ conf->child,
+ conf->child->fops->opendir,
+ loc, fd);
+
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
+
+ frame->local = local;
+
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+ pathlen = STRLEN_0(loc->path);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_OPENDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, fd);
+ return 0;
+
+}
+
+
+/**
+ * client_readdir - readdir function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ *
+ * external reference through client_protocol_xlator->fops->readdir
+ */
+
+int32_t
+client_getdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ int32_t flag)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_getdents_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_getdents_cbk,
+ conf->child,
+ conf->child->fops->getdents,
+ fd,
+ size,
+ offset,
+ flag);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+ GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
+
+ req->fd = hton64 (remote_fd);
+ req->size = hton32 (size);
+ req->offset = hton64 (offset);
+ req->flags = hton32 (flag);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_GETDENTS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+unwind:
+ STACK_UNWIND(frame, -1, EINVAL, NULL, 0);
+ return 0;
+}
+
+/**
+ * client_readdir - readdir function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ *
+ * external reference through client_protocol_xlator->fops->readdir
+ */
+
+int32_t
+client_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_readdir_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_readdir_cbk,
+ conf->child,
+ conf->child->fops->readdir,
+ fd, size, offset);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ goto unwind;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req->fd = hton64 (remote_fd);
+ req->size = hton32 (size);
+ req->offset = hton64 (offset);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_READDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+
+}
+
+
+
+/**
+ * client_fsyncdir - fsyncdir function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ * @flags:
+ *
+ * external reference through client_protocol_xlator->fops->fsyncdir
+ */
+
+int32_t
+client_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fsyncdir_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int32_t ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_fsyncdir_cbk,
+ conf->child,
+ conf->child->fops->fsyncdir,
+ fd,
+ flags);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ goto unwind;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->data = hton32 (flags);
+ req->fd = hton64 (remote_fd);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSYNCDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ STACK_UNWIND (frame, -1, EBADFD);
+ return 0;
+}
+
+
+/**
+ * client_access - access function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @loc: location structure
+ * @mode:
+ *
+ * external reference through client_protocol_xlator->fops->access
+ */
+int32_t
+client_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_access_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ ino_t ino = 0;
+ size_t pathlen = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_access_cbk,
+ conf->child,
+ conf->child->fops->access,
+ loc,
+ mask);
+
+ return 0;
+ }
+
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+ pathlen = STRLEN_0(loc->path);
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->mask = hton32 (mask);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_ACCESS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+
+}
+
+
+/**
+ * client_ftrucate - ftruncate function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ * @offset: offset to truncate to
+ *
+ * external reference through client_protocol_xlator->fops->ftruncate
+ */
+
+int32_t
+client_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_ftruncate_req_t *req = NULL;
+ int64_t remote_fd = -1;
+ size_t hdrlen = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_ftruncate_cbk,
+ conf->child,
+ conf->child->fops->ftruncate,
+ fd,
+ offset);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->offset = hton64 (offset);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FTRUNCATE,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_fstat - fstat function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ *
+ * external reference through client_protocol_xlator->fops->fstat
+ */
+
+int32_t
+client_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fstat_req_t *req = NULL;
+ int64_t remote_fd = -1;
+ size_t hdrlen = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_fstat_cbk,
+ conf->child,
+ conf->child->fops->fstat,
+ fd);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND (frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSTAT,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+
+}
+
+
+/**
+ * client_lk - lk function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ * @cmd: lock command
+ * @lock:
+ *
+ * external reference through client_protocol_xlator->fops->lk
+ */
+int32_t
+client_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *flock)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_lk_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int64_t remote_fd = -1;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_lk_cbk,
+ conf->child,
+ conf->child->fops->lk,
+ fd,
+ cmd,
+ flock);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND(frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ gf_cmd = GF_LK_SETLKW;
+ else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown cmd (%d)!", gf_cmd);
+ goto unwind;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->cmd = hton32 (gf_cmd);
+ req->type = hton32 (gf_type);
+ gf_flock_from_flock (&req->flock, flock);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_LK,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+}
+
+
+/**
+ * client_inodelk - inodelk function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @inode: inode structure
+ * @cmd: lock command
+ * @lock: flock struct
+ *
+ * external reference through client_protocol_xlator->fops->inodelk
+ */
+int32_t
+client_inodelk (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t cmd,
+ struct flock *flock)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_inodelk_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ ino_t ino = 0;
+ size_t pathlen = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_inodelk_cbk,
+ conf->child,
+ conf->child->fops->inodelk,
+ loc, cmd, flock);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ gf_cmd = GF_LK_SETLKW;
+ else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown cmd (%d)!", gf_cmd);
+ goto unwind;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ hdrlen = gf_hdr_len (req, pathlen);
+ hdr = gf_hdr_new (req, pathlen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ strcpy (req->path, loc->path);
+
+ req->ino = hton64 (ino);
+
+ req->cmd = hton32 (gf_cmd);
+ req->type = hton32 (gf_type);
+ gf_flock_from_flock (&req->flock, flock);
+
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST,
+ GF_FOP_INODELK,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+
+}
+
+
+/**
+ * client_finodelk - finodelk function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @inode: inode structure
+ * @cmd: lock command
+ * @lock: flock struct
+ *
+ * external reference through client_protocol_xlator->fops->finodelk
+ */
+int32_t
+client_finodelk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *flock)
+{
+ int ret = -1;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_finodelk_req_t *req = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ int64_t remote_fd = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_finodelk_cbk,
+ conf->child,
+ conf->child->fops->finodelk,
+ fd, cmd, flock);
+
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND(frame, -1, EBADFD);
+ return 0;
+ }
+
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ gf_cmd = GF_LK_SETLKW;
+ else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown cmd (%d)!", gf_cmd);
+ goto unwind;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+
+ req->cmd = hton32 (gf_cmd);
+ req->type = hton32 (gf_type);
+ gf_flock_from_flock (&req->flock, flock);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST,
+ GF_FOP_FINODELK,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+}
+
+
+int32_t
+client_entrylk (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name,
+ entrylk_cmd cmd,
+ entrylk_type type)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_entrylk_req_t *req = NULL;
+ size_t pathlen = 0;
+ size_t hdrlen = -1;
+ int ret = -1;
+ ino_t ino = 0;
+ size_t namelen = 0;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame, default_entrylk_cbk,
+ conf->child,
+ conf->child->fops->entrylk,
+ loc, name, cmd, type);
+
+ return 0;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+ if (name)
+ namelen = STRLEN_0(name);
+
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+
+ hdrlen = gf_hdr_len (req, pathlen + namelen);
+ hdr = gf_hdr_new (req, pathlen + namelen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->namelen = hton64 (namelen);
+
+ strcpy (req->path, loc->path);
+ if (name)
+ strcpy (req->name + pathlen, name);
+
+ req->cmd = hton32 (cmd);
+ req->type = hton32 (type);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_ENTRYLK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+
+}
+
+
+int32_t
+client_fentrylk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name,
+ entrylk_cmd cmd,
+ entrylk_type type)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fentrylk_req_t *req = NULL;
+ int64_t remote_fd = -1;
+ size_t namelen = 0;
+ size_t hdrlen = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame, default_fentrylk_cbk,
+ conf->child,
+ conf->child->fops->fentrylk,
+ fd, name, cmd, type);
+
+ return 0;
+ }
+
+ if (name)
+ namelen = STRLEN_0(name);
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ STACK_UNWIND(frame, -1, EBADFD);
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, namelen);
+ hdr = gf_hdr_new (req, namelen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->namelen = hton64 (namelen);
+
+ if (name)
+ strcpy (req->name, name);
+
+ req->cmd = hton32 (cmd);
+ req->type = hton32 (type);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FENTRYLK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+
+ STACK_UNWIND(frame, -1, EINVAL);
+ return 0;
+}
+
+
+/*
+ * client_lookup - lookup function for client protocol
+ * @frame: call frame
+ * @this:
+ * @loc: location
+ *
+ * not for external reference
+ */
+int32_t
+client_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_lookup_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ ino_t ino = 0;
+ ino_t par = 0;
+ size_t dictlen = 0;
+ size_t pathlen = 0;
+ size_t baselen = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ client_conf_t *conf = this->private;
+ client_local_t *local = NULL;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_lookup_cbk,
+ conf->child,
+ conf->child->fops->lookup,
+ loc,
+ xattr_req);
+
+ return 0;
+ }
+
+ local = calloc (1, sizeof (*local));
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ loc_copy (&local->loc, loc);
+
+ frame->local = local;
+
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
+
+ if (loc->ino != 1) {
+ par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT);
+ GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
+ baselen = STRLEN_0(loc->name);
+ } else {
+ ino = 1;
+ }
+
+ pathlen = STRLEN_0(loc->path);
+
+ if (xattr_req) {
+ dictlen = dict_serialized_length (xattr_req);
+ if (dictlen < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict(%p)",
+ xattr_req);
+ ret = dictlen;
+ goto unwind;
+ }
+ }
+
+ hdrlen = gf_hdr_len (req, pathlen + baselen + dictlen);
+ hdr = gf_hdr_new (req, pathlen + baselen + dictlen);
+ GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->ino = hton64 (ino);
+ req->par = hton64 (par);
+ strcpy (req->path, loc->path);
+ if (baselen)
+ strcpy (req->path + pathlen, loc->name);
+
+ if (dictlen) {
+ ret = dict_serialize (xattr_req, req->dict + baselen + pathlen);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary(%p)",
+ xattr_req);
+ goto unwind;
+ }
+ }
+
+ req->dictlen = hton32 (dictlen);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_LOOKUP,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, NULL, NULL);
+ return ret;
+}
+
+
+
+/*
+ * client_fchmod
+ *
+ */
+int32_t
+client_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fchmod_req_t *req = NULL;
+ int64_t remote_fd = -1;
+ size_t hdrlen = -1;
+ int ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_fchmod_cbk,
+ conf->child,
+ conf->child->fops->fchmod,
+ fd,
+ mode);
+
+ return 0;
+ }
+
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ op_errno = EBADFD;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ goto unwind;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->mode = hton32 (mode);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FCHMOD,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+
+/*
+ * client_fchown -
+ *
+ * @frame:
+ * @this:
+ * @fd:
+ * @uid:
+ * @gid:
+ *
+ */
+int32_t
+client_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fchown_req_t *req = NULL;
+ int64_t remote_fd = 0;
+ size_t hdrlen = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t ret = -1;
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_fchown_cbk,
+ conf->child,
+ conf->child->fops->fchown,
+ fd,
+ uid,
+ gid);
+
+ return 0;
+ }
+
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ op_errno = EBADFD;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ goto unwind;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->uid = hton32 (uid);
+ req->gid = hton32 (gid);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_FCHOWN,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno, NULL);
+ return 0;
+
+}
+
+/**
+ * client_setdents -
+ */
+int32_t
+client_setdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_setdents_req_t *req = NULL;
+ int64_t remote_fd = 0;
+ char *buffer = NULL;
+ char *ptr = NULL;
+ data_t *buf_data = NULL;
+ dict_t *reply_dict = NULL;
+ dir_entry_t *trav = NULL;
+ uint32_t len = 0;
+ int32_t buf_len = 0;
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t vec_count = 0;
+ size_t hdrlen = -1;
+ struct iovec vector[1];
+ client_conf_t *conf = this->private;
+
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_setdents_cbk,
+ conf->child,
+ conf->child->fops->setdents,
+ fd,
+ flags,
+ entries,
+ count);
+
+ return 0;
+ }
+
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd. returning EBADFD",
+ fd->inode->ino);
+ op_errno = EBADFD;
+ goto unwind;
+ }
+
+ GF_VALIDATE_OR_GOTO (this->name, entries, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, count, unwind);
+
+ trav = entries->next;
+ while (trav) {
+ len += strlen (trav->name);
+ len += 1;
+ len += strlen (trav->link);
+ len += 1;
+ len += 256; // max possible for statbuf;
+ trav = trav->next;
+ }
+ buffer = CALLOC (1, len);
+ GF_VALIDATE_OR_GOTO (this->name, buffer, unwind);
+
+ ptr = buffer;
+
+ trav = entries->next;
+ while (trav) {
+ int32_t this_len = 0;
+ char *tmp_buf = NULL;
+ struct stat *stbuf = &trav->buf;
+ {
+ /* Convert the stat buf to string */
+ uint64_t dev = stbuf->st_dev;
+ uint64_t ino = stbuf->st_ino;
+ uint32_t mode = stbuf->st_mode;
+ uint32_t nlink = stbuf->st_nlink;
+ uint32_t uid = stbuf->st_uid;
+ uint32_t gid = stbuf->st_gid;
+ uint64_t rdev = stbuf->st_rdev;
+ uint64_t size = stbuf->st_size;
+ uint32_t blksize = stbuf->st_blksize;
+ uint64_t blocks = stbuf->st_blocks;
+
+ uint32_t atime = stbuf->st_atime;
+ uint32_t mtime = stbuf->st_mtime;
+ uint32_t ctime = stbuf->st_ctime;
+
+ uint32_t atime_nsec = ST_ATIM_NSEC(stbuf);
+ uint32_t mtime_nsec = ST_MTIM_NSEC(stbuf);
+ uint32_t ctime_nsec = ST_CTIM_NSEC(stbuf);
+
+ asprintf (&tmp_buf,
+ GF_STAT_PRINT_FMT_STR,
+ dev,
+ ino,
+ mode,
+ nlink,
+ uid,
+ gid,
+ rdev,
+ size,
+ blksize,
+ blocks,
+ atime,
+ atime_nsec,
+ mtime,
+ mtime_nsec,
+ ctime,
+ ctime_nsec);
+ }
+ this_len = sprintf (ptr, "%s/%s%s\n",
+ trav->name,
+ tmp_buf,
+ trav->link);
+
+ FREE (tmp_buf);
+ trav = trav->next;
+ ptr += this_len;
+ }
+ buf_len = strlen (buffer);
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+ req->flags = hton32 (flags);
+ req->count = hton32 (count);
+
+ {
+ buf_data = get_new_data ();
+ GF_VALIDATE_OR_GOTO (this->name, buf_data, unwind);
+ reply_dict = get_new_dict();
+ GF_VALIDATE_OR_GOTO (this->name, reply_dict, unwind);
+
+ buf_data->data = buffer;
+ buf_data->len = buf_len;
+ dict_set (reply_dict, NULL, buf_data);
+ frame->root->rsp_refs = dict_ref (reply_dict);
+ vector[0].iov_base = buffer;
+ vector[0].iov_len = buf_len;
+ vec_count = 1;
+ }
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_SETDENTS,
+ hdr, hdrlen, vector, vec_count,
+ frame->root->rsp_refs);
+
+ return ret;
+unwind:
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+/*
+ * CBKs
+ */
+/*
+ * client_forget - forget function for client protocol
+ * @this:
+ * @inode:
+ *
+ * not for external reference
+ */
+int32_t
+client_forget (xlator_t *this,
+ inode_t *inode)
+{
+ ino_t ino = 0;
+ client_conf_t *conf = NULL;
+ client_forget_t forget = {0,};
+ uint8_t send_forget = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ conf = this->private;
+
+ if (conf->child) {
+ /* */
+ /* Yenu beda */
+ return 0;
+ }
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ ino = this_ino_get_from_inode (inode, this);
+
+ LOCK (&conf->forget.lock);
+ {
+ conf->forget.ino_array[conf->forget.count++] = ino;
+
+ if ((!conf->forget.frames_in_transit) ||
+ (conf->forget.count >= CLIENT_PROTO_FORGET_LIMIT)) {
+ ret = client_get_forgets (this, &forget);
+ if (ret <= 0)
+ send_forget = 0;
+ else
+ send_forget = 1;
+ }
+ }
+ UNLOCK (&conf->forget.lock);
+
+ if (send_forget) {
+ ret = protocol_client_xfer (forget.frame, this,
+ CLIENT_CHANNEL (this,CHANNEL_BULK),
+ GF_OP_TYPE_CBK_REQUEST,
+ GF_CBK_FORGET,
+ forget.hdr, forget.hdrlen,
+ NULL, 0, NULL);
+ }
+out:
+ return 0;
+}
+
+/**
+ * client_releasedir - releasedir function for client protocol
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ *
+ * external reference through client_protocol_xlator->cbks->releasedir
+ */
+
+int32_t
+client_releasedir (xlator_t *this, fd_t *fd)
+{
+ call_frame_t *fr = NULL;
+ int32_t ret = -1;
+ int64_t remote_fd = 0;
+ char key[32] = {0,};
+ gf_hdr_common_t *hdr = NULL;
+ size_t hdrlen = 0;
+ gf_cbk_releasedir_req_t *req = NULL;
+ client_conf_t *conf = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ conf = this->private;
+ if (conf->child) {
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1){
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd.",
+ fd->inode->ino);
+ goto out;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO (this->name, hdr, out);
+
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+
+ {
+ sprintf (key, "%p", fd);
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ dict_del (conf->saved_fds, key);
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ }
+
+ fr = create_frame (this, this->ctx->pool);
+ GF_VALIDATE_OR_GOTO (this->name, fr, out);
+
+ ret = protocol_client_xfer (fr, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_CBK_REQUEST, GF_CBK_RELEASEDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+out:
+ return ret;
+}
+
+
+/**
+ * client_release - release function for client protocol
+ * @this: this translator structure
+ * @fd: file descriptor structure
+ *
+ * external reference through client_protocol_xlator->cbks->release
+ *
+ */
+int
+client_release (xlator_t *this, fd_t *fd)
+{
+ call_frame_t *fr = NULL;
+ int32_t ret = -1;
+ int64_t remote_fd = 0;
+ char key[32] = {0,};
+ gf_hdr_common_t *hdr = NULL;
+ size_t hdrlen = 0;
+ gf_cbk_release_req_t *req = NULL;
+ client_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ conf = this->private;
+
+ if (conf->child) {
+ return 0;
+ }
+
+ ret = this_fd_get (fd, this, &remote_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "(%"PRId64"): failed to get remote fd.",
+ fd->inode->ino);
+ goto out;
+ }
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO (this->name, hdr, out);
+ req = gf_param (hdr);
+
+ req->fd = hton64 (remote_fd);
+
+ {
+ sprintf (key, "%p", fd);
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ dict_del (conf->saved_fds, key);
+ }
+ pthread_mutex_unlock (&conf->mutex);
+ }
+
+ fr = create_frame (this, this->ctx->pool);
+ GF_VALIDATE_OR_GOTO (this->name, fr, out);
+
+ ret = protocol_client_xfer (fr, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_CBK_REQUEST, GF_CBK_RELEASE,
+ hdr, hdrlen, NULL, 0, NULL);
+out:
+ return ret;
+}
+
+/*
+ * MGMT_OPS
+ */
+
+/**
+ * client_stats - stats function for client protocol
+ * @frame: call frame
+ * @this: this translator structure
+ * @flags:
+ *
+ * external reference through client_protocol_xlator->mops->stats
+ */
+
+int32_t
+client_stats (call_frame_t *frame,
+ xlator_t *this,
+ int32_t flags)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_mop_stats_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ client_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, unwind);
+
+ conf = this->private;
+ if (conf->child) {
+ /* */
+ STACK_WIND (frame,
+ default_stats_cbk,
+ conf->child,
+ conf->child->mops->stats,
+ flags);
+
+ return 0;
+ }
+
+
+ hdrlen = gf_hdr_len (req, 0);
+ hdr = gf_hdr_new (req, 0);
+ GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+
+ req->flags = hton32 (flags);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_MOP_REQUEST, GF_MOP_STATS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ STACK_UNWIND (frame, -1, EINVAL, NULL);
+ return 0;
+}
+
+
+/* Callbacks */
+
+int32_t
+client_fxattrop_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_xattrop_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t gf_errno = 0;
+ int32_t op_errno = 0;
+ int32_t dict_len = 0;
+ dict_t *dict = NULL;
+ int32_t ret = -1;
+ char *dictbuf = NULL;
+
+ rsp = gf_param (hdr);
+ GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+
+ if (op_ret >= 0) {
+ op_ret = -1;
+ dict_len = ntoh32 (rsp->dict_len);
+
+ if (dict_len > 0) {
+ dictbuf = memdup (rsp->dict, dict_len);
+ GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
+
+ dict = dict_new();
+ GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail);
+
+ ret = dict_unserialize (dictbuf, dict_len, &dict);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary(%p)",
+ dict);
+ op_errno = -ret;
+ goto fail;
+ } else {
+ dict->extra_free = dictbuf;
+ dictbuf = NULL;
+ }
+ }
+ op_ret = 0;
+ }
+ gf_errno = ntoh32 (hdr->rsp.op_errno);
+ op_errno = gf_error_to_errno (gf_errno);
+
+fail:
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+
+ if (dictbuf)
+ free (dictbuf);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int32_t
+client_xattrop_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_xattrop_rsp_t *rsp = NULL;
+ int32_t op_ret = -1;
+ int32_t gf_errno = EINVAL;
+ int32_t op_errno = 0;
+ int32_t dict_len = 0;
+ dict_t *dict = NULL;
+ int32_t ret = -1;
+ char *dictbuf = NULL;
+
+ rsp = gf_param (hdr);
+ GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ if (op_ret >= 0) {
+ op_ret = -1;
+ dict_len = ntoh32 (rsp->dict_len);
+
+ if (dict_len > 0) {
+ dictbuf = memdup (rsp->dict, dict_len);
+ GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
+
+ dict = get_new_dict();
+ GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail);
+ dict_ref (dict);
+
+ ret = dict_unserialize (dictbuf, dict_len, &dict);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary(%p)",
+ dict);
+ goto fail;
+ } else {
+ dict->extra_free = dictbuf;
+ dictbuf = NULL;
+ }
+ }
+ op_ret = 0;
+ }
+ gf_errno = ntoh32 (hdr->rsp.op_errno);
+ op_errno = gf_error_to_errno (gf_errno);
+
+
+fail:
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+
+ if (dictbuf)
+ free (dictbuf);
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+/*
+ * client_chown_cbk -
+ *
+ * @frame:
+ * @args:
+ *
+ * not for external reference
+ */
+int32_t
+client_fchown_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_fchown_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+/*
+ * client_fchmod_cbk
+ *
+ * @frame:
+ * @args:
+ *
+ * not for external reference
+ */
+int32_t
+client_fchmod_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_fchmod_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+/*
+ * client_create_cbk - create callback function for client protocol
+ * @frame: call frame
+ * @args: arguments in dictionary
+ *
+ * not for external reference
+ */
+
+int
+client_create_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_create_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ fd_t *fd = NULL;
+ inode_t *inode = NULL;
+ struct stat stbuf = {0, };
+ int64_t remote_fd = 0;
+ char key[32] = {0, };
+ int32_t ret = -1;
+ client_local_t *local = NULL;
+ client_conf_t *conf = NULL;
+
+
+ local = frame->local; frame->local = NULL;
+ conf = frame->this->private;
+ fd = local->fd;
+ inode = local->loc.inode;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = ntoh32 (hdr->rsp.op_errno);
+
+ if (op_ret >= 0) {
+ remote_fd = ntoh64 (rsp->fd);
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ if (op_ret >= 0) {
+ this_ino_set (&local->loc, frame->this, stbuf.st_ino);
+ this_fd_set (fd, frame->this, &local->loc, remote_fd);
+
+ sprintf (key, "%p", fd);
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ ret = dict_set_str (conf->saved_fds, key, "");
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
+ if (ret < 0) {
+ free (key);
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to save remote fd",
+ local->loc.path, stbuf.st_ino);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, &stbuf);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+
+/*
+ * client_open_cbk - open callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_open_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = ENOTCONN;
+ fd_t *fd = NULL;
+ int64_t remote_fd = 0;
+ gf_fop_open_rsp_t *rsp = NULL;
+ char key[32] = {0,};
+ int32_t ret = -1;
+ client_local_t *local = NULL;
+ client_conf_t *conf = NULL;
+
+
+ local = frame->local; frame->local = NULL;
+ conf = frame->this->private;
+ fd = local->fd;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = ntoh32 (hdr->rsp.op_errno);
+
+ if (op_ret >= 0) {
+ remote_fd = ntoh64 (rsp->fd);
+ }
+
+ if (op_ret >= 0) {
+ this_fd_set (fd, frame->this, &local->loc, remote_fd);
+
+ sprintf (key, "%p", fd);
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ ret = dict_set_str (conf->saved_fds, key, "");
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to save remote fd",
+ local->loc.path, local->loc.inode->ino);
+ free (key);
+ }
+
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+/*
+ * client_stat_cbk - stat callback for client protocol
+ * @frame: call frame
+ * @args: arguments dictionary
+ *
+ * not for external reference
+ */
+int
+client_stat_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_stat_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/*
+ * client_utimens_cbk - utimens callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_utimens_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_utimens_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/*
+ * client_chmod_cbk - chmod for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_chmod_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_chmod_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/*
+ * client_chown_cbk - chown for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_chown_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_chown_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/*
+ * client_mknod_cbk - mknod callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_mknod_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_mknod_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ struct stat stbuf = {0, };
+ inode_t *inode = NULL;
+ client_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ inode = local->loc.inode;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret >= 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ this_ino_set (&local->loc, frame->this, stbuf.st_ino);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+/*
+ * client_symlink_cbk - symlink callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_symlink_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_symlink_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ struct stat stbuf = {0, };
+ inode_t *inode = NULL;
+ client_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ inode = local->loc.inode;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret >= 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ this_ino_set (&local->loc, frame->this, stbuf.st_ino);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+/*
+ * client_link_cbk - link callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_link_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_link_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ struct stat stbuf = {0, };
+ inode_t *inode = NULL;
+ client_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ inode = local->loc.inode;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret >= 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+/*
+ * client_truncate_cbk - truncate callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_truncate_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_truncate_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/* client_fstat_cbk - fstat callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_fstat_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_fstat_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/*
+ * client_ftruncate_cbk - ftruncate callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_ftruncate_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_ftruncate_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/* client_readv_cbk - readv callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external referece
+ */
+
+int32_t
+client_readv_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_read_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ struct iovec vector = {0, };
+ struct stat stbuf = {0, };
+ dict_t *refs = NULL;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret != -1) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ vector.iov_base = buf;
+ vector.iov_len = buflen;
+
+ refs = get_new_dict ();
+ dict_set (refs, NULL, data_from_dynptr (buf, 0));
+ frame->root->rsp_refs = dict_ref (refs);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &vector, 1, &stbuf);
+
+ if (refs)
+ dict_unref (refs);
+
+ return 0;
+}
+
+/*
+ * client_write_cbk - write callback for client protocol
+ * @frame: cal frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_write_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_write_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ struct stat stbuf = {0, };
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret >= 0)
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+int32_t
+client_readdir_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_readdir_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ uint32_t buf_size = 0;
+ gf_dirent_t entries;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = ntoh32 (hdr->rsp.op_errno);
+
+ INIT_LIST_HEAD (&entries.list);
+ if (op_ret > 0) {
+ buf_size = ntoh32 (rsp->size);
+ gf_dirent_unserialize (&entries, rsp->buf, buf_size);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &entries);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+/*
+ * client_fsync_cbk - fsync callback for client protocol
+ *
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_fsync_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_fsync_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+/*
+ * client_unlink_cbk - unlink callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_unlink_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_unlink_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+/*
+ * client_rename_cbk - rename callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_rename_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ gf_fop_rename_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+/*
+ * client_readlink_cbk - readlink callback for client protocol
+ *
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_readlink_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_readlink_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ char *link = NULL;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret > 0) {
+ link = rsp->path;
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, link);
+ return 0;
+}
+
+/*
+ * client_mkdir_cbk - mkdir callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_mkdir_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_mkdir_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ struct stat stbuf = {0, };
+ inode_t *inode = NULL;
+ client_local_t *local = NULL;
+
+ local = frame->local;
+ inode = local->loc.inode;
+ frame->local = NULL;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret >= 0) {
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ this_ino_set (&local->loc, frame->this, stbuf.st_ino);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+/*
+ * client_flush_cbk - flush callback for client protocol
+ *
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_flush_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+
+/*
+ * client_opendir_cbk - opendir callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int
+client_opendir_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = ENOTCONN;
+ fd_t *fd = NULL;
+ int64_t remote_fd = 0;
+ gf_fop_opendir_rsp_t *rsp = NULL;
+ char key[32] = {0,};
+ int32_t ret = -1;
+ client_local_t *local = NULL;
+ client_conf_t *conf = NULL;
+
+
+ local = frame->local; frame->local = NULL;
+ conf = frame->this->private;
+ fd = local->fd;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = ntoh32 (hdr->rsp.op_errno);
+
+ if (op_ret >= 0) {
+ remote_fd = ntoh64 (rsp->fd);
+ }
+
+ if (op_ret >= 0) {
+ this_fd_set (fd, frame->this, &local->loc, remote_fd);
+
+ sprintf (key, "%p", fd);
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ ret = dict_set_str (conf->saved_fds, key, "");
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
+ if (ret < 0) {
+ free (key);
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to save remote fd",
+ local->loc.path, local->loc.inode->ino);
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+
+/*
+ * client_rmdir_cbk - rmdir callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int
+client_rmdir_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_rmdir_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+/*
+ * client_access_cbk - access callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_access_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_access_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+
+
+/*
+ * client_lookup_cbk - lookup callback for client protocol
+ *
+ * @frame: call frame
+ * @args: arguments dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_lookup_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct stat stbuf = {0, };
+ inode_t *inode = NULL;
+ dict_t *xattr = NULL;
+ gf_fop_lookup_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ size_t dict_len = 0;
+ char *dictbuf = NULL;
+ int32_t ret = -1;
+ int32_t gf_errno = 0;
+ client_local_t *local = NULL;
+
+ local = frame->local;
+ inode = local->loc.inode;
+ frame->local = NULL;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+
+ if (op_ret == 0) {
+ op_ret = -1;
+ gf_stat_to_stat (&rsp->stat, &stbuf);
+ this_ino_set (&local->loc, frame->this, stbuf.st_ino);
+
+ dict_len = ntoh32 (rsp->dict_len);
+
+ if (dict_len > 0) {
+ dictbuf = memdup (rsp->dict, dict_len);
+ GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
+
+ xattr = dict_new();
+ GF_VALIDATE_OR_GOTO(frame->this->name, xattr, fail);
+
+ ret = dict_unserialize (dictbuf, dict_len, &xattr);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to unserialize dictionary",
+ local->loc.path, inode->ino);
+ goto fail;
+ } else {
+ xattr->extra_free = dictbuf;
+ dictbuf = NULL;
+ }
+ }
+ op_ret = 0;
+ }
+ gf_errno = ntoh32 (hdr->rsp.op_errno);
+ op_errno = gf_error_to_errno (gf_errno);
+
+fail:
+ STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, xattr);
+
+ client_local_wipe (local);
+
+ if (dictbuf)
+ free (dictbuf);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+static dir_entry_t *
+gf_bin_to_direntry (char *buf, size_t count)
+{
+ int32_t idx = 0, bread = 0;
+ size_t rcount = 0;
+ char *ender = NULL, *buffer = NULL;
+ char tmp_buf[512] = {0,};
+ dir_entry_t *trav = NULL, *prev = NULL;
+ dir_entry_t *thead = NULL, *head = NULL;
+
+ thead = CALLOC (1, sizeof (dir_entry_t));
+ GF_VALIDATE_OR_GOTO("client-protocol", thead, fail);
+
+ buffer = buf;
+ prev = thead;
+
+ for (idx = 0; idx < count ; idx++) {
+ bread = 0;
+ trav = CALLOC (1, sizeof (dir_entry_t));
+ GF_VALIDATE_OR_GOTO("client-protocol", trav, fail);
+
+ ender = strchr (buffer, '/');
+ if (!ender)
+ break;
+ rcount = ender - buffer;
+ trav->name = CALLOC (1, rcount + 2);
+ GF_VALIDATE_OR_GOTO("client-protocol", trav->name, fail);
+
+ strncpy (trav->name, buffer, rcount);
+ bread = rcount + 1;
+ buffer += bread;
+
+ ender = strchr (buffer, '\n');
+ if (!ender)
+ break;
+ rcount = ender - buffer;
+ strncpy (tmp_buf, buffer, rcount);
+ bread = rcount + 1;
+ buffer += bread;
+
+ gf_string_to_stat (tmp_buf, &trav->buf);
+
+ ender = strchr (buffer, '\n');
+ if (!ender)
+ break;
+ rcount = ender - buffer;
+ *ender = '\0';
+ if (S_ISLNK (trav->buf.st_mode))
+ trav->link = strdup (buffer);
+ else
+ trav->link = "";
+
+ bread = rcount + 1;
+ buffer += bread;
+
+ prev->next = trav;
+ prev = trav;
+ }
+
+ head = thead;
+fail:
+ return head;
+}
+
+int32_t
+gf_free_direntry(dir_entry_t *head)
+{
+ dir_entry_t *prev = NULL, *trav = NULL;
+
+ prev = head;
+ GF_VALIDATE_OR_GOTO("client-protocol", prev, fail);
+
+ trav = head->next;
+ while (trav) {
+ prev->next = trav->next;
+ FREE (trav->name);
+ if (S_ISLNK (trav->buf.st_mode))
+ FREE (trav->link);
+ FREE (trav);
+ trav = prev->next;
+ }
+ FREE (head);
+fail:
+ return 0;
+}
+/*
+ * client_getdents_cbk - readdir callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_getdents_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_getdents_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int32_t gf_errno = 0;
+ int32_t nr_count = 0;
+ dir_entry_t *entry = NULL;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ gf_errno = ntoh32 (hdr->rsp.op_errno);
+ op_errno = gf_error_to_errno (gf_errno);
+
+ if (op_ret >= 0) {
+ nr_count = ntoh32 (rsp->count);
+ entry = gf_bin_to_direntry(buf, nr_count);
+ if (entry == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, entry, nr_count);
+
+ if (op_ret >= 0) {
+ /* Free the buffer */
+ FREE (buf);
+ gf_free_direntry(entry);
+ }
+
+ return 0;
+}
+
+/*
+ * client_statfs_cbk - statfs callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_statfs_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct statvfs stbuf = {0, };
+ gf_fop_statfs_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret == 0)
+ {
+ gf_statfs_to_statfs (&rsp->statfs, &stbuf);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+/*
+ * client_fsyncdir_cbk - fsyncdir callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_fsyncdir_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+/*
+ * client_setxattr_cbk - setxattr callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_setxattr_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_setxattr_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+/*
+ * client_getxattr_cbk - getxattr callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_getxattr_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_getxattr_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t gf_errno = 0;
+ int32_t op_errno = 0;
+ int32_t dict_len = 0;
+ dict_t *dict = NULL;
+ int32_t ret = -1;
+ char *dictbuf = NULL;
+ client_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ rsp = gf_param (hdr);
+ GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+
+ if (op_ret >= 0) {
+ op_ret = -1;
+ dict_len = ntoh32 (rsp->dict_len);
+
+ if (dict_len > 0) {
+ dictbuf = memdup (rsp->dict, dict_len);
+ GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
+
+ dict = dict_new();
+ GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail);
+
+ ret = dict_unserialize (dictbuf, dict_len, &dict);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to "
+ "unserialize xattr dictionary",
+ local->loc.path, local->loc.inode->ino);
+ goto fail;
+ } else {
+ dict->extra_free = dictbuf;
+ dictbuf = NULL;
+ }
+ }
+ op_ret = 0;
+ }
+ gf_errno = ntoh32 (hdr->rsp.op_errno);
+ op_errno = gf_error_to_errno (gf_errno);
+fail:
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+
+ client_local_wipe (local);
+
+ if (dictbuf)
+ free (dictbuf);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+/*
+ * client_removexattr_cbk - removexattr callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_removexattr_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+/*
+ * client_lk_cbk - lk callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_lk_common_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct flock lock = {0,};
+ gf_fop_lk_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret >= 0) {
+ gf_flock_to_flock (&rsp->flock, &lock);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &lock);
+ return 0;
+}
+
+
+/*
+ * client_gf_file_lk_cbk - gf_file_lk callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_inodelk_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_inodelk_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+client_finodelk_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_finodelk_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+/*
+ * client_entrylk_cbk - entrylk callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int32_t
+client_entrylk_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_entrylk_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+client_fentrylk_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_fentrylk_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+/**
+ * client_writedir_cbk -
+ *
+ * @frame:
+ * @args:
+ *
+ * not for external reference
+ */
+int32_t
+client_setdents_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+
+
+/*
+ * client_stats_cbk - stats callback for client protocol
+ *
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_stats_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct xlator_stats stats = {0,};
+ gf_mop_stats_rsp_t *rsp = NULL;
+ char *buffer = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret >= 0)
+ {
+ buffer = rsp->buf;
+
+ sscanf (buffer, "%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64
+ ",%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64"\n",
+ &stats.nr_files,
+ &stats.disk_usage,
+ &stats.free_disk,
+ &stats.total_disk_size,
+ &stats.read_usage,
+ &stats.write_usage,
+ &stats.disk_speed,
+ &stats.nr_clients);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stats);
+ return 0;
+}
+
+/*
+ * client_getspec - getspec function for client protocol
+ * @frame: call frame
+ * @this: client protocol xlator structure
+ * @flag:
+ *
+ * external reference through client_protocol_xlator->fops->getspec
+ */
+int32_t
+client_getspec (call_frame_t *frame,
+ xlator_t *this,
+ const char *key,
+ int32_t flag)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_mop_getspec_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int keylen = 0;
+ int ret = -1;
+
+ if (key)
+ keylen = STRLEN_0(key);
+
+ hdrlen = gf_hdr_len (req, keylen);
+ hdr = gf_hdr_new (req, keylen);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
+
+ req = gf_param (hdr);
+ req->flags = hton32 (flag);
+ req->keylen = hton32 (keylen);
+ if (keylen)
+ strcpy (req->key, key);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_MOP_REQUEST, GF_MOP_GETSPEC,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+unwind:
+ if (hdr)
+ free (hdr);
+ STACK_UNWIND(frame, -1, EINVAL, NULL);
+ return 0;
+}
+
+
+/*
+ * client_getspec_cbk - getspec callback for client protocol
+ *
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_getspec_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_mop_getspec_rsp_t *rsp = NULL;
+ char *spec_data = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int32_t gf_errno = 0;
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ gf_errno = ntoh32 (hdr->rsp.op_errno);
+ op_errno = gf_error_to_errno (gf_errno);
+ rsp = gf_param (hdr);
+
+ if (op_ret >= 0) {
+ spec_data = rsp->spec;
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, spec_data);
+ return 0;
+}
+
+int32_t
+client_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_checksum_req_t *req = NULL;
+ size_t hdrlen = -1;
+ int ret = -1;
+ client_conf_t *conf = this->private;
+ ino_t ino = 0;
+
+ if (conf->child) {
+ STACK_WIND (frame,
+ default_checksum_cbk,
+ conf->child,
+ conf->child->fops->checksum,
+ loc,
+ flag);
+
+ return 0;
+ }
+
+ hdrlen = gf_hdr_len (req, strlen (loc->path) + 1);
+ hdr = gf_hdr_new (req, strlen (loc->path) + 1);
+ req = gf_param (hdr);
+
+ ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF);
+ req->ino = hton64 (ino);
+ req->flag = hton32 (flag);
+ strcpy (req->path, loc->path);
+
+ ret = protocol_client_xfer (frame, this,
+ CLIENT_CHANNEL (this, CHANNEL_BULK),
+ GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHECKSUM,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+client_checksum_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_checksum_rsp_t *rsp = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int32_t gf_errno = 0;
+ unsigned char *fchecksum = NULL;
+ unsigned char *dchecksum = NULL;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ gf_errno = ntoh32 (hdr->rsp.op_errno);
+ op_errno = gf_error_to_errno (gf_errno);
+
+ if (op_ret >= 0) {
+ fchecksum = rsp->fchecksum;
+ dchecksum = rsp->dchecksum + ZR_FILENAME_MAX;
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+ return 0;
+}
+
+
+/*
+ * client_setspec_cbk - setspec callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+client_setspec_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+/*
+ * client_setvolume_cbk - setvolume callback for client protocol
+ * @frame: call frame
+ * @args: argument dictionary
+ *
+ * not for external reference
+ */
+int
+client_setvolume_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_mop_setvolume_rsp_t *rsp = NULL;
+ client_connection_t *conn = NULL;
+ client_conf_t *conf = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ xlator_list_t *parent = NULL;
+ transport_t *trans = NULL;
+ dict_t *reply = NULL;
+ char *remote_subvol = NULL;
+ char *remote_error = NULL;
+ char *process_uuid = NULL;
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t dict_len = 0;
+
+
+ trans = frame->local; frame->local = NULL;
+ this = frame->this;
+ conf = this->private;
+ conn = trans->xl_private;
+
+ rsp = gf_param (hdr);
+
+ op_ret = ntoh32 (hdr->rsp.op_ret);
+ op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
+
+ if (op_ret < 0 && op_errno == ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setvolume failed (%s)",
+ strerror (op_errno));
+ goto out;
+ }
+
+ reply = dict_new ();
+ GF_VALIDATE_OR_GOTO(this->name, reply, out);
+
+ dict_len = ntoh32 (rsp->dict_len);
+ ret = dict_unserialize (rsp->buf, dict_len, &reply);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to unserialize buffer(%p) to dictionary",
+ rsp->buf);
+ goto out;
+ }
+
+ ret = dict_get_str (reply, "ERROR", &remote_error);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get ERROR string from reply dictionary");
+ }
+
+ ret = dict_get_str (reply, "process-uuid", &process_uuid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get 'process-uuid' from reply dictionary");
+ }
+
+ if (op_ret < 0) {
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "SETVOLUME on remote-host failed: %s",
+ remote_error ? remote_error : strerror (op_errno));
+ errno = op_errno;
+ if (op_errno == ENOTCONN)
+ goto out;
+ } else {
+ ctx = get_global_ctx_ptr ();
+ if (process_uuid && !strcmp (ctx->process_uuid,process_uuid)) {
+ ret = dict_get_str (this->options, "remote-subvolume",
+ &remote_subvol);
+ if (!remote_subvol)
+ goto out;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "attaching to the local volume '%s'",
+ remote_subvol);
+
+ /* TODO: */
+ conf->child = xlator_search_by_name (this,
+ remote_subvol);
+ }
+ gf_log (trans->xl->name, GF_LOG_INFO,
+ "connection and handshake succeeded");
+
+ pthread_mutex_lock (&(conn->lock));
+ {
+ conn->connected = 1;
+ }
+ pthread_mutex_unlock (&(conn->lock));
+
+ parent = trans->xl->parents;
+ while (parent) {
+ parent->xlator->notify (parent->xlator,
+ GF_EVENT_CHILD_UP,
+ trans->xl);
+ parent = parent->next;
+ }
+ }
+
+out:
+ STACK_DESTROY (frame->root);
+
+ if (reply)
+ dict_unref (reply);
+
+ return op_ret;
+}
+
+/*
+ * client_enosys_cbk -
+ * @frame: call frame
+ *
+ * not for external reference
+ */
+int
+client_enosys_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+void
+client_protocol_reconnect (void *trans_ptr)
+{
+ transport_t *trans = NULL;
+ client_connection_t *conn = NULL;
+ struct timeval tv = {0, 0};
+
+ trans = trans_ptr;
+ conn = trans->xl_private;
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (conn->reconnect)
+ gf_timer_call_cancel (trans->xl->ctx,
+ conn->reconnect);
+ conn->reconnect = 0;
+
+ if (conn->connected == 0) {
+ tv.tv_sec = 10;
+
+ gf_log (trans->xl->name, GF_LOG_DEBUG,
+ "attempting reconnect");
+ transport_connect (trans);
+
+ conn->reconnect =
+ gf_timer_call_after (trans->xl->ctx, tv,
+ client_protocol_reconnect,
+ trans);
+ } else {
+ gf_log (trans->xl->name, GF_LOG_DEBUG,
+ "breaking reconnect chain");
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+}
+
+/*
+ * client_protocol_cleanup - cleanup function
+ * @trans: transport object
+ *
+ */
+int
+protocol_client_cleanup (transport_t *trans)
+{
+ client_connection_t *conn = NULL;
+ struct saved_frames *saved_frames = NULL;
+
+ conn = trans->xl_private;
+
+ gf_log (trans->xl->name, GF_LOG_DEBUG,
+ "cleaning up state in transport object %p", trans);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ saved_frames = conn->saved_frames;
+ conn->saved_frames = saved_frames_new ();
+
+/*
+ trav = conn->saved_fds->members_list;
+ this = trans->xl;
+
+ while (trav) {
+ fd_t *fd_tmp = (fd_t *)(long) strtoul (trav->key,
+ NULL, 0);
+ fd_ctx_del (fd_tmp, this, NULL);
+ trav = trav->next;
+ }
+
+ dict_destroy (conn->saved_fds);
+
+ conn->saved_fds = get_new_dict_full (64);
+*/
+ /* bailout logic cleanup */
+ memset (&(conn->last_sent), 0,
+ sizeof (conn->last_sent));
+
+ memset (&(conn->last_received), 0,
+ sizeof (conn->last_received));
+
+ if (conn->timer) {
+ gf_timer_call_cancel (trans->xl->ctx, conn->timer);
+ conn->timer = NULL;
+ }
+
+ if (conn->reconnect == NULL) {
+ /* :O This part is empty.. any thing missing? */
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ saved_frames_destroy (trans->xl, saved_frames,
+ gf_fops, gf_mops, gf_cbks);
+
+ return 0;
+}
+
+
+/* cbk callbacks */
+int
+client_releasedir_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+int
+client_release_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+int
+client_forget_cbk (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ client_conf_t *conf = NULL;
+ client_forget_t forget = {0, };
+ uint8_t send_forget = 0;
+ int32_t ret = -1;
+
+
+ conf = frame->this->private;
+ LOCK (&conf->forget.lock);
+ {
+ conf->forget.frames_in_transit--;
+
+ ret = client_get_forgets (frame->this, &forget);
+ if (ret <= 0)
+ send_forget = 0;
+ else
+ send_forget = 1;
+ }
+ UNLOCK (&conf->forget.lock);
+
+ if (send_forget) {
+ ret = protocol_client_xfer (forget.frame, frame->this,
+ CLIENT_CHANNEL (frame->this,
+ CHANNEL_BULK),
+ GF_OP_TYPE_CBK_REQUEST,
+ GF_CBK_FORGET,
+ forget.hdr, forget.hdrlen,
+ NULL, 0, NULL);
+ }
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static gf_op_t gf_fops[] = {
+ [GF_FOP_STAT] = client_stat_cbk,
+ [GF_FOP_READLINK] = client_readlink_cbk,
+ [GF_FOP_MKNOD] = client_mknod_cbk,
+ [GF_FOP_MKDIR] = client_mkdir_cbk,
+ [GF_FOP_UNLINK] = client_unlink_cbk,
+ [GF_FOP_RMDIR] = client_rmdir_cbk,
+ [GF_FOP_SYMLINK] = client_symlink_cbk,
+ [GF_FOP_RENAME] = client_rename_cbk,
+ [GF_FOP_LINK] = client_link_cbk,
+ [GF_FOP_CHMOD] = client_chmod_cbk,
+ [GF_FOP_CHOWN] = client_chown_cbk,
+ [GF_FOP_TRUNCATE] = client_truncate_cbk,
+ [GF_FOP_OPEN] = client_open_cbk,
+ [GF_FOP_READ] = client_readv_cbk,
+ [GF_FOP_WRITE] = client_write_cbk,
+ [GF_FOP_STATFS] = client_statfs_cbk,
+ [GF_FOP_FLUSH] = client_flush_cbk,
+ [GF_FOP_FSYNC] = client_fsync_cbk,
+ [GF_FOP_SETXATTR] = client_setxattr_cbk,
+ [GF_FOP_GETXATTR] = client_getxattr_cbk,
+ [GF_FOP_REMOVEXATTR] = client_removexattr_cbk,
+ [GF_FOP_OPENDIR] = client_opendir_cbk,
+ [GF_FOP_GETDENTS] = client_getdents_cbk,
+ [GF_FOP_FSYNCDIR] = client_fsyncdir_cbk,
+ [GF_FOP_ACCESS] = client_access_cbk,
+ [GF_FOP_CREATE] = client_create_cbk,
+ [GF_FOP_FTRUNCATE] = client_ftruncate_cbk,
+ [GF_FOP_FSTAT] = client_fstat_cbk,
+ [GF_FOP_LK] = client_lk_common_cbk,
+ [GF_FOP_UTIMENS] = client_utimens_cbk,
+ [GF_FOP_FCHMOD] = client_fchmod_cbk,
+ [GF_FOP_FCHOWN] = client_fchown_cbk,
+ [GF_FOP_LOOKUP] = client_lookup_cbk,
+ [GF_FOP_SETDENTS] = client_setdents_cbk,
+ [GF_FOP_READDIR] = client_readdir_cbk,
+ [GF_FOP_INODELK] = client_inodelk_cbk,
+ [GF_FOP_FINODELK] = client_finodelk_cbk,
+ [GF_FOP_ENTRYLK] = client_entrylk_cbk,
+ [GF_FOP_FENTRYLK] = client_fentrylk_cbk,
+ [GF_FOP_CHECKSUM] = client_checksum_cbk,
+ [GF_FOP_XATTROP] = client_xattrop_cbk,
+ [GF_FOP_FXATTROP] = client_fxattrop_cbk,
+};
+
+static gf_op_t gf_mops[] = {
+ [GF_MOP_SETVOLUME] = client_setvolume_cbk,
+ [GF_MOP_GETVOLUME] = client_enosys_cbk,
+ [GF_MOP_STATS] = client_stats_cbk,
+ [GF_MOP_SETSPEC] = client_setspec_cbk,
+ [GF_MOP_GETSPEC] = client_getspec_cbk,
+ [GF_MOP_PING] = client_ping_cbk,
+};
+
+static gf_op_t gf_cbks[] = {
+ [GF_CBK_FORGET] = client_forget_cbk,
+ [GF_CBK_RELEASE] = client_release_cbk,
+ [GF_CBK_RELEASEDIR] = client_releasedir_cbk
+};
+
+/*
+ * client_protocol_interpret - protocol interpreter
+ * @trans: transport object
+ * @blk: data block
+ *
+ */
+int
+protocol_client_interpret (xlator_t *this, transport_t *trans,
+ char *hdr_p, size_t hdrlen,
+ char *buf_p, size_t buflen)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ uint64_t callid = 0;
+ int type = -1;
+ int op = -1;
+
+
+ hdr = (gf_hdr_common_t *)hdr_p;
+
+ type = ntoh32 (hdr->type);
+ op = ntoh32 (hdr->op);
+ callid = ntoh64 (hdr->callid);
+
+ frame = lookup_frame (trans, op, type, callid);
+ if (frame == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "no frame for callid=%"PRId64" type=%d op=%d",
+ callid, type, op);
+ return 0;
+ }
+
+ switch (type) {
+ case GF_OP_TYPE_FOP_REPLY:
+ if ((op > GF_FOP_MAXVALUE) ||
+ (op < 0)) {
+ gf_log (trans->xl->name, GF_LOG_WARNING,
+ "invalid fop '%d'", op);
+ } else {
+ ret = gf_fops[op] (frame, hdr, hdrlen, buf_p, buflen);
+ }
+ break;
+ case GF_OP_TYPE_MOP_REPLY:
+ if ((op > GF_MOP_MAXVALUE) ||
+ (op < 0)) {
+ gf_log (trans->xl->name, GF_LOG_WARNING,
+ "invalid fop '%d'", op);
+ } else {
+ ret = gf_mops[op] (frame, hdr, hdrlen, buf_p, buflen);
+ }
+ break;
+ case GF_OP_TYPE_CBK_REPLY:
+ if ((op > GF_CBK_MAXVALUE) ||
+ (op < 0)) {
+ gf_log (trans->xl->name, GF_LOG_WARNING,
+ "invalid cbk '%d'", op);
+ } else {
+ ret = gf_cbks[op] (frame, hdr, hdrlen, buf_p, buflen);
+ }
+ break;
+ default:
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "invalid packet type: %d", type);
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * init - initiliazation function. called during loading of client protocol
+ * @this:
+ *
+ */
+int32_t
+init (xlator_t *this)
+{
+ transport_t *trans = NULL;
+ client_conf_t *conf = NULL;
+ client_connection_t *conn = NULL;
+ int32_t transport_timeout = 0;
+ int32_t ping_timeout = 0;
+ data_t *remote_subvolume = NULL;
+ int32_t ret = -1;
+ int i = 0;
+
+ if (this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: client protocol translator cannot have "
+ "subvolumes");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ remote_subvolume = dict_get (this->options, "remote-subvolume");
+ if (remote_subvolume == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "missing 'option remote-subvolume'.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (this->options, "transport-timeout",
+ &transport_timeout);
+ if (ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting transport-timeout to %d", transport_timeout);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "defaulting transport-timeout to 42");
+ transport_timeout = 42;
+ }
+
+ ret = dict_get_int32 (this->options, "ping-timeout",
+ &ping_timeout);
+ if (ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting ping-timeout to %d", ping_timeout);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "defaulting ping-timeout to 10");
+ ping_timeout = 10;
+ }
+
+ conf = CALLOC (1, sizeof (client_conf_t));
+
+ LOCK_INIT (&conf->forget.lock);
+ pthread_mutex_init (&conf->mutex, NULL);
+ conf->saved_fds = get_new_dict_full (64);
+
+ this->private = conf;
+
+ for (i = 0; i < CHANNEL_MAX; i++) {
+ trans = transport_load (this->options, this);
+ if (trans == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to load transport");
+ ret = -1;
+ goto out;
+ }
+
+ conn = CALLOC (1, sizeof (*conn));
+
+ conn->saved_frames = saved_frames_new ();
+
+ conn->callid = 1;
+
+ memset (&(conn->last_sent), 0, sizeof (conn->last_sent));
+ memset (&(conn->last_received), 0,
+ sizeof (conn->last_received));
+
+ conn->transport_timeout = transport_timeout;
+ conn->ping_timeout = ping_timeout;
+
+ pthread_mutex_init (&conn->lock, NULL);
+
+ trans->xl_private = conn;
+ conf->transport[i] = transport_ref (trans);
+ }
+
+#ifndef GF_DARWIN_HOST_OS
+ {
+ struct rlimit lim;
+
+ lim.rlim_cur = 1048576;
+ lim.rlim_max = 1048576;
+
+ ret = setrlimit (RLIMIT_NOFILE, &lim);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "WARNING: Failed to set 'ulimit -n 1M': %s",
+ strerror(errno));
+ lim.rlim_cur = 65536;
+ lim.rlim_max = 65536;
+
+ ret = setrlimit (RLIMIT_NOFILE, &lim);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set max open fd to 64k: %s",
+ strerror(errno));
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "max open fd set to 64k");
+ }
+
+ }
+ }
+#endif
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * fini - finish function called during unloading of client protocol
+ * @this:
+ *
+ */
+void
+fini (xlator_t *this)
+{
+ /* TODO: Check if its enough.. how to call transport's fini () */
+ client_conf_t *conf = NULL;
+
+ conf = this->private;
+ this->private = NULL;
+
+ if (conf) {
+ LOCK_DESTROY (&conf->forget.lock);
+ FREE (conf);
+ }
+ return;
+}
+
+
+int
+protocol_client_handshake (xlator_t *this, transport_t *trans)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_mop_setvolume_req_t *req = NULL;
+ dict_t *options = NULL;
+ int32_t ret = -1;
+ int hdrlen = 0;
+ int dict_len = 0;
+ call_frame_t *fr = NULL;
+ char *process_uuid_xl;
+
+ options = this->options;
+ ret = dict_set_str (options, "version", PACKAGE_VERSION);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set version(%s) in options dictionary",
+ PACKAGE_VERSION);
+ }
+
+ asprintf (&process_uuid_xl, "%s-%s", this->ctx->process_uuid,
+ this->name);
+ ret = dict_set_dynstr (options, "process-uuid",
+ process_uuid_xl);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set process-uuid(%s) in options dictionary",
+ PACKAGE_VERSION);
+ }
+
+ dict_len = dict_serialized_length (options);
+ if (dict_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict(%p)",
+ options);
+ ret = dict_len;
+ goto fail;
+ }
+
+ hdrlen = gf_hdr_len (req, dict_len);
+ hdr = gf_hdr_new (req, dict_len);
+ GF_VALIDATE_OR_GOTO(this->name, hdr, fail);
+
+ req = gf_param (hdr);
+
+ ret = dict_serialize (options, req->buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary(%p)",
+ options);
+ goto fail;
+ }
+
+ req->dict_len = hton32 (dict_len);
+ fr = create_frame (this, this->ctx->pool);
+ GF_VALIDATE_OR_GOTO(this->name, fr, fail);
+
+ fr->local = trans;
+ ret = protocol_client_xfer (fr, this, trans,
+ GF_OP_TYPE_MOP_REQUEST, GF_MOP_SETVOLUME,
+ hdr, hdrlen, NULL, 0, NULL);
+ return ret;
+fail:
+ if (hdr)
+ free (hdr);
+ return ret;
+}
+
+
+int
+protocol_client_pollout (xlator_t *this, transport_t *trans)
+{
+ client_connection_t *conn = NULL;
+
+ conn = trans->xl_private;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ gettimeofday (&conn->last_sent, NULL);
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ return 0;
+}
+
+
+int
+protocol_client_pollin (xlator_t *this, transport_t *trans)
+{
+ client_connection_t *conn = NULL;
+ int ret = -1;
+ char *buf = NULL;
+ size_t buflen = 0;
+ char *hdr = NULL;
+ size_t hdrlen = 0;
+ int connected = 0;
+
+ conn = trans->xl_private;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ gettimeofday (&conn->last_received, NULL);
+ connected = conn->connected;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ ret = transport_receive (trans, &hdr, &hdrlen, &buf, &buflen);
+
+ if (ret == 0)
+ {
+ ret = protocol_client_interpret (this, trans, hdr, hdrlen,
+ buf, buflen);
+ }
+
+ /* TODO: use mem-pool */
+ FREE (hdr);
+
+ return ret;
+}
+
+
+/*
+ * client_protocol_notify - notify function for client protocol
+ * @this:
+ * @trans: transport object
+ * @event
+ *
+ */
+
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ int ret = -1;
+ transport_t *trans = NULL;
+ client_connection_t *conn = NULL;
+
+ trans = data;
+
+ switch (event) {
+ case GF_EVENT_POLLOUT:
+ {
+ ret = protocol_client_pollout (this, trans);
+
+ break;
+ }
+ case GF_EVENT_POLLIN:
+ {
+ ret = protocol_client_pollin (this, trans);
+
+ break;
+ }
+ /* no break for ret check to happen below */
+ case GF_EVENT_POLLERR:
+ {
+ ret = -1;
+ protocol_client_cleanup (trans);
+ }
+
+ conn = trans->xl_private;
+ if (conn->connected) {
+ xlator_list_t *parent = NULL;
+
+ gf_log (this->name, GF_LOG_INFO, "disconnected");
+
+ parent = this->parents;
+ while (parent) {
+ parent->xlator->notify (parent->xlator,
+ GF_EVENT_CHILD_DOWN,
+ this);
+ parent = parent->next;
+ }
+
+ conn->connected = 0;
+ if (conn->reconnect == 0)
+ client_protocol_reconnect (trans);
+ }
+ break;
+
+ case GF_EVENT_PARENT_UP:
+ {
+ xlator_list_t *parent = NULL;
+ client_conf_t *conf = NULL;
+ int i = 0;
+ transport_t *trans = NULL;
+
+ conf = this->private;
+ for (i = 0; i < CHANNEL_MAX; i++) {
+ trans = conf->transport[i];
+ if (!trans) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "transport init failed");
+ return -1;
+ }
+
+ conn = trans->xl_private;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_PARENT_UP, attempting connect "
+ "on transport");
+
+ client_protocol_reconnect (trans);
+ }
+
+ /* Let the connection/re-connection happen in
+ * background, for now, don't hang here,
+ * tell the parents that i am all ok..
+ */
+ parent = trans->xl->parents;
+ while (parent) {
+ parent->xlator->notify (parent->xlator,
+ GF_EVENT_CHILD_CONNECTING,
+ trans->xl);
+ parent = parent->next;
+ }
+ }
+ break;
+
+ case GF_EVENT_CHILD_UP:
+ {
+ char *handshake = NULL;
+
+ ret = dict_get_str (this->options, "disable-handshake",
+ &handshake);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_CHILD_UP");
+ if ((ret < 0) ||
+ (strcasecmp (handshake, "on"))) {
+ ret = protocol_client_handshake (this, trans);
+ } else {
+ conn = trans->xl_private;
+ conn->connected = 1;
+ ret = default_notify (this, event, trans);
+ }
+
+ if (ret)
+ transport_disconnect (trans);
+
+ }
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got %d, calling default_notify ()", event);
+
+ default_notify (this, event, data);
+ break;
+ }
+
+ return ret;
+}
+
+
+struct xlator_fops fops = {
+ .stat = client_stat,
+ .readlink = client_readlink,
+ .mknod = client_mknod,
+ .mkdir = client_mkdir,
+ .unlink = client_unlink,
+ .rmdir = client_rmdir,
+ .symlink = client_symlink,
+ .rename = client_rename,
+ .link = client_link,
+ .chmod = client_chmod,
+ .chown = client_chown,
+ .truncate = client_truncate,
+ .utimens = client_utimens,
+ .open = client_open,
+ .readv = client_readv,
+ .writev = client_writev,
+ .statfs = client_statfs,
+ .flush = client_flush,
+ .fsync = client_fsync,
+ .setxattr = client_setxattr,
+ .getxattr = client_getxattr,
+ .removexattr = client_removexattr,
+ .opendir = client_opendir,
+ .readdir = client_readdir,
+ .fsyncdir = client_fsyncdir,
+ .access = client_access,
+ .ftruncate = client_ftruncate,
+ .fstat = client_fstat,
+ .create = client_create,
+ .lk = client_lk,
+ .inodelk = client_inodelk,
+ .finodelk = client_finodelk,
+ .entrylk = client_entrylk,
+ .fentrylk = client_fentrylk,
+ .lookup = client_lookup,
+ .fchmod = client_fchmod,
+ .fchown = client_fchown,
+ .setdents = client_setdents,
+ .getdents = client_getdents,
+ .checksum = client_checksum,
+ .xattrop = client_xattrop,
+ .fxattrop = client_fxattrop,
+};
+
+struct xlator_mops mops = {
+ .stats = client_stats,
+ .getspec = client_getspec,
+};
+
+struct xlator_cbks cbks = {
+ .forget = client_forget,
+ .release = client_release,
+ .releasedir = client_releasedir
+};
+
+
+struct volume_options options[] = {
+ { .key = {"username"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"password"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport-type"},
+ .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
+ "tcp/client", "ib-verbs/client"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"remote-host"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"remote-subvolume"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 5,
+ .max = 1013,
+ },
+ { .key = {"ping-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 5,
+ .max = 1013,
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/protocol/client/src/client-protocol.h b/xlators/protocol/client/src/client-protocol.h
new file mode 100644
index 00000000000..c90cc980d83
--- /dev/null
+++ b/xlators/protocol/client/src/client-protocol.h
@@ -0,0 +1,173 @@
+/*
+ Copyright (c) 2006, 2007 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CLIENT_PROTOCOL_H
+#define _CLIENT_PROTOCOL_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <arpa/inet.h>
+#include "inode.h"
+#include "timer.h"
+#include "byte-order.h"
+
+#define CLIENT_PROTO_FORGET_LIMIT 128
+#define CLIENT_PORT_CIELING 1023
+
+#define GF_CLIENT_INODE_SELF 0
+#define GF_CLIENT_INODE_PARENT 1
+
+#define CLIENT_CONF(this) ((client_conf_t *)(this->private))
+
+#define RECEIVE_TIMEOUT(_cprivate,_current) \
+ ((_cprivate->last_received.tv_sec + \
+ _cprivate->transport_timeout) < \
+ _current.tv_sec)
+
+#define SEND_TIMEOUT(_cprivate,_current) \
+ ((_cprivate->last_sent.tv_sec + \
+ _cprivate->transport_timeout) < \
+ _current.tv_sec)
+
+enum {
+ CHANNEL_BULK = 0,
+ CHANNEL_LOWLAT = 1,
+ CHANNEL_MAX
+};
+#define CLIENT_CHANNEL(xl,id) \
+ (((client_conf_t *)(xl->private))->transport[id])
+
+struct client_connection;
+typedef struct client_connection client_connection_t;
+
+#include "stack.h"
+#include "xlator.h"
+#include "transport.h"
+#include "protocol.h"
+
+struct _client_conf {
+ transport_t *transport[CHANNEL_MAX];
+ xlator_t *child;
+
+ /* enhancement for 'forget', a must required where lot
+ of stats happening */
+ struct {
+ uint64_t ino_array[CLIENT_PROTO_FORGET_LIMIT + 4];
+ uint32_t count;
+ uint32_t frames_in_transit;
+ gf_lock_t lock;
+ } forget;
+ dict_t *saved_fds;
+ pthread_mutex_t mutex;
+};
+typedef struct _client_conf client_conf_t;
+
+/* This will be stored in transport_t->xl_private */
+struct client_connection {
+ pthread_mutex_t lock;
+ uint64_t callid;
+ struct saved_frames *saved_frames;
+ int32_t transport_timeout;
+ int32_t ping_started;
+ int32_t ping_timeout;
+ gf_timer_t *reconnect;
+ char connected;
+ uint64_t max_block_size;
+ struct timeval last_sent;
+ struct timeval last_received;
+ gf_timer_t *timer;
+ gf_timer_t *ping_timer;
+};
+
+typedef struct {
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+} client_local_t;
+
+typedef struct {
+ gf_hdr_common_t *hdr;
+ size_t hdrlen;
+ call_frame_t *frame;
+} client_forget_t;
+
+static inline void
+gf_string_to_stat(char *string, struct stat *stbuf)
+{
+ uint64_t dev = 0;
+ uint64_t ino = 0;
+ uint32_t mode = 0;
+ uint32_t nlink = 0;
+ uint32_t uid = 0;
+ uint32_t gid = 0;
+ uint64_t rdev = 0;
+ uint64_t size = 0;
+ uint32_t blksize = 0;
+ uint64_t blocks = 0;
+ uint32_t atime = 0;
+ uint32_t atime_nsec = 0;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
+ uint32_t ctime = 0;
+ uint32_t ctime_nsec = 0;
+
+ sscanf (string, GF_STAT_PRINT_FMT_STR,
+ &dev,
+ &ino,
+ &mode,
+ &nlink,
+ &uid,
+ &gid,
+ &rdev,
+ &size,
+ &blksize,
+ &blocks,
+ &atime,
+ &atime_nsec,
+ &mtime,
+ &mtime_nsec,
+ &ctime,
+ &ctime_nsec);
+
+ stbuf->st_dev = dev;
+ stbuf->st_ino = ino;
+ stbuf->st_mode = mode;
+ stbuf->st_nlink = nlink;
+ stbuf->st_uid = uid;
+ stbuf->st_gid = gid;
+ stbuf->st_rdev = rdev;
+ stbuf->st_size = size;
+ stbuf->st_blksize = blksize;
+ stbuf->st_blocks = blocks;
+
+ stbuf->st_atime = atime;
+ stbuf->st_mtime = mtime;
+ stbuf->st_ctime = ctime;
+
+ ST_ATIM_NSEC_SET(stbuf, atime_nsec);
+ ST_MTIM_NSEC_SET(stbuf, mtime_nsec);
+ ST_CTIM_NSEC_SET(stbuf, ctime_nsec);
+
+}
+
+#endif
diff --git a/xlators/protocol/client/src/saved-frames.c b/xlators/protocol/client/src/saved-frames.c
new file mode 100644
index 00000000000..0d1366d8222
--- /dev/null
+++ b/xlators/protocol/client/src/saved-frames.c
@@ -0,0 +1,178 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#include "saved-frames.h"
+#include "common-utils.h"
+#include "protocol.h"
+#include "xlator.h"
+
+
+
+struct saved_frames *
+saved_frames_new (void)
+{
+ struct saved_frames *saved_frames = NULL;
+
+ saved_frames = CALLOC (sizeof (*saved_frames), 1);
+ if (!saved_frames) {
+ return NULL;
+ }
+
+ INIT_LIST_HEAD (&saved_frames->fops.list);
+ INIT_LIST_HEAD (&saved_frames->mops.list);
+ INIT_LIST_HEAD (&saved_frames->cbks.list);
+
+ return saved_frames;
+}
+
+
+struct saved_frame *
+get_head_frame_for_type (struct saved_frames *frames, int8_t type)
+{
+ struct saved_frame *head_frame = NULL;
+
+ switch (type) {
+ case GF_OP_TYPE_FOP_REQUEST:
+ case GF_OP_TYPE_FOP_REPLY:
+ head_frame = &frames->fops;
+ break;
+ case GF_OP_TYPE_MOP_REQUEST:
+ case GF_OP_TYPE_MOP_REPLY:
+ head_frame = &frames->mops;
+ break;
+ case GF_OP_TYPE_CBK_REQUEST:
+ case GF_OP_TYPE_CBK_REPLY:
+ head_frame = &frames->cbks;
+ break;
+ }
+
+ return head_frame;
+}
+
+
+int
+saved_frames_put (struct saved_frames *frames, call_frame_t *frame,
+ int32_t op, int8_t type, int64_t callid)
+{
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *head_frame = NULL;
+
+ head_frame = get_head_frame_for_type (frames, type);
+
+ saved_frame = CALLOC (sizeof (*saved_frame), 1);
+ if (!saved_frame) {
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD (&saved_frame->list);
+ saved_frame->frame = frame;
+ saved_frame->op = op;
+ saved_frame->type = type;
+ saved_frame->callid = callid;
+
+// gettimeofday (&saved_frame->saved_at, NULL);
+
+ list_add (&saved_frame->list, &head_frame->list);
+ frames->count++;
+
+ return 0;
+}
+
+
+call_frame_t *
+saved_frames_get (struct saved_frames *frames, int32_t op,
+ int8_t type, int64_t callid)
+{
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *tmp = NULL;
+ struct saved_frame *head_frame = NULL;
+ call_frame_t *frame = NULL;
+
+ head_frame = get_head_frame_for_type (frames, type);
+
+ list_for_each_entry (tmp, &head_frame->list, list) {
+ if (tmp->callid == callid) {
+ list_del_init (&tmp->list);
+ frames->count--;
+ saved_frame = tmp;
+ break;
+ }
+ }
+
+ if (saved_frame)
+ frame = saved_frame->frame;
+
+ FREE (saved_frame);
+
+ return frame;
+}
+
+
+void
+saved_frames_unwind (xlator_t *this, struct saved_frames *saved_frames,
+ struct saved_frame *head,
+ gf_op_t gf_ops[], char *gf_op_list[])
+{
+ struct saved_frame *trav = NULL;
+ struct saved_frame *tmp = NULL;
+
+ gf_hdr_common_t hdr = {0, };
+ call_frame_t *frame = NULL;
+ dict_t *reply = NULL;
+
+ reply = get_new_dict();
+ dict_ref (reply);
+
+ hdr.rsp.op_ret = hton32 (-1);
+ hdr.rsp.op_errno = hton32 (ENOTCONN);
+
+ list_for_each_entry_safe (trav, tmp, &head->list, list) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "forced unwinding frame type(%d) op(%s)",
+ trav->type, gf_op_list[trav->op]);
+
+ hdr.type = hton32 (trav->type);
+ hdr.op = hton32 (trav->op);
+
+ frame = trav->frame;
+ frame->root->rsp_refs = reply;
+
+ saved_frames->count--;
+
+ gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL, 0);
+
+ list_del_init (&trav->list);
+ FREE (trav);
+ }
+
+ dict_unref (reply);
+}
+
+
+void
+saved_frames_destroy (xlator_t *this, struct saved_frames *frames,
+ gf_op_t gf_fops[], gf_op_t gf_mops[], gf_op_t gf_cbks[])
+{
+ saved_frames_unwind (this, frames, &frames->fops, gf_fops, gf_fop_list);
+ saved_frames_unwind (this, frames, &frames->mops, gf_mops, gf_mop_list);
+ saved_frames_unwind (this, frames, &frames->cbks, gf_cbks, gf_cbk_list);
+
+ FREE (frames);
+}
diff --git a/xlators/protocol/client/src/saved-frames.h b/xlators/protocol/client/src/saved-frames.h
new file mode 100644
index 00000000000..e402feba33b
--- /dev/null
+++ b/xlators/protocol/client/src/saved-frames.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _SAVED_FRAMES_H
+#define _SAVED_FRAMES_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdint.h>
+#include <sys/time.h>
+#include "stack.h"
+#include "list.h"
+#include "protocol.h"
+
+/* UGLY: have common typedef b/w saved-frames.c and protocol-client.c */
+typedef int32_t (*gf_op_t) (call_frame_t *frame,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen);
+
+
+struct saved_frame {
+ union {
+ struct list_head list;
+ struct {
+ struct saved_frame *frame_next;
+ struct saved_frame *frame_prev;
+ };
+ };
+
+ struct timeval saved_at;
+ call_frame_t *frame;
+ int32_t op;
+ int8_t type;
+ uint64_t callid;
+};
+
+
+struct saved_frames {
+ int64_t count;
+ struct saved_frame fops;
+ struct saved_frame mops;
+ struct saved_frame cbks;
+};
+
+
+struct saved_frames *saved_frames_new ();
+int saved_frames_put (struct saved_frames *frames, call_frame_t *frame,
+ int32_t op, int8_t type, int64_t callid);
+call_frame_t *saved_frames_get (struct saved_frames *frames, int32_t op,
+ int8_t type, int64_t callid);
+void saved_frames_destroy (xlator_t *this, struct saved_frames *frames,
+ gf_op_t gf_fops[], gf_op_t gf_mops[],
+ gf_op_t gf_cbks[]);
+
+#endif /* _SAVED_FRAMES_H */
diff --git a/xlators/protocol/server/Makefile.am b/xlators/protocol/server/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/protocol/server/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am
new file mode 100644
index 00000000000..dcd92aeedd9
--- /dev/null
+++ b/xlators/protocol/server/src/Makefile.am
@@ -0,0 +1,18 @@
+
+xlator_LTLIBRARIES = server.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol
+
+server_la_LDFLAGS = -module -avoidversion
+
+server_la_SOURCES = server-protocol.c server-dentry.c server-helpers.c
+server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = server-protocol.h server-helpers.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
+ -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/protocol/server/src/server-dentry.c b/xlators/protocol/server/src/server-dentry.c
new file mode 100644
index 00000000000..d3a69a393fc
--- /dev/null
+++ b/xlators/protocol/server/src/server-dentry.c
@@ -0,0 +1,413 @@
+#include "glusterfs.h"
+#include "xlator.h"
+#include "server-protocol.h"
+#include "server-helpers.h"
+#include <libgen.h>
+
+/* SERVER_DENTRY_STATE_PREPARE - prepare a fresh state for use
+ *
+ * @state - an empty state
+ * @loc - loc_t which needs to resolved
+ * @parent - most immediate parent of @loc available in dentry cache
+ * @resolved - component of @loc->path which has been resolved
+ * through dentry cache
+ */
+#define SERVER_DENTRY_STATE_PREPARE(_state,_loc,_parent,_resolved) do { \
+ size_t pathlen = 0; \
+ size_t resolvedlen = 0; \
+ char *path = NULL; \
+ int pad = 0; \
+ pathlen = strlen (_loc->path) + 1; \
+ path = CALLOC (1, pathlen); \
+ _state->loc.parent = inode_ref (_parent); \
+ _state->loc.inode = inode_new (_state->itable); \
+ if (_resolved) { \
+ resolvedlen = strlen (_resolved); \
+ strncpy (path, _resolved, resolvedlen); \
+ _state->resolved = memdup (path, pathlen); \
+ if (resolvedlen == 1) /* only root resolved */ \
+ pad = 0; \
+ else { \
+ pad = 1; \
+ path[resolvedlen] = '/'; \
+ } \
+ strcpy_till (path + resolvedlen + pad, loc->path + resolvedlen + pad, '/'); \
+ } else { \
+ strncpy (path, _loc->path, pathlen); \
+ } \
+ _state->loc.path = path; \
+ _state->loc.name = strrchr (path, '/'); \
+ if (_state->loc.name) \
+ _state->loc.name++; \
+ _state->path = strdup (_loc->path); \
+ }while (0);
+
+/* SERVER_DENTRY_UPDATE_STATE - update a server_state_t, to prepare state
+ * for new lookup
+ *
+ * @state - state to be updated.
+ */
+#define SERVER_DENTRY_UPDATE_STATE(_state) do { \
+ char *path = NULL; \
+ size_t pathlen = 0; \
+ strcpy (_state->resolved, _state->loc.path); \
+ pathlen = strlen (_state->loc.path); \
+ if (!strcmp (_state->resolved, _state->path)) { \
+ free (_state->resolved); \
+ _state->resolved = NULL; \
+ goto resume; \
+ } \
+ \
+ path = (char *)(_state->loc.path + pathlen); \
+ path[0] = '/'; \
+ strcpy_till (path + 1, \
+ _state->path + pathlen + 1, '/'); \
+ _state->loc.name = strrchr (_state->loc.path, '/'); \
+ if (_state->loc.name) \
+ _state->loc.name++; \
+ inode_unref (_state->loc.parent); \
+ _state->loc.parent = inode_ref (_state->loc.inode); \
+ inode_unref (_state->loc.inode); \
+ _state->loc.inode = inode_new (_state->itable); \
+ }while (0);
+
+/* NOTE: should be used only for a state which was created by __do_path_resolve
+ * using any other state will result in double free corruption.
+ */
+#define SERVER_STATE_CLEANUP(_state) do { \
+ if (_state->resolved) \
+ free (_state->resolved); \
+ if (_state->path) \
+ free (_state->path); \
+ server_loc_wipe (&_state->loc); \
+ free_state (_state); \
+ } while (0);
+
+/* strcpy_till - copy @dname to @dest, until 'delim' is encountered in @dest
+ * @dest - destination string
+ * @dname - source string
+ * @delim - delimiter character
+ *
+ * return - NULL is returned if '0' is encountered in @dname, otherwise returns
+ * a pointer to remaining string begining in @dest.
+ */
+static char *
+strcpy_till (char *dest, const char *dname, char delim)
+{
+ char *src = NULL;
+ int idx = 0;
+ char *ret = NULL;
+
+ src = (char *)dname;
+ while (src[idx] && (src[idx] != delim)) {
+ dest[idx] = src[idx];
+ idx++;
+ }
+
+ dest[idx] = 0;
+
+ if (src[idx] == 0)
+ ret = NULL;
+ else
+ ret = &(src[idx]);
+
+ return ret;
+}
+
+/* __server_path_to_parenti - derive parent inode for @path. if immediate parent is
+ * not available in the dentry cache, return nearest
+ * available parent inode and set @reslv to the path of
+ * the returned directory.
+ *
+ * @itable - inode table
+ * @path - path whose parent has to be looked up.
+ * @reslv - if immediate parent is not available, reslv will be set to path of the
+ * resolved parent.
+ *
+ * return - should never return NULL. should at least return '/' inode.
+ */
+static inode_t *
+__server_path_to_parenti (inode_table_t *itable,
+ const char *path,
+ char **reslv)
+{
+ char *resolved_till = NULL;
+ char *strtokptr = NULL;
+ char *component = NULL;
+ char *next_component = NULL;
+ char *pathdup = NULL;
+ inode_t *curr = NULL;
+ inode_t *parent = NULL;
+ size_t pathlen = 0;
+
+
+ pathlen = STRLEN_0 (path);
+ resolved_till = CALLOC (1, pathlen);
+
+ GF_VALIDATE_OR_GOTO("server-dentry", resolved_till, out);
+ pathdup = strdup (path);
+ GF_VALIDATE_OR_GOTO("server-dentry", pathdup, out);
+
+ parent = inode_ref (itable->root);
+ curr = NULL;
+
+ component = strtok_r (pathdup, "/", &strtokptr);
+
+ while (component) {
+ curr = inode_search (itable, parent->ino, component);
+ if (!curr) {
+ /* if current component was the last component
+ set it to NULL
+ */
+ component = strtok_r (NULL, "/", &strtokptr);
+ break;
+ }
+
+ /* It is OK to append the component even if it is the
+ last component in the path, because, if 'next_component'
+ returns NULL, @parent will remain the same and
+ @resolved_till will not be sent back
+ */
+
+ strcat (resolved_till, "/");
+ strcat (resolved_till, component);
+
+ next_component = strtok_r (NULL, "/", &strtokptr);
+
+ if (next_component) {
+ inode_unref (parent);
+ parent = curr;
+ curr = NULL;
+ } else {
+ /* will break */
+ inode_unref (curr);
+ }
+
+ component = next_component;
+ }
+
+ free (pathdup);
+
+ if (component) {
+ *reslv = resolved_till;
+ } else {
+ free (resolved_till);
+ }
+out:
+ return parent;
+}
+
+
+/* __do_path_resolve_cbk -
+ *
+ * @frame -
+ * @cookie -
+ * @this -
+ * @op_ret -
+ * @op_errno -
+ * @inode -
+ * @stbuf -
+ * @dict -
+ *
+ */
+static int32_t
+__do_path_resolve_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf,
+ dict_t *dict)
+{
+ server_state_t *state = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *parent = NULL;
+
+ stub = frame->local;
+ state = CALL_STATE(frame);
+
+ parent = state->loc.parent;
+
+ if (op_ret == -1) {
+ if (strcmp (state->path, state->loc.path))
+ parent = NULL;
+
+ server_stub_resume (stub, op_ret, op_errno, NULL, parent);
+ goto cleanup;
+ } else {
+ if (inode->ino == 0) {
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "looked up for %s (%"PRId64"/%s)",
+ state->loc.path, state->loc.parent->ino, state->loc.name);
+ inode_link (inode, state->loc.parent, state->loc.name, stbuf);
+ inode_lookup (inode);
+ }
+
+ if (state->resolved) {
+ SERVER_DENTRY_UPDATE_STATE(state);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "looking up for %s (%"PRId64"/%s)",
+ state->loc.path, state->loc.parent->ino, state->loc.name);
+
+ STACK_WIND (frame,
+ __do_path_resolve_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->lookup,
+ &(state->loc),
+ 0);
+
+ goto out;
+ }
+ resume:
+ /* we are done, call stub_resume() to do rest of the job */
+ server_stub_resume (stub, op_ret, op_errno, inode, parent);
+ cleanup:
+ SERVER_STATE_CLEANUP(state);
+ /* stub will be freed by stub_resume, leave no traces */
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+out:
+ return 0;
+}
+
+/* __do_path_resolve - resolve @loc->path into @loc->inode and @loc->parent. also
+ * update the dentry cache
+ *
+ * @stub - call stub to resume after resolving @loc->path
+ * @loc - loc to resolve before resuming @stub.
+ *
+ * return - return value of __do_path_resolve doesn't matter to the caller, if @stub
+ * is not NULL.
+ */
+static int32_t
+__do_path_resolve (call_stub_t *stub,
+ const loc_t *loc)
+{
+ int32_t ret = -1;
+ char *resolved = NULL;
+ call_frame_t *new_frame = NULL;
+ server_state_t *state = NULL, *new_state = NULL;
+ inode_t *parent = NULL;
+
+ state = CALL_STATE(stub->frame);
+ parent = loc->parent;
+ if (parent) {
+ inode_ref (parent);
+ gf_log (BOUND_XL(stub->frame)->name, GF_LOG_DEBUG,
+ "loc->parent(%"PRId64") already present. sending lookup "
+ "for %"PRId64"/%s", parent->ino, parent->ino, loc->name);
+ resolved = strdup (loc->path);
+ resolved = dirname (resolved);
+ } else {
+ parent = __server_path_to_parenti (state->itable, loc->path, &resolved);
+ }
+
+ if (parent == NULL) {
+ /* fire in the bush.. run! run!! run!!! */
+ gf_log ("server",
+ GF_LOG_CRITICAL,
+ "failed to get parent inode number");
+ goto panic;
+ }
+
+ if (resolved) {
+ gf_log (BOUND_XL(stub->frame)->name,
+ GF_LOG_DEBUG,
+ "resolved path(%s) till %"PRId64"(%s). "
+ "sending lookup for remaining path",
+ loc->path, parent->ino, resolved);
+ }
+
+ {
+ new_frame = server_copy_frame (stub->frame);
+ new_state = CALL_STATE(new_frame);
+
+ SERVER_DENTRY_STATE_PREPARE(new_state, loc, parent, resolved);
+
+ if (parent)
+ inode_unref (parent); /* __server_path_to_parenti()'s inode_ref */
+ free (resolved);
+ /* now interpret state as:
+ * state->path - compelete pathname to resolve
+ * state->resolved - pathname resolved from dentry cache
+ */
+ new_frame->local = stub;
+ STACK_WIND (new_frame,
+ __do_path_resolve_cbk,
+ BOUND_XL(new_frame),
+ BOUND_XL(new_frame)->fops->lookup,
+ &(new_state->loc),
+ 0);
+ goto out;
+ }
+panic:
+ server_stub_resume (stub, -1, ENOENT, NULL, NULL);
+out:
+ return ret;
+}
+
+
+/*
+ * do_path_lookup - transform a pathname into inode, with the compelete
+ * dentry tree upto inode built.
+ *
+ * @stub - call stub to resume after completing pathname to inode transform
+ * @loc - location. valid fields that do_path_lookup() uses in @loc are
+ * @loc->path - pathname
+ * @loc->ino - inode number
+ *
+ * return - do_path_lookup returns only after complete dentry tree is built
+ * upto @loc->path.
+ */
+int32_t
+do_path_lookup (call_stub_t *stub,
+ const loc_t *loc)
+{
+ char *pathname = NULL;
+ char *directory = NULL;
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(stub->frame);
+
+ inode = inode_from_path (state->itable, loc->path);
+ pathname = strdup (loc->path);
+ directory = dirname (pathname);
+ parent = inode_from_path (state->itable, directory);
+
+ if (inode && parent) {
+ gf_log (BOUND_XL(stub->frame)->name,
+ GF_LOG_DEBUG,
+ "resolved path(%s) to %"PRId64"/%"PRId64"(%s)",
+ loc->path, parent->ino, inode->ino, loc->name);
+ server_stub_resume (stub, 0, 0, inode, parent);
+ inode_unref (inode);
+ inode_unref (parent);
+ } else {
+ gf_log (BOUND_XL(stub->frame)->name,
+ GF_LOG_DEBUG,
+ "resolved path(%s) to %p(%"PRId64")/%p(%"PRId64")",
+ loc->path, parent, (parent ? parent->ino : 0),
+ inode, (inode ? inode->ino : 0));
+ if (parent) {
+ inode_unref (parent);
+ } else if (inode) {
+ inode_unref (inode);
+ gf_log (BOUND_XL(stub->frame)->name,
+ GF_LOG_ERROR,
+ "undesired behaviour. inode(%"PRId64") for %s "
+ "exists without parent (%s)",
+ inode->ino, loc->path, directory);
+ }
+ __do_path_resolve (stub, loc);
+ }
+
+ if (pathname)
+ free (pathname);
+
+ return 0;
+}
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
new file mode 100644
index 00000000000..b51c11aa994
--- /dev/null
+++ b/xlators/protocol/server/src/server-helpers.c
@@ -0,0 +1,586 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "server-protocol.h"
+#include "server-helpers.h"
+
+
+/* server_loc_fill - derive a loc_t for a given inode number
+ *
+ * NOTE: make sure that @loc is empty, because any pointers it holds with reference will
+ * be leaked after returning from here.
+ */
+int
+server_loc_fill (loc_t *loc, server_state_t *state,
+ ino_t ino, ino_t par,
+ const char *name, const char *path)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ int32_t ret = -1;
+ char *dentry_path = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("server", loc, out);
+ GF_VALIDATE_OR_GOTO ("server", state, out);
+ GF_VALIDATE_OR_GOTO ("server", path, out);
+
+ /* anything beyond this point is success */
+ ret = 0;
+ loc->ino = ino;
+ inode = loc->inode;
+ if (inode == NULL) {
+ if (ino)
+ inode = inode_search (state->itable, ino, NULL);
+
+ if ((inode == NULL) &&
+ (par && name))
+ inode = inode_search (state->itable, par, name);
+
+ loc->inode = inode;
+ if (inode)
+ loc->ino = inode->ino;
+ }
+
+ parent = loc->parent;
+ if (parent == NULL) {
+ if (inode)
+ parent = inode_parent (inode, par, name);
+ else
+ parent = inode_search (state->itable, par, NULL);
+ loc->parent = parent;
+ }
+
+ if (name && parent) {
+ ret = inode_path (parent, name, &dentry_path);
+ if (ret < 0) {
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "failed to build path for %"PRId64"/%s: %s",
+ parent->ino, name, strerror (-ret));
+ }
+ } else if (inode) {
+ ret = inode_path (inode, NULL, &dentry_path);
+ if (ret < 0) {
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "failed to build path for %"PRId64": %s",
+ inode->ino, strerror (-ret));
+
+ inode_unref (loc->inode);
+ loc->inode = NULL;
+ }
+ }
+
+ if (dentry_path) {
+ if (strcmp (dentry_path, path)) {
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "paths differ for inode(%"PRId64"): "
+ "client path = %s. dentry path = %s",
+ ino, path, dentry_path);
+ }
+
+ loc->path = dentry_path;
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ } else {
+ loc->path = strdup (path);
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * stat_to_str - convert struct stat to a ASCII string
+ * @stbuf: struct stat pointer
+ *
+ * not for external reference
+ */
+char *
+stat_to_str (struct stat *stbuf)
+{
+ char *tmp_buf = NULL;
+
+ uint64_t dev = stbuf->st_dev;
+ uint64_t ino = stbuf->st_ino;
+ uint32_t mode = stbuf->st_mode;
+ uint32_t nlink = stbuf->st_nlink;
+ uint32_t uid = stbuf->st_uid;
+ uint32_t gid = stbuf->st_gid;
+ uint64_t rdev = stbuf->st_rdev;
+ uint64_t size = stbuf->st_size;
+ uint32_t blksize = stbuf->st_blksize;
+ uint64_t blocks = stbuf->st_blocks;
+ uint32_t atime = stbuf->st_atime;
+ uint32_t mtime = stbuf->st_mtime;
+ uint32_t ctime = stbuf->st_ctime;
+
+ uint32_t atime_nsec = ST_ATIM_NSEC(stbuf);
+ uint32_t mtime_nsec = ST_MTIM_NSEC(stbuf);
+ uint32_t ctime_nsec = ST_CTIM_NSEC(stbuf);
+
+
+ asprintf (&tmp_buf,
+ GF_STAT_PRINT_FMT_STR,
+ dev,
+ ino,
+ mode,
+ nlink,
+ uid,
+ gid,
+ rdev,
+ size,
+ blksize,
+ blocks,
+ atime,
+ atime_nsec,
+ mtime,
+ mtime_nsec,
+ ctime,
+ ctime_nsec);
+
+ return tmp_buf;
+}
+
+
+void
+server_loc_wipe (loc_t *loc)
+{
+ if (loc->parent)
+ inode_unref (loc->parent);
+ if (loc->inode)
+ inode_unref (loc->inode);
+ if (loc->path)
+ free ((char *)loc->path);
+}
+
+void
+free_state (server_state_t *state)
+{
+ transport_t *trans = NULL;
+
+ trans = state->trans;
+
+ if (state->fd)
+ fd_unref (state->fd);
+
+ transport_unref (trans);
+
+ if (state->xattr_req)
+ dict_unref (state->xattr_req);
+
+ FREE (state);
+}
+
+
+call_frame_t *
+server_copy_frame (call_frame_t *frame)
+{
+ call_frame_t *new_frame = NULL;
+ server_state_t *state = NULL, *new_state = NULL;
+
+ state = frame->root->state;
+
+ new_frame = copy_frame (frame);
+
+ new_state = CALLOC (1, sizeof (server_state_t));
+
+ new_frame->root->op = frame->root->op;
+ new_frame->root->type = frame->root->type;
+ new_frame->root->trans = state->trans;
+ new_frame->root->state = new_state;
+
+ new_state->bound_xl = state->bound_xl;
+ new_state->trans = transport_ref (state->trans);
+ new_state->itable = state->itable;
+
+ return new_frame;
+}
+
+int32_t
+gf_add_locker (struct _lock_table *table,
+ loc_t *loc,
+ fd_t *fd,
+ pid_t pid)
+{
+ int32_t ret = -1;
+ struct _locker *new = NULL;
+ uint8_t dir = 0;
+
+ new = CALLOC (1, sizeof (struct _locker));
+ if (new == NULL) {
+ gf_log ("server", GF_LOG_ERROR,
+ "failed to allocate memory for \'struct _locker\'");
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->lockers);
+
+ if (fd == NULL) {
+ loc_copy (&new->loc, loc);
+ dir = S_ISDIR (new->loc.inode->st_mode);
+ } else {
+ new->fd = fd_ref (fd);
+ dir = S_ISDIR (fd->inode->st_mode);
+ }
+
+ new->pid = pid;
+
+ LOCK (&table->lock);
+ {
+ if (dir)
+ list_add_tail (&new->lockers, &table->dir_lockers);
+ else
+ list_add_tail (&new->lockers, &table->file_lockers);
+ }
+ UNLOCK (&table->lock);
+out:
+ return ret;
+}
+
+int32_t
+gf_del_locker (struct _lock_table *table,
+ loc_t *loc,
+ fd_t *fd,
+ pid_t pid)
+{
+ struct _locker *locker = NULL, *tmp = NULL;
+ int32_t ret = 0;
+ uint8_t dir = 0;
+ struct list_head *head = NULL;
+ struct list_head del;
+
+ INIT_LIST_HEAD (&del);
+
+ if (fd) {
+ dir = S_ISDIR (fd->inode->st_mode);
+ } else {
+ dir = S_ISDIR (loc->inode->st_mode);
+ }
+
+ LOCK (&table->lock);
+ {
+ if (dir) {
+ head = &table->dir_lockers;
+ } else {
+ head = &table->file_lockers;
+ }
+
+ list_for_each_entry_safe (locker, tmp, head, lockers) {
+ if (locker->fd &&
+ fd &&
+ (locker->fd == fd) && (locker->pid == pid)) {
+ list_move_tail (&locker->lockers, &del);
+ } else if (locker->loc.inode &&
+ loc &&
+ (locker->loc.inode == loc->inode) &&
+ (locker->pid == pid)) {
+ list_move_tail (&locker->lockers, &del);
+ }
+ }
+ }
+ UNLOCK (&table->lock);
+
+ tmp = NULL;
+ locker = NULL;
+
+ list_for_each_entry_safe (locker, tmp, &del, lockers) {
+ list_del_init (&locker->lockers);
+ if (locker->fd)
+ fd_unref (locker->fd);
+ else
+ loc_wipe (&locker->loc);
+
+ free (locker);
+ }
+
+ return ret;
+}
+
+int32_t
+gf_direntry_to_bin (dir_entry_t *head,
+ char **bufferp)
+{
+ dir_entry_t *trav = NULL;
+ uint32_t len = 0;
+ uint32_t this_len = 0;
+ char *buffer = NULL;
+ size_t buflen = -1;
+ char *ptr = NULL;
+ char *tmp_buf = NULL;
+
+ trav = head->next;
+ while (trav) {
+ len += strlen (trav->name);
+ len += 1;
+ len += strlen (trav->link);
+ len += 1; /* for '\n' */
+ len += 256; // max possible for statbuf;
+ trav = trav->next;
+ }
+
+ buffer = CALLOC (1, len);
+ if (buffer == NULL) {
+ gf_log ("server", GF_LOG_ERROR,
+ "failed to allocate memory for buffer");
+ goto out;
+ }
+
+ ptr = buffer;
+ trav = head->next;
+ while (trav) {
+ tmp_buf = stat_to_str (&trav->buf);
+ /* tmp_buf will have \n before \0 */
+
+ this_len = sprintf (ptr, "%s/%s%s\n",
+ trav->name, tmp_buf,
+ trav->link);
+
+ FREE (tmp_buf);
+ trav = trav->next;
+ ptr += this_len;
+ }
+ if (bufferp)
+ *bufferp = buffer;
+ buflen = strlen (buffer);
+
+out:
+ return buflen;
+}
+
+
+static struct _lock_table *
+gf_lock_table_new (void)
+{
+ struct _lock_table *new = NULL;
+
+ new = CALLOC (1, sizeof (struct _lock_table));
+ if (new == NULL) {
+ gf_log ("server-protocol", GF_LOG_CRITICAL,
+ "failed to allocate memory for new lock table");
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->dir_lockers);
+ INIT_LIST_HEAD (&new->file_lockers);
+ LOCK_INIT (&new->lock);
+out:
+ return new;
+}
+
+
+int
+server_connection_destroy (xlator_t *this, server_connection_t *conn)
+{
+
+ call_frame_t *frame = NULL, *tmp_frame = NULL;
+ xlator_t *bound_xl = NULL;
+ int32_t ret = -1;
+ server_state_t *state = NULL;
+ struct list_head file_lockers;
+ struct list_head dir_lockers;
+ struct _lock_table *ltable = NULL;
+ struct _locker *locker = NULL, *tmp = NULL;
+ struct flock flock = {0,};
+
+
+ bound_xl = (xlator_t *) (conn->bound_xl);
+
+ if (bound_xl) {
+ /* trans will have ref_count = 1 after this call, but its
+ ok since this function is called in
+ GF_EVENT_TRANSPORT_CLEANUP */
+ frame = create_frame (this, this->ctx->pool);
+
+ pthread_mutex_lock (&(conn->lock));
+ {
+ if (conn->ltable) {
+ ltable = conn->ltable;
+ conn->ltable = NULL;
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ INIT_LIST_HEAD (&file_lockers);
+ INIT_LIST_HEAD (&dir_lockers);
+
+ LOCK (&ltable->lock);
+ {
+ list_splice_init (&ltable->file_lockers,
+ &file_lockers);
+
+ list_splice_init (&ltable->dir_lockers, &dir_lockers);
+ }
+ UNLOCK (&ltable->lock);
+ free (ltable);
+
+ flock.l_type = F_UNLCK;
+ flock.l_start = 0;
+ flock.l_len = 0;
+ list_for_each_entry_safe (locker,
+ tmp, &file_lockers, lockers) {
+ tmp_frame = copy_frame (frame);
+ /*
+ pid = 0 is a special case that tells posix-locks
+ to release all locks from this transport
+ */
+ tmp_frame->root->pid = 0;
+ tmp_frame->root->trans = conn;
+
+ if (locker->fd) {
+ STACK_WIND (tmp_frame, server_nop_cbk,
+ bound_xl,
+ bound_xl->fops->finodelk,
+ locker->fd, F_SETLK, &flock);
+ fd_unref (locker->fd);
+ } else {
+ STACK_WIND (tmp_frame, server_nop_cbk,
+ bound_xl,
+ bound_xl->fops->inodelk,
+ &(locker->loc), F_SETLK, &flock);
+ loc_wipe (&locker->loc);
+ }
+
+ list_del_init (&locker->lockers);
+ free (locker);
+ }
+
+ tmp = NULL;
+ locker = NULL;
+ list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) {
+ tmp_frame = copy_frame (frame);
+
+ tmp_frame->root->pid = 0;
+ tmp_frame->root->trans = conn;
+
+ if (locker->fd) {
+ STACK_WIND (tmp_frame, server_nop_cbk,
+ bound_xl,
+ bound_xl->fops->fentrylk,
+ locker->fd, NULL,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ fd_unref (locker->fd);
+ } else {
+ STACK_WIND (tmp_frame, server_nop_cbk,
+ bound_xl,
+ bound_xl->fops->entrylk,
+ &(locker->loc), NULL,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ loc_wipe (&locker->loc);
+ }
+
+ list_del_init (&locker->lockers);
+ free (locker);
+ }
+
+ state = CALL_STATE (frame);
+ if (state)
+ free (state);
+ STACK_DESTROY (frame->root);
+
+ pthread_mutex_lock (&(conn->lock));
+ {
+ if (conn->fdtable) {
+ gf_fd_fdtable_destroy (conn->fdtable);
+ conn->fdtable = NULL;
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "destroyed connection of %s",
+ conn->id);
+
+ FREE (conn->id);
+ FREE (conn);
+
+ return ret;
+}
+
+
+server_connection_t *
+server_connection_get (xlator_t *this, const char *id)
+{
+ server_connection_t *conn = NULL;
+ server_connection_t *trav = NULL;
+ server_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (trav, &conf->conns, list) {
+ if (!strcmp (id, trav->id)) {
+ conn = trav;
+ break;
+ }
+ }
+
+ if (!conn) {
+ conn = (void *) CALLOC (1, sizeof (*conn));
+
+ conn->id = strdup (id);
+ conn->fdtable = gf_fd_fdtable_alloc ();
+ conn->ltable = gf_lock_table_new ();
+
+ pthread_mutex_init (&conn->lock, NULL);
+
+ list_add (&conn->list, &conf->conns);
+ }
+
+ conn->ref++;
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
+ return conn;
+}
+
+
+void
+server_connection_put (xlator_t *this, server_connection_t *conn)
+{
+ server_conf_t *conf = NULL;
+ server_connection_t *todel = NULL;
+
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ conn->ref--;
+
+ if (!conn->ref) {
+ list_del_init (&conn->list);
+ todel = conn;
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
+ if (todel) {
+ server_connection_destroy (this, todel);
+ }
+
+ return;
+}
diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h
new file mode 100644
index 00000000000..36c0ce98e40
--- /dev/null
+++ b/xlators/protocol/server/src/server-helpers.h
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __SERVER_HELPERS_H__
+#define __SERVER_HELPERS_H__
+
+#define CALL_STATE(frame) ((server_state_t *)frame->root->state)
+
+#define BOUND_XL(frame) ((xlator_t *) CALL_STATE(frame)->bound_xl)
+
+#define TRANSPORT_FROM_FRAME(frame) ((transport_t *) CALL_STATE(frame)->trans)
+
+#define SERVER_CONNECTION(frame) \
+ ((server_connection_t *) TRANSPORT_FROM_FRAME(frame)->xl_private)
+
+#define SERVER_CONF(frame) \
+ ((server_conf_t *)TRANSPORT_FROM_FRAME(frame)->xl->private)
+
+#define TRANSPORT_FROM_XLATOR(this) ((((server_conf_t *)this->private))->trans)
+
+#define INODE_LRU_LIMIT(this) \
+ (((server_conf_t *)(this->private))->inode_lru_limit)
+
+#define IS_ROOT_INODE(inode) (inode == inode->table->root)
+
+#define IS_NOT_ROOT(pathlen) ((pathlen > 2)? 1 : 0)
+
+int32_t
+server_loc_fill (loc_t *loc,
+ server_state_t *state,
+ ino_t ino,
+ ino_t par,
+ const char *name,
+ const char *path);
+
+char *
+stat_to_str (struct stat *stbuf);
+
+call_frame_t *
+server_copy_frame (call_frame_t *frame);
+
+void free_state (server_state_t *state);
+
+void server_loc_wipe (loc_t *loc);
+
+int32_t
+gf_add_locker (struct _lock_table *table,
+ loc_t *loc,
+ fd_t *fd,
+ pid_t pid);
+
+int32_t
+gf_del_locker (struct _lock_table *table,
+ loc_t *loc,
+ fd_t *fd,
+ pid_t pid);
+
+int32_t
+gf_direntry_to_bin (dir_entry_t *head,
+ char **bufferp);
+#endif /* __SERVER_HELPERS_H__ */
diff --git a/xlators/protocol/server/src/server-protocol.c b/xlators/protocol/server/src/server-protocol.c
new file mode 100644
index 00000000000..a5198c1ed07
--- /dev/null
+++ b/xlators/protocol/server/src/server-protocol.c
@@ -0,0 +1,7984 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <time.h>
+#include <sys/uio.h>
+#include <sys/resource.h>
+
+#include <libgen.h>
+
+#include "transport.h"
+#include "fnmatch.h"
+#include "xlator.h"
+#include "protocol.h"
+#include "server-protocol.h"
+#include "server-helpers.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "dict.h"
+#include "compat.h"
+#include "compat-errno.h"
+
+
+static void
+protocol_server_reply (call_frame_t *frame,
+ int type, int op,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ struct iovec *vector, int count,
+ dict_t *refs)
+{
+ server_state_t *state = NULL;
+ xlator_t *bound_xl = NULL;
+ transport_t *trans = NULL;
+
+ bound_xl = BOUND_XL(frame);
+ state = CALL_STATE(frame);
+ trans = state->trans;
+
+ hdr->callid = hton64 (frame->root->unique);
+ hdr->type = hton32 (type);
+ hdr->op = hton32 (op);
+
+ transport_submit (trans, (char *)hdr, hdrlen, vector, count, refs);
+ /* TODO: If transport submit fails, there is no reply sent to client,
+ * its bailed out as of now.. loggically, only this frame should fail.
+ */
+
+ STACK_DESTROY (frame->root);
+
+ if (state)
+ free_state (state);
+
+}
+
+
+/*
+ * server_fchmod_cbk
+ */
+int32_t
+server_fchmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fchmod_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FCHMOD %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FCHMOD,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_fchmod
+ *
+ */
+int32_t
+server_fchmod (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ server_connection_t *conn = NULL;
+ gf_fop_fchmod_req_t *req = NULL;
+ server_state_t *state = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->mode = ntoh32 (req->mode);
+ }
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ STACK_WIND (frame,
+ server_fchmod_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->fchmod,
+ state->fd,
+ state->mode);
+
+ return 0;
+fail:
+ server_fchmod_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+ return 0;
+}
+
+
+/*
+ * server_fchown_cbk
+ */
+int32_t
+server_fchown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fchown_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FCHOWN %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FCHOWN,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_fchown
+ *
+ */
+int32_t
+server_fchown (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ server_connection_t *conn = NULL;
+ gf_fop_fchown_req_t *req = NULL;
+ server_state_t *state = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->uid = ntoh32 (req->uid);
+ state->gid = ntoh32 (req->gid);
+ }
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ STACK_WIND (frame,
+ server_fchown_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->fchown,
+ state->fd,
+ state->uid,
+ state->gid);
+
+ return 0;
+fail:
+ server_fchown_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+ return 0;
+}
+
+/*
+ * server_setdents_cbk - writedir callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int32_t
+server_setdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_setdents_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SETDENTS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_lk_cbk - lk callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @lock:
+ *
+ * not for external reference
+ */
+int32_t
+server_lk_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct flock *lock)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_lk_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ gf_flock_from_flock (&rsp->flock, lock);
+ } else if (op_errno != ENOSYS) {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": LK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int32_t
+server_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_inodelk_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ conn = SERVER_CONNECTION(frame);
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ if (state->flock.l_type == F_UNLCK)
+ gf_del_locker (conn->ltable,
+ &state->loc, NULL, frame->root->pid);
+ else
+ gf_add_locker (conn->ltable,
+ &state->loc, NULL, frame->root->pid);
+ } else if (op_errno != ENOSYS) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ server_loc_wipe (&state->loc);
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_INODELK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int32_t
+server_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_finodelk_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ conn = SERVER_CONNECTION(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ state = CALL_STATE(frame);
+
+ if (op_ret >= 0) {
+ if (state->flock.l_type == F_UNLCK)
+ gf_del_locker (conn->ltable,
+ NULL, state->fd, frame->root->pid);
+ else
+ gf_add_locker (conn->ltable,
+ NULL, state->fd, frame->root->pid);
+ } else if (op_errno != ENOSYS) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FINODELK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FINODELK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_entrylk_cbk -
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @lock:
+ *
+ * not for external reference
+ */
+int32_t
+server_entrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_entrylk_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ conn = SERVER_CONNECTION(frame);
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ if (state->cmd == ENTRYLK_UNLOCK)
+ gf_del_locker (conn->ltable,
+ &state->loc, NULL, frame->root->pid);
+ else
+ gf_add_locker (conn->ltable,
+ &state->loc, NULL, frame->root->pid);
+ } else if (op_errno != ENOSYS) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ server_loc_wipe (&state->loc);
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_ENTRYLK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+int32_t
+server_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fentrylk_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ conn = SERVER_CONNECTION(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ state = CALL_STATE(frame);
+ if (state->cmd == ENTRYLK_UNLOCK)
+ gf_del_locker (conn->ltable,
+ NULL, state->fd, frame->root->pid);
+ else
+ gf_add_locker (conn->ltable,
+ NULL, state->fd, frame->root->pid);
+ } else if (op_errno != ENOSYS) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FENTRYLK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FENTRYLK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_access_cbk - access callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_access_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_access_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_ACCESS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_utimens_cbk - utimens callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_utimens_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_utimens_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0)
+ gf_stat_from_stat (&rsp->stat, stbuf);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_UTIMENS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_chmod_cbk - chmod callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_chmod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_chmod_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0)
+ gf_stat_from_stat (&rsp->stat, stbuf);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHMOD,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_chown_cbk - chown callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_chown_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_chown_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ int32_t gf_errno = 0;
+ size_t hdrlen = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0)
+ gf_stat_from_stat (&rsp->stat, stbuf);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHOWN,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_rmdir_cbk - rmdir callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_rmdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_rmdir_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ int32_t gf_errno = 0;
+ size_t hdrlen = 0;
+
+ state = CALL_STATE(frame);
+
+ if (op_ret == 0) {
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": RMDIR %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_RMDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_mkdir_cbk - mkdir callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_mkdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_mkdir_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ inode_link (inode, state->loc.parent, state->loc.name, stbuf);
+ inode_lookup (inode);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": MKDIR %s ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ op_ret, strerror (op_errno));
+ }
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_MKDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_mknod_cbk - mknod callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_mknod_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_mknod_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ int32_t gf_errno = 0;
+ size_t hdrlen = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ inode_link (inode, state->loc.parent, state->loc.name, stbuf);
+ inode_lookup (inode);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": MKNOD %s ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ op_ret, strerror (op_errno));
+ }
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_MKNOD,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_fsyncdir_cbk - fsyncdir callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_fsyncdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fsyncdir_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ if (op_ret < 0) {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FSYNCDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSYNCDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_getdents_cbk - readdir callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ * @entries:
+ * @count:
+ *
+ * not for external reference
+ */
+int32_t
+server_getdents_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_getdents_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t vec_count = 0;
+ int32_t gf_errno = 0;
+ int32_t ret = -1;
+ dict_t *reply_dict = NULL;
+ char *buffer = NULL;
+ size_t buflen = 0;
+ struct iovec vector[1];
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (op_ret >= 0) {
+ buflen = gf_direntry_to_bin (entries, &buffer);
+ if (buflen < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): failed to convert "
+ "entries list to string buffer",
+ state->fd_no, state->fd->inode->ino);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ reply_dict = dict_new ();
+ if (reply_dict == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): failed to get new dict",
+ state->fd_no, state->fd->inode->ino);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynptr (reply_dict, NULL,
+ buffer, buflen);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): failed to set read buffer "
+ "to reply dictionary",
+ state->fd_no, state->fd->inode->ino);
+ op_ret = -1;
+ op_errno = -ret;
+ goto out;
+ }
+ frame->root->rsp_refs = reply_dict;
+ vector[0].iov_base = buffer;
+ vector[0].iov_len = buflen;
+ vec_count = 1;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": GETDENTS %"PRId64" (%"PRId64"): %"PRId32" (%s)",
+ frame->root->unique,
+ state->fd_no,
+ state->fd ? state->fd->inode->ino : 0,
+ op_ret, strerror (op_errno));
+ vector[0].iov_base = NULL;
+ vector[0].iov_len = 0;
+ }
+
+out:
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ rsp->count = hton32 (count);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_GETDENTS,
+ hdr, hdrlen, vector, vec_count,
+ frame->root->rsp_refs);
+
+ if (reply_dict)
+ dict_unref (reply_dict);
+
+ return 0;
+}
+
+
+/*
+ * server_readdir_cbk - getdents callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_readdir_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ size_t buf_size = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ if (op_ret > 0)
+ buf_size = gf_dirent_serialize (entries, NULL, 0);
+
+ hdrlen = gf_hdr_len (rsp, buf_size);
+ hdr = gf_hdr_new (rsp, buf_size);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret > 0) {
+ rsp->size = hton32 (buf_size);
+ gf_dirent_serialize (entries, rsp->buf, buf_size);
+ } else {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": READDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_releasedir_cbk - releasedir callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int32_t
+server_releasedir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_cbk_releasedir_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASEDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_opendir_cbk - opendir callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ * @fd: file descriptor structure of opened directory
+ *
+ * not for external reference
+ */
+int32_t
+server_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_opendir_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ conn = SERVER_CONNECTION(frame);
+
+ state = CALL_STATE(frame);
+
+ if (op_ret >= 0) {
+ fd_bind (fd);
+
+ state->fd_no = gf_fd_unused_get (conn->fdtable, fd);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": OPENDIR %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+
+ /* NOTE: corresponding to fd_create()'s ref */
+ if (state->fd)
+ fd_unref (state->fd);
+ }
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+ rsp->fd = hton64 (state->fd_no);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_OPENDIR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_statfs_cbk - statfs callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ * @buf:
+ *
+ * not for external reference
+ */
+int32_t
+server_statfs_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *buf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_statfs_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ gf_statfs_from_statfs (&rsp->statfs, buf);
+ }
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_STATFS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_removexattr_cbk - removexattr callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int32_t
+server_removexattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_removexattr_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_REMOVEXATTR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_getxattr_cbk - getxattr callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ * @value:
+ *
+ * not for external reference
+ */
+int32_t
+server_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_getxattr_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t len = 0;
+ int32_t gf_errno = 0;
+ int32_t ret = -1;
+
+ state = CALL_STATE(frame);
+
+ if (op_ret >= 0) {
+ len = dict_serialized_length (dict);
+ if (len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to get serialized length of "
+ "reply dict",
+ state->loc.path, state->ino);
+ op_ret = -1;
+ op_errno = EINVAL;
+ len = 0;
+ }
+ }
+
+ hdrlen = gf_hdr_len (rsp, len + 1);
+ hdr = gf_hdr_new (rsp, len + 1);
+ rsp = gf_param (hdr);
+
+ if (op_ret >= 0) {
+ ret = dict_serialize (dict, rsp->dict);
+ if (len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to serialize reply dict",
+ state->loc.path, state->ino);
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ }
+ rsp->dict_len = hton32 (len);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_GETXATTR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_setxattr_cbk - setxattr callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int32_t
+server_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_setxattr_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SETXATTR,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_rename_cbk - rename callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int32_t
+server_rename_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_rename_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ stbuf->st_ino = state->loc.inode->ino;
+ stbuf->st_mode = state->loc.inode->st_mode;
+
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": RENAME_CBK (%"PRId64") %"PRId64"/%s "
+ "==> %"PRId64"/%s",
+ frame->root->unique, state->loc.inode->ino,
+ state->loc.parent->ino, state->loc.name,
+ state->loc2.parent->ino, state->loc2.name);
+
+ inode_rename (state->itable,
+ state->loc.parent, state->loc.name,
+ state->loc2.parent, state->loc2.name,
+ state->loc.inode, stbuf);
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ }
+
+ server_loc_wipe (&(state->loc));
+ server_loc_wipe (&(state->loc2));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_RENAME,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_unlink_cbk - unlink callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int32_t
+server_unlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_unlink_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ if (op_ret == 0) {
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": UNLINK_CBK %"PRId64"/%s (%"PRId64")",
+ frame->root->unique, state->loc.parent->ino,
+ state->loc.name, state->loc.inode->ino);
+
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": UNLINK %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_UNLINK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_symlink_cbk - symlink callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int32_t
+server_symlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_symlink_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
+
+ if (op_ret >= 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ inode_link (inode, state->loc.parent, state->loc.name, stbuf);
+ inode_lookup (inode);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": SYMLINK %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SYMLINK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_link_cbk - link callback for server protocol
+ * @frame: call frame
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_link_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_link_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ int32_t gf_errno = 0;
+ size_t hdrlen = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ stbuf->st_ino = state->loc.inode->ino;
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s",
+ frame->root->unique, inode->ino, state->loc2.parent->ino,
+ state->loc2.name, state->loc.parent->ino, state->loc.name);
+
+ inode_link (inode, state->loc2.parent,
+ state->loc2.name, stbuf);
+ } else {
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s "
+ " ==> %"PRId32" (%s)",
+ frame->root->unique, inode->ino, state->loc2.parent->ino,
+ state->loc2.name, state->loc.parent->ino, state->loc.name,
+ op_ret, strerror (op_errno));
+ }
+
+ server_loc_wipe (&(state->loc));
+ server_loc_wipe (&(state->loc2));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LINK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_truncate_cbk - truncate callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_truncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_truncate_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": TRUNCATE %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_TRUNCATE,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_fstat_cbk - fstat callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_fstat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fstat_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FSTAT %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSTAT,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_ftruncate_cbk - ftruncate callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_ftruncate_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_ftruncate_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FTRUNCATE %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FTRUNCATE,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_flush_cbk - flush callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_flush_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_flush_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ if (op_ret < 0) {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FLUSH %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FLUSH,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_fsync_cbk - fsync callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_fsync_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_fsync_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ if (op_ret < 0) {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FSYNC %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSYNC,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_release_cbk - rleease callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_release_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_cbk_release_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASE,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_writev_cbk - writev callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+
+int32_t
+server_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_write_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
+
+ if (op_ret >= 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": WRITEV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame,
+ GF_OP_TYPE_FOP_REPLY, GF_FOP_WRITE,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_readv_cbk - readv callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @vector:
+ * @count:
+ *
+ * not for external reference
+ */
+int32_t
+server_readv_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_read_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ server_state_t *state = NULL;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ state = CALL_STATE(frame);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": READV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READ,
+ hdr, hdrlen, vector, count,
+ frame->root->rsp_refs);
+
+ return 0;
+}
+
+
+/*
+ * server_open_cbk - open callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @fd:
+ *
+ * not for external reference
+ */
+int32_t
+server_open_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_open_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ conn = SERVER_CONNECTION(frame);
+
+ state = CALL_STATE(frame);
+
+ if (op_ret >= 0) {
+ fd_bind (fd);
+
+ state->fd_no = gf_fd_unused_get (conn->fdtable, fd);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": OPEN %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+
+ /* NOTE: corresponding to fd_create()'s ref */
+ if (state->fd)
+ fd_unref (state->fd);
+ }
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+ rsp->fd = hton64 (state->fd_no);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_OPEN,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_create_cbk - create callback for server
+ * @frame: call frame
+ * @cookie:
+ * @this: translator structure
+ * @op_ret:
+ * @op_errno:
+ * @fd: file descriptor
+ * @inode: inode structure
+ * @stbuf: struct stat of created file
+ *
+ * not for external reference
+ */
+int32_t
+server_create_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct stat *stbuf)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_create_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ conn = SERVER_CONNECTION(frame);
+
+ state = CALL_STATE(frame);
+
+ if (op_ret >= 0) {
+ gf_log (state->bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": CREATE %"PRId64"/%s (%"PRId64")",
+ frame->root->unique, state->loc.parent->ino,
+ state->loc.name, stbuf->st_ino);
+
+ inode_link (inode, state->loc.parent, state->loc.name, stbuf);
+ inode_lookup (inode);
+
+ fd_bind (fd);
+
+ state->fd_no = gf_fd_unused_get (conn->fdtable, fd);
+
+ if ((state->fd_no < 0) || (fd == 0)) {
+ op_ret = state->fd_no;
+ op_errno = errno;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": CREATE %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+
+ /* NOTE: corresponding to fd_create()'s ref */
+ if (state->fd)
+ fd_unref (state->fd);
+
+ }
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+ rsp->fd = hton64 (state->fd_no);
+
+ if (op_ret >= 0)
+ gf_stat_from_stat (&rsp->stat, stbuf);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CREATE,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_readlink_cbk - readlink callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @buf:
+ *
+ * not for external reference
+ */
+int32_t
+server_readlink_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *buf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_readlink_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ size_t linklen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ if (op_ret >= 0) {
+ linklen = strlen (buf) + 1;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": READLINK %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ hdrlen = gf_hdr_len (rsp, linklen);
+ hdr = gf_hdr_new (rsp, linklen);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
+
+ if (op_ret >= 0)
+ strcpy (rsp->path, buf);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READLINK,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_stat_cbk - stat callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_stat_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct stat *stbuf)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_stat_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ state = CALL_STATE(frame);
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
+
+ if (op_ret == 0) {
+ gf_stat_from_stat (&rsp->stat, stbuf);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": STAT %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_STAT,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_forget_cbk - forget callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ *
+ * not for external reference
+ */
+int32_t
+server_forget_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_cbk_forget_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_FORGET,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_lookup_cbk - lookup callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @inode:
+ * @stbuf:
+ *
+ * not for external reference
+ */
+int32_t
+server_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct stat *stbuf,
+ dict_t *dict)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_lookup_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ inode_t *root_inode = NULL;
+ int32_t dict_len = 0;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ int32_t ret = -1;
+
+ state = CALL_STATE(frame);
+ if ((op_errno == ESTALE) && (op_ret == -1)) {
+ /* Send lookup again with new ctx dictionary */
+ loc_t loc = {0,};
+
+ root_inode = BOUND_XL(frame)->itable->root;
+ if (state->loc.inode != root_inode) {
+ if (state->loc.inode)
+ inode_unref (state->loc.inode);
+ state->loc.inode = inode_new (BOUND_XL(frame)->itable);
+ }
+ loc.inode = state->loc.inode;
+ loc.path = state->path;
+ state->is_revalidate = 2;
+ STACK_WIND (frame, server_lookup_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->lookup,
+ &loc,
+ state->xattr_req);
+ return 0;
+ }
+
+ if (dict) {
+ dict_len = dict_serialized_length (dict);
+ if (dict_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to get serialized "
+ "length of reply dict",
+ state->loc.path, state->loc.inode->ino);
+ op_ret = -1;
+ op_errno = EINVAL;
+ dict_len = 0;
+ }
+ }
+
+ hdrlen = gf_hdr_len (rsp, dict_len);
+ hdr = gf_hdr_new (rsp, dict_len);
+ rsp = gf_param (hdr);
+
+ if ((op_ret >= 0) && dict) {
+ ret = dict_serialize (dict, rsp->dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to serialize reply dict",
+ state->loc.path, state->loc.inode->ino);
+ op_ret = -1;
+ op_errno = -ret;
+ dict_len = 0;
+ }
+ }
+ rsp->dict_len = hton32 (dict_len);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret == 0) {
+ root_inode = BOUND_XL(frame)->itable->root;
+ if (inode == root_inode) {
+ /* we just looked up root ("/") */
+ stbuf->st_ino = 1;
+ if (inode->st_mode == 0)
+ inode->st_mode = stbuf->st_mode;
+ }
+
+ gf_stat_from_stat (&rsp->stat, stbuf);
+
+ if (inode->ino == 0) {
+ inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ inode_lookup (inode);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": LOOKUP %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ server_loc_wipe (&state->loc);
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LOOKUP,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+int32_t
+server_xattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_xattrop_rsp_t *rsp = NULL;
+ server_state_t *state = NULL;
+ size_t hdrlen = 0;
+ int32_t len = 0;
+ int32_t gf_errno = 0;
+ int32_t ret = -1;
+
+ state = CALL_STATE(frame);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": XATTROP %s (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->loc.path,
+ state->loc.inode ? state->loc.inode->ino : 0,
+ op_ret, strerror (op_errno));
+ }
+
+ if ((op_ret >= 0) && dict) {
+ len = dict_serialized_length (dict);
+ if (len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to get serialized length"
+ " for reply dict",
+ state->loc.path, state->loc.inode->ino);
+ op_ret = -1;
+ op_errno = EINVAL;
+ len = 0;
+ }
+ }
+
+ hdrlen = gf_hdr_len (rsp, len + 1);
+ hdr = gf_hdr_new (rsp, len + 1);
+ rsp = gf_param (hdr);
+
+ if ((op_ret >= 0) && dict) {
+ ret = dict_serialize (dict, rsp->dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to serialize reply dict",
+ state->loc.path, state->loc.inode->ino);
+ op_ret = -1;
+ op_errno = -ret;
+ len = 0;
+ }
+ }
+ rsp->dict_len = hton32 (len);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ server_loc_wipe (&(state->loc));
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_XATTROP,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+int32_t
+server_fxattrop_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_xattrop_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t len = 0;
+ int32_t gf_errno = 0;
+ int32_t ret = -1;
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%"PRId64": FXATTROP %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
+ frame->root->unique, state->fd_no,
+ state->fd ? state->fd->inode->ino : 0, op_ret,
+ strerror (op_errno));
+ }
+
+ if ((op_ret >= 0) && dict) {
+ len = dict_serialized_length (dict);
+ if (len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): failed to get "
+ "serialized length for reply dict",
+ state->fd_no, state->fd->inode->ino);
+ op_ret = -1;
+ op_errno = EINVAL;
+ len = 0;
+ }
+ }
+
+ hdrlen = gf_hdr_len (rsp, len + 1);
+ hdr = gf_hdr_new (rsp, len + 1);
+ rsp = gf_param (hdr);
+
+ if ((op_ret >= 0) && dict) {
+ ret = dict_serialize (dict, rsp->dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): failed to "
+ "serialize reply dict",
+ state->fd_no, state->fd->inode->ino);
+ op_ret = -1;
+ op_errno = -ret;
+ len = 0;
+ }
+ }
+ rsp->dict_len = hton32 (len);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FXATTROP,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * server_stub_resume - this is callback function used whenever an fop does
+ * STACK_WIND to fops->lookup in order to lookup the inode
+ * for a pathname. this case of doing fops->lookup arises
+ * when fop searches in inode table for pathname and search
+ * fails.
+ *
+ * @stub: call stub
+ * @op_ret:
+ * @op_errno:
+ * @inode:
+ * @parent:
+ *
+ * not for external reference
+ */
+int32_t
+server_stub_resume (call_stub_t *stub,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ inode_t *parent)
+{
+ inode_t *server_inode = inode;
+
+ if (!stub) {
+ return 0;
+ }
+ switch (stub->fop)
+ {
+ case GF_FOP_RENAME:
+ if (stub->args.rename.old.inode == NULL) {
+ loc_t *newloc = NULL;
+ /* now we are called by lookup of oldpath. */
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": RENAME (%s -> %s) on %s "
+ "returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.rename.old.path,
+ stub->args.rename.new.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ /* lookup of oldpath failed, UNWIND to
+ * server_rename_cbk with ret=-1 and
+ * errno=ENOENT
+ */
+ server_rename_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1,
+ ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.rename.old);
+ server_loc_wipe (&stub->args.rename.new);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.rename.old.parent == NULL)
+ stub->args.rename.old.parent =
+ inode_ref (parent);
+
+ /* store inode information of oldpath in our stub
+ * and search for newpath in inode table.
+ */
+ if (server_inode) {
+ stub->args.rename.old.inode =
+ inode_ref (server_inode);
+
+ stub->args.rename.old.ino =
+ server_inode->ino;
+ }
+
+ /* now lookup for newpath */
+ newloc = &stub->args.rename.new;
+
+ if (newloc->parent == NULL) {
+ /* lookup for newpath */
+ do_path_lookup (stub, newloc);
+ break;
+ } else {
+ /* found newpath in inode cache */
+ call_resume (stub);
+ break;
+ }
+ } else {
+ /* we are called by the lookup of newpath */
+ if (stub->args.rename.new.parent == NULL)
+ stub->args.rename.new.parent =
+ inode_ref (parent);
+ }
+
+ /* after looking up for oldpath as well as newpath,
+ * we are ready to resume */
+ {
+ call_resume (stub);
+ }
+ break;
+
+ case GF_FOP_OPEN:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": OPEN (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.open.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_open_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1,
+ ENOENT,
+ NULL);
+ FREE (stub->args.open.loc.path);
+ FREE (stub);
+ return 0;
+ }
+ if (stub->args.open.loc.parent == NULL)
+ stub->args.open.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.open.loc.inode == NULL)) {
+ stub->args.open.loc.inode = inode_ref (server_inode);
+ stub->args.open.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_LOOKUP:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name,
+ GF_LOG_DEBUG,
+ "%"PRId64": LOOKUP (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.lookup.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_lookup_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL, NULL,
+ NULL);
+ server_loc_wipe (&stub->args.lookup.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.lookup.loc.parent == NULL)
+ stub->args.lookup.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.lookup.loc.inode == NULL)) {
+ stub->args.lookup.loc.inode = inode_ref (server_inode);
+ stub->args.lookup.loc.ino = server_inode->ino;
+ }
+
+ call_resume (stub);
+
+ break;
+ }
+
+ case GF_FOP_STAT:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": STAT (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.stat.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+ server_stat_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.stat.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ /* TODO:reply from here only, we already have stat structure */
+ if (stub->args.stat.loc.parent == NULL)
+ stub->args.stat.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.stat.loc.inode == NULL)) {
+ stub->args.stat.loc.inode = inode_ref (server_inode);
+ stub->args.stat.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_XATTROP:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": XATTROP (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.xattrop.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+ server_xattrop_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.xattrop.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.xattrop.loc.parent == NULL)
+ stub->args.xattrop.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.xattrop.loc.inode == NULL)) {
+ stub->args.xattrop.loc.inode =
+ inode_ref (server_inode);
+
+ stub->args.xattrop.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_UNLINK:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": UNLINK (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.unlink.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+ server_unlink_cbk (stub->frame, NULL,
+ stub->frame->this,
+ -1, ENOENT);
+ server_loc_wipe (&stub->args.unlink.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.unlink.loc.parent == NULL)
+ stub->args.unlink.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.unlink.loc.inode == NULL)) {
+ stub->args.unlink.loc.inode = inode_ref (server_inode);
+ stub->args.unlink.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_SYMLINK:
+ {
+ if ((op_ret < 0) && (parent == NULL)) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": SYMLINK (%s -> %s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.symlink.loc.path,
+ stub->args.symlink.linkname,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+ server_symlink_cbk (stub->frame, NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL, NULL);
+ server_loc_wipe (&stub->args.symlink.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.symlink.loc.parent == NULL)
+ stub->args.symlink.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.symlink.loc.inode == NULL)) {
+ stub->args.symlink.loc.inode =
+ inode_ref (server_inode);
+ stub->args.symlink.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_RMDIR:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": RMDIR (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.rmdir.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+ server_rmdir_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1,
+ ENOENT);
+ server_loc_wipe (&stub->args.rmdir.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.rmdir.loc.parent == NULL)
+ stub->args.rmdir.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.rmdir.loc.inode == NULL)) {
+ stub->args.rmdir.loc.inode = inode_ref (server_inode);
+ stub->args.rmdir.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_CHMOD:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": CHMOD (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.chmod.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+ server_chmod_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1,
+ ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.chmod.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.chmod.loc.parent == NULL)
+ stub->args.chmod.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.chmod.loc.inode == NULL)) {
+ stub->args.chmod.loc.inode = inode_ref (server_inode);
+ stub->args.chmod.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_CHOWN:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": CHOWN (%s) on %s returning ENOENT: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.chown.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+ server_chown_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1,
+ ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.chown.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.chown.loc.parent == NULL)
+ stub->args.chown.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.chown.loc.inode == NULL)) {
+ stub->args.chown.loc.inode = inode_ref (server_inode);
+ stub->args.chown.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_LINK:
+ {
+ if (stub->args.link.oldloc.inode == NULL) {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": LINK (%s -> %s) on %s returning "
+ "error for oldloc: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.link.oldloc.path,
+ stub->args.link.newloc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_link_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL, NULL);
+ server_loc_wipe (&stub->args.link.oldloc);
+ server_loc_wipe (&stub->args.link.newloc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.link.oldloc.parent == NULL)
+ stub->args.link.oldloc.parent =
+ inode_ref (parent);
+
+ if (server_inode &&
+ (stub->args.link.oldloc.inode == NULL)) {
+ stub->args.link.oldloc.inode =
+ inode_ref (server_inode);
+ stub->args.link.oldloc.ino = server_inode->ino;
+ }
+
+ if (stub->args.link.newloc.parent == NULL) {
+ do_path_lookup (stub,
+ &(stub->args.link.newloc));
+ break;
+ }
+ } else {
+ /* we are called by the lookup of newpath */
+ if ((op_ret < 0) && (parent == NULL)) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": LINK (%s -> %s) on %s returning "
+ "error for newloc: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.link.oldloc.path,
+ stub->args.link.newloc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_link_cbk (stub->frame, NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL, NULL);
+
+ server_loc_wipe (&stub->args.link.oldloc);
+ server_loc_wipe (&stub->args.link.newloc);
+ FREE (stub);
+ break;
+ }
+
+ if (stub->args.link.newloc.parent == NULL) {
+ stub->args.link.newloc.parent =
+ inode_ref (parent);
+ }
+
+ if (server_inode &&
+ (stub->args.link.newloc.inode == NULL)) {
+ /* as new.inode doesn't get forget, it
+ * needs to be unref'd here */
+ stub->args.link.newloc.inode =
+ inode_ref (server_inode);
+ stub->args.link.newloc.ino = server_inode->ino;
+ }
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_TRUNCATE:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": TRUNCATE (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.truncate.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_truncate_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.truncate.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.truncate.loc.parent == NULL)
+ stub->args.truncate.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.truncate.loc.inode == NULL)) {
+ stub->args.truncate.loc.inode =
+ inode_ref (server_inode);
+ stub->args.truncate.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_STATFS:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": STATFS (%s) on %s returning ENOENT: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.statfs.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_statfs_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.statfs.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.statfs.loc.parent == NULL)
+ stub->args.statfs.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.statfs.loc.inode == NULL)) {
+ stub->args.statfs.loc.inode = inode_ref (server_inode);
+ stub->args.statfs.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_SETXATTR:
+ {
+ dict_t *dict = stub->args.setxattr.dict;
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": SETXATTR (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.setxattr.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_setxattr_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT);
+
+ server_loc_wipe (&stub->args.setxattr.loc);
+ dict_unref (dict);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.setxattr.loc.parent == NULL)
+ stub->args.setxattr.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.setxattr.loc.inode == NULL)) {
+ stub->args.setxattr.loc.inode =
+ inode_ref (server_inode);
+ stub->args.setxattr.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_GETXATTR:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": GETXATTR (%s) on %s for key %s "
+ "returning error: %"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.getxattr.loc.path,
+ BOUND_XL(stub->frame)->name,
+ stub->args.getxattr.name ?
+ stub->args.getxattr.name : "<nul>",
+ op_ret, op_errno);
+
+ server_getxattr_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.getxattr.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.getxattr.loc.parent == NULL)
+ stub->args.getxattr.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.getxattr.loc.inode == NULL)) {
+ stub->args.getxattr.loc.inode =
+ inode_ref (server_inode);
+ stub->args.getxattr.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_REMOVEXATTR:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": REMOVEXATTR (%s) on %s for key %s "
+ "returning error: %"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.removexattr.loc.path,
+ BOUND_XL(stub->frame)->name,
+ stub->args.removexattr.name,
+ op_ret, op_errno);
+
+ server_removexattr_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1,
+ ENOENT);
+ server_loc_wipe (&stub->args.removexattr.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.removexattr.loc.parent == NULL)
+ stub->args.removexattr.loc.parent = inode_ref (parent);
+
+ if (server_inode &&
+ (stub->args.removexattr.loc.inode == NULL)) {
+ stub->args.removexattr.loc.inode =
+ inode_ref (server_inode);
+ stub->args.removexattr.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_OPENDIR:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": OPENDIR (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.opendir.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_opendir_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.opendir.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.opendir.loc.parent == NULL)
+ stub->args.opendir.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.opendir.loc.inode == NULL)) {
+ stub->args.opendir.loc.inode =
+ inode_ref (server_inode);
+ stub->args.opendir.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_ACCESS:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": ACCESS (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.access.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_access_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT);
+ server_loc_wipe (&stub->args.access.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.access.loc.parent == NULL)
+ stub->args.access.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.access.loc.inode == NULL)) {
+ stub->args.access.loc.inode = inode_ref (server_inode);
+ stub->args.access.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+
+ case GF_FOP_UTIMENS:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": UTIMENS (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.utimens.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_utimens_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.utimens.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.utimens.loc.parent == NULL)
+ stub->args.utimens.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.utimens.loc.inode == NULL)) {
+ stub->args.utimens.loc.inode =
+ inode_ref (server_inode);
+ stub->args.utimens.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_READLINK:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": READLINK (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.readlink.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_readlink_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL);
+ server_loc_wipe (&stub->args.readlink.loc);
+ FREE (stub);
+ return 0;
+ }
+
+ if (stub->args.readlink.loc.parent == NULL)
+ stub->args.readlink.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.readlink.loc.inode == NULL)) {
+ stub->args.readlink.loc.inode =
+ inode_ref (server_inode);
+ stub->args.readlink.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+ case GF_FOP_MKDIR:
+ {
+ if ((op_ret < 0) && (parent == NULL)) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": MKDIR (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.mkdir.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_mkdir_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL, NULL);
+ server_loc_wipe (&stub->args.mkdir.loc);
+ FREE (stub);
+ break;
+ }
+
+ if (stub->args.mkdir.loc.parent == NULL)
+ stub->args.mkdir.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.mkdir.loc.inode == NULL)) {
+ stub->args.mkdir.loc.inode = inode_ref (server_inode);
+ stub->args.mkdir.loc.ino = server_inode->ino;
+ }
+
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_CREATE:
+ {
+ if ((op_ret < 0) && (parent == NULL)) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": CREATE (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.create.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_create_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL, NULL,
+ NULL);
+ if (stub->args.create.fd)
+ fd_unref (stub->args.create.fd);
+ server_loc_wipe (&stub->args.create.loc);
+ FREE (stub);
+ break;
+ }
+
+ if (stub->args.create.loc.parent == NULL)
+ stub->args.create.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.create.loc.inode == NULL)) {
+ stub->args.create.loc.inode = inode_ref (server_inode);
+ stub->args.create.loc.ino = server_inode->ino;
+ }
+
+ call_resume (stub);
+ break;
+ }
+
+ case GF_FOP_MKNOD:
+ {
+ if ((op_ret < 0) && (parent == NULL)) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": MKNOD (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.mknod.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_mknod_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT,
+ NULL, NULL);
+ server_loc_wipe (&stub->args.mknod.loc);
+ FREE (stub);
+ break;
+ }
+
+ if (stub->args.mknod.loc.parent == NULL)
+ stub->args.mknod.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.mknod.loc.inode == NULL)) {
+ stub->args.mknod.loc.inode = inode_ref (server_inode);
+ stub->args.mknod.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+ case GF_FOP_ENTRYLK:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": ENTRYLK (%s) on %s for key %s returning "
+ "error: %"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.entrylk.loc.path,
+ BOUND_XL(stub->frame)->name,
+ stub->args.entrylk.name ?
+ stub->args.entrylk.name : "<nul>",
+ op_ret, op_errno);
+
+ server_entrylk_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT);
+ server_loc_wipe (&stub->args.entrylk.loc);
+ FREE (stub);
+ break;
+ }
+
+ if (stub->args.entrylk.loc.parent == NULL)
+ stub->args.entrylk.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.entrylk.loc.inode == NULL)) {
+ stub->args.entrylk.loc.inode = inode_ref (server_inode);
+ stub->args.entrylk.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+ case GF_FOP_INODELK:
+ {
+ if (op_ret < 0) {
+ gf_log (stub->frame->this->name, GF_LOG_ERROR,
+ "%"PRId64": INODELK (%s) on %s returning error: "
+ "%"PRId32" (%"PRId32")",
+ stub->frame->root->unique,
+ stub->args.inodelk.loc.path,
+ BOUND_XL(stub->frame)->name,
+ op_ret, op_errno);
+
+ server_inodelk_cbk (stub->frame,
+ NULL,
+ stub->frame->this,
+ -1, ENOENT);
+ server_loc_wipe (&stub->args.inodelk.loc);
+ FREE (stub);
+ break;
+ }
+
+ if (stub->args.inodelk.loc.parent == NULL)
+ stub->args.inodelk.loc.parent = inode_ref (parent);
+
+ if (server_inode && (stub->args.inodelk.loc.inode == NULL)) {
+ stub->args.inodelk.loc.inode =
+ inode_ref (server_inode);
+ stub->args.inodelk.loc.ino = server_inode->ino;
+ }
+ call_resume (stub);
+ break;
+ }
+ default:
+ call_resume (stub);
+ }
+
+ return 0;
+}
+
+static int
+server_lookup_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if ((state->loc.parent == NULL) &&
+ (loc->parent))
+ state->loc.parent = inode_ref (loc->parent);
+
+ if (state->loc.inode == NULL) {
+ if (loc->inode == NULL)
+ state->loc.inode = inode_new (state->itable);
+ else
+ /* FIXME: why another lookup? */
+ state->loc.inode = inode_ref (loc->inode);
+ } else {
+ if (loc->inode && (state->loc.inode != loc->inode)) {
+ if (state->loc.inode)
+ inode_unref (state->loc.inode);
+ state->loc.inode = inode_ref (loc->inode);
+ }
+ }
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": LOOKUP \'%"PRId64"/%s\'",
+ frame->root->unique, state->par, state->bname);
+
+ STACK_WIND (frame,
+ server_lookup_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->lookup,
+ &(state->loc),
+ xattr_req);
+ return 0;
+}
+
+/*
+ * server_lookup - lookup function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int
+server_lookup (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_lookup_req_t *req = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *lookup_stub = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0, baselen = 0;
+ size_t dictlen = 0;
+ dict_t *xattr_req = NULL;
+ char *req_dictbuf = NULL;
+
+ req = gf_param (hdr);
+
+ state = CALL_STATE(frame);
+ {
+
+ pathlen = STRLEN_0 (req->path);
+ dictlen = ntoh32 (req->dictlen);
+
+ /* NOTE: lookup() uses req->ino only to identify if a lookup()
+ * is requested for 'root' or not
+ */
+ state->ino = ntoh64 (req->ino);
+ if (state->ino != 1)
+ state->ino = 0;
+
+ state->par = ntoh64 (req->par);
+ state->path = req->path;
+ if (IS_NOT_ROOT(pathlen)) {
+ state->bname = req->bname + pathlen;
+ baselen = STRLEN_0 (state->bname);
+ }
+
+ if (dictlen) {
+ /* Unserialize the dictionary */
+ req_dictbuf = memdup (req->dict + pathlen + baselen, dictlen);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
+
+ xattr_req = dict_new ();
+ GF_VALIDATE_OR_GOTO(bound_xl->name, xattr_req, fail);
+
+ ret = dict_unserialize (req_dictbuf, dictlen, &xattr_req);
+ if (ret < 0) {
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "%"PRId64": %s (%"PRId64"): failed to "
+ "unserialize request buffer to dictionary",
+ frame->root->unique, state->loc.path,
+ state->ino);
+ free (req_dictbuf);
+ goto fail;
+ } else{
+ xattr_req->extra_free = req_dictbuf;
+ state->xattr_req = xattr_req;
+ xattr_req = NULL;
+ }
+ }
+ }
+
+ ret = server_loc_fill (&state->loc, state,
+ state->ino, state->par, state->bname,
+ state->path);
+
+ if (state->loc.inode) {
+ /* revalidate */
+ state->is_revalidate = 1;
+ } else {
+ /* fresh lookup or inode was previously pruned out */
+ state->is_revalidate = -1;
+ }
+
+ lookup_stub = fop_lookup_stub (frame, server_lookup_resume,
+ &(state->loc), state->xattr_req);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, lookup_stub, fail);
+
+ if ((state->loc.parent == NULL) &&
+ IS_NOT_ROOT(pathlen))
+ do_path_lookup (lookup_stub, &(state->loc));
+ else
+ call_resume (lookup_stub);
+
+ return 0;
+fail:
+ server_lookup_cbk (frame, NULL, frame->this,
+ -1,EINVAL,
+ NULL, NULL, NULL);
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
+}
+
+
+/*
+ * server_forget - forget function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_forget (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int index = 0;
+ ino_t ino = 0;
+ int32_t count = 0;
+ inode_t *inode = NULL;
+ gf_cbk_forget_req_t *req = NULL;
+
+ req = gf_param (hdr);
+ count = ntoh32 (req->count);
+
+ for (index = 0; index < count; index++) {
+
+ ino = ntoh64 (req->ino_array[index]);
+
+ if (!ino)
+ continue;
+
+ inode = inode_search (bound_xl->itable, ino, NULL);
+
+ if (inode) {
+ inode_forget (inode, 0);
+ inode_unref (inode);
+ } else {
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FORGET %"PRId64" not found "
+ "in inode table",
+ frame->root->unique, ino);
+ }
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FORGET \'%"PRId64"\'",
+ frame->root->unique, ino);
+ }
+
+ server_forget_cbk (frame, NULL, bound_xl, 0, 0);
+
+ return 0;
+}
+
+
+
+int32_t
+server_stat_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": STAT \'%s (%"PRId64")\'",
+ frame->root->unique, state->loc.path, state->loc.ino);
+
+ STACK_WIND (frame,
+ server_stat_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->stat,
+ loc);
+ return 0;
+}
+
+/*
+ * server_stat - stat function for server
+ * @frame: call frame
+ * @bound_xl: translator this server is bound to
+ * @params: parameters dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_stat (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *stat_stub = NULL;
+ gf_fop_stat_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+ pathlen = STRLEN_0(state->path);
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, state->par, state->bname,
+ state->path);
+
+ stat_stub = fop_stat_stub (frame,
+ server_stat_resume,
+ &(state->loc));
+ GF_VALIDATE_OR_GOTO(bound_xl->name, stat_stub, fail);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (stat_stub, &(state->loc));
+ } else {
+ call_resume (stat_stub);
+ }
+ return 0;
+fail:
+ server_stat_cbk (frame, NULL, frame->this,
+ -1, EINVAL,
+ NULL);
+ return 0;
+}
+
+
+int32_t
+server_readlink_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": READLINK \'%s (%"PRId64")\'",
+ frame->root->unique, state->loc.path, state->loc.ino);
+
+ STACK_WIND (frame,
+ server_readlink_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->readlink,
+ loc,
+ size);
+ return 0;
+}
+
+/*
+ * server_readlink - readlink function for server
+ * @frame: call frame
+ * @bound_xl: translator this server is bound to
+ * @params: parameters dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_readlink (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *readlink_stub = NULL;
+ gf_fop_readlink_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+
+ state->size = ntoh32 (req->size);
+
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ readlink_stub = fop_readlink_stub (frame,
+ server_readlink_resume,
+ &(state->loc),
+ state->size);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, readlink_stub, fail);
+
+ if ((state->loc.parent == NULL) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (readlink_stub, &(state->loc));
+ } else {
+ call_resume (readlink_stub);
+ }
+ return 0;
+fail:
+ server_readlink_cbk (frame, NULL,frame->this,
+ -1, EINVAL,
+ NULL);
+ return 0;
+}
+
+int32_t
+server_create_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ server_state_t *state = CALL_STATE(frame);
+
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (loc->parent);
+
+ state->loc.inode = inode_new (state->itable);
+ GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, state->loc.inode, fail);
+
+ state->fd = fd_create (state->loc.inode, frame->root->pid);
+ GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, state->fd, fail);
+
+ state->fd->flags = flags;
+ state->fd = fd_ref (state->fd);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": CREATE \'%"PRId64"/%s\'",
+ frame->root->unique, state->par, state->bname);
+
+ STACK_WIND (frame,
+ server_create_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->create,
+ &(state->loc),
+ flags,
+ mode,
+ state->fd);
+
+ return 0;
+fail:
+ server_create_cbk (frame, NULL, frame->this,
+ -1, EINVAL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+
+/*
+ * server_create - create function for server
+ * @frame: call frame
+ * @bound_xl: translator this server is bound to
+ * @params: parameters dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_create (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_create_req_t *req = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *create_stub = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+
+ state->par = ntoh64 (req->par);
+ state->path = req->path;
+ if (IS_NOT_ROOT(pathlen))
+ state->bname = req->bname + pathlen;
+
+ state->mode = ntoh32 (req->mode);
+ state->flags = ntoh32 (req->flags);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ 0, state->par, state->bname,
+ state->path);
+
+ create_stub = fop_create_stub (frame, server_create_resume,
+ &(state->loc), state->flags,
+ state->mode, state->fd);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, create_stub, fail);
+
+ if (state->loc.parent == NULL) {
+ do_path_lookup (create_stub, &state->loc);
+ } else {
+ call_resume (create_stub);
+ }
+ return 0;
+fail:
+ server_create_cbk (frame, NULL, frame->this,
+ -1, EINVAL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+server_open_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ server_state_t *state = CALL_STATE(frame);
+ fd_t *new_fd = NULL;
+
+ new_fd = fd_create (loc->inode, frame->root->pid);
+ GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, new_fd, fail);
+
+ new_fd->flags = flags;
+
+ state->fd = fd_ref (new_fd);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": OPEN \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_open_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->open,
+ loc,
+ flags,
+ state->fd);
+
+ return 0;
+fail:
+ server_open_cbk (frame, NULL, frame->this,
+ -1, EINVAL,
+ NULL);
+ return 0;
+}
+
+/*
+ * server_open - open function for server protocol
+ * @frame: call frame
+ * @bound_xl: translator this server protocol is bound to
+ * @params: parameters dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_open (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *open_stub = NULL;
+ gf_fop_open_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+ pathlen = STRLEN_0(state->path);
+ state->flags = ntoh32 (req->flags);
+ }
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ open_stub = fop_open_stub (frame,
+ server_open_resume,
+ &(state->loc), state->flags, NULL);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, open_stub, fail);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (open_stub, &state->loc);
+ } else {
+ call_resume (open_stub);
+ }
+ return 0;
+fail:
+ server_open_cbk (frame, NULL, frame->this,
+ -1, EINVAL,
+ NULL);
+ return 0;
+}
+
+
+/*
+ * server_readv - readv function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_readv (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_read_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->size = ntoh32 (req->size);
+ state->offset = ntoh64 (req->offset);
+ }
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": READV \'fd=%"PRId64" (%"PRId64"); "
+ "offset=%"PRId64"; size=%"PRId64,
+ frame->root->unique, state->fd_no, state->fd->inode->ino,
+ state->offset, (int64_t)state->size);
+
+ STACK_WIND (frame,
+ server_readv_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->readv,
+ state->fd, state->size, state->offset);
+ return 0;
+fail:
+ server_readv_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL, 0, NULL);
+ return 0;
+}
+
+
+/*
+ * server_writev - writev function for server
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_writev (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ server_connection_t *conn = NULL;
+ gf_fop_write_req_t *req = NULL;
+ struct iovec iov = {0, };
+ dict_t *refs = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->offset = ntoh64 (req->offset);
+ }
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ refs = dict_new ();
+ GF_VALIDATE_OR_GOTO(bound_xl->name, refs, fail);
+
+ ret = dict_set_dynptr (refs, NULL, buf, buflen);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): failed to set buffer entry "
+ "to req_refs",
+ state->fd_no, state->fd->inode->ino);
+ goto fail;
+ } else {
+ buf = NULL;
+ }
+
+ frame->root->req_refs = refs;
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": WRITEV \'fd=%"PRId64" (%"PRId64"); "
+ "offset=%"PRId64"; size=%"PRId64,
+ frame->root->unique, state->fd_no, state->fd->inode->ino,
+ state->offset, (int64_t)buflen);
+
+ STACK_WIND (frame,
+ server_writev_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->writev,
+ state->fd, &iov, 1, state->offset);
+
+ if (refs)
+ dict_unref (refs);
+ return 0;
+fail:
+ server_writev_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+
+ if (buf)
+ free (buf);
+
+ if (refs)
+ dict_unref (refs);
+
+ return 0;
+}
+
+
+
+/*
+ * server_release - release function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_release (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_cbk_release_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+
+ state->fd_no = ntoh64 (req->fd);
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ gf_fd_put (conn->fdtable,
+ state->fd_no);
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": RELEASE \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame,
+ server_release_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->flush,
+ state->fd);
+ return 0;
+fail:
+ server_release_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+ return 0;
+}
+
+
+/*
+ * server_fsync - fsync function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameters dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_fsync (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_fsync_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->flags = ntoh32 (req->data);
+ }
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FSYNC \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame,
+ server_fsync_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->fsync,
+ state->fd, state->flags);
+ return 0;
+fail:
+ server_fsync_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+
+ return 0;
+}
+
+
+/*
+ * server_flush - flush function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_flush (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_flush_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+ }
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FLUSH \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame,
+ server_flush_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->flush,
+ state->fd);
+ return 0;
+
+fail:
+ server_flush_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+
+ return 0;
+}
+
+
+/*
+ * server_ftruncate - ftruncate function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameters dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_ftruncate (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_ftruncate_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->offset = ntoh64 (req->offset);
+ }
+
+ GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FTRUNCATE \'fd=%"PRId64" (%"PRId64"); "
+ "offset=%"PRId64"\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino,
+ state->offset);
+
+ STACK_WIND (frame,
+ server_ftruncate_cbk,
+ bound_xl,
+ bound_xl->fops->ftruncate,
+ state->fd,
+ state->offset);
+ return 0;
+fail:
+ server_ftruncate_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+
+ return 0;
+}
+
+
+/*
+ * server_fstat - fstat function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_fstat (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_fstat_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+ }
+
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_fstat_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+
+ goto out;
+ }
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FSTAT \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame,
+ server_fstat_cbk,
+ bound_xl,
+ bound_xl->fops->fstat,
+ state->fd);
+out:
+ return 0;
+}
+
+
+int32_t
+server_truncate_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": TRUNCATE \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_truncate_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->truncate,
+ loc,
+ offset);
+ return 0;
+}
+
+
+/*
+ * server_truncate - truncate function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params:
+ *
+ * not for external reference
+ */
+int32_t
+server_truncate (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *truncate_stub = NULL;
+ gf_fop_truncate_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+ state->offset = ntoh64 (req->offset);
+
+ state->path = req->path;
+ state->ino = ntoh64 (req->ino);
+ }
+
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ truncate_stub = fop_truncate_stub (frame,
+ server_truncate_resume,
+ &(state->loc),
+ state->offset);
+ if ((state->loc.parent == NULL) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (truncate_stub, &(state->loc));
+ } else {
+ call_resume (truncate_stub);
+ }
+
+ return 0;
+}
+
+
+
+
+
+int32_t
+server_unlink_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (loc->parent);
+
+ if (state->loc.inode == NULL)
+ state->loc.inode = inode_ref (loc->inode);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": UNLINK \'%"PRId64"/%s (%"PRId64")\'",
+ frame->root->unique, state->par, state->path,
+ state->loc.inode->ino);
+
+ STACK_WIND (frame,
+ server_unlink_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->unlink,
+ loc);
+ return 0;
+}
+
+/*
+ * server_unlink - unlink function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_unlink (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *unlink_stub = NULL;
+ gf_fop_unlink_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+
+ pathlen = STRLEN_0(req->path);
+
+ state->par = ntoh64 (req->par);
+ state->path = req->path;
+ if (IS_NOT_ROOT(pathlen))
+ state->bname = req->bname + pathlen;
+
+ ret = server_loc_fill (&(state->loc), state,
+ 0, state->par, state->bname,
+ state->path);
+
+ unlink_stub = fop_unlink_stub (frame,
+ server_unlink_resume,
+ &(state->loc));
+
+ if ((state->loc.parent == NULL) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (unlink_stub, &state->loc);
+ } else {
+ call_resume (unlink_stub);
+ }
+
+ return 0;
+}
+
+
+
+
+
+int32_t
+server_setxattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": SETXATTR \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_setxattr_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->setxattr,
+ loc,
+ dict,
+ flags);
+ return 0;
+}
+
+/*
+ * server_setxattr - setxattr function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+server_setxattr (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *setxattr_stub = NULL;
+ gf_fop_setxattr_req_t *req = NULL;
+ dict_t *dict = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+ size_t dict_len = 0;
+ char *req_dictbuf = NULL;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ dict_len = ntoh32 (req->dict_len);
+
+ state->path = req->path + dict_len;
+
+ pathlen = STRLEN_0(state->path);
+ state->ino = ntoh64 (req->ino);
+
+ state->flags = ntoh32 (req->flags);
+ }
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ {
+ /* Unserialize the dictionary */
+ req_dictbuf = memdup (req->dict, dict_len);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
+
+ dict = dict_new ();
+ GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail);
+
+ ret = dict_unserialize (req_dictbuf, dict_len, &dict);
+ if (ret < 0) {
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "%"PRId64": %s (%"PRId64"): failed to "
+ "unserialize request buffer to dictionary",
+ frame->root->unique, state->loc.path,
+ state->ino);
+ free (req_dictbuf);
+ goto fail;
+ } else{
+ dict->extra_free = req_dictbuf;
+ }
+ }
+
+ setxattr_stub = fop_setxattr_stub (frame,
+ server_setxattr_resume,
+ &(state->loc),
+ dict,
+ state->flags);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, setxattr_stub, fail);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (setxattr_stub, &(state->loc));
+ } else {
+ call_resume (setxattr_stub);
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+fail:
+ if (dict)
+ dict_unref (dict);
+
+ server_setxattr_cbk (frame, NULL, frame->this,
+ -1, ENOENT);
+ return 0;
+
+}
+
+
+
+int32_t
+server_fxattrop (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ server_connection_t *conn = NULL;
+ gf_fop_fxattrop_req_t *req = NULL;
+ dict_t *dict = NULL;
+ server_state_t *state = NULL;
+ size_t dict_len = 0;
+ char *req_dictbuf = NULL;
+ int32_t ret = -1;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ dict_len = ntoh32 (req->dict_len);
+ state->ino = ntoh64 (req->ino);
+ state->flags = ntoh32 (req->flags);
+ }
+
+ if (dict_len) {
+ /* Unserialize the dictionary */
+ req_dictbuf = memdup (req->dict, dict_len);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
+
+ dict = dict_new ();
+ GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail);
+
+ ret = dict_unserialize (req_dictbuf, dict_len, &dict);
+ if (ret < 0) {
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): failed to unserialize "
+ "request buffer to dictionary",
+ state->fd_no, state->fd->inode->ino);
+ free (req_dictbuf);
+ goto fail;
+ } else {
+ dict->extra_free = req_dictbuf;
+ }
+ }
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FXATTROP \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame,
+ server_fxattrop_cbk,
+ bound_xl,
+ bound_xl->fops->fxattrop,
+ state->fd,
+ state->flags,
+ dict);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+fail:
+ if (dict)
+ dict_unref (dict);
+
+ server_fxattrop_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+ return 0;
+}
+
+int32_t
+server_xattrop_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": XATTROP \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_xattrop_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->xattrop,
+ loc,
+ flags,
+ dict);
+ return 0;
+}
+
+int32_t
+server_xattrop (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_xattrop_req_t *req = NULL;
+ dict_t *dict = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *xattrop_stub = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+ size_t dict_len = 0;
+ char *req_dictbuf = NULL;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ dict_len = ntoh32 (req->dict_len);
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path + dict_len;
+ pathlen = STRLEN_0(state->path);
+ state->flags = ntoh32 (req->flags);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ if (dict_len) {
+ /* Unserialize the dictionary */
+ req_dictbuf = memdup (req->dict, dict_len);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
+
+ dict = dict_new ();
+ GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail);
+
+ ret = dict_unserialize (req_dictbuf, dict_len, &dict);
+ if (ret < 0) {
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "%s (%"PRId64"): failed to unserialize "
+ "request buffer to dictionary",
+ state->loc.path, state->ino);
+ goto fail;
+ } else {
+ dict->extra_free = req_dictbuf;
+ }
+ }
+ xattrop_stub = fop_xattrop_stub (frame,
+ server_xattrop_resume,
+ &(state->loc),
+ state->flags,
+ dict);
+ GF_VALIDATE_OR_GOTO(bound_xl->name, xattrop_stub, fail);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (xattrop_stub, &(state->loc));
+ } else {
+ call_resume (xattrop_stub);
+ }
+
+ if (dict)
+ dict_unref (dict);
+ return 0;
+fail:
+ if (dict)
+ dict_unref (dict);
+
+ server_xattrop_cbk (frame, NULL, frame->this,
+ -1, EINVAL,
+ NULL);
+ return 0;
+}
+
+
+int32_t
+server_getxattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": GETXATTR \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_getxattr_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->getxattr,
+ loc,
+ name);
+ return 0;
+}
+
+/*
+ * server_getxattr - getxattr function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_getxattr (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_getxattr_req_t *req = NULL;
+ call_stub_t *getxattr_stub = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t namelen = 0;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+
+ state->path = req->path;
+ state->ino = ntoh64 (req->ino);
+
+ namelen = ntoh32 (req->namelen);
+ if (namelen)
+ state->name = (req->name + pathlen);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ getxattr_stub = fop_getxattr_stub (frame,
+ server_getxattr_resume,
+ &(state->loc),
+ state->name);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (getxattr_stub, &(state->loc));
+ } else {
+ call_resume (getxattr_stub);
+ }
+
+ return 0;
+}
+
+
+
+int32_t
+server_removexattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": REMOVEXATTR \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_removexattr_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->removexattr,
+ loc,
+ name);
+ return 0;
+}
+
+/*
+ * server_removexattr - removexattr function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_removexattr (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_removexattr_req_t *req = NULL;
+ call_stub_t *removexattr_stub = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+
+ state->path = req->path;
+ state->ino = ntoh64 (req->ino);
+
+ state->name = (req->name + pathlen);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ removexattr_stub = fop_removexattr_stub (frame,
+ server_removexattr_resume,
+ &(state->loc),
+ state->name);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (removexattr_stub, &(state->loc));
+ } else {
+ call_resume (removexattr_stub);
+ }
+
+ return 0;
+}
+
+
+/*
+ * server_statfs - statfs function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_statfs (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_statfs_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+
+ req = gf_param (hdr);
+
+ state = CALL_STATE(frame);
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+
+ ret = server_loc_fill (&state->loc, state,
+ state->ino, 0, NULL, state->path);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": STATFS \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_statfs_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->statfs,
+ &(state->loc));
+
+ return 0;
+}
+
+
+
+int32_t
+server_opendir_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd)
+{
+ server_state_t *state = CALL_STATE(frame);
+ fd_t *new_fd = NULL;
+
+ new_fd = fd_create (loc->inode, frame->root->pid);
+ state->fd = fd_ref (new_fd);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": OPENDIR \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_opendir_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->opendir,
+ loc,
+ state->fd);
+ return 0;
+}
+
+
+/*
+ * server_opendir - opendir function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_opendir (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *opendir_stub = NULL;
+ gf_fop_opendir_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->path = req->path;
+ pathlen = STRLEN_0(state->path);
+ state->ino = ntoh64 (req->ino);
+ }
+
+ ret = server_loc_fill (&state->loc, state,
+ state->ino, 0, NULL, state->path);
+
+ opendir_stub = fop_opendir_stub (frame,
+ server_opendir_resume,
+ &(state->loc),
+ NULL);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (opendir_stub, &(state->loc));
+ } else {
+ call_resume (opendir_stub);
+ }
+
+ return 0;
+}
+
+
+/*
+ * server_releasedir - releasedir function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_releasedir (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_cbk_releasedir_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+
+ state->fd_no = ntoh64 (req->fd);
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_releasedir_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+ goto out;
+ }
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": RELEASEDIR \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ gf_fd_put (conn->fdtable, state->fd_no);
+
+ server_releasedir_cbk (frame, NULL, frame->this,
+ 0, 0);
+out:
+ return 0;
+}
+
+
+/*
+ * server_readdir - readdir function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_getdents (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_getdents_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->size = ntoh32 (req->size);
+ state->offset = ntoh64 (req->offset);
+ state->flags = ntoh32 (req->flags);
+ }
+
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_getdents_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL, 0);
+
+ goto out;
+ }
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": GETDENTS \'fd=%"PRId64" (%"PRId64"); "
+ "offset=%"PRId64"; size=%"PRId64,
+ frame->root->unique, state->fd_no, state->fd->inode->ino,
+ state->offset, (int64_t)state->size);
+
+ STACK_WIND (frame,
+ server_getdents_cbk,
+ bound_xl,
+ bound_xl->fops->getdents,
+ state->fd,
+ state->size,
+ state->offset,
+ state->flags);
+out:
+ return 0;
+}
+
+
+/*
+ * server_readdir - readdir function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_readdir (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_readdir_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->size = ntoh32 (req->size);
+ state->offset = ntoh64 (req->offset);
+ }
+
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_readdir_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+
+ goto out;
+ }
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": READDIR \'fd=%"PRId64" (%"PRId64"); "
+ "offset=%"PRId64"; size=%"PRId64,
+ frame->root->unique, state->fd_no, state->fd->inode->ino,
+ state->offset, (int64_t)state->size);
+
+ STACK_WIND (frame,
+ server_readdir_cbk,
+ bound_xl,
+ bound_xl->fops->readdir,
+ state->fd, state->size, state->offset);
+out:
+ return 0;
+}
+
+
+
+/*
+ * server_fsyncdir - fsyncdir function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_fsyncdir (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_fsyncdir_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->flags = ntoh32 (req->data);
+ }
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_fsyncdir_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+ goto out;
+ }
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": FSYNCDIR \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame,
+ server_fsyncdir_cbk,
+ bound_xl,
+ bound_xl->fops->fsyncdir,
+ state->fd, state->flags);
+out:
+ return 0;
+}
+
+
+int32_t
+server_mknod_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t dev)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (loc->parent);
+
+ state->loc.inode = inode_new (state->itable);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": MKNOD \'%"PRId64"/%s\'",
+ frame->root->unique, state->par, state->bname);
+
+ STACK_WIND (frame,
+ server_mknod_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->mknod,
+ &(state->loc), mode, dev);
+
+ return 0;
+}
+/*
+ * server_mknod - mknod function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_mknod (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_mknod_req_t *req = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *mknod_stub = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+
+ state->par = ntoh64 (req->par);
+ state->path = req->path;
+ if (IS_NOT_ROOT(pathlen))
+ state->bname = req->bname + pathlen;
+
+ state->mode = ntoh32 (req->mode);
+ state->dev = ntoh64 (req->dev);
+ }
+ ret = server_loc_fill (&(state->loc), state,
+ 0, state->par, state->bname,
+ state->path);
+
+ mknod_stub = fop_mknod_stub (frame, server_mknod_resume,
+ &(state->loc), state->mode, state->dev);
+
+ if (state->loc.parent == NULL) {
+ do_path_lookup (mknod_stub, &(state->loc));
+ } else {
+ call_resume (mknod_stub);
+ }
+
+ return 0;
+}
+
+int32_t
+server_mkdir_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (loc->parent);
+
+ state->loc.inode = inode_new (state->itable);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": MKDIR \'%"PRId64"/%s\'",
+ frame->root->unique, state->par, state->bname);
+
+ STACK_WIND (frame,
+ server_mkdir_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->mkdir,
+ &(state->loc),
+ state->mode);
+
+ return 0;
+}
+
+/*
+ * server_mkdir - mkdir function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params:
+ *
+ * not for external reference
+ */
+int32_t
+server_mkdir (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_mkdir_req_t *req = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *mkdir_stub = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+ state->mode = ntoh32 (req->mode);
+
+ state->path = req->path;
+ state->bname = req->bname + pathlen;
+ state->par = ntoh64 (req->par);
+ }
+
+
+ ret = server_loc_fill (&(state->loc), state,
+ 0, state->par, state->bname,
+ state->path);
+
+ mkdir_stub = fop_mkdir_stub (frame, server_mkdir_resume,
+ &(state->loc), state->mode);
+
+ if (state->loc.parent == NULL) {
+ do_path_lookup (mkdir_stub, &(state->loc));
+ } else {
+ call_resume (mkdir_stub);
+ }
+
+ return 0;
+}
+
+
+int32_t
+server_rmdir_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (loc->parent);
+
+ if (state->loc.inode == NULL)
+ state->loc.inode = inode_ref (loc->inode);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": RMDIR \'%"PRId64"/%s\'",
+ frame->root->unique, state->par, state->bname);
+
+ STACK_WIND (frame,
+ server_rmdir_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->rmdir,
+ loc);
+ return 0;
+}
+
+/*
+ * server_rmdir - rmdir function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params:
+ *
+ * not for external reference
+ */
+int32_t
+server_rmdir (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *rmdir_stub = NULL;
+ gf_fop_rmdir_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+ state->path = req->path;
+ state->par = ntoh64 (req->par);
+ state->bname = req->bname + pathlen;
+ }
+
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, state->par, state->bname,
+ state->path);
+
+ rmdir_stub = fop_rmdir_stub (frame,
+ server_rmdir_resume,
+ &(state->loc));
+
+ if ((state->loc.parent == NULL) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (rmdir_stub, &(state->loc));
+ } else {
+ call_resume (rmdir_stub);
+ }
+
+ return 0;
+}
+
+
+
+int32_t
+server_chown_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": CHOWN \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame, server_chown_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->chown,
+ loc, uid, gid);
+ return 0;
+}
+
+
+/*
+ * server_chown - chown function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_chown (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *chown_stub = NULL;
+ gf_fop_chown_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+ pathlen = STRLEN_0(state->path);
+ state->uid = ntoh32 (req->uid);
+ state->gid = ntoh32 (req->gid);
+ }
+
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ chown_stub = fop_chown_stub (frame,
+ server_chown_resume,
+ &(state->loc),
+ state->uid,
+ state->gid);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (chown_stub, &(state->loc));
+ } else {
+ call_resume (chown_stub);
+ }
+
+ return 0;
+}
+
+
+int32_t
+server_chmod_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": CHMOD \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_chmod_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->chmod,
+ loc,
+ mode);
+ return 0;
+
+}
+
+/*
+ * server_chmod - chmod function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_chmod (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *chmod_stub = NULL;
+ gf_fop_chmod_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+
+ state = CALL_STATE(frame);
+ {
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+ pathlen = STRLEN_0(state->path);
+
+ state->mode = ntoh32 (req->mode);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ chmod_stub = fop_chmod_stub (frame,
+ server_chmod_resume,
+ &(state->loc),
+ state->mode);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (chmod_stub, &(state->loc));
+ } else {
+ call_resume (chmod_stub);
+ }
+
+ return 0;
+}
+
+
+int32_t
+server_utimens_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec *tv)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": UTIMENS \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_utimens_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->utimens,
+ loc,
+ tv);
+ return 0;
+}
+
+/*
+ * server_utimens - utimens function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_utimens (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *utimens_stub = NULL;
+ gf_fop_utimens_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+ pathlen = STRLEN_0(state->path);
+
+ gf_timespec_to_timespec (req->tv, state->tv);
+ }
+
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ utimens_stub = fop_utimens_stub (frame,
+ server_utimens_resume,
+ &(state->loc),
+ state->tv);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (utimens_stub, &(state->loc));
+ } else {
+ call_resume (utimens_stub);
+ }
+
+ return 0;
+}
+
+
+
+int32_t
+server_inodelk_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int32_t cmd,
+ struct flock *flock)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+ if (state->loc.inode == NULL) {
+ state->loc.inode = inode_ref (loc->inode);
+ }
+
+ if (state->loc.parent == NULL) {
+ state->loc.parent = inode_ref (loc->parent);
+ }
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": INODELK \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_inodelk_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->inodelk,
+ loc, cmd, flock);
+ return 0;
+
+}
+
+
+int32_t
+server_inodelk (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *inodelk_stub = NULL;
+ gf_fop_inodelk_req_t *req = NULL;
+ server_state_t *state = NULL;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->cmd = ntoh32 (req->cmd);
+ switch (state->cmd) {
+ case GF_LK_GETLK:
+ state->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ state->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ state->cmd = F_SETLKW;
+ break;
+ }
+
+ state->type = ntoh32 (req->type);
+
+ pathlen = STRLEN_0(req->path);
+
+ state->path = req->path;
+ state->ino = ntoh64 (req->ino);
+
+ gf_flock_to_flock (&req->flock, &state->flock);
+
+ switch (state->type) {
+ case GF_LK_F_RDLCK:
+ state->flock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ state->flock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ state->flock.l_type = F_UNLCK;
+ break;
+ }
+
+ }
+
+ server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ inodelk_stub = fop_inodelk_stub (frame,
+ server_inodelk_resume,
+ &state->loc, state->cmd, &state->flock);
+
+ if ((state->loc.parent == NULL) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (inodelk_stub, &(state->loc));
+ } else {
+ call_resume (inodelk_stub);
+ }
+
+ return 0;
+}
+
+
+int32_t
+server_finodelk (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_finodelk_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->cmd = ntoh32 (req->cmd);
+ switch (state->cmd) {
+ case GF_LK_GETLK:
+ state->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ state->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ state->cmd = F_SETLKW;
+ break;
+ }
+
+ state->type = ntoh32 (req->type);
+
+ gf_flock_to_flock (&req->flock, &state->flock);
+
+ switch (state->type) {
+ case GF_LK_F_RDLCK:
+ state->flock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ state->flock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ state->flock.l_type = F_UNLCK;
+ break;
+ }
+
+ }
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_finodelk_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+ return -1;
+ }
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": FINODELK \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame, server_finodelk_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->finodelk,
+ state->fd, state->cmd, &state->flock);
+ return 0;
+}
+
+
+int32_t
+server_entrylk_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, const char *name,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->loc.inode == NULL)
+ state->loc.inode = inode_ref (loc->inode);
+
+ if ((state->loc.parent == NULL) &&
+ (loc->parent))
+ state->loc.parent = inode_ref (loc->parent);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": ENTRYLK \'%s (%"PRId64") \'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_entrylk_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->entrylk,
+ loc, name, cmd, type);
+ return 0;
+
+}
+
+/*
+ * server_entrylk - entrylk function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_entrylk (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_entrylk_req_t *req = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *entrylk_stub = NULL;
+ size_t pathlen = 0;
+ size_t namelen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+
+ state->path = req->path;
+ state->ino = ntoh64 (req->ino);
+ namelen = ntoh64 (req->namelen);
+ if (namelen)
+ state->name = req->name + pathlen;
+
+ state->cmd = ntoh32 (req->cmd);
+ state->type = ntoh32 (req->type);
+ }
+
+
+ server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ entrylk_stub = fop_entrylk_stub (frame,
+ server_entrylk_resume,
+ &state->loc, state->name, state->cmd,
+ state->type);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (entrylk_stub, &(state->loc));
+ } else {
+ call_resume (entrylk_stub);
+ }
+
+ return 0;
+}
+
+
+int32_t
+server_fentrylk (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_fentrylk_req_t *req = NULL;
+ server_state_t *state = NULL;
+ size_t namelen = 0;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->cmd = ntoh32 (req->cmd);
+ state->type = ntoh32 (req->type);
+ namelen = ntoh64 (req->namelen);
+
+ if (namelen)
+ state->name = req->name;
+ }
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_fentrylk_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+ return -1;
+ }
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": FENTRYLK \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame, server_fentrylk_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->fentrylk,
+ state->fd, state->name, state->cmd, state->type);
+ return 0;
+}
+
+
+int32_t
+server_access_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": ACCESS \'%s (%"PRId64")\'",
+ frame->root->unique, state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_access_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->access,
+ loc,
+ mask);
+ return 0;
+}
+
+/*
+ * server_access - access function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_access (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ call_stub_t *access_stub = NULL;
+ gf_fop_access_req_t *req = NULL;
+ server_state_t *state = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+
+ state->mask = ntoh32 (req->mask);
+
+ state->ino = ntoh64 (req->ino);
+ state->path = req->path;
+ pathlen = STRLEN_0(state->path);
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL, state->path);
+
+ access_stub = fop_access_stub (frame,
+ server_access_resume,
+ &(state->loc),
+ state->mask);
+
+ if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (access_stub, &(state->loc));
+ } else {
+ call_resume (access_stub);
+ }
+
+ return 0;
+}
+
+
+int32_t
+server_symlink_resume (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkname,
+ loc_t *loc)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (loc->parent);
+
+ state->loc.inode = inode_new (BOUND_XL(frame)->itable);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": SYMLINK \'%"PRId64"/%s \'",
+ frame->root->unique, state->par, state->bname);
+
+ STACK_WIND (frame,
+ server_symlink_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->symlink,
+ linkname,
+ &(state->loc));
+
+ return 0;
+}
+
+/*
+ * server_symlink- symlink function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+server_symlink (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ server_state_t *state = NULL;
+ gf_fop_symlink_req_t *req = NULL;
+ call_stub_t *symlink_stub = NULL;
+ int32_t ret = -1;
+ size_t pathlen = 0;
+ size_t baselen = 0;
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ pathlen = STRLEN_0(req->path);
+ baselen = STRLEN_0(req->bname + pathlen);
+
+ state->par = ntoh64 (req->par);
+ state->path = req->path;
+ state->bname = req->bname + pathlen;
+
+ state->name = (req->linkname + pathlen + baselen);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ 0, state->par, state->bname,
+ state->path);
+
+ symlink_stub = fop_symlink_stub (frame, server_symlink_resume,
+ state->name, &(state->loc));
+
+ if (state->loc.parent == NULL) {
+ do_path_lookup (symlink_stub, &(state->loc));
+ } else {
+ call_resume (symlink_stub);
+ }
+
+ return 0;
+}
+
+int32_t
+server_link_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (oldloc->parent);
+
+ if (state->loc.inode == NULL) {
+ state->loc.inode = inode_ref (oldloc->inode);
+ } else if (state->loc.inode != oldloc->inode) {
+ if (state->loc.inode)
+ inode_unref (state->loc.inode);
+ state->loc.inode = inode_ref (oldloc->inode);
+ }
+
+ if (state->loc2.parent == NULL)
+ state->loc2.parent = inode_ref (newloc->parent);
+
+ state->loc2.inode = inode_ref (state->loc.inode);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": LINK \'%"PRId64"/%s ==> %s (%"PRId64")\'",
+ frame->root->unique, state->par2, state->bname2,
+ state->path, state->ino);
+
+ STACK_WIND (frame,
+ server_link_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->link,
+ &(state->loc),
+ &(state->loc2));
+ return 0;
+}
+
+/*
+ * server_link - link function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params:
+ *
+ * not for external reference
+ */
+int32_t
+server_link (call_frame_t *frame,
+ xlator_t *this,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_link_req_t *req = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *link_stub = NULL;
+ int32_t ret = -1;
+ size_t oldpathlen = 0;
+ size_t newpathlen = 0;
+ size_t newbaselen = 0;
+
+ req = gf_param (hdr);
+
+ state = CALL_STATE(frame);
+ {
+ oldpathlen = STRLEN_0(req->oldpath);
+ newpathlen = STRLEN_0(req->newpath + oldpathlen);
+ newbaselen = STRLEN_0(req->newbname + oldpathlen + newpathlen);
+
+ state->path = req->oldpath;
+ state->path2 = req->newpath + oldpathlen;
+ state->bname2 = req->newbname + oldpathlen + newpathlen;
+ state->ino = ntoh64 (req->oldino);
+ state->par2 = ntoh64 (req->newpar);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ state->ino, 0, NULL,
+ state->path);
+ ret = server_loc_fill (&(state->loc2), state,
+ 0, state->par2, state->bname2,
+ state->path2);
+
+ link_stub = fop_link_stub (frame, server_link_resume,
+ &(state->loc), &(state->loc2));
+
+ if ((state->loc.parent == NULL) ||
+ (state->loc.inode == NULL)) {
+ do_path_lookup (link_stub, &(state->loc));
+ } else if (state->loc2.parent == NULL) {
+ do_path_lookup (link_stub, &(state->loc2));
+ } else {
+ call_resume (link_stub);
+ }
+
+ return 0;
+}
+
+
+int32_t
+server_rename_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->loc.parent == NULL)
+ state->loc.parent = inode_ref (oldloc->parent);
+
+ if (state->loc.inode == NULL) {
+ state->loc.inode = inode_ref (oldloc->inode);
+ }
+
+ if (state->loc2.parent == NULL)
+ state->loc2.parent = inode_ref (newloc->parent);
+
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": RENAME %s (%"PRId64"/%s) ==> %s (%"PRId64"/%s)",
+ frame->root->unique, state->path, state->par, state->bname,
+ state->path2, state->par2, state->bname2);
+
+ STACK_WIND (frame,
+ server_rename_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->rename,
+ &(state->loc),
+ &(state->loc2));
+ return 0;
+}
+
+/*
+ * server_rename - rename function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+int32_t
+server_rename (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_fop_rename_req_t *req = NULL;
+ server_state_t *state = NULL;
+ call_stub_t *rename_stub = NULL;
+ int32_t ret = -1;
+ size_t oldpathlen = 0;
+ size_t oldbaselen = 0;
+ size_t newpathlen = 0;
+ size_t newbaselen = 0;
+
+ req = gf_param (hdr);
+
+ state = CALL_STATE(frame);
+ {
+ oldpathlen = STRLEN_0(req->oldpath);
+ oldbaselen = STRLEN_0(req->oldbname + oldpathlen);
+ newpathlen = STRLEN_0(req->newpath + oldpathlen + oldbaselen);
+ newbaselen = STRLEN_0(req->newbname + oldpathlen +
+ oldbaselen + newpathlen);
+
+ state->path = req->oldpath;
+ state->bname = req->oldbname + oldpathlen;
+ state->path2 = req->newpath + oldpathlen + oldbaselen;
+ state->bname2 = (req->newbname + oldpathlen + oldbaselen +
+ newpathlen);
+
+ state->par = ntoh64 (req->oldpar);
+ state->par2 = ntoh64 (req->newpar);
+ }
+
+ ret = server_loc_fill (&(state->loc), state,
+ 0, state->par, state->bname,
+ state->path);
+ ret = server_loc_fill (&(state->loc2), state,
+ 0, state->par2, state->bname2,
+ state->path2);
+
+ rename_stub = fop_rename_stub (frame,
+ server_rename_resume,
+ &(state->loc),
+ &(state->loc2));
+
+ if ((state->loc.parent == NULL) ||
+ (state->loc.inode == NULL)){
+ do_path_lookup (rename_stub, &(state->loc));
+ } else if ((state->loc2.parent == NULL)){
+ do_path_lookup (rename_stub, &(state->loc2));
+ } else {
+ /* we have found inode for both oldpath and newpath in
+ * inode cache. lets continue with fops->rename() */
+ call_resume (rename_stub);
+ }
+
+ return 0;
+}
+
+
+/*
+ * server_lk - lk function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ * not for external reference
+ */
+
+int32_t
+server_lk (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ struct flock lock = {0, };
+ gf_fop_lk_req_t *req = NULL;
+ server_state_t *state = NULL;
+ server_connection_t *conn = NULL;
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+ {
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->cmd = ntoh32 (req->cmd);
+ state->type = ntoh32 (req->type);
+ }
+
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_lk_cbk (frame, NULL, frame->this,
+ -1, EINVAL, NULL);
+
+ goto out;
+ }
+
+ switch (state->cmd) {
+ case GF_LK_GETLK:
+ state->cmd = F_GETLK;
+ break;
+ case GF_LK_SETLK:
+ state->cmd = F_SETLK;
+ break;
+ case GF_LK_SETLKW:
+ state->cmd = F_SETLKW;
+ break;
+ }
+
+ switch (state->type) {
+ case GF_LK_F_RDLCK:
+ lock.l_type = F_RDLCK;
+ break;
+ case GF_LK_F_WRLCK:
+ lock.l_type = F_WRLCK;
+ break;
+ case GF_LK_F_UNLCK:
+ lock.l_type = F_UNLCK;
+ break;
+ default:
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): Unknown lock type: %"PRId32"!",
+ state->fd_no, state->fd->inode->ino, state->type);
+ break;
+ }
+
+ gf_flock_to_flock (&req->flock, &lock);
+
+ gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
+ "%"PRId64": LK \'fd=%"PRId64" (%"PRId64")\'",
+ frame->root->unique, state->fd_no, state->fd->inode->ino);
+
+ STACK_WIND (frame, server_lk_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->lk,
+ state->fd, state->cmd, &lock);
+
+out:
+ return 0;
+}
+
+
+/*
+ * server_writedir -
+ *
+ * @frame:
+ * @bound_xl:
+ * @params:
+ *
+ */
+int32_t
+server_setdents (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ server_connection_t *conn = NULL;
+ gf_fop_setdents_req_t *req = NULL;
+ server_state_t *state = NULL;
+ dir_entry_t *entry = NULL;
+ dir_entry_t *trav = NULL;
+ dir_entry_t *prev = NULL;
+ int32_t count = 0;
+ int32_t i = 0;
+ int32_t bread = 0;
+ char *ender = NULL;
+ char *buffer_ptr = NULL;
+ char tmp_buf[512] = {0,};
+
+ conn = SERVER_CONNECTION(frame);
+
+ req = gf_param (hdr);
+ state = CALL_STATE(frame);
+
+ state->fd_no = ntoh64 (req->fd);
+ if (state->fd_no >= 0)
+ state->fd = gf_fd_fdptr_get (conn->fdtable,
+ state->fd_no);
+
+ state->nr_count = ntoh32 (req->count);
+
+ if (state->fd == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64": unresolved fd",
+ state->fd_no);
+
+ server_setdents_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+
+ goto out;
+ }
+
+ if (buf == NULL) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "fd - %"PRId64" (%"PRId64"): received a null buffer, "
+ "returning EINVAL",
+ state->fd_no, state->fd->inode->ino);
+
+ server_setdents_cbk (frame, NULL, frame->this,
+ -1, EINVAL);
+
+ goto out;
+ }
+
+ entry = CALLOC (1, sizeof (dir_entry_t));
+ ERR_ABORT (entry);
+ prev = entry;
+ buffer_ptr = buf;
+
+ for (i = 0; i < state->nr_count ; i++) {
+ bread = 0;
+ trav = CALLOC (1, sizeof (dir_entry_t));
+ ERR_ABORT (trav);
+
+ ender = strchr (buffer_ptr, '/');
+ if (!ender)
+ break;
+ count = ender - buffer_ptr;
+ trav->name = CALLOC (1, count + 2);
+ ERR_ABORT (trav->name);
+
+ strncpy (trav->name, buffer_ptr, count);
+ bread = count + 1;
+ buffer_ptr += bread;
+
+ ender = strchr (buffer_ptr, '\n');
+ if (!ender)
+ break;
+ count = ender - buffer_ptr;
+ strncpy (tmp_buf, buffer_ptr, count);
+ bread = count + 1;
+ buffer_ptr += bread;
+
+ /* TODO: use str_to_stat instead */
+ {
+ uint64_t dev;
+ uint64_t ino;
+ uint32_t mode;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint64_t rdev;
+ uint64_t size;
+ uint32_t blksize;
+ uint64_t blocks;
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+
+ sscanf (tmp_buf, GF_STAT_PRINT_FMT_STR,
+ &dev,
+ &ino,
+ &mode,
+ &nlink,
+ &uid,
+ &gid,
+ &rdev,
+ &size,
+ &blksize,
+ &blocks,
+ &atime,
+ &atime_nsec,
+ &mtime,
+ &mtime_nsec,
+ &ctime,
+ &ctime_nsec);
+
+ trav->buf.st_dev = dev;
+ trav->buf.st_ino = ino;
+ trav->buf.st_mode = mode;
+ trav->buf.st_nlink = nlink;
+ trav->buf.st_uid = uid;
+ trav->buf.st_gid = gid;
+ trav->buf.st_rdev = rdev;
+ trav->buf.st_size = size;
+ trav->buf.st_blksize = blksize;
+ trav->buf.st_blocks = blocks;
+
+ trav->buf.st_atime = atime;
+ trav->buf.st_mtime = mtime;
+ trav->buf.st_ctime = ctime;
+
+ ST_ATIM_NSEC_SET(&trav->buf, atime_nsec);
+ ST_MTIM_NSEC_SET(&trav->buf, mtime_nsec);
+ ST_CTIM_NSEC_SET(&trav->buf, ctime_nsec);
+
+ }
+
+ ender = strchr (buffer_ptr, '\n');
+ if (!ender)
+ break;
+ count = ender - buffer_ptr;
+ *ender = '\0';
+ if (S_ISLNK (trav->buf.st_mode)) {
+ trav->link = strdup (buffer_ptr);
+ } else
+ trav->link = "";
+ bread = count + 1;
+ buffer_ptr += bread;
+
+ prev->next = trav;
+ prev = trav;
+ }
+
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": SETDENTS \'fd=%"PRId64" (%"PRId64"); count=%"PRId64,
+ frame->root->unique, state->fd_no, state->fd->inode->ino,
+ (int64_t)state->nr_count);
+
+ STACK_WIND (frame,
+ server_setdents_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->setdents,
+ state->fd,
+ state->flags,
+ entry,
+ state->nr_count);
+
+
+ /* Free the variables allocated in this fop here */
+ trav = entry->next;
+ prev = entry;
+ while (trav) {
+ prev->next = trav->next;
+ FREE (trav->name);
+ if (S_ISLNK (trav->buf.st_mode))
+ FREE (trav->link);
+ FREE (trav);
+ trav = prev->next;
+ }
+ FREE (entry);
+
+out:
+ return 0;
+}
+
+
+
+/* xxx_MOPS */
+
+/* Management Calls */
+/*
+ * mop_getspec - getspec function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params:
+ *
+ */
+int32_t
+mop_getspec (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_hdr_common_t *_hdr = NULL;
+ gf_mop_getspec_rsp_t *rsp = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOENT;
+ int32_t gf_errno = 0;
+ int32_t spec_fd = -1;
+ size_t file_len = 0;
+ size_t _hdrlen = 0;
+ char tmp_filename[ZR_FILENAME_MAX] = {0,};
+ char data_key[256] = {0,};
+ char *filename = NULL;
+ struct stat stbuf = {0,};
+ peer_info_t *peerinfo = NULL;
+ transport_t *trans = NULL;
+
+ gf_mop_getspec_req_t *req = NULL;
+ uint32_t flags = 0;
+ uint32_t keylen = 0;
+ char *key = NULL;
+
+ req = gf_param (hdr);
+ flags = ntoh32 (req->flags);
+ keylen = ntoh32 (req->keylen);
+ if (keylen) {
+ key = req->key;
+ }
+
+ trans = TRANSPORT_FROM_FRAME(frame);
+
+ peerinfo = &(trans->peerinfo);
+ /* Inform users that this option is changed now */
+ ret = dict_get_str (frame->this->options, "client-volume-filename",
+ &filename);
+ if (ret == 0) {
+ gf_log (trans->xl->name, GF_LOG_WARNING,
+ "option 'client-volume-specfile' is changed to "
+ "'volume-filename.<key>' which now takes 'key' as an "
+ "option to choose/fetch different files from server. "
+ "Refer documentation or contact developers for more "
+ "info. Currently defaulting to given file '%s'",
+ filename);
+ }
+
+ if (key && !filename) {
+ sprintf (data_key, "volume-filename.%s", key);
+ ret = dict_get_str (frame->this->options, data_key, &filename);
+ if (ret < 0) {
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to get corresponding volume file "
+ "for the key '%s'. using default file %s",
+ key, GLUSTERFSD_SPEC_PATH);
+ }
+ }
+ if (!filename) {
+ filename = GLUSTERFSD_SPEC_PATH;
+ if (!key)
+ gf_log (trans->xl->name, GF_LOG_WARNING,
+ "using default volume file %s",
+ GLUSTERFSD_SPEC_PATH);
+ }
+
+ {
+ sprintf (tmp_filename, "%s.%s",
+ filename, peerinfo->identifier);
+
+ /* Try for ip specific client volfile.
+ * If not found, then go for, regular client file.
+ */
+ ret = open (tmp_filename, O_RDONLY);
+ spec_fd = ret;
+ if (spec_fd < 0) {
+ gf_log (trans->xl->name, GF_LOG_DEBUG,
+ "Unable to open %s (%s)",
+ tmp_filename, strerror (errno));
+ /* fall back */
+ ret = open (filename, O_RDONLY);
+ spec_fd = ret;
+ if (spec_fd < 0) {
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "Unable to open %s (%s)",
+ filename, strerror (errno));
+ goto fail;
+ }
+ } else {
+ /* Successful */
+ filename = tmp_filename;
+ }
+ }
+
+ /* to allocate the proper buffer to hold the file data */
+ {
+ ret = stat (filename, &stbuf);
+ if (ret < 0){
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "Unable to stat %s (%s)",
+ filename, strerror (errno));
+ goto fail;
+ }
+
+ file_len = stbuf.st_size;
+ }
+
+fail:
+ op_errno = errno;
+
+ _hdrlen = gf_hdr_len (rsp, file_len + 1);
+ _hdr = gf_hdr_new (rsp, file_len + 1);
+ rsp = gf_param (_hdr);
+
+ _hdr->rsp.op_ret = hton32 (ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ _hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (file_len) {
+ read (spec_fd, rsp->spec, file_len);
+ close (spec_fd);
+ }
+ protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_GETSPEC,
+ _hdr, _hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+int32_t
+server_checksum_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint8_t *fchecksum,
+ uint8_t *dchecksum)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_checksum_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+
+ hdrlen = gf_hdr_len (rsp, ZR_FILENAME_MAX + 1 + ZR_FILENAME_MAX + 1);
+ hdr = gf_hdr_new (rsp, ZR_FILENAME_MAX + 1 + ZR_FILENAME_MAX + 1);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ if (op_ret >= 0) {
+ memcpy (rsp->fchecksum, fchecksum, ZR_FILENAME_MAX);
+ rsp->fchecksum[ZR_FILENAME_MAX] = '\0';
+ memcpy (rsp->dchecksum + ZR_FILENAME_MAX,
+ dchecksum, ZR_FILENAME_MAX);
+ rsp->dchecksum[ZR_FILENAME_MAX + ZR_FILENAME_MAX] = '\0';
+ }
+
+ protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHECKSUM,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+int32_t
+server_checksum (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ loc_t loc = {0,};
+ int32_t flag = 0;
+ gf_fop_checksum_req_t *req = NULL;
+
+ req = gf_param (hdr);
+
+ loc.path = req->path;
+ loc.ino = ntoh64 (req->ino);
+ loc.inode = NULL;
+ flag = ntoh32 (req->flag);
+
+ gf_log (bound_xl->name, GF_LOG_DEBUG,
+ "%"PRId64": CHECKSUM \'%s (%"PRId64")\'",
+ frame->root->unique, loc.path, loc.ino);
+
+ STACK_WIND (frame,
+ server_checksum_cbk,
+ BOUND_XL(frame),
+ BOUND_XL(frame)->fops->checksum,
+ &loc,
+ flag);
+
+ return 0;
+}
+
+
+/*
+ * mop_unlock - unlock management function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ */
+int32_t
+mop_getvolume (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ return 0;
+}
+
+struct __get_xl_struct {
+ const char *name;
+ xlator_t *reply;
+};
+
+void __check_and_set (xlator_t *each,
+ void *data)
+{
+ if (!strcmp (each->name,
+ ((struct __get_xl_struct *) data)->name))
+ ((struct __get_xl_struct *) data)->reply = each;
+}
+
+static xlator_t *
+get_xlator_by_name (xlator_t *some_xl,
+ const char *name)
+{
+ struct __get_xl_struct get = {
+ .name = name,
+ .reply = NULL
+ };
+
+ xlator_foreach (some_xl, __check_and_set, &get);
+
+ return get.reply;
+}
+
+
+/*
+ * mop_setvolume - setvolume management function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ */
+int
+mop_setvolume (call_frame_t *frame, xlator_t *bound_xl,
+ gf_hdr_common_t *req_hdr, size_t req_hdrlen,
+ char *req_buf, size_t req_buflen)
+{
+ server_connection_t *conn = NULL;
+ server_conf_t *conf = NULL;
+ gf_hdr_common_t *rsp_hdr = NULL;
+ gf_mop_setvolume_req_t *req = NULL;
+ gf_mop_setvolume_rsp_t *rsp = NULL;
+ peer_info_t *peerinfo = NULL;
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t gf_errno = 0;
+ dict_t *reply = NULL;
+ dict_t *config_params = NULL;
+ dict_t *params = NULL;
+ char *name = NULL;
+ char *version = NULL;
+ char *process_uuid = NULL;
+ xlator_t *xl = NULL;
+ transport_t *trans = NULL;
+ size_t rsp_hdrlen = -1;
+ size_t dict_len = -1;
+ size_t req_dictlen = -1;
+
+ params = dict_new ();
+ reply = dict_new ();
+
+ req = gf_param (req_hdr);
+ req_dictlen = ntoh32 (req->dict_len);
+ ret = dict_unserialize (req->buf, req_dictlen, &params);
+
+ config_params = dict_copy_with_ref (frame->this->options, NULL);
+ trans = TRANSPORT_FROM_FRAME(frame);
+ conf = SERVER_CONF(frame);
+
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "Internal error: failed to unserialize "
+ "request dictionary");
+ if (ret < 0)
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "failed to set error msg \"%s\"",
+ "Internal error: failed to unserialize "
+ "request dictionary");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ ret = dict_get_str (params, "process-uuid", &process_uuid);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "UUID not specified");
+ if (ret < 0)
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+
+ conn = server_connection_get (frame->this, process_uuid);
+ if (trans->xl_private != conn)
+ trans->xl_private = conn;
+
+ ret = dict_get_str (params, "version", &version);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "No version number specified");
+ if (ret < 0)
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ ret = strcmp (version, PACKAGE_VERSION);
+ if (ret != 0) {
+ char *msg = NULL;
+ asprintf (&msg,
+ "Version mismatch: client(%s) Vs server (%s)",
+ version, PACKAGE_VERSION);
+ ret = dict_set_dynstr (reply, "ERROR", msg);
+ if (ret < 0)
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+
+ ret = dict_get_str (params,
+ "remote-subvolume", &name);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "No remote-subvolume option specified");
+ if (ret < 0)
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ xl = get_xlator_by_name (frame->this, name);
+ if (xl == NULL) {
+ char *msg = NULL;
+ asprintf (&msg, "remote-subvolume \"%s\" is not found", name);
+ ret = dict_set_dynstr (reply, "ERROR", msg);
+ if (ret < 0)
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = ENOENT;
+ goto fail;
+ }
+
+ peerinfo = &trans->peerinfo;
+ ret = dict_set_static_ptr (params, "peer-info", peerinfo);
+ if (ret < 0)
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to set peer-info");
+
+ if (conf->auth_modules == NULL) {
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "Authentication module not initialized");
+ }
+
+ ret = gf_authenticate (params, config_params,
+ conf->auth_modules);
+ if (ret == AUTH_ACCEPT) {
+ gf_log (trans->xl->name, GF_LOG_INFO,
+ "accepted client from %s",
+ peerinfo->identifier);
+ op_ret = 0;
+ conn->bound_xl = xl;
+ ret = dict_set_str (reply, "ERROR", "Success");
+ if (ret < 0)
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+ } else {
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "Cannot authenticate client from %s",
+ peerinfo->identifier);
+ op_ret = -1;
+ op_errno = EACCES;
+ ret = dict_set_str (reply, "ERROR", "Authentication failed");
+ if (ret < 0)
+ gf_log (bound_xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+
+ goto fail;
+ }
+
+ if (conn->bound_xl == NULL) {
+ ret = dict_set_str (reply, "ERROR",
+ "Check volfile and handshake "
+ "options in protocol/client");
+ if (ret < 0)
+ gf_log (trans->xl->name, GF_LOG_ERROR,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EACCES;
+ goto fail;
+ }
+
+ if ((conn->bound_xl != NULL) &&
+ (ret >= 0) &&
+ (conn->bound_xl->itable == NULL)) {
+ /* create inode table for this bound_xl, if one doesn't
+ already exist */
+ int32_t lru_limit = 1024;
+
+ lru_limit = INODE_LRU_LIMIT (frame->this);
+
+ gf_log (trans->xl->name, GF_LOG_DEBUG,
+ "creating inode table with lru_limit=%"PRId32", "
+ "xlator=%s", lru_limit, conn->bound_xl->name);
+
+ conn->bound_xl->itable =
+ inode_table_new (lru_limit,
+ conn->bound_xl);
+ }
+
+ ret = dict_set_str (reply, "process-uuid",
+ xl->ctx->process_uuid);
+
+fail:
+ dict_len = dict_serialized_length (reply);
+ if (dict_len < 0) {
+ gf_log (xl->name, GF_LOG_ERROR,
+ "failed to get serialized length of reply dict");
+ op_ret = -1;
+ op_errno = EINVAL;
+ dict_len = 0;
+ }
+
+ rsp_hdr = gf_hdr_new (rsp, dict_len);
+ rsp_hdrlen = gf_hdr_len (rsp, dict_len);
+ rsp = gf_param (rsp_hdr);
+
+ if (dict_len) {
+ ret = dict_serialize (reply, rsp->buf);
+ if (ret < 0) {
+ gf_log (xl->name, GF_LOG_ERROR,
+ "failed to serialize reply dict");
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ }
+ rsp->dict_len = hton32 (dict_len);
+
+ rsp_hdr->rsp.op_ret = hton32 (op_ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ rsp_hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_SETVOLUME,
+ rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
+
+ dict_unref (params);
+ dict_unref (reply);
+ dict_unref (config_params);
+
+ return 0;
+}
+
+/*
+ * server_mop_stats_cbk - stats callback for server management operation
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ * @stats:err
+ *
+ * not for external reference
+ */
+
+int32_t
+server_mop_stats_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *xl,
+ int32_t ret,
+ int32_t op_errno,
+ struct xlator_stats *stats)
+{
+ /* TODO: get this information from somewhere else, not extern */
+ gf_hdr_common_t *hdr = NULL;
+ gf_mop_stats_rsp_t *rsp = NULL;
+ char buffer[256] = {0,};
+ int64_t glusterfsd_stats_nr_clients = 0;
+ size_t hdrlen = 0;
+ size_t buf_len = 0;
+ int32_t gf_errno = 0;
+
+ if (ret >= 0) {
+ sprintf (buffer,
+ "%"PRIx64",%"PRIx64",%"PRIx64
+ ",%"PRIx64",%"PRIx64",%"PRIx64
+ ",%"PRIx64",%"PRIx64"\n",
+ stats->nr_files,
+ stats->disk_usage,
+ stats->free_disk,
+ stats->total_disk_size,
+ stats->read_usage,
+ stats->write_usage,
+ stats->disk_speed,
+ glusterfsd_stats_nr_clients);
+
+ buf_len = strlen (buffer);
+ }
+
+ hdrlen = gf_hdr_len (rsp, buf_len + 1);
+ hdr = gf_hdr_new (rsp, buf_len + 1);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (ret);
+ gf_errno = gf_errno_to_error (op_errno);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ strcpy (rsp->buf, buffer);
+
+ protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_STATS,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+
+/*
+ * mop_unlock - unlock management function for server protocol
+ * @frame: call frame
+ * @bound_xl:
+ * @params: parameter dictionary
+ *
+ */
+static int32_t
+mop_stats (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ int32_t flag = 0;
+ gf_mop_stats_req_t *req = NULL;
+
+ req = gf_param (hdr);
+
+ flag = ntoh32 (req->flags);
+
+ STACK_WIND (frame,
+ server_mop_stats_cbk,
+ bound_xl,
+ bound_xl->mops->stats,
+ flag);
+
+ return 0;
+}
+
+int32_t
+mop_ping (call_frame_t *frame,
+ xlator_t *bound_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen)
+{
+ gf_hdr_common_t *rsp_hdr = NULL;
+ gf_mop_ping_rsp_t *rsp = NULL;
+ size_t rsp_hdrlen = 0;
+
+ rsp_hdrlen = gf_hdr_len (rsp, 0);
+ rsp_hdr = gf_hdr_new (rsp, 0);
+
+ hdr->rsp.op_ret = 0;
+
+ protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_PING,
+ rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+/*
+ * unknown_op_cbk - This function is called when a opcode for unknown
+ * type is called. Helps to keep the backward/forward
+ * compatiblity
+ * @frame: call frame
+ * @type:
+ * @opcode:
+ *
+ */
+
+int32_t
+unknown_op_cbk (call_frame_t *frame,
+ int32_t type,
+ int32_t opcode)
+{
+ gf_hdr_common_t *hdr = NULL;
+ gf_fop_flush_rsp_t *rsp = NULL;
+ size_t hdrlen = 0;
+ int32_t gf_errno = 0;
+ hdrlen = gf_hdr_len (rsp, 0);
+ hdr = gf_hdr_new (rsp, 0);
+ rsp = gf_param (hdr);
+
+ hdr->rsp.op_ret = hton32 (-1);
+ gf_errno = gf_errno_to_error (ENOSYS);
+ hdr->rsp.op_errno = hton32 (gf_errno);
+
+ protocol_server_reply (frame, type, opcode,
+ hdr, hdrlen, NULL, 0, NULL);
+
+ return 0;
+}
+
+/*
+ * get_frame_for_transport - get call frame for specified transport object
+ *
+ * @trans: transport object
+ *
+ */
+static call_frame_t *
+get_frame_for_transport (transport_t *trans)
+{
+ call_frame_t *frame = NULL;
+ call_pool_t *pool = NULL;
+ server_connection_t *conn = NULL;
+ server_state_t *state = NULL;;
+
+ GF_VALIDATE_OR_GOTO("server", trans, out);
+
+ if (trans->xl && trans->xl->ctx)
+ pool = trans->xl->ctx->pool;
+ GF_VALIDATE_OR_GOTO("server", pool, out);
+
+ frame = create_frame (trans->xl, pool);
+ GF_VALIDATE_OR_GOTO("server", frame, out);
+
+ state = CALLOC (1, sizeof (*state));
+ GF_VALIDATE_OR_GOTO("server", state, out);
+
+ conn = trans->xl_private;
+ if (conn) {
+ if (conn->bound_xl)
+ state->itable = conn->bound_xl->itable;
+ state->bound_xl = conn->bound_xl;
+ }
+
+ state->trans = transport_ref (trans);
+
+ frame->root->trans = conn;
+ frame->root->state = state; /* which socket */
+ frame->root->unique = 0; /* which call */
+
+out:
+ return frame;
+}
+
+/*
+ * get_frame_for_call - create a frame into the capable of
+ * generating and replying the reply packet by itself.
+ * By making a call with this frame, the last UNWIND
+ * function will have all needed state from its
+ * frame_t->root to send reply.
+ * @trans:
+ * @blk:
+ * @params:
+ *
+ * not for external reference
+ */
+static call_frame_t *
+get_frame_for_call (transport_t *trans, gf_hdr_common_t *hdr)
+{
+ call_frame_t *frame = NULL;
+
+ frame = get_frame_for_transport (trans);
+
+ frame->root->op = ntoh32 (hdr->op);
+ frame->root->type = ntoh32 (hdr->type);
+
+ frame->root->uid = ntoh32 (hdr->req.uid);
+ frame->root->unique = ntoh64 (hdr->callid); /* which call */
+ frame->root->gid = ntoh32 (hdr->req.gid);
+ frame->root->pid = ntoh32 (hdr->req.pid);
+
+ return frame;
+}
+
+/*
+ * prototype of operations function for each of mop and
+ * fop at server protocol level
+ *
+ * @frame: call frame pointer
+ * @bound_xl: the xlator that this frame is bound to
+ * @params: parameters dictionary
+ *
+ * to be used by protocol interpret, _not_ for exterenal reference
+ */
+typedef int32_t (*gf_op_t) (call_frame_t *frame, xlator_t *bould_xl,
+ gf_hdr_common_t *hdr, size_t hdrlen,
+ char *buf, size_t buflen);
+
+
+static gf_op_t gf_fops[] = {
+ [GF_FOP_STAT] = server_stat,
+ [GF_FOP_READLINK] = server_readlink,
+ [GF_FOP_MKNOD] = server_mknod,
+ [GF_FOP_MKDIR] = server_mkdir,
+ [GF_FOP_UNLINK] = server_unlink,
+ [GF_FOP_RMDIR] = server_rmdir,
+ [GF_FOP_SYMLINK] = server_symlink,
+ [GF_FOP_RENAME] = server_rename,
+ [GF_FOP_LINK] = server_link,
+ [GF_FOP_CHMOD] = server_chmod,
+ [GF_FOP_CHOWN] = server_chown,
+ [GF_FOP_TRUNCATE] = server_truncate,
+ [GF_FOP_OPEN] = server_open,
+ [GF_FOP_READ] = server_readv,
+ [GF_FOP_WRITE] = server_writev,
+ [GF_FOP_STATFS] = server_statfs,
+ [GF_FOP_FLUSH] = server_flush,
+ [GF_FOP_FSYNC] = server_fsync,
+ [GF_FOP_SETXATTR] = server_setxattr,
+ [GF_FOP_GETXATTR] = server_getxattr,
+ [GF_FOP_REMOVEXATTR] = server_removexattr,
+ [GF_FOP_OPENDIR] = server_opendir,
+ [GF_FOP_GETDENTS] = server_getdents,
+ [GF_FOP_FSYNCDIR] = server_fsyncdir,
+ [GF_FOP_ACCESS] = server_access,
+ [GF_FOP_CREATE] = server_create,
+ [GF_FOP_FTRUNCATE] = server_ftruncate,
+ [GF_FOP_FSTAT] = server_fstat,
+ [GF_FOP_LK] = server_lk,
+ [GF_FOP_UTIMENS] = server_utimens,
+ [GF_FOP_FCHMOD] = server_fchmod,
+ [GF_FOP_FCHOWN] = server_fchown,
+ [GF_FOP_LOOKUP] = server_lookup,
+ [GF_FOP_SETDENTS] = server_setdents,
+ [GF_FOP_READDIR] = server_readdir,
+ [GF_FOP_INODELK] = server_inodelk,
+ [GF_FOP_FINODELK] = server_finodelk,
+ [GF_FOP_ENTRYLK] = server_entrylk,
+ [GF_FOP_FENTRYLK] = server_fentrylk,
+ [GF_FOP_CHECKSUM] = server_checksum,
+ [GF_FOP_XATTROP] = server_xattrop,
+ [GF_FOP_FXATTROP] = server_fxattrop,
+};
+
+
+
+static gf_op_t gf_mops[] = {
+ [GF_MOP_SETVOLUME] = mop_setvolume,
+ [GF_MOP_GETVOLUME] = mop_getvolume,
+ [GF_MOP_STATS] = mop_stats,
+ [GF_MOP_GETSPEC] = mop_getspec,
+ [GF_MOP_PING] = mop_ping,
+};
+
+static gf_op_t gf_cbks[] = {
+ [GF_CBK_FORGET] = server_forget,
+ [GF_CBK_RELEASE] = server_release,
+ [GF_CBK_RELEASEDIR] = server_releasedir
+};
+
+int
+protocol_server_interpret (xlator_t *this, transport_t *trans,
+ char *hdr_p, size_t hdrlen, char *buf,
+ size_t buflen)
+{
+ server_connection_t *conn = NULL;
+ gf_hdr_common_t *hdr = NULL;
+ xlator_t *bound_xl = NULL;
+ call_frame_t *frame = NULL;
+ peer_info_t *peerinfo = NULL;
+ int32_t type = -1;
+ int32_t op = -1;
+ int32_t ret = -1;
+
+ hdr = (gf_hdr_common_t *)hdr_p;
+ type = ntoh32 (hdr->type);
+ op = ntoh32 (hdr->op);
+
+ conn = trans->xl_private;
+ if (conn)
+ bound_xl = conn->bound_xl;
+
+ peerinfo = &trans->peerinfo;
+ switch (type) {
+ case GF_OP_TYPE_FOP_REQUEST:
+ if ((op < 0) ||
+ (op > GF_FOP_MAXVALUE)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid fop %"PRId32" from client %s",
+ op, peerinfo->identifier);
+ break;
+ }
+ if (bound_xl == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Received fop %"PRId32" before "
+ "authentication.", op);
+ break;
+ }
+ frame = get_frame_for_call (trans, hdr);
+ ret = gf_fops[op] (frame, bound_xl, hdr, hdrlen, buf, buflen);
+ break;
+
+ case GF_OP_TYPE_MOP_REQUEST:
+ if (op < 0 || op > GF_MOP_MAXVALUE) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid mop %"PRId32" from client %s",
+ op, peerinfo->identifier);
+ break;
+ }
+ frame = get_frame_for_call (trans, hdr);
+ ret = gf_mops[op] (frame, bound_xl, hdr, hdrlen, buf, buflen);
+ break;
+
+ case GF_OP_TYPE_CBK_REQUEST:
+ if (op < 0 || op > GF_CBK_MAXVALUE) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid cbk %"PRId32" from client %s",
+ op, peerinfo->identifier);
+ break;
+ }
+ if (bound_xl == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Received cbk %d before authentication.", op);
+ break;
+ }
+
+ frame = get_frame_for_call (trans, hdr);
+ ret = gf_cbks[op] (frame, bound_xl, hdr, hdrlen, buf, buflen);
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+
+/*
+ * server_nop_cbk - nop callback for server protocol
+ * @frame: call frame
+ * @cookie:
+ * @this:
+ * @op_ret: return value
+ * @op_errno: errno
+ *
+ * not for external reference
+ */
+int
+server_nop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state)
+ free_state (state);
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+static void
+get_auth_types (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ dict_t *auth_dict = data;
+ char *saveptr = NULL, *tmp = NULL;
+ char *key_cpy = NULL;
+ int32_t ret = -1;
+
+ key_cpy = strdup (key);
+ GF_VALIDATE_OR_GOTO("server", key_cpy, out);
+
+ tmp = strtok_r (key_cpy, ".", &saveptr);
+ ret = strcmp (tmp, "auth");
+ if (ret == 0) {
+ tmp = strtok_r (NULL, ".", &saveptr);
+ if (strcmp (tmp, "ip") == 0) {
+ /* TODO: backward compatibility, remove when
+ newer versions are available */
+ tmp = "addr";
+ gf_log ("server", GF_LOG_WARNING,
+ "assuming 'auth.ip' to be 'auth.addr'");
+ }
+ ret = dict_set_dynptr (auth_dict, tmp, NULL, 0);
+ if (ret < 0) {
+ gf_log ("server", GF_LOG_ERROR,
+ "failed to dict_set_dynptr");
+ }
+ }
+
+ FREE (key_cpy);
+out:
+ return;
+}
+
+
+static int
+validate_auth_options (xlator_t *this, dict_t *dict)
+{
+ int ret = -1;
+ int error = 0;
+ xlator_list_t *trav = NULL;
+ data_pair_t *pair = NULL;
+ char *saveptr = NULL, *tmp = NULL;
+ char *key_cpy = NULL;
+
+ trav = this->children;
+ while (trav) {
+ error = -1;
+ for (pair = dict->members_list; pair; pair = pair->next) {
+ key_cpy = strdup (pair->key);
+ tmp = strtok_r (key_cpy, ".", &saveptr);
+ ret = strcmp (tmp, "auth");
+ if (ret == 0) {
+ /* for module type */
+ tmp = strtok_r (NULL, ".", &saveptr);
+ /* for volume name */
+ tmp = strtok_r (NULL, ".", &saveptr);
+ }
+
+ if (strcmp (tmp, trav->xlator->name) == 0) {
+ error = 0;
+ free (key_cpy);
+ break;
+ }
+ free (key_cpy);
+ }
+ if (-1 == error) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "volume '%s' defined as subvolume, but no "
+ "authentication defined for the same",
+ trav->xlator->name);
+ break;
+ }
+ trav = trav->next;
+ }
+
+ return error;
+}
+
+
+/*
+ * init - called during server protocol initialization
+ *
+ * @this:
+ *
+ */
+int
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ transport_t *trans = NULL;
+ server_conf_t *conf = NULL;
+
+ if (this->children == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "protocol/server should have subvolume");
+ goto out;
+ }
+
+ trans = transport_load (this->options, this);
+ if (trans == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to load transport");
+ goto out;
+ }
+
+ ret = transport_listen (trans);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to bind/listen on socket");
+ goto out;
+ }
+
+ conf = CALLOC (1, sizeof (server_conf_t));
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ INIT_LIST_HEAD (&conf->conns);
+ pthread_mutex_init (&conf->mutex, NULL);
+
+ conf->trans = trans;
+
+ conf->auth_modules = dict_new ();
+ GF_VALIDATE_OR_GOTO(this->name, conf->auth_modules, out);
+
+ dict_foreach (this->options, get_auth_types,
+ conf->auth_modules);
+ ret = validate_auth_options (this, this->options);
+ if (ret == -1) {
+ /* logging already done in validate_auth_options function. */
+ goto out;
+ }
+
+ ret = gf_auth_init (this, conf->auth_modules);
+ if (ret) {
+ dict_unref (conf->auth_modules);
+ goto out;
+ }
+
+ this->private = conf;
+
+ ret = dict_get_int32 (this->options, "inode-lru-limit",
+ &conf->inode_lru_limit);
+ if (ret < 0) {
+ conf->inode_lru_limit = 1024;
+ }
+
+ ret = dict_get_int32 (this->options, "limits.transaction-size",
+ &conf->max_block_size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "defaulting limits.transaction-size to %d",
+ DEFAULT_BLOCK_SIZE);
+ conf->max_block_size = DEFAULT_BLOCK_SIZE;
+ }
+
+#ifndef GF_DARWIN_HOST_OS
+ {
+ struct rlimit lim;
+
+ lim.rlim_cur = 1048576;
+ lim.rlim_max = 1048576;
+
+ if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "WARNING: Failed to set 'ulimit -n 1M': %s",
+ strerror(errno));
+ lim.rlim_cur = 65536;
+ lim.rlim_max = 65536;
+
+ if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set max open fd to 64k: %s",
+ strerror(errno));
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "max open fd set to 64k");
+ }
+ }
+ }
+#endif
+ this->ctx->top = this;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+
+int
+protocol_server_pollin (xlator_t *this, transport_t *trans)
+{
+ char *hdr = NULL;
+ size_t hdrlen = 0;
+ char *buf = NULL;
+ size_t buflen = 0;
+ int ret = -1;
+
+
+ ret = transport_receive (trans, &hdr, &hdrlen, &buf, &buflen);
+
+ if (ret == 0)
+ ret = protocol_server_interpret (this, trans, hdr,
+ hdrlen, buf, buflen);
+
+ /* TODO: use mem-pool */
+ FREE (hdr);
+
+ return ret;
+}
+
+
+/*
+ * fini - finish function for server protocol, called before
+ * unloading server protocol.
+ *
+ * @this:
+ *
+ */
+void
+fini (xlator_t *this)
+{
+ server_conf_t *conf = this->private;
+
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->auth_modules) {
+ dict_unref (conf->auth_modules);
+ }
+
+ FREE (conf);
+ this->private = NULL;
+out:
+ return;
+}
+
+/*
+ * server_protocol_notify - notify function for server protocol
+ * @this:
+ * @trans:
+ * @event:
+ *
+ */
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = 0;
+ transport_t *trans = data;
+
+ switch (event) {
+ case GF_EVENT_POLLIN:
+ ret = protocol_server_pollin (this, trans);
+ break;
+ case GF_EVENT_POLLERR:
+ {
+ peer_info_t *peerinfo = NULL;
+
+ peerinfo = &(trans->peerinfo);
+ gf_log (trans->xl->name, GF_LOG_INFO, "%s disconnected",
+ peerinfo->identifier);
+
+ ret = -1;
+ transport_disconnect (trans);
+ }
+ break;
+
+ case GF_EVENT_TRANSPORT_CLEANUP:
+ {
+ if (trans->xl_private)
+ server_connection_put (this, trans->xl_private);
+ }
+ break;
+
+ default:
+ default_notify (this, event, data);
+ break;
+ }
+
+ return ret;
+}
+
+
+struct xlator_mops mops = {
+};
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"transport-type"},
+ .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
+ "tcp/server", "ib-verbs/server"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"volume-filename.*"},
+ .type = GF_OPTION_TYPE_PATH,
+ },
+ { .key = {"inode-lru-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = (1 * GF_UNIT_MB)
+ },
+ { .key = {"client-volume-filename"},
+ .type = GF_OPTION_TYPE_PATH
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/protocol/server/src/server-protocol.h b/xlators/protocol/server/src/server-protocol.h
new file mode 100644
index 00000000000..cc5f6f9512c
--- /dev/null
+++ b/xlators/protocol/server/src/server-protocol.h
@@ -0,0 +1,143 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _SERVER_PROTOCOL_H_
+#define _SERVER_PROTOCOL_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <pthread.h>
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "authenticate.h"
+#include "fd.h"
+#include "byte-order.h"
+
+#define DEFAULT_BLOCK_SIZE 4194304 /* 4MB */
+#define GLUSTERFSD_SPEC_PATH CONFDIR "/glusterfs-client.vol"
+
+typedef struct _server_state server_state_t;
+
+struct _locker {
+ struct list_head lockers;
+ loc_t loc;
+ fd_t *fd;
+ pid_t pid;
+};
+
+struct _lock_table {
+ struct list_head file_lockers;
+ struct list_head dir_lockers;
+ gf_lock_t lock;
+ size_t count;
+};
+
+
+/* private structure per connection (transport object)
+ * used as transport_t->xl_private
+ */
+struct _server_connection {
+ struct list_head list;
+ char *id;
+ int ref;
+ pthread_mutex_t lock;
+ char disconnected;
+ fdtable_t *fdtable;
+ struct _lock_table *ltable;
+ xlator_t *bound_xl;
+};
+
+typedef struct _server_connection server_connection_t;
+
+
+server_connection_t *
+server_connection_get (xlator_t *this, const char *id);
+
+void
+server_connection_put (xlator_t *this, server_connection_t *conn);
+
+int
+server_connection_destroy (xlator_t *this, server_connection_t *conn);
+
+int
+server_nop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno);
+
+
+typedef struct {
+ dict_t *auth_modules;
+ transport_t *trans;
+ int32_t max_block_size;
+ int32_t inode_lru_limit;
+ pthread_mutex_t mutex;
+ struct list_head conns;
+} server_conf_t;
+
+
+struct _server_state {
+ transport_t *trans;
+ xlator_t *bound_xl;
+ loc_t loc;
+ loc_t loc2;
+ int flags;
+ fd_t *fd;
+ size_t size;
+ off_t offset;
+ mode_t mode;
+ dev_t dev;
+ uid_t uid;
+ gid_t gid;
+ size_t nr_count;
+ int cmd;
+ int type;
+ char *name;
+ int name_len;
+ inode_table_t *itable;
+ int64_t fd_no;
+ ino_t ino;
+ ino_t par;
+ ino_t ino2;
+ ino_t par2;
+ char *path;
+ char *path2;
+ char *bname;
+ char *bname2;
+ int mask;
+ char is_revalidate;
+ dict_t *xattr_req;
+ struct flock flock;
+ struct timespec tv[2];
+ char *resolved;
+};
+
+
+int
+server_stub_resume (call_stub_t *stub, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, inode_t *parent);
+
+int
+do_path_lookup (call_stub_t *stub, const loc_t *loc);
+
+#endif
diff --git a/xlators/storage/Makefile.am b/xlators/storage/Makefile.am
new file mode 100644
index 00000000000..59b9689699e
--- /dev/null
+++ b/xlators/storage/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = posix $(BDB_SUBDIR)
+
+CLEANFILES =
diff --git a/xlators/storage/bdb/Makefile.am b/xlators/storage/bdb/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/storage/bdb/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/storage/bdb/src/Makefile.am b/xlators/storage/bdb/src/Makefile.am
new file mode 100644
index 00000000000..c0ab394bc58
--- /dev/null
+++ b/xlators/storage/bdb/src/Makefile.am
@@ -0,0 +1,18 @@
+
+xlator_LTLIBRARIES = bdb.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
+
+bdb_la_LDFLAGS = -module -avoidversion
+
+bdb_la_SOURCES = bctx.c bdb-ll.c bdb.c
+bdb_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = bdb.h
+
+AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+
+AM_LDFLAGS = -ldb
+
+CLEANFILES =
+
diff --git a/xlators/storage/bdb/src/bctx.c b/xlators/storage/bdb/src/bctx.c
new file mode 100644
index 00000000000..2bfa3ea8762
--- /dev/null
+++ b/xlators/storage/bdb/src/bctx.c
@@ -0,0 +1,394 @@
+/*
+ Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <list.h>
+#include <bdb.h>
+#include <libgen.h> /* for dirname */
+
+static void
+__destroy_bctx (bctx_t *bctx)
+{
+ if (bctx->directory)
+ FREE (bctx->directory);
+
+ if (bctx->db_path)
+ FREE (bctx->db_path);
+
+ FREE (bctx);
+}
+
+static void
+__unhash_bctx (bctx_t *bctx)
+{
+ list_del_init (&bctx->b_hash);
+}
+
+static int32_t
+bctx_table_prune (bctx_table_t *table)
+{
+ int32_t ret = 0;
+ struct list_head purge = {0,};
+ struct list_head *next = NULL;
+ bctx_t *entry = NULL;
+ bctx_t *del = NULL, *tmp = NULL;
+
+ if (!table)
+ return 0;
+
+ INIT_LIST_HEAD (&purge);
+
+ LOCK (&table->lock);
+ {
+ if ((table->lru_limit) &&
+ (table->lru_size > table->lru_limit)) {
+ while (table->lru_size > table->lru_limit) {
+ next = table->b_lru.next;
+ entry = list_entry (next, bctx_t, list);
+
+ list_move_tail (next, &table->purge);
+ __unhash_bctx (entry);
+
+ table->lru_size--;
+ ret++;
+ }
+ }
+ list_move_tail (&purge, &table->purge);
+ list_del_init (&table->purge);
+ }
+ UNLOCK (&table->lock);
+
+ {
+ list_for_each_entry_safe (del, tmp, &purge, list) {
+ list_del_init (&del->list);
+ if (del->dbp) {
+ ret = del->dbp->close (del->dbp, 0);
+ if (ret != 0) {
+ gf_log (table->this->name, GF_LOG_ERROR,
+ "failed to close db on path (%s): %s",
+ del->directory, db_strerror (ret));
+ } else {
+ gf_log (table->this->name, GF_LOG_WARNING,
+ "close db for path %s; table->lru_count = %d",
+ del->directory, table->lru_size);
+ }
+ }
+ __destroy_bctx (del);
+ }
+ }
+
+ return ret;
+}
+
+
+/* struct bdb_ctx related */
+static inline uint32_t
+bdb_key_hash (char *key, uint32_t hash_size)
+{
+ uint32_t hash = 0;
+
+ hash = *key;
+
+ if (hash) {
+ for (key += 1; *key != '\0'; key++) {
+ hash = (hash << 5) - hash + *key;
+ }
+ }
+
+ return (hash + *key) % hash_size;
+}
+
+static void
+__hash_bctx (bctx_t *bctx)
+{
+ bctx_table_t *table = NULL;
+ char *key = NULL;
+
+ table = bctx->table;
+
+ MAKE_KEY_FROM_PATH (key, bctx->directory);
+ bctx->key_hash = bdb_key_hash (key, table->hash_size);
+
+ list_del_init (&bctx->b_hash);
+ list_add (&bctx->b_hash, &table->b_hash[bctx->key_hash]);
+}
+
+static inline bctx_t *
+__bctx_passivate (bctx_t *bctx)
+{
+ if (bctx->dbp) {
+ list_move_tail (&bctx->list, &(bctx->table->b_lru));
+ bctx->table->lru_size++;
+ } else {
+ list_move_tail (&bctx->list, &bctx->table->purge);
+ __unhash_bctx (bctx);
+ }
+ return bctx;
+}
+
+static inline bctx_t *
+__bctx_activate (bctx_t *bctx)
+{
+ list_move (&bctx->list, &bctx->table->active);
+ bctx->table->lru_size--;
+
+ return bctx;
+}
+
+static bctx_t *
+__bdb_ctx_unref (bctx_t *bctx)
+{
+ assert (bctx->ref);
+
+ --bctx->ref;
+
+ if (!bctx->ref)
+ bctx = __bctx_passivate (bctx);
+
+ return bctx;
+}
+
+
+bctx_t *
+bctx_unref (bctx_t *bctx)
+{
+ bctx_table_t *table = NULL;
+
+ if (!bctx && !bctx->table)
+ return NULL;
+
+ table = bctx->table;
+
+ LOCK (&table->lock);
+ {
+ bctx = __bdb_ctx_unref (bctx);
+ }
+ UNLOCK (&table->lock);
+
+ bctx_table_prune (table);
+
+ return bctx;
+}
+
+/*
+ * NOTE: __bdb_ctx_ref() is called only after holding table->lock and bctx->lock, in that order
+ */
+static inline bctx_t *
+__bctx_ref (bctx_t *bctx)
+{
+ if (!bctx->ref)
+ __bctx_activate (bctx);
+
+ bctx->ref++;
+
+ return bctx;
+}
+
+bctx_t *
+bctx_ref (bctx_t *bctx)
+{
+ LOCK (&(bctx->table->lock));
+ {
+ __bctx_ref (bctx);
+ }
+ UNLOCK (&(bctx->table->lock));
+
+ return bctx;
+}
+
+
+#define BDB_THIS(table) (table->this)
+
+static inline bctx_t *
+__create_bctx (bctx_table_t *table,
+ const char *path)
+{
+ bctx_t *bctx = NULL;
+ char *db_path = NULL;
+
+ bctx = CALLOC (1, sizeof (*bctx));
+ GF_VALIDATE_OR_GOTO ("bctx", bctx, out);
+
+ bctx->table = table;
+ bctx->directory = strdup (path);
+ GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out);
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, BDB_THIS (table), path);
+
+ bctx->db_path = strdup (db_path);
+ GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out);
+
+ INIT_LIST_HEAD (&bctx->c_list);
+ INIT_LIST_HEAD (&bctx->list);
+ INIT_LIST_HEAD (&bctx->b_hash);
+
+ LOCK_INIT (&bctx->lock);
+
+ __hash_bctx (bctx);
+
+ list_add (&bctx->list, &table->b_lru);
+ table->lru_size++;
+
+out:
+ return bctx;
+}
+
+/* bctx_lookup - lookup bctx_t for the directory @directory. (see description of bctx_t in bdb.h)
+ *
+ * @table: bctx_table_t for this instance of bdb.
+ * @directory: directory for which bctx_t is being looked up.
+ */
+bctx_t *
+bctx_lookup (bctx_table_t *table,
+ const char *directory)
+{
+ char *key = NULL;
+ uint32_t key_hash = 0;
+ bctx_t *trav = NULL, *bctx = NULL, *tmp = NULL;
+ int32_t need_break = 0;
+
+ GF_VALIDATE_OR_GOTO ("bctx", table, out);
+ GF_VALIDATE_OR_GOTO ("bctx", directory, out);
+
+ MAKE_KEY_FROM_PATH (key, directory);
+ key_hash = bdb_key_hash (key, table->hash_size);
+
+ LOCK (&table->lock);
+ {
+ if (!list_empty (&table->b_hash[key_hash])) {
+ list_for_each_entry_safe (trav, tmp, &table->b_hash[key_hash], b_hash) {
+ LOCK(&trav->lock);
+ if (!strcmp(trav->directory, directory)) {
+ bctx = __bctx_ref (trav);
+ need_break = 1;
+ }
+ UNLOCK(&trav->lock);
+ if (need_break)
+ break;
+ }
+ }
+
+ if (!bctx) {
+ bctx = __create_bctx (table, directory);
+ bctx = __bctx_ref (bctx);
+ }
+ }
+ UNLOCK (&table->lock);
+out:
+ return bctx;
+}
+
+
+bctx_t *
+bctx_parent (bctx_table_t *table,
+ const char *path)
+{
+ char *pathname = NULL, *directory = NULL;
+ bctx_t *bctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bctx", table, out);
+ GF_VALIDATE_OR_GOTO ("bctx", path, out);
+
+ pathname = strdup (path);
+ GF_VALIDATE_OR_GOTO ("bctx", pathname, out);
+ directory = dirname (pathname);
+
+ bctx = bctx_lookup (table, directory);
+ GF_VALIDATE_OR_GOTO ("bctx", bctx, out);
+
+out:
+ if (pathname)
+ free (pathname);
+ return bctx;
+}
+
+inline int32_t
+bdb_db_rename (bctx_table_t *table,
+ const char *oldpath,
+ const char *newpath)
+{
+ DB_ENV *dbenv = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bctx", table, out);
+ GF_VALIDATE_OR_GOTO ("bctx", oldpath, out);
+ GF_VALIDATE_OR_GOTO ("bctx", newpath, out);
+
+ dbenv = table->dbenv;
+ GF_VALIDATE_OR_GOTO ("bctx", dbenv, out);
+
+ LOCK (&table->lock);
+ {
+ ret = dbenv->dbrename (dbenv, NULL, oldpath, NULL, newpath, 0);
+
+ if (ret != 0) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to rename %s to %s: %s",
+ oldpath, newpath, db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "successfully renamed %s to %s: %s",
+ oldpath, newpath, db_strerror (ret));
+ }
+ }
+ UNLOCK (&table->lock);
+
+out:
+ return ret;
+}
+
+bctx_t *
+bctx_rename (bctx_t *bctx,
+ const char *db_newpath)
+{
+ bctx_table_t *table = NULL;
+ int32_t ret = -1;
+
+ table = bctx->table;
+
+ LOCK (&table->lock);
+ {
+ __unhash_bctx (bctx);
+ list_del_init (&bctx->list);
+ if (bctx->dbp) {
+ ret = bctx->dbp->close (bctx->dbp, 0);
+ if (ret != 0) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to close db for directory %s (%s)",
+ bctx->directory, db_strerror (ret));
+ }
+ bctx->dbp = NULL;
+ }
+ }
+ UNLOCK (&table->lock);
+
+ ret = bdb_db_rename (table, bctx->db_path, db_newpath);
+
+ if (ret != 0) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "bdb_db_rename failed for directory %s",
+ bctx->directory);
+ bctx = NULL;
+ }
+
+ return bctx;
+}
diff --git a/xlators/storage/bdb/src/bdb-ll.c b/xlators/storage/bdb/src/bdb-ll.c
new file mode 100644
index 00000000000..40e7d187759
--- /dev/null
+++ b/xlators/storage/bdb/src/bdb-ll.c
@@ -0,0 +1,1455 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <libgen.h>
+#include "bdb.h"
+#include <list.h>
+/*
+ * implement the procedures to interact with bdb */
+
+/****************************************************************
+ *
+ * General wrappers and utility procedures for bdb xlator
+ *
+ ****************************************************************/
+#define BDB_LL_PAGE_SIZE_DEFAULT 4096
+#define BDB_LL_PAGE_SIZE_MIN 4096
+#define BDB_LL_PAGE_SIZE_MAX 65536
+
+ino_t
+bdb_inode_transform (ino_t parent,
+ bctx_t *bctx)
+{
+ struct bdb_private *private = NULL;
+ ino_t ino = -1;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+
+ private = bctx->table->this->private;
+
+ LOCK (&private->ino_lock);
+ ino = ++private->next_ino;
+ UNLOCK (&private->ino_lock);
+out:
+ return ino;
+}
+
+
+/***********************************************************
+ *
+ * bdb storage database utilities
+ *
+ **********************************************************/
+
+/*
+ * bdb_db_open - opens a storage db.
+ *
+ * @ctx: context specific to the directory for which we are supposed to open db
+ *
+ * see, if we have empty slots to open a db.
+ * if (no-empty-slots), then prune open dbs and close as many as possible
+ * if (empty-slot-available), tika muchkonDu db open maaDu
+ *
+ * NOTE: illi baro munche lock hiDkobEku
+ */
+static DB *
+bdb_db_open (bctx_t *bctx)
+{
+ DB *storage_dbp = NULL;
+ int32_t op_ret = -1;
+ bctx_table_t *table = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+
+ table = bctx->table;
+ GF_VALIDATE_OR_GOTO ("bdb-ll", table, out);
+
+ /* we have to do the following, we can't deny someone of db_open ;) */
+ op_ret = db_create (&storage_dbp, table->dbenv, 0);
+ if (op_ret != 0) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "failed to do db_create for directory %s (%s)",
+ bctx->directory, db_strerror (op_ret));
+ storage_dbp = NULL;
+ goto out;
+ }
+
+ if (table->page_size) {
+ op_ret = storage_dbp->set_pagesize (storage_dbp,
+ table->page_size);
+ if (op_ret != 0) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "failed to set the page_size (%"PRIu64") for directory %s (%s)",
+ table->page_size, bctx->directory, db_strerror (op_ret));
+ } else {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "page-size (%"PRIu64") set on DB",
+ table->page_size);
+ }
+ }
+
+ op_ret = storage_dbp->open (storage_dbp,
+ NULL,
+ bctx->db_path,
+ NULL,
+ table->access_mode,
+ table->dbflags,
+ 0);
+ if (op_ret != 0 ) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to open storage-db for directory %s (%s)",
+ bctx->db_path, db_strerror (op_ret));
+ storage_dbp = NULL;
+ }
+
+out:
+ return storage_dbp;
+}
+
+
+
+int32_t
+bdb_cursor_close (bctx_t *bctx,
+ DBC *cursorp)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out);
+
+ LOCK (&bctx->lock);
+ {
+#ifdef HAVE_BDB_CURSOR_GET
+ ret = cursorp->close (cursorp);
+#else
+ ret = cursorp->c_close (cursorp);
+#endif
+ if ((ret != 0)) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to close db cursor for directory %s (%s)",
+ bctx->directory, db_strerror (ret));
+ }
+ }
+ UNLOCK (&bctx->lock);
+
+out:
+ return ret;
+}
+
+
+int32_t
+bdb_cursor_open (bctx_t *bctx,
+ DBC **cursorpp)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", cursorpp, out);
+
+ LOCK (&bctx->lock);
+ {
+ if (bctx->dbp) {
+ /* do nothing, just continue */
+ ret = 0;
+ } else {
+ bctx->dbp = bdb_db_open (bctx);
+ if (!bctx->dbp) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to open storage db for %s",
+ bctx->directory);
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+ }
+
+ if (ret == 0) {
+ /* all set, lets open cursor */
+ ret = bctx->dbp->cursor (bctx->dbp, NULL, cursorpp, 0);
+ if (ret != 0) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to create a cursor for %s (%s)",
+ bctx->directory, db_strerror (ret));
+ }
+ }
+ }
+ UNLOCK (&bctx->lock);
+
+out:
+ return ret;
+}
+
+
+/* cache related */
+static bdb_cache_t *
+bdb_cache_lookup (bctx_t *bctx,
+ char *path)
+{
+ bdb_cache_t *bcache = NULL;
+ bdb_cache_t *trav = NULL;
+ char *key = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", path, out);
+
+ MAKE_KEY_FROM_PATH (key, path);
+
+ LOCK (&bctx->lock);
+ {
+ list_for_each_entry (trav, &bctx->c_list, c_list) {
+ if (!strcmp (trav->key, key)){
+ bcache = trav;
+ break;
+ }
+ }
+ }
+ UNLOCK (&bctx->lock);
+
+out:
+ return bcache;
+}
+
+static int32_t
+bdb_cache_insert (bctx_t *bctx,
+ DBT *key,
+ DBT *data)
+{
+ bdb_cache_t *bcache = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", key, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", data, out);
+
+ LOCK (&bctx->lock);
+ {
+ if (bctx->c_count > 5) {
+ /* most of the times, we enter here */
+ /* FIXME: ugly, not supposed to disect any of the
+ * 'struct list_head' directly */
+ if (!list_empty (&bctx->c_list)) {
+ bcache = list_entry (bctx->c_list.prev, bdb_cache_t, c_list);
+ list_del_init (&bcache->c_list);
+ }
+ if (bcache->key) {
+ free (bcache->key);
+ bcache->key = strdup ((char *)key->data);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock);
+ } else {
+ /* should never come here */
+ gf_log ("bdb-ll",
+ GF_LOG_CRITICAL,
+ "bcache->key (null)");
+ } /* if(bcache->key)...else */
+ if (bcache->data) {
+ free (bcache->data);
+ bcache->data = memdup (data->data, data->size);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock);
+ bcache->size = data->size;
+ } else {
+ /* should never come here */
+ gf_log ("bdb-ll",
+ GF_LOG_CRITICAL,
+ "bcache->data (null)");
+ } /* if(bcache->data)...else */
+ list_add (&bcache->c_list, &bctx->c_list);
+ ret = 0;
+ } else {
+ /* we will be entering here very rarely */
+ bcache = CALLOC (1, sizeof (*bcache));
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bcache, unlock);
+ bcache->key = strdup ((char *)(key->data));
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock);
+ bcache->data = memdup (data->data, data->size);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock);
+ bcache->size = data->size;
+ list_add (&bcache->c_list, &bctx->c_list);
+ bctx->c_count++;
+ ret = 0;
+ } /* if(private->c_count < 5)...else */
+ }
+unlock:
+ UNLOCK (&bctx->lock);
+out:
+ return ret;
+}
+
+static int32_t
+bdb_cache_delete (bctx_t *bctx,
+ char *key)
+{
+ bdb_cache_t *bcache = NULL;
+ bdb_cache_t *trav = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", key, out);
+
+ LOCK (&bctx->lock);
+ {
+ list_for_each_entry (trav, &bctx->c_list, c_list) {
+ if (!strcmp (trav->key, key)){
+ bctx->c_count--;
+ bcache = trav;
+ break;
+ }
+ }
+
+ if (bcache) {
+ list_del_init (&bcache->c_list);
+ free (bcache->key);
+ free (bcache->data);
+ free (bcache);
+ }
+ }
+ UNLOCK (&bctx->lock);
+
+out:
+ return 0;
+}
+
+void *
+bdb_db_stat (bctx_t *bctx,
+ DB_TXN *txnid,
+ uint32_t flags)
+{
+ DB *storage = NULL;
+ void *stat = NULL;
+ int32_t ret = -1;
+
+ LOCK (&bctx->lock);
+ {
+ if (bctx->dbp == NULL) {
+ bctx->dbp = bdb_db_open (bctx);
+ storage = bctx->dbp;
+ } else {
+ /* we are just fine, lets continue */
+ storage = bctx->dbp;
+ } /* if(bctx->dbp==NULL)...else */
+ }
+ UNLOCK (&bctx->lock);
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
+
+ ret = storage->stat (storage, txnid, &stat, flags);
+
+ if (ret != 0) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to do DB->stat() on db file %s: %s",
+ bctx->db_path, db_strerror (ret));
+ } else {
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG,
+ "successfully called DB->stat() on db file %s",
+ bctx->db_path);
+ }
+out:
+ return stat;
+
+}
+
+/* bdb_storage_get - retrieve a key/value pair corresponding to @path from the corresponding
+ * db file.
+ *
+ * @bctx: bctx_t * corresponding to the parent directory of @path. (should always be a valid
+ * bctx). bdb_storage_get should never be called if @bctx = NULL.
+ * @txnid: NULL if bdb_storage_get is not embedded in an explicit transaction or a valid
+ * DB_TXN *, when embedded in an explicit transaction.
+ * @path: path of the file to read from (translated to a database key using MAKE_KEY_FROM_PATH)
+ * @buf: char ** - pointer to a pointer to char. a read buffer is created in this procedure
+ * and pointer to the buffer is passed through @buf to the caller.
+ * @size: size of the file content to be read.
+ * @offset: offset from which the file content to be read.
+ *
+ * NOTE: bdb_storage_get tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL,
+ * nobody has opened DB till now or DB was closed by bdb_table_prune()).
+ *
+ * NOTE: if private->cache is set (bdb xlator's internal caching enabled), then bdb_storage_get
+ * first looks up the cache for key/value pair. if bdb_lookup_cache fails, then only
+ * DB->get() is called. also, inserts a newly read key/value pair to cache through
+ * bdb_insert_to_cache.
+ *
+ * return: 'number of bytes read' on success or -1 on error.
+ *
+ * also see: bdb_lookup_cache, bdb_insert_to_cache for details about bdb xlator's internal cache.
+ */
+int32_t
+bdb_db_get (bctx_t *bctx,
+ DB_TXN *txnid,
+ const char *path,
+ char **buf,
+ size_t size,
+ off_t offset)
+{
+ DB *storage = NULL;
+ DBT key = {0,};
+ DBT value = {0,};
+ int32_t ret = -1;
+ char *key_string = NULL;
+ bdb_cache_t *bcache = NULL;
+ int32_t db_flags = 0;
+ uint8_t need_break = 0;
+ int32_t retries = 1;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", path, out);
+
+ MAKE_KEY_FROM_PATH (key_string, path);
+
+ if (bctx->cache &&
+ ((bcache = bdb_cache_lookup (bctx, key_string)) != NULL)) {
+ if (buf) {
+ *buf = CALLOC (1, bcache->size);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", buf, out);
+ memcpy (*buf, (bcache->data + offset), bcache->size);
+ }
+ ret = bcache->size;
+ } else {
+ LOCK (&bctx->lock);
+ {
+ if (bctx->dbp == NULL) {
+ bctx->dbp = bdb_db_open (bctx);
+ storage = bctx->dbp;
+ } else {
+ /* we are just fine, lets continue */
+ storage = bctx->dbp;
+ } /* if(bctx->dbp==NULL)...else */
+ }
+ UNLOCK (&bctx->lock);
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
+
+ key.data = (char *)key_string;
+ key.size = strlen (key_string);
+ key.flags = DB_DBT_USERMEM;
+
+ if (bctx->cache){
+ /* we are called to return the size of the file */
+ value.flags = DB_DBT_MALLOC;
+ } else {
+ if (size) {
+ value.flags = DB_DBT_MALLOC | DB_DBT_PARTIAL;
+ } else {
+ value.flags = DB_DBT_MALLOC;
+ }
+ value.dlen = size;
+ value.doff = offset;
+ }
+
+ do {
+ /* TODO: we prefer to give our own buffer to value.data
+ * and ask bdb to fill in it */
+ ret = storage->get (storage, txnid, &key, &value, db_flags);
+
+ if (ret == DB_NOTFOUND) {
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG,
+ "failed to do DB->get() for key: %s."
+ " key not found in storage DB", key_string);
+ ret = -1;
+ need_break = 1;
+ } else if (ret == DB_LOCK_DEADLOCK) {
+ retries++;
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "deadlock detected in DB->put. retrying DB->put (%d)",
+ retries);
+ }else if (ret == 0) {
+ /* successfully read data, lets set everything in place
+ * and return */
+ if (buf) {
+ *buf = CALLOC (1, value.size);
+ ERR_ABORT (*buf);
+ memcpy (*buf, value.data, value.size);
+ }
+ ret = value.size;
+ if (bctx->cache)
+ bdb_cache_insert (bctx, &key, &value);
+ free (value.data);
+ need_break = 1;
+ } else {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to do DB->get() for key %s: %s",
+ key_string, db_strerror (ret));
+ ret = -1;
+ need_break = 1;
+ }
+ } while (!need_break);
+ }
+out:
+ return ret;
+}/* bdb_db_get */
+
+/* bdb_storage_put - insert a key/value specified to the corresponding DB.
+ *
+ * @bctx: bctx_t * corresponding to the parent directory of @path.
+ * (should always be a valid bctx). bdb_storage_put should never be called if @bctx = NULL.
+ * @txnid: NULL if bdb_storage_put is not embedded in an explicit transaction or a valid
+ * DB_TXN *, when embedded in an explicit transaction.
+ * @key_string: key of the database entry.
+ * @buf: pointer to the buffer data to be written as data for @key_string.
+ * @size: size of @buf.
+ * @offset: offset in the key's data to be modified with provided data.
+ * @flags: valid flags are BDB_TRUNCATE_RECORD (to reduce the data of @key_string to 0 size).
+ *
+ * NOTE: bdb_storage_put tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL,
+ * nobody has opened DB till now or DB was closed by bdb_table_prune()).
+ *
+ * NOTE: bdb_storage_put deletes the key/value from bdb xlator's internal cache.
+ *
+ * return: 0 on success or -1 on error.
+ *
+ * also see: bdb_cache_delete for details on how a cached key/value pair is removed.
+ */
+int32_t
+bdb_db_put (bctx_t *bctx,
+ DB_TXN *txnid,
+ const char *key_string,
+ const char *buf,
+ size_t size,
+ off_t offset,
+ int32_t flags)
+{
+ DB *storage = NULL;
+ DBT key = {0,}, value = {0,};
+ int32_t ret = -1;
+ int32_t db_flags = DB_AUTO_COMMIT;
+ uint8_t need_break = 0;
+ int32_t retries = 1;
+
+ LOCK (&bctx->lock);
+ {
+ if (bctx->dbp == NULL) {
+ bctx->dbp = bdb_db_open (bctx);
+ storage = bctx->dbp;
+ } else {
+ /* we are just fine, lets continue */
+ storage = bctx->dbp;
+ }
+ }
+ UNLOCK (&bctx->lock);
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
+
+ if (bctx->cache) {
+ ret = bdb_cache_delete (bctx, (char *)key_string);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out);
+ }
+
+ key.data = (void *)key_string;
+ key.size = strlen (key_string);
+
+ /* NOTE: bdb lets us expand the file, suppose value.size > value.len, then value.len bytes
+ * from value.doff offset and value.size bytes will be written from value.doff and
+ * data from value.doff + value.dlen will be pushed value.doff + value.size
+ */
+ value.data = (void *)buf;
+
+ if (flags & BDB_TRUNCATE_RECORD) {
+ value.size = size;
+ value.doff = 0;
+ value.dlen = offset;
+ } else {
+ value.size = size;
+ value.dlen = size;
+ value.doff = offset;
+ }
+ value.flags = DB_DBT_PARTIAL;
+ if (buf == NULL && size == 0)
+ /* truncate called us */
+ value.flags = 0;
+
+ do {
+ ret = storage->put (storage, txnid, &key, &value, db_flags);
+ if (ret == DB_LOCK_DEADLOCK) {
+ retries++;
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "deadlock detected in DB->put. retrying DB->put (%d)",
+ retries);
+ } else if (ret) {
+ /* write failed */
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to do DB->put() for key %s: %s",
+ key_string, db_strerror (ret));
+ need_break = 1;
+ } else {
+ /* successfully wrote */
+ ret = 0;
+ need_break = 1;
+ }
+ } while (!need_break);
+out:
+ return ret;
+}/* bdb_db_put */
+
+
+/* bdb_storage_del - delete a key/value pair corresponding to @path from corresponding db file.
+ *
+ * @bctx: bctx_t * corresponding to the parent directory of @path.
+ * (should always be a valid bctx). bdb_storage_del should never be called
+ * if @bctx = NULL.
+ * @txnid: NULL if bdb_storage_del is not embedded in an explicit transaction or a
+ * valid DB_TXN *, when embedded in an explicit transaction.
+ * @path: path to the file, whose key/value pair has to be deleted.
+ *
+ * NOTE: bdb_storage_del tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL,
+ * nobody has opened DB till now or DB was closed by bdb_table_prune()).
+ *
+ * return: 0 on success or -1 on error.
+ */
+int32_t
+bdb_db_del (bctx_t *bctx,
+ DB_TXN *txnid,
+ const char *path)
+{
+ DB *storage = NULL;
+ DBT key = {0,};
+ char *key_string = NULL;
+ int32_t ret = -1;
+ int32_t db_flags = 0;
+ uint8_t need_break = 0;
+ int32_t retries = 1;
+
+ MAKE_KEY_FROM_PATH (key_string, path);
+
+ LOCK (&bctx->lock);
+ {
+ if (bctx->dbp == NULL) {
+ bctx->dbp = bdb_db_open (bctx);
+ storage = bctx->dbp;
+ } else {
+ /* we are just fine, lets continue */
+ storage = bctx->dbp;
+ }
+ }
+ UNLOCK (&bctx->lock);
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out);
+
+ ret = bdb_cache_delete (bctx, key_string);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out);
+
+ key.data = key_string;
+ key.size = strlen (key_string);
+ key.flags = DB_DBT_USERMEM;
+
+ do {
+ ret = storage->del (storage, txnid, &key, db_flags);
+
+ if (ret == DB_NOTFOUND) {
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG,
+ "failed to delete %s from storage db, doesn't exist in storage DB",
+ path);
+ need_break = 1;
+ } else if (ret == DB_LOCK_DEADLOCK) {
+ retries++;
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "deadlock detected in DB->put. retrying DB->put (%d)",
+ retries);
+ }else if (ret == 0) {
+ /* successfully deleted the entry */
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG,
+ "deleted %s from storage db", path);
+ ret = 0;
+ need_break = 1;
+ } else {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to delete %s from storage db: %s",
+ path, db_strerror (ret));
+ ret = -1;
+ need_break = 1;
+ }
+ } while (!need_break);
+out:
+ return ret;
+}
+
+/* NOTE: bdb version compatibility wrapper */
+int32_t
+bdb_cursor_get (DBC *cursorp,
+ DBT *key,
+ DBT *value,
+ int32_t flags)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out);
+
+#ifdef HAVE_BDB_CURSOR_GET
+ ret = cursorp->get (cursorp, key, value, flags);
+#else
+ ret = cursorp->c_get (cursorp, key, value, flags);
+#endif
+ if ((ret != 0) && (ret != DB_NOTFOUND)) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "failed to CURSOR->get() for key %s (%s)",
+ (char *)key->data, db_strerror (ret));
+ }
+
+out:
+ return ret;
+}/* bdb_cursor_get */
+
+
+int32_t
+bdb_dirent_size (DBT *key)
+{
+ return ALIGN (24 /* FIX MEEEE!!! */ + key->size);
+}
+
+
+/* bdb_extract_bfd - translate a fd_t to a bfd (either a 'struct bdb_bfd' or 'struct bdb_dir')
+ *
+ * @fd->ctx is with bdb specific file handle during a successful bdb_open (also bdb_create)
+ * or bdb_opendir.
+ *
+ * return: 'struct bdb_bfd *' or 'struct bdb_dir *' on success, or NULL on failure.
+ */
+inline void *
+bdb_extract_bfd (fd_t *fd,
+ xlator_t *this)
+{
+ uint64_t tmp_bfd = 0;
+ void *bfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb-ll", fd, out);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", this, out);
+
+ fd_ctx_get (fd, this, &tmp_bfd);
+ bfd = (void *)(long)bfd;
+
+out:
+ return bfd;
+}
+
+/* bdb_dbenv_init - initialize DB_ENV
+ *
+ * initialization includes:
+ * 1. opening DB_ENV (db_env_create(), DB_ENV->open()).
+ * NOTE: see private->envflags for flags used.
+ * 2. DB_ENV->set_lg_dir - set log directory to be used for storing log files
+ * (log files are the files in which transaction logs are written by db).
+ * 3. DB_ENV->set_flags (DB_LOG_AUTOREMOVE) - set DB_ENV to automatically clear
+ * the unwanted log files (flushed at each checkpoint).
+ * 4. DB_ENV->set_errfile - set errfile to be used by db to report detailed error logs.
+ * used only for debbuging purpose.
+ *
+ * return: returns a valid DB_ENV * on success or NULL on error.
+ *
+ */
+static DB_ENV *
+bdb_dbenv_init (xlator_t *this,
+ char *directory)
+{
+ /* Create a DB environment */
+ DB_ENV *dbenv = NULL;
+ int32_t ret = 0;
+ bdb_private_t *private = NULL;
+ int32_t fatal_flags = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (directory, out);
+
+ private = this->private;
+ VALIDATE_OR_GOTO (private, out);
+
+ ret = db_env_create (&dbenv, 0);
+ VALIDATE_OR_GOTO ((ret == 0), out);
+
+ /* NOTE: set_errpfx returns 'void' */
+ dbenv->set_errpfx(dbenv, this->name);
+
+ ret = dbenv->set_lk_detect (dbenv, DB_LOCK_DEFAULT);
+ VALIDATE_OR_GOTO ((ret == 0), out);
+
+ ret = dbenv->open(dbenv, directory,
+ private->envflags,
+ S_IRUSR | S_IWUSR);
+ if ((ret != 0) && (ret != DB_RUNRECOVERY)) {
+ gf_log (this->name,
+ GF_LOG_CRITICAL,
+ "failed to open DB environment (%s)",
+ db_strerror (ret));
+ dbenv = NULL;
+ goto out;
+ } else if (ret == DB_RUNRECOVERY) {
+ fatal_flags = ((private->envflags & (~DB_RECOVER)) | DB_RECOVER_FATAL);
+ ret = dbenv->open(dbenv, directory,
+ fatal_flags,
+ S_IRUSR | S_IWUSR);
+ if (ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to open DB environment (%s) with DB_REOVER_FATAL",
+ db_strerror (ret));
+ dbenv = NULL;
+ goto out;
+ } else {
+ gf_log (this->name,
+ GF_LOG_WARNING,
+ "opened DB environment after DB_RECOVER_FATAL: %s",
+ db_strerror (ret));
+ }
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "DB environment successfull opened: %s",
+ db_strerror (ret));
+ }
+
+
+
+#if (DB_VERSION_MAJOR == 4 && \
+ DB_VERSION_MINOR == 7)
+ if (private->log_auto_remove) {
+ ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 1);
+ } else {
+ ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 0);
+ }
+#else
+ if (private->log_auto_remove) {
+ ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 1);
+ } else {
+ ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 0);
+ }
+#endif
+ if (ret != 0) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to set DB_LOG_AUTOREMOVE on dbenv: %s", db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "DB_LOG_AUTOREMOVE set on dbenv");
+ }
+
+ if (private->transaction) {
+ ret = dbenv->set_flags(dbenv, DB_AUTO_COMMIT, 1);
+
+ if (ret != 0) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to set DB_AUTO_COMMIT on dbenv: %s",
+ db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "DB_AUTO_COMMIT set on dbenv");
+ }
+
+ if (private->txn_timeout) {
+ ret = dbenv->set_timeout (dbenv,
+ private->txn_timeout,
+ DB_SET_TXN_TIMEOUT);
+ if (ret != 0) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to set TXN_TIMEOUT to %d milliseconds "
+ "on dbenv: %s",
+ private->txn_timeout, db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "TXN_TIMEOUT set to %d milliseconds",
+ private->txn_timeout);
+ }
+ }
+
+ if (private->lock_timeout) {
+ ret = dbenv->set_timeout(dbenv,
+ private->txn_timeout,
+ DB_SET_LOCK_TIMEOUT);
+
+ if (ret != 0) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to set LOCK_TIMEOUT to %d milliseconds "
+ "on dbenv: %s",
+ private->lock_timeout, db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "LOCK_TIMEOUT set to %d milliseconds",
+ private->lock_timeout);
+ }
+ }
+
+ ret = dbenv->set_lg_dir (dbenv, private->logdir);
+
+ if (ret != 0) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to set log directory for dbenv: %s", db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "set dbenv log dir to %s", private->logdir);
+ }
+
+ }
+
+ if (private->errfile) {
+ private->errfp = fopen (private->errfile, "a+");
+ if (private->errfp) {
+ dbenv->set_errfile (dbenv, private->errfp);
+ } else {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to open errfile: %s", strerror (errno));
+ }
+ }
+
+out:
+ return dbenv;
+}
+
+#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv)
+
+/* bdb_checkpoint - during transactional usage, db does not directly write the data to db
+ * files, instead db writes a 'log' (similar to a journal entry) into a
+ * log file. db normally clears the log files during opening of an
+ * environment. since we expect a filesystem server to run for a pretty
+ * long duration and flushing 'log's during dbenv->open would prove very
+ * costly, if we accumulate the log entries for one complete run of
+ * glusterfs server. to flush the logs frequently, db provides a mechanism
+ * called 'checkpointing'. when we do a checkpoint, db flushes the logs to
+ * disk (writes changes to db files) and we can also clear the accumulated
+ * log files after checkpointing. NOTE: removing unwanted log files is not
+ * part of dbenv->txn_checkpoint() call.
+ *
+ * @data: xlator_t of the current instance of bdb xlator.
+ *
+ * bdb_checkpoint is called in a different thread from the main glusterfs thread. bdb
+ * xlator creates the checkpoint thread after successfully opening the db environment.
+ * NOTE: bdb_checkpoint thread shares the DB_ENV handle with the filesystem thread.
+ *
+ * db environment checkpointing frequency is controlled by
+ * 'option checkpoint-timeout <time-in-seconds>' in volfile.
+ *
+ * NOTE: checkpointing thread is started only if 'option transaction on' specified in
+ * volfile. checkpointing is not valid for non-transactional environments.
+ *
+ */
+static void *
+bdb_checkpoint (void *data)
+{
+ xlator_t *this = NULL;
+ struct bdb_private *private = NULL;
+ DB_ENV *dbenv = NULL;
+ int32_t ret = 0;
+ uint32_t active = 0;
+
+ this = (xlator_t *) data;
+ dbenv = BDB_ENV(this);
+ private = this->private;
+
+ for (;;sleep (private->checkpoint_timeout)) {
+ LOCK (&private->active_lock);
+ active = private->active;
+ UNLOCK (&private->active_lock);
+
+ if (active) {
+ ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0);
+ if (ret) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to checkpoint environment: %s", db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "checkpointing successful");
+ }
+ } else {
+ ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0);
+ if (ret) {
+ gf_log ("bctx",
+ GF_LOG_ERROR,
+ "failed to do final checkpoint environment: %s",
+ db_strerror (ret));
+ } else {
+ gf_log ("bctx",
+ GF_LOG_DEBUG,
+ "final checkpointing successful");
+ }
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static inline void
+BDB_CACHE_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ /* cache is always on */
+ private->cache = ON;
+}
+
+static inline void
+BDB_LOG_REMOVE_INIT(xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ private->log_auto_remove = 1;
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "DB_ENV will use DB_LOG_AUTO_REMOVE");
+}
+
+static inline void
+BDB_ERRFILE_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *errfile = NULL;
+
+ errfile = dict_get (options, "errfile");
+ if (errfile) {
+ private->errfile = strdup (errfile->data);
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "using errfile: %s", private->errfile);
+ }
+}
+
+static inline void
+BDB_TABLE_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ bctx_table_t *table = NULL;
+ int32_t idx = 0;
+
+ data_t *lru_limit = NULL;
+ data_t *page_size = NULL;
+
+ table = CALLOC (1, sizeof (*table));
+ if (table) {
+ INIT_LIST_HEAD(&(table->b_lru));
+ INIT_LIST_HEAD(&(table->active));
+ INIT_LIST_HEAD(&(table->purge));
+
+ LOCK_INIT (&table->lock);
+ LOCK_INIT (&table->checkpoint_lock);
+
+ table->transaction = private->transaction;
+ table->access_mode = private->access_mode;
+ table->dbflags = private->dbflags;
+ table->this = this;
+
+ {
+ lru_limit = dict_get (options, "lru-limit");
+
+ /* TODO: set max lockers and max txns to accomodate
+ * for more than lru_limit */
+ if (lru_limit) {
+ table->lru_limit = strtol (lru_limit->data, NULL, 0);
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG,
+ "setting bctx lru limit to %d", table->lru_limit);
+ } else {
+ table->lru_limit = BDB_DEFAULT_LRU_LIMIT;
+ }
+ }
+
+ {
+ page_size = dict_get (options, "page-size");
+
+ if (page_size)
+ {
+ if (gf_string2bytesize (page_size->data,
+ &table->page_size) != 0) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "invalid number format \"%s\""
+ " of \"option page-size\"",
+ page_size->data);
+ }
+
+ if (!(table->page_size >= BDB_LL_PAGE_SIZE_MIN &&
+ table->page_size <= BDB_LL_PAGE_SIZE_MAX)) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "pagesize %s is out of range."
+ "Allowed pagesize is between %d and %d",
+ page_size->data,
+ BDB_LL_PAGE_SIZE_MIN,
+ BDB_LL_PAGE_SIZE_MAX);
+ }
+ }
+ else {
+ table->page_size = BDB_LL_PAGE_SIZE_DEFAULT;
+ }
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG, "using page-size %"PRIu64,
+ table->page_size);
+ }
+
+ table->hash_size = BDB_DEFAULT_HASH_SIZE;
+ table->b_hash = CALLOC (BDB_DEFAULT_HASH_SIZE, sizeof (struct list_head));
+
+ for (idx = 0; idx < table->hash_size; idx++)
+ INIT_LIST_HEAD(&(table->b_hash[idx]));
+
+ private->b_table = table;
+ } else {
+ gf_log ("bdb-ll",
+ GF_LOG_CRITICAL,
+ "failed to allocate bctx table: out of memory");
+ }
+}
+
+static inline void
+BDB_DIRECTORY_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *directory = NULL;
+ data_t *logdir = NULL;
+ int32_t op_ret = -1;
+ struct stat stbuf = {0};
+
+ directory = dict_get (options, "directory");
+
+ if (directory) {
+ logdir = dict_get (options, "logdir");
+
+ if (logdir == NULL) {
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG,
+ "using default logdir as database home");
+ private->logdir = strdup (directory->data);
+
+ } else {
+ private->logdir = strdup (logdir->data);
+ gf_log ("bdb-ll",
+ GF_LOG_DEBUG,
+ "using logdir: %s", private->logdir);
+ umask (000);
+ if (mkdir (private->logdir, 0777) == 0) {
+ gf_log ("bdb-ll", GF_LOG_WARNING,
+ "logdir specified (%s) not exists, created",
+ private->logdir);
+ }
+
+ op_ret = stat (private->logdir, &stbuf);
+ if ((op_ret != 0) || !S_ISDIR (stbuf.st_mode)) {
+ gf_log ("bdb-ll",
+ GF_LOG_ERROR,
+ "specified logdir doesn't exist, "
+ "using default (environment home directory: %s)",
+ directory->data);
+ private->logdir = strdup (directory->data);
+ }
+ }
+
+ private->b_table->dbenv = bdb_dbenv_init (this, directory->data);
+
+ if (!private->b_table->dbenv) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "failed to initialize db environment");
+ FREE (private);
+ op_ret = -1;
+ } else {
+ if (private->transaction) {
+ /* all well, start the checkpointing thread */
+ LOCK_INIT (&private->active_lock);
+
+ LOCK (&private->active_lock);
+ private->active = 1;
+ UNLOCK (&private->active_lock);
+ pthread_create (&private->checkpoint_thread, NULL,
+ bdb_checkpoint, this);
+ }
+ }
+ }
+}
+
+static inline void
+BDB_DIR_MODE_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *dir_mode = NULL;
+ char *endptr = NULL;
+
+ dir_mode = dict_get (options, "dir-mode");
+
+ if (dir_mode) {
+ private->dir_mode = strtol (dir_mode->data, &endptr, 8);
+ if ((*endptr) ||
+ (!IS_VALID_FILE_MODE(private->dir_mode))) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "invalid dir-mode %o. setting to default %o",
+ private->dir_mode,
+ DEFAULT_DIR_MODE);
+ private->dir_mode = DEFAULT_DIR_MODE;
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "setting dir-mode to %o", private->dir_mode);
+ private->dir_mode = private->dir_mode;
+ }
+ } else {
+ private->dir_mode = DEFAULT_DIR_MODE;
+ }
+
+ private->dir_mode = private->dir_mode | S_IFDIR;
+}
+
+static inline void
+BDB_FILE_MODE_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *file_mode = NULL;
+ char *endptr = NULL;
+
+ file_mode = dict_get (options, "file-mode");
+
+ if (file_mode) {
+ private->file_mode = strtol (file_mode->data, &endptr, 8);
+
+ if ((*endptr) ||
+ (!IS_VALID_FILE_MODE(private->file_mode))) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "invalid file-mode %o. setting to default %o",
+ private->file_mode,
+ DEFAULT_FILE_MODE);
+ private->file_mode = DEFAULT_FILE_MODE;
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "setting file-mode to %o", private->file_mode);
+ private->file_mode = private->file_mode;
+ }
+ } else {
+ private->file_mode = DEFAULT_FILE_MODE;
+ }
+
+ private->symlink_mode = private->file_mode | S_IFLNK;
+ private->file_mode = private->file_mode | S_IFREG;
+}
+
+static inline void
+BDB_CHECKPOINT_TIMEOUT_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *checkpoint_timeout = NULL;
+
+ checkpoint_timeout = dict_get (options, "checkpoint-timeout");
+
+ private->checkpoint_timeout = BDB_DEFAULT_CHECKPOINT_TIMEOUT;
+
+ if (checkpoint_timeout) {
+ private->checkpoint_timeout = strtol (checkpoint_timeout->data, NULL, 0);
+
+ if (private->checkpoint_timeout < 5 || private->checkpoint_timeout > 60) {
+ gf_log (this->name,
+ GF_LOG_WARNING,
+ "checkpoint-timeout %d seconds too %s",
+ private->checkpoint_timeout,
+ (private->checkpoint_timeout < 5)?"low":"high");
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "setting checkpoint-timeout to %d seconds",
+ private->checkpoint_timeout);
+ }
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "setting checkpoint-timeout to default: %d seconds",
+ private->checkpoint_timeout);
+ }
+}
+
+static inline void
+BDB_LOCK_TIMEOUT_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *lock_timeout = NULL;
+
+ lock_timeout = dict_get (options, "lock-timeout");
+
+ if (lock_timeout) {
+ private->lock_timeout = strtol (lock_timeout->data, NULL, 0);
+
+ if (private->lock_timeout > 4260000) {
+ /* db allows us to DB_SET_LOCK_TIMEOUT to be set to a
+ * maximum of 71 mins (4260000 milliseconds) */
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "lock-timeout %d, out of range",
+ private->lock_timeout);
+ private->lock_timeout = 0;
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "setting lock-timeout to %d milliseconds",
+ private->lock_timeout);
+ }
+ }
+}
+
+static inline void
+BDB_TRANSACTION_TIMEOUT_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *txn_timeout = NULL;
+ txn_timeout = dict_get (options, "transaction-timeout");
+
+ if (txn_timeout) {
+ private->txn_timeout = strtol (txn_timeout->data, NULL, 0);
+
+ if (private->txn_timeout > 4260000) {
+ /* db allows us to DB_SET_TXN_TIMEOUT to be set to a maximum
+ * of 71 mins (4260000 milliseconds) */
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "transaction-timeout %d, out of range",
+ private->txn_timeout);
+ private->txn_timeout = 0;
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "setting transaction-timeout to %d milliseconds",
+ private->txn_timeout);
+ }
+ }
+}
+
+static inline void
+BDB_TRANSACTION_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *mode = NULL;
+
+ mode = dict_get (options, "mode");
+
+ if (mode && !strcmp (mode->data, "off")) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "cache mode selected");
+ private->envflags = DB_CREATE | DB_INIT_LOG |
+ DB_INIT_MPOOL | DB_THREAD;
+ private->dbflags = DB_CREATE | DB_THREAD;
+ private->transaction = OFF;
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "persistant mode selected");
+ private->transaction = ON;
+ private->envflags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG |
+ DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD;
+ private->dbflags = DB_CREATE | DB_THREAD;
+ }
+}
+
+static inline void
+BDB_ACCESS_MODE_INIT (xlator_t *this,
+ dict_t *options,
+ struct bdb_private *private)
+{
+ data_t *access_mode = NULL;
+
+ access_mode = dict_get (options, "access-mode");
+
+ if (access_mode && !strcmp (access_mode->data, "btree")) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "using access mode BTREE");
+ private->access_mode = DB_BTREE;
+ } else {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "using access mode HASH");
+ private->access_mode = DB_HASH;
+ }
+}
+
+
+/* bdb_db_init - initialize bdb xlator
+ *
+ * reads the options from @options dictionary and sets appropriate values in @this->private.
+ * also initializes DB_ENV.
+ *
+ * return: 0 on success or -1 on error (with logging the error through gf_log()).
+ */
+int
+bdb_db_init (xlator_t *this,
+ dict_t *options)
+{
+ /* create a db entry for root */
+ int32_t op_ret = 0;
+ bdb_private_t *private = NULL;
+
+ private = this->private;
+
+ BDB_CACHE_INIT (this, options, private);
+
+ BDB_ACCESS_MODE_INIT (this, options, private);
+
+ BDB_TRANSACTION_INIT (this, options, private);
+
+ BDB_TRANSACTION_TIMEOUT_INIT (this, options, private);
+
+ BDB_LOCK_TIMEOUT_INIT (this, options, private);
+
+ {
+ LOCK_INIT (&private->ino_lock);
+ private->next_ino = 2;
+ }
+
+ BDB_CHECKPOINT_TIMEOUT_INIT (this, options, private);
+
+ BDB_FILE_MODE_INIT (this, options, private);
+
+ BDB_DIR_MODE_INIT (this, options, private);
+
+ BDB_TABLE_INIT (this, options, private);
+
+ BDB_ERRFILE_INIT (this, options, private);
+
+ BDB_LOG_REMOVE_INIT (this, options, private);
+
+ BDB_DIRECTORY_INIT (this, options, private);
+
+ return op_ret;
+}
diff --git a/xlators/storage/bdb/src/bdb.c b/xlators/storage/bdb/src/bdb.c
new file mode 100644
index 00000000000..e820e867a94
--- /dev/null
+++ b/xlators/storage/bdb/src/bdb.c
@@ -0,0 +1,3371 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+/* bdb based storage translator - named as 'bdb' translator
+ *
+ *
+ * There can be only two modes for files existing on bdb translator:
+ * 1. DIRECTORY - directories are stored by bdb as regular directories on background
+ * file-system. directories also have an entry in the ns_db.db of their parent directory.
+ * 2. REGULAR FILE - regular files are stored as records in the storage_db.db present in
+ * the directory. regular files also have an entry in ns_db.db
+ *
+ * Internally bdb has a maximum of three different types of logical files associated with
+ * each directory:
+ * 1. storage_db.db - storage database, used to store the data corresponding to regular
+ * files in the form of key/value pair. file-name is the 'key' and data
+ * is 'value'.
+ * 2. directory (all subdirectories) - any subdirectory will have a regular directory entry.
+ */
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#define __XOPEN_SOURCE 500
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <ftw.h>
+#include <libgen.h>
+
+#include "glusterfs.h"
+#include "dict.h"
+#include "logging.h"
+#include "bdb.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "common-utils.h"
+
+/* to be used only by fops, nobody else */
+#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv)
+#define B_TABLE(this) (((struct bdb_private *)this->private)->b_table)
+
+
+int32_t
+bdb_mknod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode,
+ dev_t dev)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *key_string = NULL; /* after translating loc->path to DB key */
+ char *db_path = NULL;
+ bctx_t *bctx = NULL;
+ struct stat stbuf = {0,};
+
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ if (!S_ISREG(mode)) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "mknod for non-regular file");
+ op_ret = -1;
+ op_errno = EPERM;
+ goto out;
+ } /* if(!S_ISREG(mode)) */
+
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+
+ if (bctx == NULL) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to get bctx for path: %s", loc->path);
+ op_ret = -1;
+ op_errno = ENOENT;
+ goto out;
+ } /* if(bctx == NULL) */
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
+
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ MAKE_KEY_FROM_PATH (key_string, loc->path);
+ op_ret = bdb_db_put (bctx, NULL, key_string, NULL, 0, 0, 0);
+ if (op_ret > 0) {
+ /* create successful */
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+ stbuf.st_mode = mode;
+ stbuf.st_size = 0;
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+ } else {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "bdb_db_get() failed for path: %s", loc->path);
+ op_ret = -1;
+ op_errno = ENOENT;
+ }/* if (!op_ret)...else */
+
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
+ return 0;
+}
+
+static inline int32_t
+is_dir_empty (xlator_t *this,
+ loc_t *loc)
+{
+ int32_t ret = 1;
+ bctx_t *bctx = NULL;
+ DIR *dir = NULL;
+ char *real_path = NULL;
+ void *dbstat = NULL;
+ struct dirent *entry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ bctx = bctx_lookup (B_TABLE(this), loc->path);
+ if (bctx == NULL) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "failed to get bctx from inode for dir: %s,"
+ "assuming empty directory",
+ loc->path);
+ ret = 1;
+ goto out;
+ }
+
+ dbstat = bdb_db_stat (bctx, NULL, 0);
+ if (dbstat) {
+ switch (bctx->table->access_mode)
+ {
+ case DB_HASH:
+ ret = (((DB_HASH_STAT *)dbstat)->hash_nkeys == 0);
+ break;
+ case DB_BTREE:
+ case DB_RECNO:
+ ret = (((DB_BTREE_STAT *)dbstat)->bt_nkeys == 0);
+ break;
+ case DB_QUEUE:
+ ret = (((DB_QUEUE_STAT *)dbstat)->qs_nkeys == 0);
+ break;
+ case DB_UNKNOWN:
+ gf_log (this->name,
+ GF_LOG_CRITICAL,
+ "unknown access-mode set for db");
+ ret = 0;
+ }
+ } else {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to get db stat for db at path: %s", loc->path);
+ ret = 1;
+ goto out;
+ }
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ dir = opendir (real_path);
+ if (dir == NULL) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "failed to opendir(%s)", loc->path);
+ ret = 0;
+ goto out;
+ }
+
+ while ((entry = readdir (dir))) {
+ if ((!IS_BDB_PRIVATE_FILE(entry->d_name)) &&
+ (!IS_DOT_DOTDOT(entry->d_name))) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "directory (%s) not empty, has a non-db entry",
+ loc->path);
+ ret = 0;
+ break;
+ }/* if(!IS_BDB_PRIVATE_FILE()) */
+ } /* while(true) */
+ closedir (dir);
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ return ret;
+}
+
+int32_t
+bdb_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ struct bdb_private *private = NULL;
+ bctx_table_t *table = NULL;
+ bctx_t *oldbctx = NULL;
+ bctx_t *newbctx = NULL;
+ bctx_t *tmpbctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = ENOENT;
+ int32_t read_size = 0;
+ struct stat stbuf = {0,};
+ struct stat old_stbuf = {0,};
+ DB_TXN *txnid = NULL;
+ char *real_newpath = NULL;
+ char *real_oldpath = NULL;
+ char *oldkey = NULL;
+ char *newkey = NULL;
+ char *buf = NULL; /* pointer to temporary buffer, where
+ * the contents of a file are read, if
+ * file being renamed is a regular file */
+ char *real_db_newpath = NULL;
+ char *tmp_db_newpath = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, newloc, out);
+ GF_VALIDATE_OR_GOTO (this->name, oldloc, out);
+
+ private = this->private;
+ table = private->b_table;
+
+ MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
+
+ if (S_ISREG (oldloc->inode->st_mode)) {
+ oldbctx = bctx_parent (B_TABLE(this), oldloc->path);
+ MAKE_REAL_PATH (real_newpath, this, newloc->path);
+
+ op_ret = lstat (real_newpath, &stbuf);
+
+ if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))) {
+ op_ret = -1;
+ op_errno = EISDIR;
+ goto out;
+ }
+ if (op_ret == 0) {
+ /* destination is a symlink */
+ MAKE_KEY_FROM_PATH (oldkey, oldloc->path);
+ MAKE_KEY_FROM_PATH (newkey, newloc->path);
+
+ op_ret = unlink (real_newpath);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to unlink %s (%s)",
+ newloc->path, strerror (op_errno));
+ goto out;
+ }
+ newbctx = bctx_parent (B_TABLE (this), newloc->path);
+ GF_VALIDATE_OR_GOTO (this->name, newbctx, out);
+
+ op_ret = bdb_txn_begin (BDB_ENV(this), &txnid);
+
+ if ((read_size =
+ bdb_db_get (oldbctx, txnid, oldkey, &buf, 0, 0)) < 0) {
+ bdb_txn_abort (txnid);
+ } else if ((op_ret =
+ bdb_db_del (oldbctx, txnid, oldkey)) != 0) {
+ bdb_txn_abort (txnid);
+ } else if ((op_ret = bdb_db_put (newbctx, txnid,
+ newkey, buf,
+ read_size, 0, 0)) != 0) {
+ bdb_txn_abort (txnid);
+ } else {
+ bdb_txn_commit (txnid);
+ }
+
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (newbctx);
+ } else {
+ /* destination doesn't exist or a regular file */
+ MAKE_KEY_FROM_PATH (oldkey, oldloc->path);
+ MAKE_KEY_FROM_PATH (newkey, newloc->path);
+
+ newbctx = bctx_parent (B_TABLE (this), newloc->path);
+ GF_VALIDATE_OR_GOTO (this->name, newbctx, out);
+
+ op_ret = bdb_txn_begin (BDB_ENV(this), &txnid);
+
+ if ((read_size = bdb_db_get (oldbctx, txnid,
+ oldkey, &buf,
+ 0, 0)) < 0) {
+ bdb_txn_abort (txnid);
+ } else if ((op_ret = bdb_db_del (oldbctx,
+ txnid, oldkey)) != 0) {
+ bdb_txn_abort (txnid);
+ } else if ((op_ret = bdb_db_put (newbctx, txnid,
+ newkey, buf,
+ read_size, 0, 0)) != 0) {
+ bdb_txn_abort (txnid);
+ } else {
+ bdb_txn_commit (txnid);
+ }
+
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (newbctx);
+ }
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (oldbctx);
+ } else if (S_ISLNK (oldloc->inode->st_mode)) {
+ MAKE_REAL_PATH (real_newpath, this, newloc->path);
+ op_ret = lstat (real_newpath, &stbuf);
+ if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))) {
+ op_ret = -1;
+ op_errno = EISDIR;
+ goto out;
+ }
+
+ if (op_ret == 0){
+ /* destination exists and is also a symlink */
+ MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
+ op_ret = rename (real_oldpath, real_newpath);
+ op_errno = errno;
+
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to rename symlink %s (%s)",
+ oldloc->path, strerror (op_errno));
+ }
+ goto out;
+ }
+
+ /* destination doesn't exist */
+ MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
+ MAKE_KEY_FROM_PATH (newkey, newloc->path);
+ newbctx = bctx_parent (B_TABLE (this), newloc->path);
+ GF_VALIDATE_OR_GOTO (this->name, newbctx, out);
+
+ op_ret = bdb_db_del (newbctx, txnid, newkey);
+ if (op_ret != 0) {
+ /* no problem */
+ }
+ op_ret = rename (real_oldpath, real_newpath);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to rename %s to %s (%s)",
+ oldloc->path, newloc->path, strerror (op_errno));
+ goto out;
+ }
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (newbctx);
+ } else if (S_ISDIR (oldloc->inode->st_mode) &&
+ (old_stbuf.st_nlink == 2)) {
+
+ tmp_db_newpath = tempnam (private->export_path, "rename_temp");
+ GF_VALIDATE_OR_GOTO (this->name, tmp_db_newpath, out);
+
+ MAKE_REAL_PATH (real_newpath, this, newloc->path);
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (real_db_newpath, this, newloc->path);
+
+ oldbctx = bctx_lookup (B_TABLE(this), oldloc->path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, oldbctx, out);
+
+ op_ret = lstat (real_newpath, &stbuf);
+ if ((op_ret == 0) &&
+ S_ISDIR (stbuf.st_mode) &&
+ is_dir_empty (this, newloc)) {
+
+ tmpbctx = bctx_rename (oldbctx, tmp_db_newpath);
+ op_ret = -1;
+ op_errno = ENOENT;
+ GF_VALIDATE_OR_GOTO (this->name, tmpbctx, out);
+
+ op_ret = rename (real_oldpath, real_newpath);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "rename directory %s to %s failed: %s",
+ oldloc->path, newloc->path,
+ strerror (errno));
+ op_ret = bdb_db_rename (table,
+ tmp_db_newpath,
+ oldbctx->db_path);
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "renaming temp database back to old db failed"
+ " for directory %s", oldloc->path);
+ goto out;
+ } else {
+ /* this is a error case, set op_errno & op_ret */
+ op_ret = -1;
+ op_errno = ENOENT; /* TODO: errno */
+ }
+ }
+ op_ret = bdb_db_rename (table, tmp_db_newpath, real_db_newpath);
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "renaming temp database to new db failed"
+ " for directory %s", oldloc->path);
+ goto out;
+ }
+ } else if ((op_ret != 0) && (errno == ENOENT)) {
+ tmp_db_newpath = tempnam (private->export_path, "rename_temp");
+ GF_VALIDATE_OR_GOTO (this->name, tmp_db_newpath, out);
+
+ tmpbctx = bctx_rename (oldbctx, tmp_db_newpath);
+ op_ret = -1;
+ op_errno = ENOENT;
+ GF_VALIDATE_OR_GOTO (this->name, tmpbctx, out);
+
+ op_ret = rename (real_oldpath, real_newpath);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "rename directory %s to %s failed: %s",
+ oldloc->path, newloc->path,
+ strerror (errno));
+ op_ret = bdb_db_rename (table,
+ tmp_db_newpath,
+ oldbctx->db_path);
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "renaming temp database back to old db failed"
+ " for directory %s", oldloc->path);
+ goto out;
+ } else {
+ /* this is a error case, set op_errno & op_ret */
+ op_ret = -1;
+ op_errno = ENOENT; /* TODO: errno */
+ }
+ } else {
+ op_ret = bdb_db_rename (table,
+ tmp_db_newpath,
+ real_db_newpath);
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "renaming temp database to new db failed"
+ " for directory %s", oldloc->path);
+ goto out;
+ } else {
+ /* this is a error case, set op_errno & op_ret */
+ op_ret = -1;
+ op_errno = ENOENT; /* TODO: errno */
+ }
+ }
+ }
+ } else {
+ gf_log (this->name,
+ GF_LOG_CRITICAL,
+ "rename called on non-existent file type");
+ op_ret = -1;
+ op_errno = EPERM;
+ }
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+ return 0;
+}
+
+int32_t
+bdb_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc)
+{
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL);
+ return 0;
+}
+
+int32_t
+is_space_left (xlator_t *this,
+ size_t size)
+{
+ struct bdb_private *private = this->private;
+ struct statvfs stbuf = {0,};
+ int32_t ret = -1;
+ fsblkcnt_t req_blocks = 0;
+ fsblkcnt_t usable_blocks = 0;
+
+ ret = statvfs (private->export_path, &stbuf);
+ if (ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to do statvfs on %s", private->export_path);
+ return 0;
+ } else {
+ req_blocks = (size / stbuf.f_frsize) + 1;
+
+ usable_blocks = (stbuf.f_bfree - BDB_ENOSPC_THRESHOLD);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "requested size: %"GF_PRI_SIZET"\nfree blocks: %"PRIu64"\nblock size: %lu\nfrag size: %lu",
+ size, stbuf.f_bfree, stbuf.f_bsize, stbuf.f_frsize);
+
+ if (req_blocks < usable_blocks)
+ return 1;
+ else
+ return 0;
+ }
+}
+
+int32_t
+bdb_create (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EPERM;
+ char *db_path = NULL;
+ struct stat stbuf = {0,};
+ bctx_t *bctx = NULL;
+ struct bdb_private *private = NULL;
+ char *key_string = NULL;
+ struct bdb_fd *bfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ private = this->private;
+
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ op_errno = ENOENT;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ MAKE_KEY_FROM_PATH (key_string, loc->path);
+ op_ret = bdb_db_put (bctx, NULL, key_string, NULL, 0, 0, 0);
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out);
+
+ /* create successful */
+ bfd = CALLOC (1, sizeof (*bfd));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ /* NOTE: bdb_get_bctx_from () returns bctx with a ref */
+ bfd->ctx = bctx;
+ bfd->key = strdup (key_string);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bfd->key, out);
+
+ BDB_SET_BFD (this, fd, bfd);
+
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+ stbuf.st_mode = private->file_mode;
+ stbuf.st_size = 0;
+ stbuf.st_nlink = 1;
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+ op_ret = 0;
+ op_errno = 0;
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, fd, loc->inode, &stbuf);
+
+ return 0;
+}
+
+
+/* bdb_open
+ *
+ * as input parameters bdb_open gets the file name, i.e key. bdb_open should effectively
+ * do: store key, open storage db, store storage-db pointer.
+ *
+ */
+int32_t
+bdb_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ bctx_t *bctx = NULL;
+ char *key_string = NULL;
+ struct bdb_fd *bfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ bfd = CALLOC (1, sizeof (*bfd));
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ /* NOTE: bctx_parent () returns bctx with a ref */
+ bfd->ctx = bctx;
+
+ MAKE_KEY_FROM_PATH (key_string, loc->path);
+ bfd->key = strdup (key_string);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bfd->key, out);
+
+ BDB_SET_BFD (this, fd, bfd);
+ op_ret = 0;
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}
+
+int32_t
+bdb_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct iovec vec = {0,};
+ struct stat stbuf = {0,};
+ struct bdb_fd *bfd = NULL;
+ dict_t *reply_dict = NULL;
+ char *buf = NULL;
+ data_t *buf_data = NULL;
+ char *db_path = NULL;
+ int32_t read_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory);
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ /* we are ready to go */
+ op_ret = bdb_db_get (bfd->ctx, NULL,
+ bfd->key, &buf,
+ size, offset);
+ read_size = op_ret;
+ if (op_ret == -1) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to do db_storage_get()");
+ op_ret = -1;
+ op_errno = ENOENT;
+ goto out;
+ } else if (op_ret == 0) {
+ goto out;
+ }
+
+ buf_data = get_new_data ();
+ op_ret = -1;
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, buf_data, out);
+
+ reply_dict = get_new_dict ();
+ op_ret = -1;
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, reply_dict, out);
+
+ buf_data->data = buf;
+
+ if (size < read_size) {
+ op_ret = size;
+ read_size = size;
+ }
+
+ buf_data->len = op_ret;
+
+ dict_set (reply_dict, NULL, buf_data);
+
+ frame->root->rsp_refs = dict_ref (reply_dict);
+
+ vec.iov_base = buf;
+ vec.iov_len = read_size;
+
+ stbuf.st_ino = fd->inode->ino;
+ stbuf.st_size = op_ret ;
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+ op_ret = size;
+out:
+ STACK_UNWIND (frame, op_ret, op_errno, &vec, 1, &stbuf);
+
+ if (reply_dict)
+ dict_unref (reply_dict);
+
+ return 0;
+}
+
+
+int32_t
+bdb_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct stat stbuf = {0,};
+ struct bdb_fd *bfd = NULL;
+ int32_t idx = 0;
+ off_t c_off = offset;
+ int32_t c_ret = -1;
+ char *db_path = NULL;
+ size_t total_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, vector, out);
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory);
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+
+ for (idx = 0; idx < count; idx++)
+ total_size += vector[idx].iov_len;
+
+ if (!is_space_left (this, total_size)) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "requested storage for %"GF_PRI_SIZET", ENOSPC", total_size);
+ op_ret = -1;
+ op_errno = ENOSPC;
+ goto out;
+ }
+
+
+ /* we are ready to go */
+ for (idx = 0; idx < count; idx++) {
+ c_ret = bdb_db_put (bfd->ctx, NULL,
+ bfd->key, vector[idx].iov_base,
+ vector[idx].iov_len, c_off, 0);
+ if (c_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to do bdb_db_put at offset: %"PRIu64" for file: %s",
+ c_off, bfd->key);
+ break;
+ } else {
+ c_off += vector[idx].iov_len;
+ }
+ op_ret += vector[idx].iov_len;
+ } /* for(idx=0;...)... */
+
+ if (c_ret) {
+ /* write failed */
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to do bdb_db_put(): %s",
+ db_strerror (op_ret));
+ op_ret = -1;
+ op_errno = EBADFD; /* TODO: search for a more meaningful errno */
+ goto out;
+ }
+ /* NOTE: we want to increment stbuf->st_size, as stored in db */
+ stbuf.st_size = op_ret;
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+ op_errno = 0;
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+ return 0;
+}
+
+int32_t
+bdb_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EPERM;
+ struct bdb_fd *bfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ /* do nothing */
+ op_ret = 0;
+ op_errno = 0;
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+bdb_release (xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EBADFD;
+ struct bdb_fd *bfd = NULL;
+
+ if ((bfd = bdb_extract_bfd (fd, this)) == NULL){
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to extract %s specific information from fd:%p", this->name, fd);
+ op_ret = -1;
+ op_errno = EBADFD;
+ } else {
+ bctx_unref (bfd->ctx);
+ bfd->ctx = NULL;
+
+ if (bfd->key)
+ free (bfd->key); /* we did strdup() in bdb_open() */
+ free (bfd);
+ op_ret = 0;
+ op_errno = 0;
+ } /* if((fd->ctx == NULL)...)...else */
+
+ return 0;
+}/* bdb_release */
+
+
+int32_t
+bdb_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync)
+{
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, 0, 0);
+ return 0;
+}/* bdb_fsync */
+
+static int gf_bdb_lk_log;
+
+int32_t
+bdb_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct flock *lock)
+{
+ struct flock nullock = {0, };
+
+ gf_bdb_lk_log++;
+ if (!(gf_bdb_lk_log % GF_UNIVERSAL_ANSWER)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"features/posix-locks\" translator is not loaded, you need to use it");
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, -1, ENOSYS, &nullock);
+ return 0;
+}/* bdb_lk */
+
+/* bdb_lookup
+ *
+ * there are four possibilities for a file being looked up:
+ * 1. file exists and is a directory.
+ * 2. file exists and is a symlink.
+ * 3. file exists and is a regular file.
+ * 4. file does not exist.
+ * case 1 and 2 are handled by doing lstat() on the @loc. if the file is a directory or symlink,
+ * lstat() succeeds. lookup continues to check if the @loc belongs to case-3 only if lstat() fails.
+ * to check for case 3, bdb_lookup does a bdb_db_get() for the given @loc. (see description of
+ * bdb_db_get() for more details on how @loc is transformed into db handle and key). if check
+ * for case 1, 2 and 3 fail, we proceed to conclude that file doesn't exist (case 4).
+ *
+ * @frame: call frame.
+ * @this: xlator_t of this instance of bdb xlator.
+ * @loc: loc_t specifying the file to operate upon.
+ * @need_xattr: if need_xattr != 0, we are asked to return all the extended attributed of @loc,
+ * if any exist, in a dictionary. if @loc is a regular file and need_xattr is set, then
+ * we look for value of need_xattr. if need_xattr > sizo-of-the-file @loc, then the file
+ * content of @loc is returned in dictionary of xattr with 'glusterfs.content' as
+ * dictionary key.
+ *
+ * NOTE: bdb currently supports only directories, symlinks and regular files.
+ *
+ * NOTE: bdb_lookup returns the 'struct stat' of underlying file itself, in case of directory and
+ * symlink (st_ino is modified as bdb allocates its own set of inodes of all files). for
+ * regular files, bdb uses 'struct stat' of the database file in which the @loc is stored
+ * as templete and modifies st_ino (see bdb_inode_transform for more details), st_mode (can
+ * be set in volfile 'option file-mode <mode>'), st_size (exact size of the @loc
+ * contents), st_blocks (block count on the underlying filesystem to accomodate st_size,
+ * see BDB_COUNT_BLOCKS in bdb.h for more details).
+ */
+int32_t
+bdb_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ struct stat stbuf = {0, };
+ int32_t op_ret = -1;
+ int32_t op_errno = ENOENT;
+ dict_t *xattr = NULL;
+ char *pathname = NULL;
+ char *directory = NULL;
+ char *real_path = NULL;
+ bctx_t *bctx = NULL;
+ char *db_path = NULL;
+ struct bdb_private *private = NULL;
+ char *key_string = NULL;
+ int32_t entry_size = 0;
+ char *file_content = NULL;
+ data_t *file_content_data = NULL;
+ uint64_t need_xattr = 0;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ private = this->private;
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ pathname = strdup (loc->path);
+ GF_VALIDATE_OR_GOTO (this->name, pathname, out);
+
+ directory = dirname (pathname);
+ GF_VALIDATE_OR_GOTO (this->name, directory, out);
+
+ if (!strcmp (directory, loc->path)) {
+ /* SPECIAL CASE: looking up root */
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ /* bctx_lookup() returns NULL only when its time to wind up,
+ * we should shutdown functioning */
+ bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ stbuf.st_ino = 1;
+ stbuf.st_mode = private->dir_mode;
+ } else {
+ MAKE_KEY_FROM_PATH (key_string, loc->path);
+ op_ret = lstat (real_path, &stbuf);
+ if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))){
+ bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ if (loc->ino) {
+ /* revalidating directory inode */
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "revalidating directory %s", (char *)loc->path);
+ stbuf.st_ino = loc->ino;
+ } else {
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+ }
+ stbuf.st_mode = private->dir_mode;
+ op_ret = 0;
+ op_errno = 0;
+ goto out;
+ } else if (op_ret == 0) {
+ /* a symlink */
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "lookup called for symlink: %s", loc->path);
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ if (loc->ino) {
+ stbuf.st_ino = loc->ino;
+ } else {
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+ }
+ stbuf.st_mode = private->symlink_mode;
+ op_ret = 0;
+ op_errno = 0;
+ goto out;
+ }
+
+ /* for regular files */
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ op_ret = -1;
+ op_errno = ENOENT;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ if (GF_FILE_CONTENT_REQUESTED(xattr_req, &need_xattr)) {
+ entry_size = bdb_db_get (bctx,
+ NULL,
+ loc->path,
+ &file_content,
+ 0, 0);
+ } else {
+ entry_size = bdb_db_get (bctx,
+ NULL,
+ loc->path,
+ NULL,
+ 0, 0);
+ }
+
+ op_ret = entry_size;
+ op_errno = ENOENT;
+ if (op_ret == -1) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "returning ENOENT for %s", loc->path);
+ goto out;
+ }
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ if ((need_xattr >= entry_size)
+ && (entry_size) && (file_content)) {
+ file_content_data = data_from_dynptr (file_content,
+ entry_size);
+ xattr = get_new_dict ();
+ dict_set (xattr, "glusterfs.content",
+ file_content_data);
+ } else {
+ if (file_content)
+ free (file_content);
+ }
+
+ if (loc->ino) {
+ /* revalidate */
+ stbuf.st_ino = loc->ino;
+ stbuf.st_size = entry_size;
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+ } else {
+ /* fresh lookup, create an inode number */
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+ stbuf.st_size = entry_size;
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+ }/* if(inode->ino)...else */
+ stbuf.st_nlink = 1;
+ stbuf.st_mode = private->file_mode;
+ }
+ op_ret = 0;
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ if (pathname)
+ free (pathname);
+
+ if (xattr)
+ dict_ref (xattr);
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf, xattr);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+
+}/* bdb_lookup */
+
+int32_t
+bdb_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+
+ struct stat stbuf = {0,};
+ char *real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct bdb_private *private = NULL;
+ char *db_path = NULL;
+ bctx_t *bctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ private = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, private, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret == 0) {
+ /* directory or symlink */
+ stbuf.st_ino = loc->inode->ino;
+ if (S_ISDIR(stbuf.st_mode))
+ stbuf.st_mode = private->dir_mode;
+ else
+ stbuf.st_mode = private->symlink_mode;
+ /* we are done, lets unwind the stack */
+ goto out;
+ }
+
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ op_ret = -1;
+ op_errno = ENOENT;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ stbuf.st_size = bdb_db_get (bctx, NULL, loc->path, NULL, 0, 0);
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+ stbuf.st_ino = loc->inode->ino;
+
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}/* bdb_stat */
+
+
+
+/* bdb_opendir - in the world of bdb, open/opendir is all about opening correspondind databases.
+ * opendir in particular, opens the database for the directory which is
+ * to be opened. after opening the database, a cursor to the database is also created.
+ * cursor helps us get the dentries one after the other, and cursor maintains the state
+ * about current positions in directory. pack 'pointer to db', 'pointer to the
+ * cursor' into struct bdb_dir and store it in fd->ctx, we get from our parent xlator.
+ *
+ * @frame: call frame
+ * @this: our information, as we filled during init()
+ * @loc: location information
+ * @fd: file descriptor structure (glusterfs internal)
+ *
+ * return value - immaterial, async call.
+ *
+ */
+int32_t
+bdb_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd)
+{
+ char *real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ bctx_t *bctx = NULL;
+ struct bdb_dir *bfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ bfd = CALLOC (1, sizeof (*bfd));
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ bfd->dir = opendir (real_path);
+ op_errno = errno;
+ GF_VALIDATE_OR_GOTO (this->name, bfd->dir, out);
+
+ /* NOTE: bctx_lookup() return bctx with ref */
+ bfd->ctx = bctx;
+
+ bfd->path = strdup (real_path);
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bfd->path, out);
+
+ BDB_SET_BFD (this, fd, bfd);
+ op_ret = 0;
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}/* bdb_opendir */
+
+
+int32_t
+bdb_getdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off,
+ int32_t flag)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t ret = -1;
+ int32_t real_path_len = 0;
+ int32_t entry_path_len = 0;
+ int32_t count = 0;
+ char *real_path = NULL;
+ char *entry_path = NULL;
+ char *db_path = NULL;
+ dir_entry_t entries = {0, };
+ dir_entry_t *tmp = NULL;
+ DIR *dir = NULL;
+ struct dirent *dirent = NULL;
+ struct bdb_dir *bfd = NULL;
+ struct stat db_stbuf = {0,};
+ struct stat buf = {0,};
+ DBC *cursorp = NULL;
+ size_t tmp_name_len = 0;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ MAKE_REAL_PATH (real_path, this, bfd->path);
+ dir = bfd->dir;
+
+ while ((dirent = readdir (dir))) {
+ if (!dirent)
+ break;
+
+ if (IS_BDB_PRIVATE_FILE(dirent->d_name)) {
+ continue;
+ }
+
+ tmp_name_len = strlen (dirent->d_name);
+ if (entry_path_len < (real_path_len + 1 + (tmp_name_len) + 1)) {
+ entry_path_len = real_path_len + tmp_name_len + 1024;
+ entry_path = realloc (entry_path, entry_path_len);
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, entry_path, out);
+ }
+
+ strncpy (&entry_path[real_path_len+1], dirent->d_name, tmp_name_len);
+ op_ret = stat (entry_path, &buf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ entry_path, strerror (op_errno));
+ goto out;
+ }
+
+ if ((flag == GF_GET_DIR_ONLY) &&
+ (ret != -1 && !S_ISDIR(buf.st_mode))) {
+ continue;
+ }
+
+ tmp = CALLOC (1, sizeof (*tmp));
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, tmp, out);
+
+ tmp->name = strdup (dirent->d_name);
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, dirent->d_name, out);
+
+ memcpy (&tmp->buf, &buf, sizeof (buf));
+
+ tmp->buf.st_ino = -1;
+ if (S_ISLNK(tmp->buf.st_mode)) {
+ char linkpath[ZR_PATH_MAX] = {0,};
+ ret = readlink (entry_path, linkpath, ZR_PATH_MAX);
+ if (ret != -1) {
+ linkpath[ret] = '\0';
+ tmp->link = strdup (linkpath);
+ }
+ } else {
+ tmp->link = "";
+ }
+
+ count++;
+
+ tmp->next = entries.next;
+ entries.next = tmp;
+ /* if size is 0, count can never be = size, so entire dir is read */
+
+ if (count == size)
+ break;
+ }
+
+ if ((flag != GF_GET_DIR_ONLY) && (count < size)) {
+ /* read from db */
+ op_ret = bdb_cursor_open (bfd->ctx, &cursorp);
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out);
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory);
+ op_ret = lstat (db_path, &db_stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ /* read all the entries in database, one after the other and put into dictionary */
+ while (1) {
+ DBT key = {0,}, value = {0,};
+
+ key.flags = DB_DBT_MALLOC;
+ value.flags = DB_DBT_MALLOC;
+ op_ret = bdb_cursor_get (cursorp, &key, &value, DB_NEXT);
+
+ if (op_ret == DB_NOTFOUND) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "end of list of key/value pair in db for directory: %s",
+ bfd->ctx->directory);
+ op_ret = 0;
+ op_errno = 0;
+ break;
+ } else if (op_ret != 0){
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to do cursor get for directory %s: %s",
+ bfd->ctx->directory, db_strerror (op_ret));
+ op_ret = -1;
+ op_errno = ENOENT;
+ break;
+ }
+ /* successfully read */
+ tmp = CALLOC (1, sizeof (*tmp));
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, tmp, out);
+
+ tmp->name = CALLOC (1, key.size + 1);
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, tmp->name, out);
+
+ memcpy (tmp->name, key.data, key.size);
+ tmp->buf = db_stbuf;
+ tmp->buf.st_size = bdb_db_get (bfd->ctx, NULL,
+ tmp->name, NULL,
+ 0, 0);
+ tmp->buf.st_blocks = BDB_COUNT_BLOCKS (tmp->buf.st_size, \
+ tmp->buf.st_blksize);
+ /* FIXME: wat will be the effect of this? */
+ tmp->buf.st_ino = -1;
+ count++;
+
+ tmp->next = entries.next;
+ tmp->link = "";
+ entries.next = tmp;
+ /* if size is 0, count can never be = size, so entire dir is read */
+ if (count == size)
+ break;
+
+ free (key.data);
+ } /* while(1){ } */
+ bdb_cursor_close (bfd->ctx, cursorp);
+ } else {
+ /* do nothing */
+ }
+ FREE (entry_path);
+ op_ret = 0;
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &entries, count);
+
+ while (entries.next) {
+ tmp = entries.next;
+ entries.next = entries.next->next;
+ FREE (tmp->name);
+ FREE (tmp);
+ }
+ return 0;
+}/* bdb_getdents */
+
+
+int32_t
+bdb_releasedir (xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ struct bdb_dir *bfd = NULL;
+
+ if ((bfd = bdb_extract_bfd (fd, this)) == NULL) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to extract fd data from fd=%p", fd);
+ op_ret = -1;
+ op_errno = EBADF;
+ } else {
+ if (bfd->path) {
+ free (bfd->path);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "bfd->path was NULL. fd=%p bfd=%p",
+ fd, bfd);
+ }
+
+ if (bfd->dir) {
+ closedir (bfd->dir);
+ } else {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "bfd->dir is NULL.");
+ }
+ if (bfd->ctx) {
+ bctx_unref (bfd->ctx);
+ } else {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "bfd->ctx is NULL");
+ }
+ free (bfd);
+ }
+
+ return 0;
+}/* bdb_releasedir */
+
+
+int32_t
+bdb_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size)
+{
+ char *dest = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EPERM;
+ char *real_path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ dest = alloca (size + 1);
+ GF_VALIDATE_OR_GOTO (this->name, dest, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = readlink (real_path, dest, size);
+
+ if (op_ret > 0)
+ dest[op_ret] = 0;
+
+ op_errno = errno;
+
+ if (op_ret == -1) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "readlink failed on %s: %s",
+ loc->path, strerror (op_errno));
+ }
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, dest);
+
+ return 0;
+}/* bdb_readlink */
+
+
+int32_t
+bdb_mkdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int32_t op_ret = -1;
+ int32_t ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct stat stbuf = {0, };
+ bctx_t *bctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = mkdir (real_path, mode);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to mkdir %s (%s)",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = chown (real_path, frame->root->uid, frame->root->gid);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to chmod on %s (%s)",
+ real_path, strerror (op_errno));
+ goto err;
+ }
+
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ real_path, strerror (op_errno));
+ goto err;
+ }
+
+ bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, err);
+
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+
+ goto out;
+
+err:
+ ret = rmdir (real_path);
+ if (ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to rmdir the directory created (%s)",
+ strerror (errno));
+ }
+
+
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
+
+ return 0;
+}/* bdb_mkdir */
+
+
+int32_t
+bdb_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ bctx_t *bctx = NULL;
+ char *real_path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ op_errno = ENOENT;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ op_ret = bdb_db_del (bctx, NULL, loc->path);
+ if (op_ret == DB_NOTFOUND) {
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ op_ret = unlink (real_path);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to unlink on %s (%s)",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ } else if (op_ret == 0) {
+ op_errno = 0;
+ }
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}/* bdb_unlink */
+
+
+
+int32_t
+bdb_do_rmdir (xlator_t *this,
+ loc_t *loc)
+{
+ char *real_path = NULL;
+ int32_t ret = -1;
+ bctx_t *bctx = NULL;
+ DB_ENV *dbenv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ dbenv = BDB_ENV(this);
+ GF_VALIDATE_OR_GOTO (this->name, dbenv, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ bctx = bctx_lookup (B_TABLE(this), loc->path);
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ LOCK(&bctx->lock);
+ {
+ if (bctx->dbp == NULL) {
+ goto unlock;
+ }
+
+ ret = bctx->dbp->close (bctx->dbp, 0);
+ GF_VALIDATE_OR_GOTO (this->name, (ret == 0), unlock);
+
+ bctx->dbp = NULL;
+
+ ret = dbenv->dbremove (dbenv, NULL, bctx->db_path, NULL, 0);
+ if (ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to DB_ENV->dbremove() on path %s: %s",
+ loc->path, db_strerror (ret));
+ }
+ }
+unlock:
+ UNLOCK(&bctx->lock);
+
+ if (ret) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to remove db %s: %s", bctx->db_path, db_strerror (ret));
+ ret = -1;
+ goto out;
+ }
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "removed db %s", bctx->db_path);
+ ret = rmdir (real_path);
+
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ return ret;
+}
+
+int32_t
+bdb_rmdir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = ENOTEMPTY;
+
+ if (!is_dir_empty (this, loc)) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "rmdir: directory %s not empty", loc->path);
+ op_errno = ENOTEMPTY;
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = bdb_do_rmdir (this, loc);
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to bdb_do_rmdir on %s",
+ loc->path);
+ goto out;
+ }
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+} /* bdb_rmdir */
+
+int32_t
+bdb_symlink (call_frame_t *frame,
+ xlator_t *this,
+ const char *linkname,
+ loc_t *loc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct stat stbuf = {0,};
+ struct bdb_private *private = NULL;
+ bctx_t *bctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, linkname, out);
+
+ private = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, private, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ op_ret = symlink (linkname, real_path);
+ op_errno = errno;
+ if (op_ret == 0) {
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ real_path, strerror (op_errno));
+ goto err;
+ }
+
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ GF_VALIDATE_OR_GOTO (this->name, bctx, err);
+
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+ stbuf.st_mode = private->symlink_mode;
+
+ goto out;
+ }
+err:
+ op_ret = unlink (real_path);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to unlink the previously created symlink (%s)",
+ strerror (op_errno));
+ }
+ op_ret = -1;
+ op_errno = ENOENT;
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
+
+ return 0;
+} /* bdb_symlink */
+
+int32_t
+bdb_chmod (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ mode_t mode)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct stat stbuf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ /* directory or symlink */
+ op_ret = chmod (real_path, mode);
+ op_errno = errno;
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}/* bdb_chmod */
+
+
+int32_t
+bdb_chown (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ uid_t uid,
+ gid_t gid)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct stat stbuf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ /* directory or symlink */
+ op_ret = lchown (real_path, uid, gid);
+ op_errno = errno;
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}/* bdb_chown */
+
+
+int32_t
+bdb_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct stat stbuf = {0,};
+ char *db_path = NULL;
+ bctx_t *bctx = NULL;
+ char *key_string = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ bctx = bctx_parent (B_TABLE(this), loc->path);
+ op_errno = ENOENT;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_KEY_FROM_PATH (key_string, loc->path);
+
+ /* now truncate */
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ if (loc->inode->ino) {
+ stbuf.st_ino = loc->inode->ino;
+ }else {
+ stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx);
+ }
+
+ op_ret = bdb_db_put (bctx, NULL, key_string, NULL, 0, 1, 0);
+ if (op_ret == -1) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "failed to do bdb_db_put: %s",
+ db_strerror (op_ret));
+ op_ret = -1;
+ op_errno = EINVAL; /* TODO: better errno */
+ }
+
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}/* bdb_truncate */
+
+
+int32_t
+bdb_utimens (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct timespec ts[2])
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EPERM;
+ char *real_path = NULL;
+ struct stat stbuf = {0,};
+ struct timeval tv[2] = {{0,},};
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ op_errno = EPERM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ /* directory or symlink */
+ tv[0].tv_sec = ts[0].tv_sec;
+ tv[0].tv_usec = ts[0].tv_nsec / 1000;
+ tv[1].tv_sec = ts[1].tv_sec;
+ tv[1].tv_usec = ts[1].tv_nsec / 1000;
+
+ op_ret = lutimes (real_path, tv);
+ if (op_ret == -1 && errno == ENOSYS) {
+ op_ret = utimes (real_path, tv);
+ }
+ op_errno = errno;
+ if (op_ret == -1) {
+ gf_log (this->name,
+ GF_LOG_WARNING,
+ "utimes on %s failed: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = lstat (real_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ stbuf.st_ino = loc->inode->ino;
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}/* bdb_utimens */
+
+int32_t
+bdb_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct statvfs buf = {0, };
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = statvfs (real_path, &buf);
+ op_errno = errno;
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+ return 0;
+}/* bdb_statfs */
+
+static int gf_bdb_xattr_log;
+
+/* bdb_setxattr - set extended attributes.
+ *
+ * bdb allows setxattr operation only on directories.
+ * bdb reservers 'glusterfs.file.<attribute-name>' to operate on the content of the files
+ * under the specified directory. 'glusterfs.file.<attribute-name>' transforms to contents of
+ * file of name '<attribute-name>' under specified directory.
+ *
+ * @frame: call frame.
+ * @this: xlator_t of this instance of bdb xlator.
+ * @loc: loc_t specifying the file to operate upon.
+ * @dict: list of extended attributes to set on @loc.
+ * @flags: can be XATTR_REPLACE (replace an existing extended attribute only if it exists) or
+ * XATTR_CREATE (create an extended attribute only if it doesn't already exist).
+ *
+ *
+ */
+int32_t
+bdb_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int flags)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ data_pair_t *trav = dict->members_list;
+ bctx_t *bctx = NULL;
+ char *real_path = NULL;
+ char *key = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ if (!S_ISDIR (loc->inode->st_mode)) {
+ op_ret = -1;
+ op_errno = EPERM;
+ goto out;
+ }
+
+ while (trav) {
+ if (ZR_FILE_CONTENT_REQUEST(trav->key) ) {
+ bctx = bctx_lookup (B_TABLE(this), loc->path);
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ key = &(trav->key[15]);
+
+ if (flags & XATTR_REPLACE) {
+ /* replace only if previously exists, otherwise error out */
+ op_ret = bdb_db_get (bctx, NULL, key,
+ NULL, 0, 0);
+ if (op_ret == -1) {
+ /* key doesn't exist in database */
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "cannot XATTR_REPLACE, xattr %s doesn't exist "
+ "on path %s", key, loc->path);
+ op_ret = -1;
+ op_errno = ENOENT;
+ break;
+ }
+ op_ret = bdb_db_put (bctx, NULL,
+ key, trav->value->data,
+ trav->value->len,
+ op_ret, BDB_TRUNCATE_RECORD);
+ if (op_ret != 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ break;
+ }
+ } else {
+ /* fresh create */
+ op_ret = bdb_db_put (bctx, NULL, key,
+ trav->value->data,
+ trav->value->len,
+ 0, 0);
+ if (op_ret != 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ break;
+ } else {
+ op_ret = 0;
+ op_errno = 0;
+ } /* if(op_ret!=0)...else */
+ } /* if(flags&XATTR_REPLACE)...else */
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+ } else {
+ /* do plain setxattr */
+ op_ret = lsetxattr (real_path,
+ trav->key,
+ trav->value->data,
+ trav->value->len,
+ flags);
+ op_errno = errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ if (op_errno == ENOTSUP) {
+ gf_bdb_xattr_log++;
+ if (!(gf_bdb_xattr_log % GF_UNIVERSAL_ANSWER)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Extended Attributes support not present."\
+ "Please check");
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed on %s (%s)",
+ loc->path, strerror (op_errno));
+ }
+ break;
+ }
+ } /* if(ZR_FILE_CONTENT_REQUEST())...else */
+ trav = trav->next;
+ }/* while(trav) */
+out:
+ frame->root->rsp_refs = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}/* bdb_setxattr */
+
+
+/* bdb_gettxattr - get extended attributes.
+ *
+ * bdb allows getxattr operation only on directories.
+ * bdb_getxattr retrieves the whole content of the file, when glusterfs.file.<attribute-name>
+ * is specified.
+ *
+ * @frame: call frame.
+ * @this: xlator_t of this instance of bdb xlator.
+ * @loc: loc_t specifying the file to operate upon.
+ * @name: name of extended attributes to get for @loc.
+ *
+ * NOTE: see description of bdb_setxattr for details on how
+ * 'glusterfs.file.<attribute-name>' is handles by bdb.
+ */
+int32_t
+bdb_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ dict_t *dict = NULL;
+ bctx_t *bctx = NULL;
+ char *buf = NULL;
+ char *key_string = NULL;
+ int32_t list_offset = 0;
+ size_t size = 0;
+ size_t remaining_size = 0;
+ char *real_path = NULL;
+ char key[1024] = {0,};
+ char *value = NULL;
+ char *list = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, name, out);
+
+ dict = get_new_dict ();
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ if (!S_ISDIR (loc->inode->st_mode)) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "operation not permitted on a non-directory file: %s", loc->path);
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ if (name && ZR_FILE_CONTENT_REQUEST(name)) {
+ bctx = bctx_lookup (B_TABLE(this), loc->path);
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ key_string = (char *)&(name[15]);
+
+ op_ret = bdb_db_get (bctx, NULL, key_string, &buf, 0, 0);
+ if (op_ret == -1) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "failed to db get on directory: %s for key: %s",
+ bctx->directory, name);
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ dict_set (dict, (char *)name, data_from_dynptr (buf, op_ret));
+ } else {
+ MAKE_REAL_PATH (real_path, this, loc->path);
+ size = llistxattr (real_path, NULL, 0);
+ op_errno = errno;
+ if (size <= 0) {
+ /* There are no extended attributes, send an empty dictionary */
+ if (size == -1 && op_errno != ENODATA) {
+ if (op_errno == ENOTSUP) {
+ gf_bdb_xattr_log++;
+ if (!(gf_bdb_xattr_log % GF_UNIVERSAL_ANSWER))
+ gf_log (this->name,
+ GF_LOG_WARNING,
+ "Extended Attributes support not present."\
+ "Please check");
+ } else {
+ gf_log (this->name,
+ GF_LOG_WARNING,
+ "llistxattr failed on %s (%s)",
+ loc->path, strerror (op_errno));
+ }
+ }
+ op_ret = -1;
+ op_errno = ENODATA;
+ } else {
+ list = alloca (size + 1);
+ op_errno = ENOMEM;
+ GF_VALIDATE_OR_GOTO (this->name, list, out);
+
+ size = llistxattr (real_path, list, size);
+ op_ret = size;
+ op_errno = errno;
+ if (size == -1) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "llistxattr failed on %s (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+ remaining_size = size;
+ list_offset = 0;
+ while (remaining_size > 0) {
+ if(*(list+list_offset) == '\0')
+ break;
+ strcpy (key, list + list_offset);
+ op_ret = lgetxattr (real_path, key, NULL, 0);
+ if (op_ret == -1)
+ break;
+ value = CALLOC (op_ret + 1, sizeof(char));
+ GF_VALIDATE_OR_GOTO (this->name, value, out);
+
+ op_ret = lgetxattr (real_path, key, value, op_ret);
+ if (op_ret == -1)
+ break;
+ value [op_ret] = '\0';
+ dict_set (dict, key, data_from_dynptr (value, op_ret));
+ remaining_size -= strlen (key) + 1;
+ list_offset += strlen (key) + 1;
+ } /* while(remaining_size>0) */
+ } /* if(size <= 0)...else */
+ } /* if(name...)...else */
+
+out:
+ if(bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ if (dict)
+ dict_ref (dict);
+
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}/* bdb_getxattr */
+
+
+int32_t
+bdb_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ bctx_t *bctx = NULL;
+ char *real_path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO (this->name, name, out);
+
+ if (!S_ISDIR(loc->inode->st_mode)) {
+ gf_log (this->name,
+ GF_LOG_WARNING,
+ "operation not permitted on non-directory files");
+ op_ret = -1;
+ op_errno = EPERM;
+ goto out;
+ }
+
+ if (ZR_FILE_CONTENT_REQUEST(name)) {
+ bctx = bctx_lookup (B_TABLE(this), loc->path);
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ op_ret = bdb_db_del (bctx, NULL, name);
+ if (op_ret == -1) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to delete %s from db of %s directory",
+ name, loc->path);
+ op_errno = EINVAL; /* TODO: errno */
+ goto out;
+ }
+ } else {
+ MAKE_REAL_PATH(real_path, this, loc->path);
+ op_ret = lremovexattr (real_path, name);
+ op_errno = errno;
+ if (op_ret == -1) {
+ if (op_errno == ENOTSUP) {
+ gf_bdb_xattr_log++;
+ if (!(gf_bdb_xattr_log % GF_UNIVERSAL_ANSWER))
+ gf_log (this->name, GF_LOG_WARNING,
+ "Extended Attributes support not present."
+ "Please check");
+ } else {
+ gf_log (this->name,
+ GF_LOG_WARNING,
+ "%s: %s",
+ loc->path, strerror (op_errno));
+ }
+ } /* if(op_ret == -1) */
+ } /* if (ZR_FILE_CONTENT_REQUEST(name))...else */
+
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}/* bdb_removexattr */
+
+
+int32_t
+bdb_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int datasync)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct bdb_fd *bfd = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ frame->root->rsp_refs = NULL;
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+out:
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}/* bdb_fsycndir */
+
+
+int32_t
+bdb_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = access (real_path, mask);
+ op_errno = errno;
+ /* TODO: implement for db entries */
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}/* bdb_access */
+
+
+int32_t
+bdb_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EPERM;
+ struct stat buf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ /* TODO: impelement */
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+
+ return 0;
+}
+
+int32_t
+bdb_fchown (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ uid_t uid,
+ gid_t gid)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EPERM;
+ struct stat buf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ /* TODO: implement */
+out:
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+
+ return 0;
+}
+
+
+int32_t
+bdb_fchmod (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ mode_t mode)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EPERM;
+ struct stat buf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ /* TODO: impelement */
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+
+ return 0;
+}
+
+int32_t
+bdb_setdents (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags,
+ dir_entry_t *entries,
+ int32_t count)
+{
+ int32_t op_ret = -1, op_errno = EINVAL;
+ char *entry_path = NULL;
+ int32_t real_path_len = 0;
+ int32_t entry_path_len = 0;
+ int32_t ret = 0;
+ struct bdb_dir *bfd = NULL;
+ dir_entry_t *trav = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, entries, out);
+
+ frame->root->rsp_refs = NULL;
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ real_path_len = strlen (bfd->path);
+ entry_path_len = real_path_len + 256;
+ entry_path = CALLOC (1, entry_path_len);
+ GF_VALIDATE_OR_GOTO (this->name, entry_path, out);
+
+ strcpy (entry_path, bfd->path);
+ entry_path[real_path_len] = '/';
+
+ trav = entries->next;
+ while (trav) {
+ char pathname[ZR_PATH_MAX] = {0,};
+ strcpy (pathname, entry_path);
+ strcat (pathname, trav->name);
+
+ if (S_ISDIR(trav->buf.st_mode)) {
+ /* If the entry is directory, create it by calling 'mkdir'. If
+ * directory is not present, it will be created, if its present,
+ * no worries even if it fails.
+ */
+ ret = mkdir (pathname, trav->buf.st_mode);
+ if ((ret == -1) && (errno != EEXIST)) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to created directory %s: %s",
+ pathname, strerror(errno));
+ goto loop;
+ }
+
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "Creating directory %s with mode (0%o)",
+ pathname,
+ trav->buf.st_mode);
+ /* Change the mode
+ * NOTE: setdents tries its best to restore the state
+ * of storage. if chmod and chown fail, they can be
+ * ignored now */
+ ret = chmod (pathname, trav->buf.st_mode);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "chmod failed on %s (%s)",
+ pathname, strerror (errno));
+ goto loop;
+ }
+ /* change the ownership */
+ ret = chown (pathname, trav->buf.st_uid, trav->buf.st_gid);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "chown failed on %s (%s)",
+ pathname, strerror (errno));
+ goto loop;
+ }
+ } else if ((flags == GF_SET_IF_NOT_PRESENT) ||
+ (flags != GF_SET_DIR_ONLY)) {
+ /* Create a 0 byte file here */
+ if (S_ISREG (trav->buf.st_mode)) {
+ op_ret = bdb_db_put (bfd->ctx, NULL,
+ trav->name, NULL, 0, 0, 0);
+ if (op_ret != 0) {
+ /* create successful */
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to create file %s",
+ pathname);
+ } /* if (!op_ret)...else */
+ } else if (S_ISLNK (trav->buf.st_mode)) {
+ /* TODO: impelement */;
+ } else {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "storage/bdb allows to create regular files only"
+ "file %s (mode = %d) cannot be created",
+ pathname, trav->buf.st_mode);
+ } /* if(S_ISREG())...else */
+ } /* if(S_ISDIR())...else if */
+ loop:
+ /* consider the next entry */
+ trav = trav->next;
+ } /* while(trav) */
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ FREE (entry_path);
+ return 0;
+}
+
+int32_t
+bdb_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct stat stbuf = {0,};
+ struct bdb_fd *bfd = NULL;
+ bctx_t *bctx = NULL;
+ char *db_path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ bctx = bfd->ctx;
+
+ MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory);
+ op_ret = lstat (db_path, &stbuf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to lstat on %s (%s)",
+ db_path, strerror (op_errno));
+ goto out;
+ }
+
+ stbuf.st_ino = fd->inode->ino;
+ stbuf.st_size = bdb_db_get (bctx, NULL, bfd->key, NULL, 0, 0);
+ stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize);
+
+out:
+ frame->root->rsp_refs = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+ return 0;
+}
+
+
+int32_t
+bdb_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off)
+{
+ struct bdb_dir *bfd = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ size_t filled = 0;
+ gf_dirent_t *this_entry = NULL;
+ gf_dirent_t entries;
+ struct dirent *entry = NULL;
+ off_t in_case = 0;
+ int32_t this_size = 0;
+ DBC *cursorp = NULL;
+ int32_t count = 0;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ INIT_LIST_HEAD (&entries.list);
+
+ bfd = bdb_extract_bfd (fd, this);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, bfd, out);
+
+ op_errno = ENOMEM;
+
+ while (filled <= size) {
+ this_entry = NULL;
+ entry = NULL;
+ in_case = 0;
+ this_size = 0;
+
+ in_case = telldir (bfd->dir);
+ entry = readdir (bfd->dir);
+ if (!entry)
+ break;
+
+ if (IS_BDB_PRIVATE_FILE(entry->d_name))
+ continue;
+
+ this_size = dirent_size (entry);
+
+ if (this_size + filled > size) {
+ seekdir (bfd->dir, in_case);
+ break;
+ }
+
+ count++;
+
+ this_entry = gf_dirent_for_name (entry->d_name);
+ this_entry->d_ino = entry->d_ino;
+
+ this_entry->d_off = -1;
+
+ this_entry->d_type = entry->d_type;
+ this_entry->d_len = entry->d_reclen;
+
+
+ list_add (&this_entry->list, &entries.list);
+
+ filled += this_size;
+ }
+ op_ret = filled;
+ op_errno = 0;
+ if (filled >= size) {
+ goto out;
+ }
+
+ /* hungry kyaa? */
+ op_ret = bdb_cursor_open (bfd->ctx, &cursorp);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out);
+
+ /* TODO: fix d_off, don't use bfd->offset. wrong method */
+ if (strlen (bfd->offset)) {
+ DBT key = {0,}, value = {0,};
+ key.data = bfd->offset;
+ key.size = strlen (bfd->offset);
+ key.flags = DB_DBT_USERMEM;
+ value.dlen = 0;
+ value.doff = 0;
+ value.flags = DB_DBT_PARTIAL;
+
+ op_ret = bdb_cursor_get (cursorp, &key, &value, DB_SET);
+ op_errno = EBADFD;
+ GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out);
+
+ } else {
+ /* first time or last time, do nothing */
+ }
+
+ while (filled <= size) {
+ DBT key = {0,}, value = {0,};
+ this_entry = NULL;
+
+ key.flags = DB_DBT_MALLOC;
+ value.dlen = 0;
+ value.doff = 0;
+ value.flags = DB_DBT_PARTIAL;
+ op_ret = bdb_cursor_get (cursorp, &key, &value, DB_NEXT);
+
+ if (op_ret == DB_NOTFOUND) {
+ /* we reached end of the directory */
+ op_ret = 0;
+ op_errno = 0;
+ break;
+ } else if (op_ret != 0) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "database error during readdir");
+ op_ret = -1;
+ op_errno = ENOENT;
+ break;
+ } /* if (op_ret == DB_NOTFOUND)...else if...else */
+
+ if (key.data == NULL) {
+ /* NOTE: currently ignore when we get key.data == NULL.
+ * TODO: we should not get key.data = NULL */
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "null key read from db");
+ continue;
+ }/* if(key.data)...else */
+ count++;
+ this_size = bdb_dirent_size (&key);
+ if (this_size + filled > size)
+ break;
+ /* TODO - consider endianness here */
+ this_entry = gf_dirent_for_name ((const char *)key.data);
+ /* FIXME: bug, if someone is going to use ->d_ino */
+ this_entry->d_ino = -1;
+ this_entry->d_off = 0;
+ this_entry->d_type = 0;
+ this_entry->d_len = key.size;
+
+ if (key.data) {
+ strncpy (bfd->offset, key.data, key.size);
+ bfd->offset [key.size] = '\0';
+ free (key.data);
+ }
+
+ list_add (&this_entry->list, &entries.list);
+
+ filled += this_size;
+ }/* while */
+ bdb_cursor_close (bfd->ctx, cursorp);
+ op_ret = filled;
+ op_errno = 0;
+out:
+ frame->root->rsp_refs = NULL;
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "read %"GF_PRI_SIZET" bytes for %d entries", filled, count);
+ STACK_UNWIND (frame, count, op_errno, &entries);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+
+int32_t
+bdb_stats (call_frame_t *frame,
+ xlator_t *this,
+ int32_t flags)
+
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ struct xlator_stats xlstats = {0, }, *stats = NULL;
+ struct statvfs buf;
+ struct timeval tv;
+ struct bdb_private *private = NULL;
+ int64_t avg_read = 0;
+ int64_t avg_write = 0;
+ int64_t _time_ms = 0;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+
+ private = (struct bdb_private *)(this->private);
+ stats = &xlstats;
+
+ op_ret = statvfs (private->export_path, &buf);
+ op_errno = errno;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to statvfs on %s (%s)",
+ private->export_path, strerror (op_errno));
+ goto out;
+ }
+
+ stats->nr_files = private->stats.nr_files;
+ stats->nr_clients = private->stats.nr_clients; /* client info is maintained at FSd */
+ stats->free_disk = buf.f_bfree * buf.f_bsize; /* Number of Free block in the filesystem. */
+ stats->total_disk_size = buf.f_blocks * buf.f_bsize; /* */
+ stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize;
+
+ /* Calculate read and write usage */
+ gettimeofday (&tv, NULL);
+
+ /* Read */
+ _time_ms = (tv.tv_sec - private->init_time.tv_sec) * 1000 +
+ ((tv.tv_usec - private->init_time.tv_usec) / 1000);
+
+ avg_read = (_time_ms) ? (private->read_value / _time_ms) : 0; /* KBps */
+ avg_write = (_time_ms) ? (private->write_value / _time_ms) : 0; /* KBps */
+
+ _time_ms = (tv.tv_sec - private->prev_fetch_time.tv_sec) * 1000 +
+ ((tv.tv_usec - private->prev_fetch_time.tv_usec) / 1000);
+ if (_time_ms && ((private->interval_read / _time_ms) > private->max_read)) {
+ private->max_read = (private->interval_read / _time_ms);
+ }
+ if (_time_ms && ((private->interval_write / _time_ms) > private->max_write)) {
+ private->max_write = private->interval_write / _time_ms;
+ }
+
+ stats->read_usage = avg_read / private->max_read;
+ stats->write_usage = avg_write / private->max_write;
+
+ gettimeofday (&(private->prev_fetch_time), NULL);
+ private->interval_read = 0;
+ private->interval_write = 0;
+
+out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, stats);
+ return 0;
+}
+
+
+int32_t
+bdb_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *lock)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+
+int32_t
+bdb_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *lock)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+
+int32_t
+bdb_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+
+int32_t
+bdb_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+
+int32_t
+bdb_checksum (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flag)
+{
+ char *real_path = NULL;
+ DIR *dir = NULL;
+ struct dirent *dirent = NULL;
+ uint8_t file_checksum[ZR_FILENAME_MAX] = {0,};
+ uint8_t dir_checksum[ZR_FILENAME_MAX] = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t i = 0, length = 0;
+ bctx_t *bctx = NULL;
+ DBC *cursorp = NULL;
+ char *data = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", frame, out);
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ {
+ dir = opendir (real_path);
+ op_errno = errno;
+ GF_VALIDATE_OR_GOTO (this->name, dir, out);
+ while ((dirent = readdir (dir))) {
+ if (!dirent)
+ break;
+
+ if (IS_BDB_PRIVATE_FILE(dirent->d_name))
+ continue;
+
+ length = strlen (dirent->d_name);
+ for (i = 0; i < length; i++)
+ dir_checksum[i] ^= dirent->d_name[i];
+ } /* while((dirent...)) */
+ closedir (dir);
+ }
+
+ {
+ bctx = bctx_lookup (B_TABLE(this), (char *)loc->path);
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, bctx, out);
+
+ op_ret = bdb_cursor_open (bctx, &cursorp);
+ op_errno = EINVAL;
+ GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out);
+
+ while (1) {
+ DBT key = {0,}, value = {0,};
+
+ key.flags = DB_DBT_MALLOC;
+ value.doff = 0;
+ value.dlen = 0;
+ op_ret = bdb_cursor_get (cursorp, &key, &value, DB_NEXT);
+
+ if (op_ret == DB_NOTFOUND) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "end of list of key/value pair in db for "
+ "directory: %s", bctx->directory);
+ op_ret = 0;
+ op_errno = 0;
+ break;
+ } else if (op_ret == 0){
+ /* successfully read */
+ data = key.data;
+ length = key.size;
+ for (i = 0; i < length; i++)
+ file_checksum[i] ^= data[i];
+
+ free (key.data);
+ } else {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to do cursor get for directory %s: %s",
+ bctx->directory, db_strerror (op_ret));
+ op_ret = -1;
+ op_errno = ENOENT;
+ break;
+ }/* if(op_ret == DB_NOTFOUND)...else if...else */
+ } /* while(1) */
+ bdb_cursor_close (bctx, cursorp);
+ }
+out:
+ if (bctx) {
+ /* NOTE: bctx_unref always returns success,
+ * see description of bctx_unref for more details */
+ bctx_unref (bctx);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum);
+
+ return 0;
+}
+
+/**
+ * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+ */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ switch (event)
+ {
+ case GF_EVENT_PARENT_UP:
+ {
+ /* Tell the parent that bdb xlator is up */
+ assert ((this->private != NULL) &&
+ (BDB_ENV(this) != NULL));
+ default_notify (this, GF_EVENT_CHILD_UP, data);
+ }
+ break;
+ default:
+ /* */
+ break;
+ }
+ return 0;
+}
+
+
+
+/**
+ * init -
+ */
+int32_t
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ struct stat buf = {0,};
+ struct bdb_private *_private = NULL;
+ data_t *directory = NULL;
+ bctx_t *bctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bdb", this, out);
+
+ _private = CALLOC (1, sizeof (*_private));
+ GF_VALIDATE_OR_GOTO (this->name, _private, out);
+
+ if (this->children) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "FATAL: storage/bdb cannot have subvolumes");
+ FREE (_private);
+ goto out;;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ directory = dict_get (this->options, "directory");
+ if (!directory) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "export directory not specified in volfile");
+ FREE (_private);
+ goto out;
+ }
+ umask (000); // umask `masking' is done at the client side
+ /* // * No need to create directory, sys admin should do it himself
+ if (mkdir (directory->data, 0777) == 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "directory specified not exists, created");
+ }
+ */
+
+ /* Check whether the specified directory exists, if not create it. */
+ ret = stat (directory->data, &buf);
+ if ((ret != 0) || !S_ISDIR (buf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "specified directory '%s' doesn't exists, Exiting", directory->data);
+ FREE (_private);
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+
+ _private->export_path = strdup (directory->data);
+ _private->export_path_length = strlen (_private->export_path);
+
+ {
+ /* Stats related variables */
+ gettimeofday (&_private->init_time, NULL);
+ gettimeofday (&_private->prev_fetch_time, NULL);
+ _private->max_read = 1;
+ _private->max_write = 1;
+ }
+
+ this->private = (void *)_private;
+ {
+ ret = bdb_db_init (this, this->options);
+
+ if (ret == -1){
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "failed to initialize database");
+ goto out;
+ } else {
+ bctx = bctx_lookup (_private->b_table, "/");
+ /* NOTE: we are not doing bctx_unref() for root bctx,
+ * let it remain in active list forever */
+ if (!bctx) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "failed to allocate memory for root (/) bctx: out of memory");
+ goto out;
+ } else {
+ ret = 0;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+void
+bctx_cleanup (struct list_head *head)
+{
+ bctx_t *trav = NULL;
+ bctx_t *tmp = NULL;
+ DB *storage = NULL;
+
+ list_for_each_entry_safe (trav, tmp, head, list) {
+ LOCK (&trav->lock);
+ storage = trav->dbp;
+ trav->dbp = NULL;
+ list_del_init (&trav->list);
+ UNLOCK (&trav->lock);
+
+ if (storage) {
+ storage->close (storage, 0);
+ storage = NULL;
+ }
+ }
+ return;
+}
+
+void
+fini (xlator_t *this)
+{
+ struct bdb_private *private = NULL;
+ int32_t idx = 0;
+ int32_t ret = 0;
+ private = this->private;
+
+ if (B_TABLE(this)) {
+ /* close all the dbs from lru list */
+ bctx_cleanup (&(B_TABLE(this)->b_lru));
+ for (idx = 0; idx < B_TABLE(this)->hash_size; idx++)
+ bctx_cleanup (&(B_TABLE(this)->b_hash[idx]));
+
+ if (BDB_ENV(this)) {
+ LOCK (&private->active_lock);
+ private->active = 0;
+ UNLOCK (&private->active_lock);
+
+ ret = pthread_join (private->checkpoint_thread, NULL);
+ if (ret != 0) {
+ gf_log (this->name,
+ GF_LOG_CRITICAL,
+ "failed to join checkpoint thread");
+ }
+
+ /* TODO: pick each of the 'struct bctx' from private->b_hash
+ * and close all the databases that are open */
+ BDB_ENV(this)->close (BDB_ENV(this), 0);
+ } else {
+ /* impossible to reach here */
+ }
+
+ FREE (B_TABLE(this));
+ }
+ FREE (private);
+ return;
+}
+
+struct xlator_mops mops = {
+ .stats = bdb_stats,
+};
+
+struct xlator_fops fops = {
+ .lookup = bdb_lookup,
+ .stat = bdb_stat,
+ .opendir = bdb_opendir,
+ .readdir = bdb_readdir,
+ .readlink = bdb_readlink,
+ .mknod = bdb_mknod,
+ .mkdir = bdb_mkdir,
+ .unlink = bdb_unlink,
+ .rmdir = bdb_rmdir,
+ .symlink = bdb_symlink,
+ .rename = bdb_rename,
+ .link = bdb_link,
+ .chmod = bdb_chmod,
+ .chown = bdb_chown,
+ .truncate = bdb_truncate,
+ .utimens = bdb_utimens,
+ .create = bdb_create,
+ .open = bdb_open,
+ .readv = bdb_readv,
+ .writev = bdb_writev,
+ .statfs = bdb_statfs,
+ .flush = bdb_flush,
+ .fsync = bdb_fsync,
+ .setxattr = bdb_setxattr,
+ .getxattr = bdb_getxattr,
+ .removexattr = bdb_removexattr,
+ .fsyncdir = bdb_fsyncdir,
+ .access = bdb_access,
+ .ftruncate = bdb_ftruncate,
+ .fstat = bdb_fstat,
+ .lk = bdb_lk,
+ .inodelk = bdb_inodelk,
+ .finodelk = bdb_finodelk,
+ .entrylk = bdb_entrylk,
+ .fentrylk = bdb_fentrylk,
+ .fchown = bdb_fchown,
+ .fchmod = bdb_fchmod,
+ .setdents = bdb_setdents,
+ .getdents = bdb_getdents,
+ .checksum = bdb_checksum,
+};
+
+struct xlator_cbks cbks = {
+ .release = bdb_release,
+ .releasedir = bdb_releasedir
+};
+
+#if 0
+struct volume_options options[] = {
+ { "directory", GF_OPTION_TYPE_PATH, 0, },
+ { "logdir", GF_OPTION_TYPE_PATH, 0, },
+ { "errfile", GF_OPTION_TYPE_PATH, 0, },
+ { "dir-mode", GF_OPTION_TYPE_ANY, 0, }, // base 8 number
+ { "file-mode", GF_OPTION_TYPE_ANY, 0, }, // base 8 number
+ { "page-size", GF_OPTION_TYPE_SIZET, -1, },
+ { "lru-limit", GF_OPTION_TYPE_INT, -1, },
+ { "lock-timeout", GF_OPTION_TYPE_TIME, 0, },
+ { "checkpoint-timeout", GF_OPTION_TYPE_TIME, 0, },
+ { "transaction-timeout", GF_OPTION_TYPE_TIME, 0, },
+ { "mode", GF_OPTION_TYPE_BOOL, 0, }, // Should be 'cache' ??
+ { "access-mode", GF_OPTION_TYPE_STR, 0, 0, 0, "btree"},
+ { NULL, 0, }
+};
+
+#endif /* #if 0 */
diff --git a/xlators/storage/bdb/src/bdb.h b/xlators/storage/bdb/src/bdb.h
new file mode 100644
index 00000000000..f2d962680dd
--- /dev/null
+++ b/xlators/storage/bdb/src/bdb.h
@@ -0,0 +1,439 @@
+/*
+ Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _BDB_H
+#define _BDB_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <db.h>
+
+#ifdef linux
+#ifdef __GLIBC__
+#include <sys/fsuid.h>
+#else
+#include <unistd.h>
+#endif
+#endif
+
+#ifdef HAVE_SYS_XATTR_H
+#include <sys/xattr.h>
+#endif
+
+#ifdef HAVE_SYS_EXTATTR_H
+#include <sys/extattr.h>
+#endif
+
+#include <pthread.h>
+#include "xlator.h"
+#include "inode.h"
+#include "compat.h"
+#include "compat-errno.h"
+
+#define GLFS_BDB_STORAGE "/glusterfs_storage.db"
+
+/* numbers are not so reader-friendly, so lets have ON and OFF macros */
+#define ON 1
+#define OFF 0
+
+#define BDB_DEFAULT_LRU_LIMIT 100
+#define BDB_DEFAULT_HASH_SIZE 100
+
+#define BDB_ENOSPC_THRESHOLD 25600
+
+#define BDB_DEFAULT_CHECKPOINT_TIMEOUT 30
+
+#define BCTX_ENV(bctx) (bctx->table->dbenv)
+/* MAKE_REAL_PATH(var,this,path)
+ * make the real path on the underlying file-system
+ *
+ * @var: destination to hold the real path
+ * @this: pointer to xlator_t corresponding to bdb xlator
+ * @path: path, as seen from mount-point
+ */
+#define MAKE_REAL_PATH(var, this, path) do { \
+ int base_len = ((struct bdb_private *)this->private)->export_path_length; \
+ var = alloca (strlen (path) + base_len + 2); \
+ strcpy (var, ((struct bdb_private *)this->private)->export_path); \
+ strcpy (&var[base_len], path); \
+ } while (0)
+
+/* MAKE_REAL_PATH_TO_STORAGE_DB(var,this,path)
+ * make the real path to the storage-database file on file-system
+ *
+ * @var: destination to hold the real path
+ * @this: pointer to xlator_t corresponding to bdb xlator
+ * @path: path of the directory, as seen from mount-point
+ */
+#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \
+ int base_len = ((struct bdb_private *)this->private)->export_path_length; \
+ var = alloca (strlen (path) + base_len + strlen (GLFS_BDB_STORAGE)); \
+ strcpy (var, ((struct bdb_private *)this->private)->export_path); \
+ strcpy (&var[base_len], path); \
+ strcat (var, GLFS_BDB_STORAGE); \
+ } while (0)
+
+/* MAKE_KEY_FROM_PATH(key,path)
+ * make a 'key', which we use as key in the underlying database by using the path
+ *
+ * @key: destination to hold the key
+ * @path: path to file as seen from mount-point
+ */
+#define MAKE_KEY_FROM_PATH(key, path) do { \
+ char *tmp = alloca (strlen (path)); \
+ strcpy (tmp, path); \
+ key = basename (tmp); \
+ }while (0);
+
+/* BDB_DO_LSTAT(path,stbuf,dirent)
+ * construct real-path to a dirent and do lstat on the real-path
+ *
+ * @path: path to the directory whose readdir is currently in progress
+ * @stbuf: a 'struct stat *'
+ * @dirent: a 'struct dirent *'
+ */
+#define BDB_DO_LSTAT(path, stbuf, dirent) do { \
+ char tmp_real_path[GF_PATH_MAX]; \
+ strcpy(tmp_real_path, path); \
+ strcat (tmp_real_path, "/"); \
+ strcat(tmp_real_path, dirent->d_name); \
+ ret = lstat (tmp_real_path, stbuf); \
+ } while(0);
+
+/* IS_BDB_PRIVATE_FILE(name)
+ * check if a given 'name' is bdb xlator's internal file name
+ *
+ * @name: basename of a file.
+ *
+ * bdb xlator reserves file names 'glusterfs_storage.db',
+ * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*' (used by libdb)
+ */
+#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \
+ (!strcmp(name, "glusterfs_storage.db")) || \
+ (!strcmp(name, "glusterfs_ns.db")) || \
+ (!strncmp(name, "log.0000", 8)))
+
+/* check if 'name' is '.' or '..' entry */
+#define IS_DOT_DOTDOT(name) ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2)))
+
+/* BDB_SET_BCTX(this,inode,bctx)
+ * put a stamp on inode. d00d, you are using bdb.. huhaha.
+ * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories.
+ * this will happen either in lookup() or mkdir().
+ *
+ * @this: pointer xlator_t of bdb xlator.
+ * @inode: inode where 'struct bdb_ctx *' has to be stored.
+ * @bctx: a 'struct bdb_ctx *'
+ */
+#define BDB_SET_BCTX(this,inode,bctx) do{ \
+ inode_ctx_put(inode, this, (uint64_t)(long)bctx); \
+ }while (0);
+
+/* MAKE_BCTX_FROM_INODE(this,bctx,inode)
+ * extract bdb xlator's 'struct bdb_ctx *' from an inode's ctx.
+ * valid only if done for directory inodes, otherwise bctx = NULL.
+ *
+ * @this: pointer xlator_t of bdb xlator.
+ * @bctx: a 'struct bdb_ctx *'
+ * @inode: inode from where 'struct bdb_ctx *' has to be extracted.
+ */
+#define MAKE_BCTX_FROM_INODE(this,bctx,inode) do{ \
+ uint64_t tmp_bctx = 0; \
+ inode_ctx_get (inode, this, &tmp_bctx); \
+ if (ret == 0) \
+ bctx = (void *)(long)tmp_bctx; \
+ }while (0);
+
+#define BDB_SET_BFD(this,fd,bfd) do{ \
+ fd_ctx_set (fd, this, (uint64_t)(long)bfd); \
+ }while (0);
+
+/* maximum number of open dbs that bdb xlator will ever have */
+#define BDB_MAX_OPEN_DBS 100
+
+/* convert file size to block-count */
+#define BDB_COUNT_BLOCKS(size,blksize) (((size + blksize - 1)/blksize) - 1)
+
+/* file permissions, again macros are more readable */
+#define RWXRWXRWX 0777
+#define DEFAULT_FILE_MODE 0644
+#define DEFAULT_DIR_MODE 0755
+
+/* see, if have a valid file permissions specification in @mode */
+#define IS_VALID_FILE_MODE(mode) (!(mode & (~RWXRWXRWX)))
+#define IS_VALID_DIR_MODE(mode) (!(mode & (~(RWXRWXRWX)))
+
+/* maximum retries for a failed transactional operation */
+#define BDB_MAX_RETRIES 10
+
+typedef struct bctx_table bctx_table_t;
+typedef struct bdb_ctx bctx_t;
+typedef struct bdb_cache bdb_cache_t;
+typedef struct bdb_private bdb_private_t;
+
+struct bctx_table {
+ uint64_t dbflags; /* flags to be used for opening each database */
+ uint64_t cache; /* cache: can be either ON or OFF */
+ gf_lock_t lock; /* used to lock the 'struct bctx_table *' */
+ gf_lock_t checkpoint_lock; /* lock for checkpointing */
+ struct list_head *b_hash; /* hash table of 'struct bdb_ctx' */
+ struct list_head active; /* list of active 'struct bdb_ctx' */
+ struct list_head b_lru; /* lru list of inactive 'struct bdb_ctx' */
+ struct list_head purge;
+ uint32_t lru_limit;
+ uint32_t lru_size;
+ uint32_t hash_size;
+ DBTYPE access_mode; /* access mode for accessing the databases,
+ * can be DB_HASH, DB_BTREE */
+ DB_ENV *dbenv; /* DB_ENV under which every db operation
+ * is carried over */
+ int32_t transaction;
+ xlator_t *this;
+
+ uint64_t page_size; /* page-size of DB,
+ * DB->set_pagesize(), should be set before DB->open */
+};
+
+struct bdb_ctx {
+ /* controller members */
+ struct list_head list; /* lru list of 'struct bdb_ctx's,
+ * a bdb_ctx can exist in one of b_hash or lru lists */
+ struct list_head b_hash; /* directory 'name' hashed list of 'struct bdb_ctx's */
+
+ struct bctx_table *table;
+ int32_t ref; /* reference count */
+ gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */
+
+ char *directory; /* directory path */
+ DB *dbp; /* pointer to open database, that resides inside this directory */
+ uint32_t cache; /* cache ON or OFF */
+
+ /* per directory cache, bdb xlator's internal cache */
+ struct list_head c_list; /* linked list of cached records */
+ int32_t c_count; /* number of cached records */
+
+ int32_t key_hash; /* index to hash table list, to which this ctx belongs */
+ char *db_path; /* absolute path to db file */
+};
+
+struct bdb_fd {
+ struct bdb_ctx *ctx; /* pointer to bdb_ctx of the parent directory */
+ char *key; /* name of the file. NOTE: basename, not the complete path */
+ int32_t flags; /* open flags */
+};
+
+struct bdb_dir {
+ struct bdb_ctx *ctx; /* pointer to bdb_ctx of this directory */
+ DIR *dir; /* open directory pointer, as returned by opendir() */
+ char offset[NAME_MAX]; /* FIXME: readdir offset, too crude. must go */
+ char *path; /* path to this directory */
+};
+
+/* cache */
+struct bdb_cache {
+ struct list_head c_list; /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */
+ char *key; /* name of the file this cache holds. NOTE: basename of file */
+ char *data; /* file content */
+ size_t size; /* size of the file content that this cache holds */
+};
+
+
+struct bdb_private {
+ inode_table_t *itable; /* pointer to inode table that we use */
+ int32_t temp; /**/
+ char is_stateless; /**/
+ char *export_path; /* path to the export directory
+ * (option directory <export-path>) */
+ int32_t export_path_length; /* length of 'export_path' string */
+
+ /* statistics */
+ struct xlator_stats stats; /* Statistics, provides activity of the server */
+
+ struct timeval prev_fetch_time;
+ struct timeval init_time;
+ int32_t max_read; /* */
+ int32_t max_write; /* */
+ int64_t interval_read; /* Used to calculate the max_read value */
+ int64_t interval_write; /* Used to calculate the max_write value */
+ int64_t read_value; /* Total read, from init */
+ int64_t write_value; /* Total write, from init */
+
+ /* bdb xlator specific private data */
+ uint64_t envflags; /* flags used for opening DB_ENV for this xlator */
+ uint64_t dbflags; /* flags to be used for opening each database */
+ uint64_t cache; /* cache: can be either ON or OFF */
+ uint32_t transaction; /* transaction: can be either ON or OFF */
+ uint32_t active;
+ gf_lock_t active_lock;
+ struct bctx_table *b_table;
+ DBTYPE access_mode; /* access mode for accessing the databases,
+ * can be DB_HASH, DB_BTREE
+ * (option access-mode <mode>) */
+ mode_t file_mode; /* mode for each and every file stored on bdb
+ * (option file-mode <mode>) */
+ mode_t dir_mode; /* mode for each and every directory stored on bdb
+ * (option dir-mode <mode>) */
+ mode_t symlink_mode; /* mode for each and every symlink stored on bdb */
+ pthread_t checkpoint_thread; /* pthread_t object used for creating checkpoint
+ * thread */
+ int32_t checkpoint_timeout; /* time duration between two consecutive checkpoint
+ * operations.
+ * (option checkpoint-timeout <time-in-seconds>) */
+ ino_t next_ino; /* inode number allocation counter */
+ gf_lock_t ino_lock; /* lock to protect 'next_ino' */
+ char *logdir; /* environment log directory
+ * (option logdir <directory>) */
+ char *errfile; /* errfile path, used by environment to
+ * print detailed error log.
+ * (option errfile <errfile-path>) */
+ FILE *errfp; /* DB_ENV->set_errfile() expects us to fopen
+ * the errfile before doing DB_ENV->set_errfile() */
+ uint32_t txn_timeout; /* used by DB_ENV->set_timeout to set the timeout for
+ * a transactionally encapsulated DB->operation() to
+ * timeout before waiting for locks to be released.
+ * (option transaction-timeout <time-in-milliseconds>)
+ */
+ uint32_t lock_timeout;
+ uint32_t log_auto_remove; /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/
+ uint32_t log_region_max;
+};
+
+
+static inline int32_t
+bdb_txn_begin (DB_ENV *dbenv,
+ DB_TXN **ptxnid)
+{
+ return dbenv->txn_begin (dbenv, NULL, ptxnid, 0);
+}
+
+static inline int32_t
+bdb_txn_abort (DB_TXN *txnid)
+{
+ return txnid->abort (txnid);
+}
+
+static inline int32_t
+bdb_txn_commit (DB_TXN *txnid)
+{
+ return txnid->commit (txnid, 0);
+}
+
+inline void *
+bdb_extract_bfd (fd_t *fd, xlator_t *this);
+
+
+void *
+bdb_db_stat (bctx_t *bctx,
+ DB_TXN *txnid,
+ uint32_t flags);
+
+int32_t
+bdb_db_get(struct bdb_ctx *bctx,
+ DB_TXN *txnid,
+ const char *key_string,
+ char **buf,
+ size_t size,
+ off_t offset);
+
+#define BDB_TRUNCATE_RECORD 0xcafebabe
+
+int32_t
+bdb_db_put (struct bdb_ctx *bctx,
+ DB_TXN *txnid,
+ const char *key_string,
+ const char *buf,
+ size_t size,
+ off_t offset,
+ int32_t flags);
+
+int32_t
+bdb_db_del (struct bdb_ctx *bctx,
+ DB_TXN *txnid,
+ const char *path);
+
+ino_t
+bdb_inode_transform (ino_t parent,
+ struct bdb_ctx *bctx);
+
+
+int32_t
+bdb_cursor_open (struct bdb_ctx *bctx,
+ DBC **cursorp);
+
+int32_t
+bdb_cursor_get (DBC *cursorp,
+ DBT *key,
+ DBT *value,
+ int32_t flags);
+
+
+int32_t
+bdb_cursor_close (struct bdb_ctx *ctx,
+ DBC *cursorp);
+
+
+int32_t
+bdb_dirent_size (DBT *key);
+
+int32_t
+dirent_size (struct dirent *entry);
+
+int
+bdb_db_init (xlator_t *this,
+ dict_t *options);
+
+void
+bdb_dbs_from_dict_close (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data);
+
+bctx_t *
+bctx_lookup (struct bctx_table *table,
+ const char *path);
+
+bctx_t *
+bctx_parent
+(struct bctx_table *table,
+ const char *path);
+
+bctx_t *
+bctx_unref (bctx_t *ctx);
+
+bctx_t *
+bctx_ref (bctx_t *ctx);
+
+bctx_t *
+bctx_rename (bctx_t *bctx,
+ const char *db_newpath);
+
+int32_t
+bdb_db_rename (bctx_table_t *table,
+ const char *tmp_db_newpath,
+ const char *real_db_newpath);
+#endif /* _BDB_H */
diff --git a/xlators/storage/posix/Makefile.am b/xlators/storage/posix/Makefile.am
new file mode 100644
index 00000000000..d471a3f9243
--- /dev/null
+++ b/xlators/storage/posix/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
new file mode 100644
index 00000000000..2859e09aa49
--- /dev/null
+++ b/xlators/storage/posix/src/Makefile.am
@@ -0,0 +1,17 @@
+
+xlator_LTLIBRARIES = posix.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
+
+posix_la_LDFLAGS = -module -avoidversion
+
+posix_la_SOURCES = posix.c xattr-cache.c
+posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = posix.h xattr-cache.h
+
+AM_CFLAGS = -fPIC -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \
+ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
+ $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
new file mode 100644
index 00000000000..159f02ddeb4
--- /dev/null
+++ b/xlators/storage/posix/src/posix.c
@@ -0,0 +1,3715 @@
+/*
+ Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#define __XOPEN_SOURCE 500
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <errno.h>
+#include <ftw.h>
+
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif /* GF_BSD_HOST_OS */
+
+#include "glusterfs.h"
+#include "dict.h"
+#include "logging.h"
+#include "posix.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+
+#undef HAVE_SET_FSID
+#ifdef HAVE_SET_FSID
+
+#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid;
+
+#define SET_FS_ID(uid, gid) do { \
+ old_fsuid = setfsuid (uid); \
+ old_fsgid = setfsgid (gid); \
+ } while (0)
+
+#define SET_TO_OLD_FS_ID() do { \
+ setfsuid (old_fsuid); \
+ setfsgid (old_fsgid); \
+ } while (0)
+
+#else
+
+#define DECLARE_OLD_FS_ID_VAR
+#define SET_FS_ID(uid, gid)
+#define SET_TO_OLD_FS_ID()
+
+#endif
+
+typedef struct {
+ xlator_t *this;
+ const char *real_path;
+ dict_t *xattr;
+ struct stat *stbuf;
+ loc_t *loc;
+} posix_xattr_filler_t;
+
+int
+posix_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_cache = 0;
+ if (!inode_ctx_del (inode, this, &tmp_cache))
+ dict_destroy ((dict_t *)(long)tmp_cache);
+
+ return 0;
+}
+
+static void
+_posix_xattr_get_set (dict_t *xattr_req,
+ char *key,
+ data_t *data,
+ void *xattrargs)
+{
+ posix_xattr_filler_t *filler = xattrargs;
+ char *value = NULL;
+ ssize_t xattr_size = -1;
+ int ret = -1;
+ char *databuf = NULL;
+ int _fd = -1;
+ loc_t *loc = NULL;
+ ssize_t req_size = 0;
+
+
+ /* should size be put into the data_t ? */
+ if (!strcmp (key, "glusterfs.content")) {
+ /* file content request */
+ req_size = data_to_uint64 (data);
+ if (req_size >= filler->stbuf->st_size) {
+ _fd = open (filler->real_path, O_RDONLY);
+
+ if (_fd == -1) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "opening file %s failed: %s",
+ filler->real_path, strerror (errno));
+ goto err;
+ }
+
+ databuf = calloc (1, filler->stbuf->st_size);
+
+ if (!databuf) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto err;
+ }
+
+ ret = read (_fd, databuf, filler->stbuf->st_size);
+ if (ret == -1) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "read on file %s failed: %s",
+ filler->real_path, strerror (errno));
+ goto err;
+ }
+
+ ret = close (_fd);
+ _fd = -1;
+ if (ret == -1) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "close on file %s failed: %s",
+ filler->real_path, strerror (errno));
+ goto err;
+ }
+
+ ret = dict_set_bin (filler->xattr, key,
+ databuf, filler->stbuf->st_size);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /* To avoid double free in cleanup below */
+ databuf = NULL;
+ err:
+ if (_fd != -1)
+ close (_fd);
+ if (databuf)
+ FREE (databuf);
+ }
+ } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
+ loc = filler->loc;
+ if (!list_empty (&loc->inode->fd_list)) {
+ ret = dict_set_uint32 (filler->xattr, key, 1);
+ } else {
+ ret = dict_set_uint32 (filler->xattr, key, 0);
+ }
+ } else {
+ xattr_size = lgetxattr (filler->real_path, key, NULL, 0);
+
+ if (xattr_size > 0) {
+ value = calloc (1, xattr_size + 1);
+
+ lgetxattr (filler->real_path, key, value, xattr_size);
+
+ value[xattr_size] = '\0';
+ ret = dict_set_bin (filler->xattr, key,
+ value, xattr_size);
+ if (ret < 0)
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "dict set failed. path: %s, key: %s",
+ filler->real_path, key);
+ }
+ }
+}
+
+
+dict_t *
+posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc,
+ dict_t *xattr_req, struct stat *buf)
+{
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {0, };
+
+ xattr = get_new_dict();
+ if (!xattr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto out;
+ }
+
+ filler.this = this;
+ filler.real_path = real_path;
+ filler.xattr = xattr;
+ filler.stbuf = buf;
+ filler.loc = loc;
+
+ dict_foreach (xattr_req, _posix_xattr_get_set, &filler);
+out:
+ return xattr;
+}
+
+
+int32_t
+posix_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ struct stat buf = {0, };
+ char * real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ dict_t * xattr = NULL;
+
+ struct posix_private *priv = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (loc->path, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ priv = this->private;
+
+ op_ret = lstat (real_path, &buf);
+ op_errno = errno;
+
+ if (op_ret == -1) {
+ if (op_errno != ENOENT) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat on %s failed: %s",
+ loc->path, strerror (op_errno));
+ }
+ goto out;
+ }
+
+ /* Make sure we don't access another mountpoint inside export dir.
+ * It may cause inode number to repeat from single export point,
+ * which leads to severe problems..
+ */
+ if (priv->base_stdev != buf.st_dev) {
+ op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: different mountpoint/device, returning "
+ "ENOENT", loc->path);
+ goto out;
+ }
+
+ if (xattr_req && (op_ret == 0)) {
+ xattr = posix_lookup_xattr_fill (this, real_path, loc,
+ xattr_req, &buf);
+ }
+
+ op_ret = 0;
+out:
+ frame->root->rsp_refs = NULL;
+
+ if (xattr)
+ dict_ref (xattr);
+
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &buf, xattr);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+
+int32_t
+posix_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc)
+{
+ struct stat buf = {0,};
+ char * real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = lstat (real_path, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat on %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+
+ return 0;
+}
+
+int32_t
+posix_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd)
+{
+ char * real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ DIR * dir = NULL;
+ struct posix_fd * pfd = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (loc->path, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ dir = opendir (real_path);
+
+ if (dir == NULL) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s (%s)",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = dirfd (dir);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "dirfd() failed on %s (%s)",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ pfd = CALLOC (1, sizeof (*fd));
+ if (!pfd) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ pfd->dir = dir;
+ pfd->fd = dirfd (dir);
+ pfd->path = strdup (real_path);
+ if (!pfd->path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+
+ frame->root->rsp_refs = NULL;
+
+ op_ret = 0;
+
+ out:
+ if (op_ret == -1) {
+ if (dir) {
+ closedir (dir);
+ dir = NULL;
+ }
+ if (pfd) {
+ if (pfd->path)
+ FREE (pfd->path);
+ FREE (pfd);
+ pfd = NULL;
+ }
+ }
+
+ SET_TO_OLD_FS_ID ();
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+
+
+int32_t
+posix_getdents (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, int32_t flag)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+ dir_entry_t entries = {0, };
+ dir_entry_t * tmp = NULL;
+ DIR * dir = NULL;
+ struct dirent * dirent = NULL;
+ int real_path_len = -1;
+ int entry_path_len = -1;
+ char * entry_path = NULL;
+ int count = 0;
+ struct posix_fd * pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ struct stat buf = {0,};
+ int ret = -1;
+ char tmp_real_path[ZR_PATH_MAX];
+ char linkpath[ZR_PATH_MAX];
+
+ DECLARE_OLD_FS_ID_VAR ;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd %p does not have context in %s",
+ fd, this->name);
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+ if (!pfd->path) {
+ op_errno = EBADFD;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd does not have path set (possibly file "
+ "fd, fd=%p)", fd);
+ goto out;
+ }
+
+ real_path = pfd->path;
+ real_path_len = strlen (real_path);
+
+ entry_path_len = real_path_len + NAME_MAX;
+ entry_path = CALLOC (1, entry_path_len);
+
+ if (!entry_path) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ strncpy (entry_path, real_path, entry_path_len);
+ entry_path[real_path_len] = '/';
+
+ dir = pfd->dir;
+
+ if (!dir) {
+ op_errno = EBADFD;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd does not have dir set (possibly file fd, "
+ "fd=%p, path=`%s'",
+ fd, real_path);
+ goto out;
+ }
+
+ /* TODO: check for all the type of flag, and behave appropriately */
+
+ while ((dirent = readdir (dir))) {
+ if (!dirent)
+ break;
+
+ /* This helps in self-heal, when only directories
+ needs to be replicated */
+
+ /* This is to reduce the network traffic, in case only
+ directory is needed from posix */
+
+ strncpy (tmp_real_path, real_path, ZR_PATH_MAX);
+ strncat (tmp_real_path, "/",
+ ZR_PATH_MAX - strlen (tmp_real_path));
+
+ strncat (tmp_real_path, dirent->d_name,
+ ZR_PATH_MAX - strlen (tmp_real_path));
+ ret = lstat (tmp_real_path, &buf);
+
+ if ((flag == GF_GET_DIR_ONLY)
+ && (ret != -1 && !S_ISDIR(buf.st_mode))) {
+ continue;
+ }
+
+ tmp = CALLOC (1, sizeof (*tmp));
+
+ if (!tmp) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ tmp->name = strdup (dirent->d_name);
+ if (!tmp->name) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ if (entry_path_len <
+ (real_path_len + 1 + strlen (tmp->name) + 1)) {
+ entry_path_len = (real_path_len +
+ strlen (tmp->name) + 1024);
+
+ entry_path = realloc (entry_path, entry_path_len);
+ }
+
+ strcpy (&entry_path[real_path_len+1], tmp->name);
+
+ ret = lstat (entry_path, &tmp->buf);
+
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lstat on %s failed: %s",
+ entry_path, strerror (op_errno));
+ goto out;
+ }
+
+ if (S_ISLNK(tmp->buf.st_mode)) {
+
+ ret = readlink (entry_path, linkpath, ZR_PATH_MAX);
+ if (ret != -1) {
+ linkpath[ret] = '\0';
+ tmp->link = strdup (linkpath);
+ }
+ } else {
+ tmp->link = "";
+ }
+
+ count++;
+
+ tmp->next = entries.next;
+ entries.next = tmp;
+
+ /* if size is 0, count can never be = size, so entire
+ dir is read */
+ if (count == size)
+ break;
+ }
+
+ FREE (entry_path);
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ if (op_ret == -1) {
+ if (entry_path)
+ FREE (entry_path);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &entries, count);
+
+ if (op_ret == 0) {
+ while (entries.next) {
+ tmp = entries.next;
+ entries.next = entries.next->next;
+ FREE (tmp->name);
+ FREE (tmp);
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+posix_releasedir (xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd * pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+ if (!pfd->dir) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd->dir is NULL for fd=%p path=%s",
+ fd, pfd->path ? pfd->path : "<NULL>");
+ goto out;
+ }
+
+ ret = closedir (pfd->dir);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "closedir on %p failed", pfd->dir);
+ goto out;
+ }
+ pfd->dir = NULL;
+
+ if (!pfd->path) {
+ op_errno = EBADFD;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd->path was NULL. fd=%p pfd=%p",
+ fd, pfd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ if (pfd) {
+ if (pfd->path)
+ FREE (pfd->path);
+ FREE (pfd);
+ }
+
+ return 0;
+}
+
+
+int32_t
+posix_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size)
+{
+ char * dest = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ dest = alloca (size + 1);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = readlink (real_path, dest, size);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "readlink on %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ dest[op_ret] = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno, dest);
+
+ return 0;
+}
+
+int32_t
+posix_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t dev)
+{
+ int tmp_fd = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ struct stat stbuf = { 0, };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = mknod (real_path, mode, dev);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ if ((op_errno == EINVAL) && S_ISREG (mode)) {
+ /* Over Darwin, mknod with (S_IFREG|mode)
+ doesn't work */
+ tmp_fd = creat (real_path, mode);
+ if (tmp_fd == -1)
+ goto out;
+ close (tmp_fd);
+ } else {
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "mknod on %s: %s", loc->path,
+ strerror (op_errno));
+ goto out;
+ }
+ }
+
+#ifndef HAVE_SET_FSID
+ op_ret = lchown (real_path, frame->root->uid, frame->root->gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lchown on %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+#endif
+
+ op_ret = lstat (real_path, &stbuf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "mknod on %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
+
+ return 0;
+}
+
+int32_t
+posix_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+ struct stat stbuf = {0, };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = mkdir (real_path, mode);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "mkdir of %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+#ifndef HAVE_SET_FSID
+ op_ret = chown (real_path, frame->root->uid, frame->root->gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "chown on %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+#endif
+
+ op_ret = lstat (real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lstat on %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
+
+ return 0;
+}
+
+
+int32_t
+posix_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+
+ xattr_cache_handle_t handle = {{0,}, 0};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ loc_copy (&handle.loc, loc);
+ {
+ posix_xattr_cache_flush (this, &handle);
+ }
+ loc_wipe (&handle.loc);
+
+ op_ret = unlink (real_path);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "unlink of %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+int32_t
+posix_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+
+ xattr_cache_handle_t handle = {{0,}, 0};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ loc_copy (&handle.loc, loc);
+ {
+ posix_xattr_cache_flush (this, &handle);
+ }
+ loc_wipe (&handle.loc);
+
+ op_ret = rmdir (real_path);
+ op_errno = errno;
+
+ if (op_errno == EEXIST)
+ /* Solaris sets errno = EEXIST instead of ENOTEMPTY */
+ op_errno = ENOTEMPTY;
+
+ if (op_ret == -1 && op_errno != ENOTEMPTY) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rmdir of %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+int32_t
+posix_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ struct stat stbuf = { 0, };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (linkname, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = symlink (linkname, real_path);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "symlink of %s --> %s: %s",
+ loc->path, linkname, strerror (op_errno));
+ goto out;
+ }
+
+#ifndef HAVE_SET_FSID
+ op_ret = lchown (real_path, frame->root->uid, frame->root->gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lchown failed on %s: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+#endif
+ op_ret = lstat (real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat failed on %s: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf);
+
+ return 0;
+}
+
+
+int
+posix_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_oldpath = NULL;
+ char * real_newpath = NULL;
+ struct stat stbuf = {0, };
+
+ xattr_cache_handle_t handle = {{0,}, 0};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (oldloc, out);
+ VALIDATE_OR_GOTO (newloc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
+ MAKE_REAL_PATH (real_newpath, this, newloc->path);
+
+ loc_copy (&handle.loc, oldloc);
+ {
+ posix_xattr_cache_flush (this, &handle);
+ }
+ loc_wipe (&handle.loc);
+
+ op_ret = rename (real_oldpath, real_newpath);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name,
+ (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "rename of %s to %s failed: %s",
+ oldloc->path, newloc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = lstat (real_newpath, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat on %s failed: %s",
+ real_newpath, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+int
+posix_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_oldpath = 0;
+ char * real_newpath = 0;
+ struct stat stbuf = {0, };
+
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (oldloc, out);
+ VALIDATE_OR_GOTO (newloc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
+ MAKE_REAL_PATH (real_newpath, this, newloc->path);
+
+ op_ret = link (real_oldpath, real_newpath);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "link %s to %s failed: %s",
+ oldloc->path, newloc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = lstat (real_newpath, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat on %s failed: %s",
+ real_newpath, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, oldloc->inode, &stbuf);
+
+ return 0;
+}
+
+
+int
+posix_chmod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ struct stat stbuf = {0,};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ if (S_ISLNK (loc->inode->st_mode)) {
+ /* chmod on a link should always succeed */
+ op_ret = lstat (real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lstat on %s failed: %s",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = lchmod (real_path, mode);
+ if ((op_ret == -1) && (errno == ENOSYS)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lchmod not implemented, falling back to chmod");
+ op_ret = chmod (real_path, mode);
+ }
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "chmod on %s failed: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = lstat (real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+int
+posix_chown (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, uid_t uid, gid_t gid)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ struct stat stbuf = {0,};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = lchown (real_path, uid, gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lchown on %s failed: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = lstat (real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat on %s failed: %s",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+int32_t
+posix_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ struct stat stbuf = {0,};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = truncate (real_path, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "truncate on %s failed: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = lstat (real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "lstat on %s failed: %s",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+int
+posix_utimens (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct timespec ts[2])
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = 0;
+ struct stat stbuf = {0,};
+ struct timeval tv[2] = {{0,},{0,}};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ tv[0].tv_sec = ts[0].tv_sec;
+ tv[0].tv_usec = ts[0].tv_nsec / 1000;
+ tv[1].tv_sec = ts[1].tv_sec;
+ tv[1].tv_usec = ts[1].tv_nsec / 1000;
+
+ op_ret = lutimes (real_path, tv);
+ if ((op_ret == -1) && (errno == ENOSYS)) {
+ op_ret = utimes (real_path, tv);
+ }
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "utimes on %s: %s", real_path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = lstat (real_path, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat on %s: %s", real_path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+int32_t
+posix_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t _fd = -1;
+ int _flags = 0;
+ char * real_path = NULL;
+ struct stat stbuf = {0, };
+ struct posix_fd * pfd = NULL;
+ struct posix_private * priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ if (!flags) {
+ _flags = O_CREAT | O_RDWR | O_EXCL;
+ }
+ else {
+ _flags = flags | O_CREAT;
+ }
+
+ if (priv->o_direct)
+ flags |= O_DIRECT;
+
+ _fd = open (real_path, _flags, mode);
+
+ if (_fd == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "open on %s: %s", loc->path, strerror (op_errno));
+ goto out;
+ }
+
+#ifndef HAVE_SET_FSID
+ op_ret = chown (real_path, frame->root->uid, frame->root->gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "chown on %s failed: %s",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+#endif
+
+ op_ret = fstat (_fd, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "fstat on %d failed: %s", _fd, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = -1;
+ pfd = CALLOC (1, sizeof (*pfd));
+
+ if (!pfd) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ close (_fd);
+ goto out;
+ }
+
+ pfd->flags = flags;
+ pfd->fd = _fd;
+
+ fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+
+ ((struct posix_private *)this->private)->stats.nr_files++;
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, fd, loc->inode, &stbuf);
+
+ return 0;
+}
+
+int32_t
+posix_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+ int32_t _fd = -1;
+ struct posix_fd * pfd = NULL;
+ struct posix_private * priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ if (priv->o_direct)
+ flags |= O_DIRECT;
+
+ _fd = open (real_path, flags, 0);
+ if (_fd == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "open on %s: %s", real_path, strerror (op_errno));
+ goto out;
+ }
+
+ pfd = CALLOC (1, sizeof (*pfd));
+
+ if (!pfd) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ pfd->flags = flags;
+ pfd->fd = _fd;
+
+ fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+
+ ((struct posix_private *)this->private)->stats.nr_files++;
+
+#ifndef HAVE_SET_FSID
+ if (flags & O_CREAT) {
+ op_ret = chown (real_path, frame->root->uid, frame->root->gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "chown on %s failed: %s",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+ }
+#endif
+
+ op_ret = 0;
+
+ out:
+ if (op_ret == -1) {
+ if (_fd != -1) {
+ close (_fd);
+ _fd = -1;
+ }
+ }
+
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+
+ return 0;
+}
+
+#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
+ (unsigned long)(~(bound - 1))))
+
+int
+posix_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset)
+{
+ uint64_t tmp_pfd = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * buf = NULL;
+ char * alloc_buf = NULL;
+ int _fd = -1;
+ struct posix_private * priv = NULL;
+ dict_t * reply_dict = NULL;
+ struct iovec vec = {0,};
+ struct posix_fd * pfd = NULL;
+ struct stat stbuf = {0,};
+ int align = 1;
+ int ret = -1;
+ int dict_ret = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_ERROR, "size == 0");
+ goto out;
+ }
+
+ if (pfd->flags & O_DIRECT) {
+ align = 4096; /* align to page boundary */
+ }
+
+ alloc_buf = MALLOC (1 * (size + align));
+ if (!alloc_buf) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ /* page aligned buffer */
+ buf = ALIGN_BUF (alloc_buf, align);
+
+ _fd = pfd->fd;
+
+ op_ret = lseek (_fd, offset, SEEK_SET);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lseek(%"PRId64") failed: %s",
+ offset, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = read (_fd, buf, size);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "read failed: %s", strerror (op_errno));
+ goto out;
+ }
+
+ priv->read_value += size;
+ priv->interval_read += size;
+
+ vec.iov_base = buf;
+ vec.iov_len = op_ret;
+
+ op_ret = -1;
+ reply_dict = get_new_dict ();
+ if (!reply_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+ dict_ref (reply_dict);
+
+ dict_ret = dict_set_ptr (reply_dict, NULL, alloc_buf);
+ if (dict_ret < 0) {
+ op_errno = -dict_ret;
+ gf_log (this->name, GF_LOG_ERROR, "could not dict_set: (%s)",
+ strerror (op_errno));
+ goto out;
+ }
+
+ /*
+ * readv successful, and we need to get the stat of the file
+ * we read from
+ */
+
+ op_ret = fstat (_fd, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "fstat failed: %s", strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+ out:
+ if (op_ret == -1) {
+ frame->root->rsp_refs = NULL;
+
+ if (reply_dict) {
+ dict_unref (reply_dict);
+ reply_dict = NULL;
+ }
+
+ if ((alloc_buf != NULL) && (dict_ret != -1))
+ FREE (alloc_buf);
+ }
+
+ if (reply_dict)
+ frame->root->rsp_refs = reply_dict;
+
+ STACK_UNWIND (frame, op_ret, op_errno, &vec, 1, &stbuf);
+
+ if (reply_dict)
+ dict_unref (reply_dict);
+
+ return 0;
+}
+
+
+int32_t
+posix_writev (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iovec *vector, int32_t count, off_t offset)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_private * priv = NULL;
+ struct posix_fd * pfd = NULL;
+ struct stat stbuf = {0,};
+ int ret = -1;
+
+ int idx = 0;
+ int align = 4096;
+ int max_buf_size = 0;
+ int retval = 0;
+ char * buf = NULL;
+ char * alloc_buf = NULL;
+ uint64_t tmp_pfd = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (vector, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO (priv, out);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL from fd=%p", fd);
+ op_errno = -ret;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ op_ret = lseek (_fd, offset, SEEK_SET);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "lseek(%"PRId64") failed: %s",
+ offset, strerror (op_errno));
+ goto out;
+ }
+
+ /* Check for the O_DIRECT flag during open() */
+ if (pfd->flags & O_DIRECT) {
+ /* This is O_DIRECT'd file */
+ op_ret = -1;
+ for (idx = 0; idx < count; idx++) {
+ if (max_buf_size < vector[idx].iov_len)
+ max_buf_size = vector[idx].iov_len;
+ }
+
+ alloc_buf = MALLOC (1 * (max_buf_size + align));
+ if (!alloc_buf) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ for (idx = 0; idx < count; idx++) {
+ /* page aligned buffer */
+ buf = ALIGN_BUF (alloc_buf, align);
+
+ memcpy (buf, vector[idx].iov_base,
+ vector[idx].iov_len);
+
+ /* not sure whether writev works on O_DIRECT'd fd */
+ retval = write (_fd, buf, vector[idx].iov_len);
+
+ if (retval == -1) {
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "O_DIRECT enabled: %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ break;
+ }
+ if (op_ret == -1)
+ op_ret = 0;
+ op_ret += retval;
+ }
+
+ } else /* if (O_DIRECT) */ {
+
+ /* This is not O_DIRECT'd fd */
+ op_ret = writev (_fd, vector, count);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "writev failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+ }
+
+ priv->write_value += op_ret;
+ priv->interval_write += op_ret;
+
+ if (op_ret >= 0) {
+ /* wiretv successful, we also need to get the stat of
+ * the file we wrote to
+ */
+ ret = fstat (_fd, &stbuf);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fstat failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+ }
+
+ out:
+ if (alloc_buf) {
+ FREE (alloc_buf);
+ }
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
+
+ return 0;
+}
+
+
+int32_t
+posix_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ char * real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct statvfs buf = {0, };
+ struct posix_private * priv = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ priv = this->private;
+
+ op_ret = statvfs (real_path, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "statvfs failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ if (!priv->export_statfs) {
+ buf.f_blocks = 0;
+ buf.f_bfree = 0;
+ buf.f_bavail = 0;
+ buf.f_files = 0;
+ buf.f_ffree = 0;
+ buf.f_favail = 0;
+ }
+
+ op_ret = 0;
+
+ out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+ return 0;
+}
+
+
+int32_t
+posix_flush (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL on fd=%p", fd);
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ /* do nothing */
+ posix_xattr_cache_flush_all (this);
+
+ op_ret = 0;
+
+ out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+
+int32_t
+posix_release (xlator_t *this,
+ fd_t *fd)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_private * priv = NULL;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+ xattr_cache_handle_t handle = {{0,},0};
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ priv->stats.nr_files--;
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ handle.fd = fd;
+ posix_xattr_cache_flush (this, &handle);
+
+ _fd = pfd->fd;
+
+ op_ret = close (_fd);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "close(): %s", strerror (op_errno));
+ goto out;
+ }
+
+ if (pfd->dir) {
+ op_ret = -1;
+ op_errno = EBADF;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd->dir is %p (not NULL) for file fd=%p",
+ pfd->dir, fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ if (pfd)
+ FREE (pfd);
+
+ return 0;
+}
+
+
+int32_t
+posix_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t datasync)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+#ifdef GF_DARWIN_HOST_OS
+ /* Always return success in case of fsync in MAC OS X */
+ op_ret = 0;
+ goto out;
+#endif
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR, "pfd not found in fd's ctx");
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ if (datasync) {
+ ;
+#ifdef HAVE_FDATASYNC
+ op_ret = fdatasync (_fd);
+#endif
+ } else {
+ op_ret = fsync (_fd);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "fsync: %s",
+ strerror (op_errno));
+ }
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+static int gf_posix_xattr_enotsup_log;
+
+int
+set_file_contents (xlator_t *this, char *real_path,
+ data_pair_t *trav, int flags)
+{
+ char * key = NULL;
+ char real_filepath[ZR_PATH_MAX] = {0,};
+ int32_t file_fd = -1;
+ int op_ret = 0;
+ int ret = -1;
+
+ key = &(trav->key[15]);
+ sprintf (real_filepath, "%s/%s", real_path, key);
+
+ if (flags & XATTR_REPLACE) {
+ /* if file exists, replace it
+ * else, error out */
+ file_fd = open (real_filepath, O_TRUNC|O_WRONLY);
+
+ if (file_fd == -1) {
+ goto create;
+ }
+
+ if (trav->value->len) {
+ ret = write (file_fd, trav->value->data,
+ trav->value->len);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "write failed while doing setxattr "
+ "for key %s on path %s: %s",
+ key, real_filepath, strerror (errno));
+ goto out;
+ }
+
+ ret = close (file_fd);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "close failed on %s: %s",
+ real_filepath, strerror (errno));
+ goto out;
+ }
+ }
+
+ create: /* we know file doesn't exist, create it */
+
+ file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644);
+
+ if (file_fd == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open file %s with O_CREAT: %s",
+ key, strerror (errno));
+ goto out;
+ }
+
+ ret = write (file_fd, trav->value->data, trav->value->len);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "write failed on %s while setxattr with "
+ "key %s: %s",
+ real_filepath, key, strerror (errno));
+ goto out;
+ }
+
+ ret = close (file_fd);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "close failed on %s while setxattr with "
+ "key %s: %s",
+ real_filepath, key, strerror (errno));
+ goto out;
+ }
+ }
+
+ out:
+ return op_ret;
+}
+
+int
+handle_pair (xlator_t *this, char *real_path,
+ data_pair_t *trav, int flags)
+{
+ int sys_ret = -1;
+ int ret = 0;
+
+ if (ZR_FILE_CONTENT_REQUEST(trav->key)) {
+ ret = set_file_contents (this, real_path, trav, flags);
+ } else {
+ sys_ret = lsetxattr (real_path, trav->key, trav->value->data,
+ trav->value->len, flags);
+
+ if (sys_ret < 0) {
+ if (errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
+ this->name,GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported");
+ } else if (errno == ENOENT) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr on %s failed: %s", real_path,
+ strerror (errno));
+ } else {
+
+#ifdef GF_DARWIN_HOST_OS
+ gf_log (this->name,
+ ((errno == EINVAL) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING),
+ "%s: key:%s error:%s",
+ real_path, trav->key,
+ strerror (errno));
+#else /* ! DARWIN */
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: key:%s error:%s",
+ real_path, trav->key,
+ strerror (errno));
+#endif /* DARWIN */
+ }
+
+ ret = -errno;
+ goto out;
+ }
+ }
+ out:
+ return ret;
+}
+
+int32_t
+posix_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+ data_pair_t * trav = NULL;
+ int ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (dict, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ trav = dict->members_list;
+
+ while (trav) {
+ ret = handle_pair (this, real_path, trav, flags);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ trav = trav->next;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+int
+get_file_contents (xlator_t *this, char *real_path,
+ const char *name, char **contents)
+{
+ char real_filepath[ZR_PATH_MAX] = {0,};
+ char * key = NULL;
+ int32_t file_fd = -1;
+ struct stat stbuf = {0,};
+ int op_ret = 0;
+ int ret = -1;
+
+ key = (char *) &(name[15]);
+ sprintf (real_filepath, "%s/%s", real_path, key);
+
+ op_ret = lstat (real_filepath, &stbuf);
+ if (op_ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s",
+ real_filepath, strerror (errno));
+ goto out;
+ }
+
+ file_fd = open (real_filepath, O_RDONLY);
+
+ if (file_fd == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s",
+ real_filepath, strerror (errno));
+ goto out;
+ }
+
+ *contents = CALLOC (stbuf.st_size + 1, sizeof(char));
+
+ if (! *contents) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory :(");
+ goto out;
+ }
+
+ ret = read (file_fd, *contents, stbuf.st_size);
+ if (ret <= 0) {
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "read on %s failed",
+ real_filepath);
+ goto out;
+ }
+
+ *contents[stbuf.st_size] = '\0';
+
+ op_ret = close (file_fd);
+ file_fd = -1;
+ if (op_ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s",
+ real_filepath, strerror (errno));
+ goto out;
+ }
+
+ out:
+ if (op_ret < 0) {
+ if (*contents)
+ FREE (*contents);
+ if (file_fd != -1)
+ close (file_fd);
+ }
+
+ return op_ret;
+}
+
+/**
+ * posix_getxattr - this function returns a dictionary with all the
+ * key:value pair present as xattr. used for
+ * both 'listxattr' and 'getxattr'.
+ */
+int32_t
+posix_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = ENOENT;
+ int32_t list_offset = 0;
+ size_t size = 0;
+ size_t remaining_size = 0;
+ char key[1024] = {0,};
+ char * value = NULL;
+ char * list = NULL;
+ char * real_path = NULL;
+ dict_t * dict = NULL;
+ char * file_contents = NULL;
+ int ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ if (loc->inode && S_ISDIR(loc->inode->st_mode) && name &&
+ ZR_FILE_CONTENT_REQUEST(name)) {
+ ret = get_file_contents (this, real_path, name,
+ &file_contents);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "getting file contents failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+ }
+
+ /* Get the total size */
+ dict = get_new_dict ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory :(");
+ goto out;
+ }
+
+ size = llistxattr (real_path, NULL, 0);
+ if (size == -1) {
+ op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported.");
+ }
+ else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "listxattr failed on %s: %s",
+ real_path, strerror (op_errno));
+ }
+ goto out;
+ }
+
+ if (size == 0)
+ goto done;
+
+ list = alloca (size + 1);
+ if (!list) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory :(");
+ goto out;
+ }
+
+ size = llistxattr (real_path, list, size);
+
+ remaining_size = size;
+ list_offset = 0;
+ while (remaining_size > 0) {
+ if(*(list + list_offset) == '\0')
+ break;
+
+ strcpy (key, list + list_offset);
+ op_ret = lgetxattr (real_path, key, NULL, 0);
+ if (op_ret == -1)
+ break;
+
+ value = CALLOC (op_ret + 1, sizeof(char));
+ if (!value) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory :(");
+ goto out;
+ }
+
+ op_ret = lgetxattr (real_path, key, value, op_ret);
+ if (op_ret == -1)
+ break;
+
+ value [op_ret] = '\0';
+ dict_set (dict, key, data_from_dynptr (value, op_ret));
+ remaining_size -= strlen (key) + 1;
+ list_offset += strlen (key) + 1;
+
+ } /* while (remaining_size > 0) */
+
+ done:
+ op_ret = size;
+
+ if (dict) {
+ dict_ref (dict);
+ }
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, dict);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int32_t
+posix_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ op_ret = lremovexattr (real_path, name);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ if (op_errno != ENOATTR && op_errno != EPERM)
+ gf_log (this->name, GF_LOG_WARNING,
+ "removexattr on %s: %s", loc->path,
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+posix_fsyncdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd * pfd = NULL;
+ int _fd = -1;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ op_ret = 0;
+
+ out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+
+ return 0;
+}
+
+
+void
+posix_print_xattr (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ gf_log ("posix", GF_LOG_TRACE,
+ "(key/val) = (%s/%d)", key, data_to_int32 (value));
+}
+
+
+/**
+ * add_array - add two arrays of 32-bit numbers (stored in network byte order)
+ * dest = dest + src
+ * @count: number of 32-bit numbers
+ * FIXME: handle overflow
+ */
+
+static void
+__add_array (int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i]));
+ }
+}
+
+
+/**
+ * xattrop - xattr operations - for internal use by GlusterFS
+ * @optype: ADD_ARRAY:
+ * dict should contain:
+ * "key" ==> array of 32-bit numbers
+ */
+
+
+int
+posix_xattrop_common (call_frame_t *frame, xlator_t *this,
+ xattr_cache_handle_t *handle,
+ gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ int32_t *array = NULL;
+
+ int ret = 0;
+ int count = 0;
+
+ int op_ret = 0;
+ int op_errno = 0;
+
+ data_pair_t *trav = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (xattr, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ trav = xattr->members_list;
+
+ while (trav) {
+ count = trav->value->len / sizeof (int32_t);
+ array = CALLOC (count, sizeof (int32_t));
+
+ ret = posix_xattr_cache_read (this, handle, trav->key,
+ array, trav->value->len);
+
+ switch (optype) {
+
+ case GF_XATTROP_ADD_ARRAY:
+ __add_array (array, (int32_t *) trav->value->data,
+ trav->value->len / 4);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown xattrop type %d",
+ optype);
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = posix_xattr_cache_write (this, handle, trav->key,
+ array, trav->value->len);
+
+ ret = dict_set_bin (xattr, trav->key, array,
+ trav->value->len);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "key=%s (%s)",
+ trav->key, strerror (-ret));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ trav = trav->next;
+ array = NULL;
+ }
+
+out:
+ if (array)
+ FREE (array);
+
+ STACK_UNWIND (frame, op_ret, op_errno, xattr);
+ return 0;
+}
+
+
+int
+posix_xattrop (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ xattr_cache_handle_t handle = {{0,}, 0};
+ int ret = -1;
+
+ loc_copy (&handle.loc, loc);
+ {
+ ret = posix_xattrop_common (frame, this, &handle, optype, xattr);
+ }
+ loc_wipe (&handle.loc);
+
+ return ret;
+}
+
+
+int
+posix_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ int ret = -1;
+ xattr_cache_handle_t handle = {{0,}, 0};
+
+ handle.fd = fd;
+
+ ret = posix_xattrop_common (frame, this, &handle, optype, xattr);
+
+ return ret;
+}
+
+
+int
+posix_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char * real_path = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ op_ret = access (real_path, mask & 07);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "access failed on %s: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+
+int32_t
+posix_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct stat buf = {0,};
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ op_ret = ftruncate (_fd, offset);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "ftruncate failed: %s",
+ strerror (errno));
+ goto out;
+ }
+
+ op_ret = fstat (_fd, &buf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s",
+ strerror (errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+
+ return 0;
+}
+
+int32_t
+posix_fchown (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, uid_t uid, gid_t gid)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct stat buf = {0,};
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ op_ret = fchown (_fd, uid, gid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "fchown failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = fstat (_fd, &buf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+
+ return 0;
+}
+
+
+int32_t
+posix_fchmod (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, mode_t mode)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct stat buf = {0,};
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL fd=%p", fd);
+ op_errno = -ret;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ op_ret = fchmod (_fd, mode);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "fchmod failed: %s", strerror (errno));
+ goto out;
+ }
+
+ op_ret = fstat (_fd, &buf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "fstat failed: %s", strerror (errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+
+ return 0;
+}
+
+
+static int
+same_file_type (mode_t m1, mode_t m2)
+{
+ return ((S_IFMT & (m1 ^ m2)) == 0);
+}
+
+
+static int
+ensure_file_type (xlator_t *this, char *pathname, mode_t mode)
+{
+ struct stat stbuf = {0,};
+ int op_ret = 0;
+ int ret = -1;
+
+ ret = lstat (pathname, &stbuf);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "stat failed while trying to make sure entry %s "
+ "is a directory: %s", pathname, strerror (errno));
+ goto out;
+ }
+
+ if (!same_file_type (mode, stbuf.st_mode)) {
+ op_ret = -EEXIST;
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "entry %s is a different type of file "
+ "than expected", pathname);
+ goto out;
+ }
+ out:
+ return op_ret;
+}
+
+static int
+create_entry (xlator_t *this, int32_t flags,
+ dir_entry_t *entry, char *pathname)
+{
+ int op_ret = 0;
+ int ret = -1;
+ struct timeval tv[2] = {{0,0},{0,0}};
+
+ if (S_ISDIR (entry->buf.st_mode)) {
+ /*
+ * If the entry is directory, create it by
+ * calling 'mkdir'. If the entry is already
+ * present, check if it is a directory,
+ * and issue a warning if otherwise.
+ */
+
+ ret = mkdir (pathname, entry->buf.st_mode);
+ if (ret == -1) {
+ if (errno == EEXIST) {
+ op_ret = ensure_file_type (this, pathname,
+ entry->buf.st_mode);
+ }
+ else {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "mkdir %s with mode (0%o) failed: %s",
+ pathname, entry->buf.st_mode,
+ strerror (errno));
+ goto out;
+ }
+ }
+
+ } else if ((flags & GF_SET_IF_NOT_PRESENT)
+ || !(flags & GF_SET_DIR_ONLY)) {
+
+ /* create a 0-byte file here */
+
+ if (S_ISREG (entry->buf.st_mode)) {
+ ret = open (pathname, O_CREAT|O_EXCL,
+ entry->buf.st_mode);
+
+ if (ret == -1) {
+ if (errno == EEXIST) {
+ op_ret = ensure_file_type (this,
+ pathname,
+ entry->buf.st_mode);
+ }
+ else {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error creating file %s with "
+ "mode (0%o): %s",
+ pathname, entry->buf.st_mode,
+ strerror (errno));
+ goto out;
+ }
+ }
+
+ close (ret);
+
+ } else if (S_ISLNK (entry->buf.st_mode)) {
+ ret = symlink (entry->link, pathname);
+
+ if (ret == -1) {
+ if (errno == EEXIST) {
+ op_ret = ensure_file_type (this,
+ pathname,
+ entry->buf.st_mode);
+ }
+ else {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "error creating symlink %s: %s"
+ , pathname, strerror (errno));
+ goto out;
+ }
+ }
+
+ } else if (S_ISBLK (entry->buf.st_mode) ||
+ S_ISCHR (entry->buf.st_mode) ||
+ S_ISFIFO (entry->buf.st_mode) ||
+ S_ISSOCK (entry->buf.st_mode)) {
+
+ ret = mknod (pathname, entry->buf.st_mode,
+ entry->buf.st_dev);
+
+ if (ret == -1) {
+ if (errno == EEXIST) {
+ op_ret = ensure_file_type (this,
+ pathname,
+ entry->buf.st_mode);
+ } else {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "error creating device file "
+ "%s: %s",
+ pathname, strerror (errno));
+ goto out;
+ }
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid mode 0%o for %s", entry->buf.st_mode,
+ pathname);
+ op_ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * Preserve atime and mtime
+ */
+
+ if (!S_ISLNK (entry->buf.st_mode)) {
+ tv[0].tv_sec = entry->buf.st_atime;
+ tv[1].tv_sec = entry->buf.st_mtime;
+ ret = utimes (pathname, tv);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "utimes %s failed: %s",
+ pathname, strerror (errno));
+ goto out;
+ }
+ }
+
+out:
+ return op_ret;
+
+}
+
+
+int
+posix_setdents (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t flags, dir_entry_t *entries,
+ int32_t count)
+{
+ char * real_path = NULL;
+ char * entry_path = NULL;
+ int32_t real_path_len = -1;
+ int32_t entry_path_len = -1;
+ int32_t ret = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd * pfd = {0, };
+ struct timeval tv[2] = {{0, }, {0, }};
+ uint64_t tmp_pfd = 0;
+ char pathname[ZR_PATH_MAX] = {0,};
+ dir_entry_t * trav = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (entries, out);
+
+ tv[0].tv_sec = tv[0].tv_usec = 0;
+ tv[1].tv_sec = tv[1].tv_usec = 0;
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd's ctx not found on fd=%p for %s",
+ fd, this->name);
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ real_path = pfd->path;
+
+ if (!real_path) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_ERROR,
+ "path is NULL on pfd=%p fd=%p", pfd, fd);
+ goto out;
+ }
+
+ real_path_len = strlen (real_path);
+ entry_path_len = real_path_len + 256;
+ entry_path = CALLOC (1, entry_path_len);
+
+ if (!entry_path) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory :(");
+ goto out;
+ }
+
+ strcpy (entry_path, real_path);
+ entry_path[real_path_len] = '/';
+
+ posix_xattr_cache_flush_all (this);
+
+ /* fd exists, and everything looks fine */
+ /**
+ * create an entry for each one present in '@entries'
+ * - if flag is set (ie, if its namespace), create both directories
+ * and files
+ * - if not set, create only directories.
+ *
+ * after the entry is created, change the mode and ownership of the
+ * entry according to the stat present in entries->buf.
+ */
+
+ trav = entries->next;
+ while (trav) {
+ strcpy (pathname, entry_path);
+ strcat (pathname, trav->name);
+
+ ret = create_entry (this, flags, trav, pathname);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ /* TODO: handle another flag, GF_SET_OVERWRITE */
+
+ /* Change the mode */
+ if (!S_ISLNK (trav->buf.st_mode)) {
+ ret = chmod (pathname, trav->buf.st_mode);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "chmod on %s failed: %s", pathname,
+ strerror (op_errno));
+ goto out;
+ }
+ }
+
+ /* change the ownership */
+ ret = lchown (pathname, trav->buf.st_uid, trav->buf.st_gid);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "chmod on %s failed: %s", pathname,
+ strerror (op_errno));
+ goto out;
+ }
+
+ if (flags & GF_SET_EPOCH_TIME) {
+ ret = utimes (pathname, tv);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "utimes on %s failed: %s", pathname,
+ strerror (op_errno));
+ goto out;
+ }
+ }
+
+ /* consider the next entry */
+ trav = trav->next;
+ }
+
+ op_ret = 0;
+ out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno);
+ if (entry_path)
+ FREE (entry_path);
+
+ return 0;
+}
+
+int32_t
+posix_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd)
+{
+ int _fd = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct stat buf = {0,};
+ struct posix_fd * pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ op_ret = fstat (_fd, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &buf);
+ return 0;
+}
+
+static int gf_posix_lk_log;
+
+int32_t
+posix_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *lock)
+{
+ struct flock nullock = {0, };
+ frame->root->rsp_refs = NULL;
+
+ gf_posix_lk_log++;
+
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_ERROR,
+ "\"features/posix-locks\" translator is "
+ "not loaded, you need to use it");
+
+ STACK_UNWIND (frame, -1, ENOSYS, &nullock);
+ return 0;
+}
+
+int32_t
+posix_inodelk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t cmd, struct flock *lock)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. "
+ "You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+int32_t
+posix_finodelk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct flock *lock)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. "
+ "You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+
+int32_t
+posix_entrylk (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. "
+ "You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+int32_t
+posix_fentrylk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type)
+{
+ frame->root->rsp_refs = NULL;
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "\"features/posix-locks\" translator is not loaded. "
+ " You need to use it for proper functioning of GlusterFS");
+
+ STACK_UNWIND (frame, -1, ENOSYS);
+ return 0;
+}
+
+
+int32_t
+posix_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off)
+{
+ uint64_t tmp_pfd = 0;
+ struct posix_fd * pfd = NULL;
+ DIR * dir = NULL;
+ int ret = -1;
+ size_t filled = 0;
+ int count = 0;
+
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ gf_dirent_t * this_entry = NULL;
+ gf_dirent_t entries;
+ struct dirent * entry = NULL;
+ off_t in_case = -1;
+ int32_t this_size = -1;
+
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ dir = pfd->dir;
+
+ if (!dir) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+
+ if (!off) {
+ rewinddir (dir);
+ } else {
+ seekdir (dir, off);
+ }
+
+ while (filled <= size) {
+ in_case = telldir (dir);
+
+ if (in_case == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "telldir failed: %s",
+ strerror (errno));
+ goto out;
+ }
+
+ errno = 0;
+ entry = readdir (dir);
+
+ if (!entry) {
+ if (errno == EBADF) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "readdir failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+ break;
+ }
+
+ this_size = dirent_size (entry);
+
+ if (this_size + filled > size) {
+ seekdir (dir, in_case);
+ break;
+ }
+
+
+ this_entry = gf_dirent_for_name (entry->d_name);
+
+ if (!this_entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create gf_dirent for entry %s (%s)",
+ entry->d_name, strerror (errno));
+ goto out;
+ }
+ this_entry->d_off = telldir (dir);
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail (&this_entry->list, &entries.list);
+
+ filled += this_size;
+ count ++;
+ }
+
+ op_ret = count;
+
+ out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, &entries);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+
+int32_t
+posix_stats (call_frame_t *frame, xlator_t *this,
+ int32_t flags)
+
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ struct xlator_stats xlstats = {0, };
+ struct xlator_stats * stats = NULL;
+ struct statvfs buf = {0,};
+ struct timeval tv = {0,};
+ struct posix_private * priv = (struct posix_private *)this->private;
+
+ int64_t avg_read = 0;
+ int64_t avg_write = 0;
+ int64_t _time_ms = 0;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ stats = &xlstats;
+
+ op_ret = statvfs (priv->base_path, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "statvfs failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* client info is maintained at FSd */
+ stats->nr_clients = priv->stats.nr_clients;
+ stats->nr_files = priv->stats.nr_files;
+
+ /* number of free block in the filesystem. */
+ stats->free_disk = buf.f_bfree * buf.f_bsize;
+
+ stats->total_disk_size = buf.f_blocks * buf.f_bsize;
+ stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize;
+
+ /* Calculate read and write usage */
+ op_ret = gettimeofday (&tv, NULL);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "gettimeofday failed: %s", strerror (errno));
+ goto out;
+ }
+
+ /* Read */
+ _time_ms = (tv.tv_sec - priv->init_time.tv_sec) * 1000 +
+ ((tv.tv_usec - priv->init_time.tv_usec) / 1000);
+
+ avg_read = (_time_ms) ? (priv->read_value / _time_ms) : 0; /* KBps */
+ avg_write = (_time_ms) ? (priv->write_value / _time_ms) : 0; /* KBps */
+
+ _time_ms = (tv.tv_sec - priv->prev_fetch_time.tv_sec) * 1000 +
+ ((tv.tv_usec - priv->prev_fetch_time.tv_usec) / 1000);
+
+ if (_time_ms && ((priv->interval_read / _time_ms) > priv->max_read)) {
+ priv->max_read = (priv->interval_read / _time_ms);
+ }
+
+ if (_time_ms &&
+ ((priv->interval_write / _time_ms) > priv->max_write)) {
+ priv->max_write = priv->interval_write / _time_ms;
+ }
+
+ stats->read_usage = avg_read / priv->max_read;
+ stats->write_usage = avg_write / priv->max_write;
+
+ op_ret = gettimeofday (&(priv->prev_fetch_time), NULL);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "gettimeofday failed: %s",
+ strerror (op_errno));
+ goto out;
+ }
+
+ priv->interval_read = 0;
+ priv->interval_write = 0;
+
+ op_ret = 0;
+
+ out:
+ SET_TO_OLD_FS_ID ();
+
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, stats);
+ return 0;
+}
+
+int32_t
+posix_checksum (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flag)
+{
+ char * real_path = NULL;
+ DIR * dir = NULL;
+ struct dirent * dirent = NULL;
+ uint8_t file_checksum[ZR_FILENAME_MAX] = {0,};
+ uint8_t dir_checksum[ZR_FILENAME_MAX] = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int i = 0;
+ int length = 0;
+
+ struct stat buf = {0,};
+ char tmp_real_path[ZR_PATH_MAX] = {0,};
+ int ret = -1;
+
+ MAKE_REAL_PATH (real_path, this, loc->path);
+
+ dir = opendir (real_path);
+
+ if (!dir){
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opendir() failed on `%s': %s",
+ real_path, strerror (op_errno));
+ goto out;
+ }
+
+ while ((dirent = readdir (dir))) {
+ errno = 0;
+ if (!dirent) {
+ if (errno != 0) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir() failed: %s",
+ strerror (errno));
+ goto out;
+ }
+ break;
+ }
+
+ length = strlen (dirent->d_name);
+
+ strcpy (tmp_real_path, real_path);
+ strcat (tmp_real_path, "/");
+ strcat (tmp_real_path, dirent->d_name);
+ ret = lstat (tmp_real_path, &buf);
+
+ if (ret == -1)
+ continue;
+
+ if (S_ISDIR (buf.st_mode)) {
+ for (i = 0; i < length; i++)
+ dir_checksum[i] ^= dirent->d_name[i];
+ } else {
+ for (i = 0; i < length; i++)
+ file_checksum[i] ^= dirent->d_name[i];
+ }
+ }
+ closedir (dir);
+
+ op_ret = 0;
+
+ out:
+ frame->root->rsp_refs = NULL;
+ STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum);
+
+ return 0;
+}
+
+/**
+ * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+ */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ switch (event)
+ {
+ case GF_EVENT_PARENT_UP:
+ {
+ /* Tell the parent that posix xlator is up */
+ default_notify (this, GF_EVENT_CHILD_UP, data);
+ }
+ break;
+ default:
+ /* */
+ break;
+ }
+ return 0;
+}
+
+/**
+ * init -
+ */
+int
+init (xlator_t *this)
+{
+ int ret = 0;
+ int op_ret = -1;
+ gf_boolean_t tmp_bool = 0;
+ struct stat buf = {0,};
+ struct posix_private * _private = NULL;
+ data_t * dir_data = NULL;
+ data_t * tmp_data = NULL;
+
+ dir_data = dict_get (this->options, "directory");
+
+ if (this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: storage/posix cannot have subvolumes");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ if (!dir_data) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "export directory not specified in volfile");
+ ret = -1;
+ goto out;
+ }
+
+ umask (000); // umask `masking' is done at the client side
+
+ /* Check whether the specified directory exists, if not create it. */
+ op_ret = lstat (dir_data->data, &buf);
+ if ((ret != 0) || !S_ISDIR (buf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "directory '%s' doesn't exists, Exiting",
+ dir_data->data);
+ ret = -1;
+ goto out;
+ }
+
+
+ /* Check for Extended attribute support, if not present, log it */
+ op_ret = lsetxattr (dir_data->data,
+ "trusted.glusterfs.test", "working", 8, 0);
+ if (op_ret < 0) {
+ tmp_data = dict_get (this->options,
+ "mandate-attribute");
+ if (tmp_data) {
+ if (gf_string2boolean (tmp_data->data,
+ &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong option provided for key "
+ "\"mandate-xattr\"");
+ ret = -1;
+ goto out;
+ }
+ if (!tmp_bool) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Extended attribute not supported, "
+ "starting as per option");
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Extended attribute not supported, "
+ "exiting");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Extended attribute not supported, exiting");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ _private = CALLOC (1, sizeof (*_private));
+ if (!_private) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ ret = -1;
+ goto out;
+ }
+
+ _private->base_path = strdup (dir_data->data);
+ _private->base_path_length = strlen (_private->base_path);
+ _private->base_stdev = buf.st_dev;
+
+ _private->xattr_cache = posix_xattr_cache_init (16);
+ if (!_private->xattr_cache) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ ret = -1;
+ goto out;
+ }
+
+ {
+ /* Stats related variables */
+ gettimeofday (&_private->init_time, NULL);
+ gettimeofday (&_private->prev_fetch_time, NULL);
+ _private->max_read = 1;
+ _private->max_write = 1;
+ }
+
+ _private->export_statfs = 1;
+ tmp_data = dict_get (this->options, "export-statfs-size");
+ if (tmp_data) {
+ if (gf_string2boolean (tmp_data->data,
+ &_private->export_statfs) == -1) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "'export-statfs-size' takes only boolean "
+ "options");
+ goto out;
+ }
+ if (!_private->export_statfs)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "'statfs()' returns dummy size");
+ }
+
+ tmp_data = dict_get (this->options, "o-direct");
+ if (tmp_data) {
+ if (gf_string2boolean (tmp_data->data,
+ &_private->o_direct) == -1) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong option provided for 'o-direct'");
+ goto out;
+ }
+ if (_private->o_direct)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "o-direct mode is enabled (O_DIRECT "
+ "for every open)");
+ }
+
+#ifndef GF_DARWIN_HOST_OS
+ {
+ struct rlimit lim;
+ lim.rlim_cur = 1048576;
+ lim.rlim_max = 1048576;
+
+ if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "WARNING: Failed to set 'ulimit -n "
+ " 1048576': %s", strerror(errno));
+ lim.rlim_cur = 65536;
+ lim.rlim_max = 65536;
+
+ if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set max open fd to "
+ "64k: %s", strerror(errno));
+ }
+ else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "max open fd set to 64k");
+ }
+ }
+ }
+#endif
+
+ this->private = (void *)_private;
+
+ out:
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ struct posix_private *priv = this->private;
+ lremovexattr (priv->base_path, "trusted.glusterfs.test");
+ FREE (priv);
+ return;
+}
+
+struct xlator_mops mops = {
+ .stats = posix_stats,
+};
+
+struct xlator_fops fops = {
+ .lookup = posix_lookup,
+ .stat = posix_stat,
+ .opendir = posix_opendir,
+ .readdir = posix_readdir,
+ .readlink = posix_readlink,
+ .mknod = posix_mknod,
+ .mkdir = posix_mkdir,
+ .unlink = posix_unlink,
+ .rmdir = posix_rmdir,
+ .symlink = posix_symlink,
+ .rename = posix_rename,
+ .link = posix_link,
+ .chmod = posix_chmod,
+ .chown = posix_chown,
+ .truncate = posix_truncate,
+ .utimens = posix_utimens,
+ .create = posix_create,
+ .open = posix_open,
+ .readv = posix_readv,
+ .writev = posix_writev,
+ .statfs = posix_statfs,
+ .flush = posix_flush,
+ .fsync = posix_fsync,
+ .setxattr = posix_setxattr,
+ .getxattr = posix_getxattr,
+ .removexattr = posix_removexattr,
+ .fsyncdir = posix_fsyncdir,
+ .access = posix_access,
+ .ftruncate = posix_ftruncate,
+ .fstat = posix_fstat,
+ .lk = posix_lk,
+ .inodelk = posix_inodelk,
+ .finodelk = posix_finodelk,
+ .entrylk = posix_entrylk,
+ .fentrylk = posix_fentrylk,
+ .fchown = posix_fchown,
+ .fchmod = posix_fchmod,
+ .setdents = posix_setdents,
+ .getdents = posix_getdents,
+ .checksum = posix_checksum,
+ .xattrop = posix_xattrop,
+ .fxattrop = posix_fxattrop,
+};
+
+struct xlator_cbks cbks = {
+ .release = posix_release,
+ .releasedir = posix_releasedir,
+ .forget = posix_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"o-direct"},
+ .type = GF_OPTION_TYPE_BOOL },
+ { .key = {"directory"},
+ .type = GF_OPTION_TYPE_PATH },
+ { .key = {"export-statfs-size"},
+ .type = GF_OPTION_TYPE_BOOL },
+ { .key = {"mandate-attribute"},
+ .type = GF_OPTION_TYPE_BOOL },
+ { .key = {NULL} }
+};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
new file mode 100644
index 00000000000..b162139c955
--- /dev/null
+++ b/xlators/storage/posix/src/posix.h
@@ -0,0 +1,110 @@
+/*
+ Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _POSIX_H
+#define _POSIX_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#ifdef linux
+#ifdef __GLIBC__
+#include <sys/fsuid.h>
+#else
+#include <unistd.h>
+#endif
+#endif
+
+#ifdef HAVE_SYS_XATTR_H
+#include <sys/xattr.h>
+#endif
+
+#ifdef HAVE_SYS_EXTATTR_H
+#include <sys/extattr.h>
+#endif
+
+#include "xlator.h"
+#include "inode.h"
+#include "compat.h"
+
+#include "xattr-cache.h"
+
+/**
+ * posix_fd - internal structure common to file and directory fd's
+ */
+
+struct posix_fd {
+ int fd; /* fd returned by the kernel */
+ int32_t flags; /* flags for open/creat */
+ char * path; /* used by setdents/getdents */
+ DIR * dir; /* handle returned by the kernel */
+};
+
+struct posix_private {
+ char *base_path;
+ int32_t base_path_length;
+ dev_t base_stdev;
+
+ xattr_cache_t *xattr_cache;
+
+ /* Statistics, provides activity of the server */
+ struct xlator_stats stats;
+
+ struct timeval prev_fetch_time;
+ struct timeval init_time;
+
+ int32_t max_read; /* */
+ int32_t max_write; /* */
+ int64_t interval_read; /* Used to calculate the max_read value */
+ int64_t interval_write; /* Used to calculate the max_write value */
+ int64_t read_value; /* Total read, from init */
+ int64_t write_value; /* Total write, from init */
+
+/*
+ In some cases, two exported volumes may reside on the same
+ partition on the server. Sending statvfs info for both
+ the volumes will lead to erroneous df output at the client,
+ since free space on the partition will be counted twice.
+
+ In such cases, user can disable exporting statvfs info
+ on one of the volumes by setting this option.
+*/
+ gf_boolean_t export_statfs;
+
+ gf_boolean_t o_direct; /* always open files in O_DIRECT mode */
+};
+
+#define POSIX_BASE_PATH(this) (((struct posix_private *)this->private)->base_path)
+
+#define POSIX_BASE_PATH_LEN(this) (((struct posix_private *)this->private)->base_path_length)
+
+#define MAKE_REAL_PATH(var, this, path) do { \
+ var = alloca (strlen (path) + POSIX_BASE_PATH_LEN(this) + 2); \
+ strcpy (var, POSIX_BASE_PATH(this)); \
+ strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \
+ } while (0)
+
+#endif /* _POSIX_H */
diff --git a/xlators/storage/posix/src/xattr-cache.c b/xlators/storage/posix/src/xattr-cache.c
new file mode 100644
index 00000000000..a39c35ae234
--- /dev/null
+++ b/xlators/storage/posix/src/xattr-cache.c
@@ -0,0 +1,521 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include "byte-order.h"
+
+#include "xattr-cache.h"
+#include "posix.h"
+#include "compat-errno.h"
+
+static int
+__hgetxattr (xattr_cache_handle_t *handle, xlator_t *this,
+ const char *key, void *value, size_t len)
+{
+ char * real_path = NULL;
+ struct posix_fd * pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int op_ret = -1;
+ int ret = -1;
+ int _fd = -1;
+
+ if (handle->loc.path) {
+ MAKE_REAL_PATH (real_path, this, handle->loc.path);
+ op_ret = lgetxattr (real_path, key, value, len);
+
+ if (op_ret == -1)
+ op_ret = -errno;
+ } else {
+ ret = fd_ctx_get (handle->fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get pfd from fd=%p",
+ handle->fd);
+ op_ret = -EBADFD;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+ _fd = pfd->fd;
+
+ op_ret = fgetxattr (_fd, key, value, len);
+ if (op_ret == -1)
+ op_ret = -errno;
+ }
+
+out:
+ return op_ret;
+}
+
+
+static int
+__hsetxattr (xattr_cache_handle_t *handle, xlator_t *this,
+ const char *key, void *value, size_t len, int flags)
+{
+ char * real_path = NULL;
+ struct posix_fd * pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int op_ret = -1;
+ int ret = -1;
+ int _fd = -1;
+
+ if (handle->loc.path) {
+ MAKE_REAL_PATH (real_path, this, handle->loc.path);
+
+ op_ret = lsetxattr (real_path, key, value, len, flags);
+ if (op_ret == -1)
+ op_ret = -errno;
+ } else {
+ ret = fd_ctx_get (handle->fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get pfd from fd=%p",
+ handle->fd);
+
+ op_ret = -EBADFD;
+ goto out;
+ }
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+
+ _fd = pfd->fd;
+
+ op_ret = fsetxattr (_fd, key, value, len, flags);
+ if (op_ret == -1)
+ op_ret = -errno;
+ }
+
+out:
+ return op_ret;
+}
+
+
+static xattr_cache_entry_t *
+__cache_lookup (xattr_cache_t *cache, inode_t *inode, char *key)
+{
+ int i = 0;
+
+ for (i = 0; i < cache->size; i++) {
+ if ((cache->entries[i]->inode == inode)
+ && (!strcmp (cache->entries[i]->key, key))) {
+ cache->entries[i]->nraccess++;
+ return cache->entries[i];
+ }
+ }
+
+ return NULL;
+}
+
+
+static xattr_cache_entry_t *
+__cache_least_used_entry (xattr_cache_t *cache)
+{
+ xattr_cache_entry_t *lue = cache->entries[0];
+ int i;
+
+ for (i = 0; i < cache->size; i++) {
+ if (cache->entries[i]->nraccess < lue->nraccess)
+ lue = cache->entries[i];
+ }
+
+ lue->nraccess++;
+ return lue;
+}
+
+
+static inode_t *
+__inode_for_handle (xattr_cache_handle_t *handle)
+{
+ inode_t *inode = NULL;
+
+ if (handle->loc.path)
+ inode = handle->loc.inode;
+ else if (handle->fd)
+ inode = handle->fd->inode;
+
+ return inode;
+}
+
+
+static void
+__free_handle (xattr_cache_handle_t *handle)
+{
+ if (handle->loc.path)
+ loc_wipe (&handle->loc);
+
+ FREE (handle);
+}
+
+
+static xattr_cache_handle_t *
+__copy_handle (xattr_cache_handle_t *handle)
+{
+ xattr_cache_handle_t *hnew = calloc (1, sizeof (xattr_cache_handle_t));
+
+ if (handle->loc.path)
+ loc_copy (&hnew->loc, &handle->loc);
+ else
+ hnew->fd = handle->fd;
+
+ return hnew;
+}
+
+
+static int
+__cache_populate_entry (xattr_cache_entry_t *entry, xlator_t *this,
+ xattr_cache_handle_t *handle, char *key, size_t len)
+{
+ int op_ret = -1;
+
+ entry->array = calloc (1, len);
+ if (!entry->array) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
+
+ op_ret = __hgetxattr (handle, this, key, entry->array, len);
+
+ entry->key = strdup (key);
+ entry->inode = __inode_for_handle (handle);
+ entry->handle = __copy_handle (handle);
+ entry->len = len;
+ entry->nraccess = 1;
+
+out:
+ return op_ret;
+}
+
+
+static int
+__cache_flush_entry (xattr_cache_entry_t *entry, xlator_t *this)
+{
+ int ret = -1;
+
+ if (entry->dirty) {
+ ret = __hsetxattr (entry->handle, this,
+ entry->key, entry->array, entry->len, 0);
+ }
+
+ entry->len = 0;
+ entry->nraccess = 0;
+ entry->dirty = 0;
+ entry->inode = NULL;
+
+ if (entry->key) {
+ FREE (entry->key);
+ entry->key = NULL;
+ }
+
+ if (entry->array) {
+ FREE (entry->array);
+ entry->array = NULL;
+ }
+
+ if (entry->handle) {
+ __free_handle (entry->handle);
+ entry->handle = NULL;
+ }
+
+ return 0;
+}
+
+
+static void
+__print_array (char *str, xlator_t *this, int32_t *array, size_t len)
+{
+ char *ptr = NULL;
+ char *buf = NULL;
+
+ int i, count = -1;
+
+ count = len / sizeof (int32_t);
+
+ /* 10 digits per entry + 1 space + '[' and ']' */
+ buf = malloc (count * 11 + 8);
+
+ ptr = buf;
+ ptr += sprintf (ptr, "[ ");
+ for (i = 0; i < count; i++)
+ ptr += sprintf (ptr, "%d ", ntoh32 (array[i]));
+ ptr += sprintf (ptr, "]");
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s%s", str, buf);
+
+ FREE (buf);
+}
+
+
+int
+posix_xattr_cache_read (xlator_t *this, xattr_cache_handle_t *handle,
+ char *key, int32_t *array, size_t len)
+{
+ xattr_cache_entry_t *entry = NULL;
+ xattr_cache_entry_t *purgee = NULL;
+
+ xattr_cache_t *cache = NULL;
+ inode_t *inode = NULL;
+
+ int op_ret = -1;
+
+ inode = __inode_for_handle (handle);
+
+ if (!inode) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "handle has no inode!");
+ goto out;
+ }
+
+ cache = ((struct posix_private *) (this->private))->xattr_cache;
+
+ pthread_mutex_lock (&cache->lock);
+ {
+ entry = __cache_lookup (cache, inode, key);
+
+ if (entry) {
+ if (handle->loc.path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cache hit for %s", handle->loc.path);
+ else if (handle->fd)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cache hit for fd=%p", handle->fd);
+ }
+
+ if (!entry) {
+ purgee = __cache_least_used_entry (cache);
+
+ if (purgee->handle && purgee->handle->loc.path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "flushing and purging entry for %s",
+ purgee->handle->loc.path);
+ else if (purgee->handle && purgee->handle->fd)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "flushing and purging entry for fd=%p",
+ purgee->handle->fd);
+ __cache_flush_entry (purgee, this);
+
+ if (handle->loc.path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "populating entry for %s",
+ handle->loc.path);
+ else if (handle->fd)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "populating entry for fd=%p",
+ handle->fd);
+ __cache_populate_entry (purgee, this, handle, key, len);
+
+ entry = purgee;
+ }
+
+ memcpy (array, entry->array, len);
+
+ __print_array ("read array: ", this, array, len);
+ }
+ pthread_mutex_unlock (&cache->lock);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+
+int posix_xattr_cache_write (xlator_t *this, xattr_cache_handle_t *handle,
+ char *key, int32_t *array, size_t len)
+{
+ xattr_cache_t * cache = NULL;
+ xattr_cache_entry_t * entry = NULL;
+
+ inode_t *inode = NULL;
+
+ int op_ret = -1;
+
+ inode = __inode_for_handle (handle);
+
+ if (!inode) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "handle has no inode!");
+ goto out;
+ }
+
+ cache = ((struct posix_private *) (this->private))->xattr_cache;
+
+ pthread_mutex_lock (&cache->lock);
+ {
+ entry = __cache_lookup (cache, inode, key);
+
+ if (entry) {
+ entry->dirty = 1;
+ memcpy (entry->array, array, len);
+ } else {
+ /*
+ * This case shouldn't usually happen, since the
+ * entry should have been brought into the cache
+ * by the previous read (xattrop always does a read &
+ * write).
+ *
+ * If we've reached here, it means things are happening
+ * very quickly and the entry was flushed after read
+ * but before this write. In that case, let's just
+ * write this to disk
+ */
+
+ op_ret = __hsetxattr (handle, this, key, array,
+ len, 0);
+ }
+
+ __print_array ("wrote array: ", this, array, len);
+ }
+ pthread_mutex_unlock (&cache->lock);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+
+int posix_xattr_cache_flush (xlator_t *this, xattr_cache_handle_t *handle)
+{
+ xattr_cache_t *cache = NULL;
+ xattr_cache_entry_t *entry = NULL;
+
+ int i;
+ inode_t *inode = NULL;
+
+ int op_ret = -1;
+
+ inode = __inode_for_handle (handle);
+ if (!inode) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "handle has no inode!");
+ op_ret = -EINVAL;
+ goto out;
+ }
+
+ cache = ((struct posix_private *) (this->private))->xattr_cache;
+
+ pthread_mutex_lock (&cache->lock);
+ {
+ for (i = 0; i < cache->size; i++) {
+ entry = cache->entries[i];
+
+ if (entry->inode == inode) {
+ if (entry->handle->loc.path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "force flushing entry for %s",
+ entry->handle->loc.path);
+
+ else if (cache->entries[i]->handle->fd)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "force flushing entry for fd=%p",
+ entry->handle->fd);
+
+ __cache_flush_entry (entry, this);
+ }
+ }
+ }
+ pthread_mutex_unlock (&cache->lock);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+
+int
+posix_xattr_cache_flush_all (xlator_t *this)
+{
+ xattr_cache_t *cache = NULL;
+ xattr_cache_entry_t *entry = NULL;
+
+ int i;
+ int op_ret = 0;
+
+ cache = ((struct posix_private *) (this->private))->xattr_cache;
+
+ pthread_mutex_lock (&cache->lock);
+ {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "flushing entire xattr cache: ");
+
+ for (i = 0; i < cache->size; i++) {
+ entry = cache->entries[i];
+
+ if (!entry || !entry->handle)
+ continue;
+
+ if (entry->handle->loc.path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ " force flushing entry for %s",
+ entry->handle->loc.path);
+
+ else if (cache->entries[i]->handle->fd)
+ gf_log (this->name, GF_LOG_DEBUG,
+ " force flushing entry for fd=%p",
+ entry->handle->fd);
+
+ __cache_flush_entry (entry, this);
+ }
+ }
+ pthread_mutex_unlock (&cache->lock);
+
+ return op_ret;
+}
+
+
+xattr_cache_t *
+posix_xattr_cache_init (size_t size)
+{
+ int i = 0;
+ xattr_cache_t * cache = NULL;
+ int op_ret = -1;
+
+ cache = CALLOC (1, sizeof (xattr_cache_t));
+ if (!cache) {
+ goto out;
+ }
+
+ cache->entries = CALLOC (size, sizeof (xattr_cache_entry_t *));
+ if (!cache->entries)
+ goto out;
+
+ cache->size = size;
+
+ for (i = 0; i < size; i++) {
+ cache->entries[i] = calloc (1, sizeof (xattr_cache_entry_t));
+ if (!cache->entries[i])
+ goto out;
+ }
+
+ pthread_mutex_init (&cache->lock, NULL);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ if (cache) {
+ if (cache->entries) {
+ for (i = 0; i < size; i++)
+ if (cache->entries[i])
+ FREE (cache->entries[i]);
+
+ FREE (cache->entries);
+ }
+
+ FREE (cache);
+ }
+ }
+
+ return cache;
+}
diff --git a/xlators/storage/posix/src/xattr-cache.h b/xlators/storage/posix/src/xattr-cache.h
new file mode 100644
index 00000000000..3e12742a90f
--- /dev/null
+++ b/xlators/storage/posix/src/xattr-cache.h
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __XATTR_CACHE_H__
+#define __XATTR_CACHE_H__
+
+
+#include "glusterfs.h"
+#include "inode.h"
+
+typedef struct __xattr_cache_handle {
+ loc_t loc;
+ fd_t *fd;
+} xattr_cache_handle_t;
+
+
+typedef struct __xattr_cache_entry {
+ char *key; /* name of the xattr */
+ int32_t *array; /* value */
+ size_t len; /* length of array in bytes */
+ inode_t *inode; /* inode for which the entry is for */
+
+ xattr_cache_handle_t *handle;
+ unsigned char dirty;
+ unsigned long nraccess; /* number of times accessed */
+} xattr_cache_entry_t;
+
+
+typedef struct __xattr_cache {
+ size_t size;
+ pthread_mutex_t lock;
+ xattr_cache_entry_t **entries;
+} xattr_cache_t;
+
+
+xattr_cache_t * posix_xattr_cache_init (size_t size);
+
+int posix_xattr_cache_read (xlator_t *this, xattr_cache_handle_t *handle,
+ char *key, int32_t *array, size_t len);
+
+int posix_xattr_cache_write (xlator_t *this, xattr_cache_handle_t *handle,
+ char *key, int32_t *array, size_t len);
+
+int posix_xattr_cache_flush (xlator_t *this, xattr_cache_handle_t *handle);
+
+int posix_xattr_cache_flush_all (xlator_t *this);
+
+
+#endif /* __XATTR_CACHE_H__ */