diff options
Diffstat (limited to 'xlators')
166 files changed, 88606 insertions, 0 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am new file mode 100644 index 000000000..2abb52194 --- /dev/null +++ b/xlators/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = cluster storage protocol performance debug features encryption mount + +CLEANFILES = diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am new file mode 100644 index 000000000..f77665802 --- /dev/null +++ b/xlators/bindings/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = $(BINDINGS_SUBDIRS) diff --git a/xlators/bindings/python/Makefile.am b/xlators/bindings/python/Makefile.am new file mode 100644 index 000000000..af437a64d --- /dev/null +++ b/xlators/bindings/python/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am new file mode 100644 index 000000000..c0b9141c6 --- /dev/null +++ b/xlators/bindings/python/src/Makefile.am @@ -0,0 +1,19 @@ + +xlator_PROGRAMS = python.so + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings + +python_PYTHON = gluster.py glustertypes.py glusterstack.py + +pythondir = $(xlatordir)/python + +python_so_SOURCES = python.c + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \ + $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\" + +AM_LDFLAGS = $(PYTHON_LDFLAGS) + +CLEANFILES = + diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py new file mode 100644 index 000000000..ee0eb1310 --- /dev/null +++ b/xlators/bindings/python/src/gluster.py @@ -0,0 +1,47 @@ +# Copyright (c) 2007 Chris AtLee <chris@atlee.ca> +# This file is part of GlusterFS. +# +# GlusterFS is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3 of the License, +# or (at your option) any later version. +# +# GlusterFS is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/>. +from ctypes import * +from glustertypes import * +from glusterstack import * +import sys +import inspect + +libglusterfs = CDLL("libglusterfs.so") +_gf_log = libglusterfs._gf_log +_gf_log.restype = c_int32 +_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p] + +gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel") + +GF_LOG_NONE = 0 +GF_LOG_CRITICAL = 1 +GF_LOG_ERROR = 2 +GF_LOG_WARNING = 3 +GF_LOG_DEBUG = 4 + +def gf_log(module, level, fmt, *params): + if level <= gf_log_loglevel: + frame = sys._getframe(1) + _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name, + frame.f_lineno, level, fmt, *params) + +class ComplexTranslator(object): + def __init__(self, xlator): + self.xlator = xlator_t.from_address(xlator) + + def __getattr__(self, item): + return getattr(self.xlator, item) diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py new file mode 100644 index 000000000..ba24c8165 --- /dev/null +++ b/xlators/bindings/python/src/glusterstack.py @@ -0,0 +1,55 @@ +# Copyright (c) 2007 Chris AtLee <chris@atlee.ca> +# This file is part of GlusterFS. +# +# GlusterFS is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3 of the License, +# or (at your option) any later version. +# +# GlusterFS is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/>. +from ctypes import * +from glustertypes import * + +libc = CDLL("libc.so.6") +calloc = libc.calloc +calloc.argtypes = [c_int, c_int] +calloc.restype = c_void_p + +# TODO: Can these be done in C somehow? +def stack_wind(frame, rfn, obj, fn, *params): + """Frame is a frame object""" + _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t)) + _new[0].root = frame.root + _new[0].next = frame.root[0].frames.next + _new[0].prev = pointer(frame.root[0].frames) + if frame.root[0].frames.next: + frame.root[0].frames.next[0].prev = _new + frame.root[0].frames.next = _new + _new[0].this = obj + # TODO: Type checking like tmp_cbk? + _new[0].ret = rfn + _new[0].parent = pointer(frame) + _new[0].cookie = cast(_new, c_void_p) + # TODO: Initialize lock + #_new.lock.init() + frame.ref_count += 1 + fn(_new, obj, *params) + +def stack_unwind(frame, *params): + """Frame is a frame object""" + fn = frame[0].ret + parent = frame[0].parent[0] + parent.ref_count -= 1 + + op_ret = params[0] + op_err = params[1] + params = params[2:] + fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this, + op_ret, op_err, *params) diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py new file mode 100644 index 000000000..e9069d07c --- /dev/null +++ b/xlators/bindings/python/src/glustertypes.py @@ -0,0 +1,167 @@ +# Copyright (c) 2007 Chris AtLee <chris@atlee.ca> +# This file is part of GlusterFS. +# +# GlusterFS is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3 of the License, +# or (at your option) any later version. +# +# GlusterFS is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/>. +from ctypes import * +import collections + +# +# Forward declaration of some gluster types +# +class call_frame_t(Structure): + pass + +class call_ctx_t(Structure): + pass + +class call_pool_t(Structure): + pass + +class xlator_t(Structure): + def _getFirstChild(self): + return self.children[0].xlator + firstChild = property(_getFirstChild) + +class xlator_list_t(Structure): + pass + +class xlator_fops(Structure): + pass + +class xlator_mops(Structure): + pass + +class glusterfs_ctx_t(Structure): + pass + +class list_head(Structure): + pass + +class dict_t(Structure): + pass + +class inode_table_t(Structure): + pass + +class fd_t(Structure): + pass + +class iovec(Structure): + _fields_ = [ + ("iov_base", c_void_p), + ("iov_len", c_size_t), + ] + + def __init__(self, s): + self.iov_base = cast(c_char_p(s), c_void_p) + self.iov_len = len(s) + + def getBytes(self): + return string_at(self.iov_base, self.iov_len) + +# This is a pthread_spinlock_t +# TODO: what happens to volatile-ness? +gf_lock_t = c_int + +uid_t = c_uint32 +gid_t = c_uint32 +pid_t = c_int32 + +off_t = c_int64 + +# +# Function pointer types +# +ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t), + POINTER(xlator_t), c_int32, c_int32) + +fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t)) +init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t)) +event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p) + +list_head._fields_ = [ + ("next", POINTER(list_head)), + ("prev", POINTER(list_head)), + ] + +call_frame_t._fields_ = [ + ("root", POINTER(call_ctx_t)), + ("parent", POINTER(call_frame_t)), + ("next", POINTER(call_frame_t)), + ("prev", POINTER(call_frame_t)), + ("local", c_void_p), + ("this", POINTER(xlator_t)), + ("ret", ret_fn_t), + ("ref_count", c_int32), + ("lock", gf_lock_t), + ("cookie", c_void_p), + ("op", c_int32), + ("type", c_int8), + ] + +call_ctx_t._fields_ = [ + ("all_frames", list_head), + ("trans", c_void_p), + ("pool", call_pool_t), + ("unique", c_uint64), + ("state", c_void_p), + ("uid", uid_t), + ("gid", gid_t), + ("pid", pid_t), + ("frames", call_frame_t), + ("req_refs", POINTER(dict_t)), + ("rsp_refs", POINTER(dict_t)), + ] + +xlator_t._fields_ = [ + ("name", c_char_p), + ("type", c_char_p), + ("next", POINTER(xlator_t)), + ("prev", POINTER(xlator_t)), + ("parent", POINTER(xlator_t)), + ("children", POINTER(xlator_list_t)), + ("fops", POINTER(xlator_fops)), + ("mops", POINTER(xlator_mops)), + ("fini", fini_fn_t), + ("init", init_fn_t), + ("notify", event_notify_fn_t), + ("options", POINTER(dict_t)), + ("ctx", POINTER(glusterfs_ctx_t)), + ("itable", POINTER(inode_table_t)), + ("ready", c_char), + ("private", c_void_p), + ] + +xlator_list_t._fields_ = [ + ("xlator", POINTER(xlator_t)), + ("next", POINTER(xlator_list_t)), + ] + +fop_functions = collections.defaultdict(lambda: c_void_p) +fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod', + 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access', + 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink', + 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush', + 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir', + 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir', + # TODO: Call backs? + ] + +fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t), + POINTER(fd_t), POINTER(iovec), c_int32, + off_t) + +fop_functions['writev'] = fop_writev_t +xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names] diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c new file mode 100644 index 000000000..739ef7329 --- /dev/null +++ b/xlators/bindings/python/src/python.c @@ -0,0 +1,235 @@ +/* + Copyright (c) 2007 Chris AtLee <chris@atlee.ca> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <Python.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "logging.h" +#include "defaults.h" + +typedef struct +{ + char *scriptname; + PyObject *pXlator; + PyObject *pScriptModule; + PyObject *pGlusterModule; + PyThreadState *pInterp; + + PyObject *pFrameType, *pVectorType, *pFdType; +} python_private_t; + +int32_t +python_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + python_private_t *priv = (python_private_t *)this->private; + gf_log("python", GF_LOG_DEBUG, "In writev"); + if (PyObject_HasAttrString(priv->pXlator, "writev")) + { + + PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev", + "O O O i l", + PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame), + PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd), + PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector), + count, + offset); + if (PyErr_Occurred()) + { + PyErr_Print(); + } + Py_XDECREF(retval); + } + else + { + return default_writev(frame, this, fd, vector, count, offset); + } + return 0; +} + +struct xlator_fops fops = { + .writev = python_writev +}; + +struct xlator_mops mops = { +}; + +static PyObject * +AnonModule_FromFile (const char* fname) +{ + // Get the builtins + PyThreadState* pThread = PyThreadState_Get(); + PyObject *pBuiltins = pThread->interp->builtins; + + if (PyErr_Occurred()) + { + PyErr_Print(); + return NULL; + } + + // Create a new dictionary for running code in + PyObject *pModuleDict = PyDict_New(); + PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins); + Py_INCREF(pBuiltins); + + // Run the file in the new context + FILE* fp = fopen(fname, "r"); + PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict); + fclose(fp); + if (PyErr_Occurred()) + { + PyErr_Print(); + Py_DECREF(pModuleDict); + Py_DECREF(pBuiltins); + return NULL; + } + + // Create an object to hold the new context + PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict); + if (PyErr_Occurred()) + { + PyErr_Print(); + Py_DECREF(pModuleDict); + Py_DECREF(pBuiltins); + return NULL; + } + PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict); + if (PyErr_Occurred()) + { + PyErr_Print(); + Py_DECREF(pModuleDict); + Py_DECREF(pBuiltins); + Py_XDECREF(pModule); + return NULL; + } + + // Set the new context's dictionary to the one we used to run the code + // inside + PyObject_SetAttrString(pModule, "__dict__", pModuleDict); + if (PyErr_Occurred()) + { + PyErr_Print(); + Py_DECREF(pModuleDict); + Py_DECREF(pBuiltins); + Py_DECREF(pModule); + return NULL; + } + + return pModule; +} + +int32_t +init (xlator_t *this) +{ + // This is ok to call more than once per process + Py_InitializeEx(0); + + if (!this->children) { + gf_log ("python", GF_LOG_ERROR, + "FATAL: python should have exactly one child"); + return -1; + } + + python_private_t *priv = CALLOC (sizeof (python_private_t), 1); + ERR_ABORT (priv); + + data_t *scriptname = dict_get (this->options, "scriptname"); + if (scriptname) { + priv->scriptname = data_to_str(scriptname); + } else { + gf_log("python", GF_LOG_ERROR, + "FATAL: python requires the scriptname parameter"); + return -1; + } + + priv->pInterp = Py_NewInterpreter(); + + // Adjust python's path + PyObject *syspath = PySys_GetObject("path"); + PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH); + PyList_Append(syspath, path); + Py_DECREF(path); + + gf_log("python", GF_LOG_DEBUG, + "Loading gluster module"); + + priv->pGlusterModule = PyImport_ImportModule("gluster"); + if (PyErr_Occurred()) + { + PyErr_Print(); + return -1; + } + + priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t"); + priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t"); + priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec"); + + gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname); + + priv->pScriptModule = AnonModule_FromFile(priv->scriptname); + if (!priv->pScriptModule || PyErr_Occurred()) + { + gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname); + PyErr_Print(); + return -1; + } + + if (!PyObject_HasAttrString(priv->pScriptModule, "xlator")) + { + gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname); + return -1; + } + gf_log("python", GF_LOG_DEBUG, "Instantiating translator"); + priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&", + PyLong_FromVoidPtr, this); + if (PyErr_Occurred() || !priv->pXlator) + { + PyErr_Print(); + return -1; + } + + this->private = priv; + + gf_log ("python", GF_LOG_DEBUG, "python xlator loaded"); + return 0; +} + +void +fini (xlator_t *this) +{ + python_private_t *priv = (python_private_t*)(this->private); + Py_DECREF(priv->pXlator); + Py_DECREF(priv->pScriptModule); + Py_DECREF(priv->pGlusterModule); + Py_DECREF(priv->pFrameType); + Py_DECREF(priv->pFdType); + Py_DECREF(priv->pVectorType); + Py_EndInterpreter(priv->pInterp); + return; +} diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py new file mode 100644 index 000000000..507455c85 --- /dev/null +++ b/xlators/bindings/python/src/testxlator.py @@ -0,0 +1,56 @@ +# Copyright (c) 2007 Chris AtLee <chris@atlee.ca> +# This file is part of GlusterFS. +# +# GlusterFS is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3 of the License, +# or (at your option) any later version. +# +# GlusterFS is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/>. + +""" +This is a test translator written in python. + +Important things to note: + This file must be import-able from glusterfsd. This probably means + setting PYTHONPATH to where this file is located. + + This file must have a top-level xlator class object that will be + used to instantiate individual translators. +""" +from gluster import * + +class MyXlator(ComplexTranslator): + name = "MyXlator" + def writev_cbk(self, frame, cookie, op_ret, op_errno, buf): + stack_unwind(frame, op_ret, op_errno, buf) + return 0 + + def writev(self, frame, fd, vector, count, offset): + gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len) + # TODO: Use cookie to pass this to writev_cbk + old_count = vector.iov_len + + data = vector.getBytes().encode("zlib") + + vector = iovec(data) + gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len) + + @ret_fn_t + def rfn(frame, prev, this, op_ret, op_errno, *params): + if len(params) == 0: + params = [0] + return self.writev_cbk(frame, prev, old_count, op_errno, *params) + + stack_wind(frame, rfn, self.firstChild, + self.firstChild[0].fops[0].writev, fd, vector, count, offset) + return 0 + +xlator = MyXlator diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am new file mode 100644 index 000000000..a6ddb3564 --- /dev/null +++ b/xlators/cluster/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = unify stripe afr dht ha map + +CLEANFILES = diff --git a/xlators/cluster/afr/Makefile.am b/xlators/cluster/afr/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/afr/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am new file mode 100644 index 000000000..1bde9e5ba --- /dev/null +++ b/xlators/cluster/afr/src/Makefile.am @@ -0,0 +1,20 @@ +xlator_LTLIBRARIES = afr.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +afr_la_LDFLAGS = -module -avoidversion + +afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c +afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/replicate.so + +install-data-hook: + ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
\ No newline at end of file diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c new file mode 100644 index 000000000..0c65ca852 --- /dev/null +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -0,0 +1,345 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" + +#include "afr.h" + + +int32_t +afr_opendir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + fd_t *fd) +{ + afr_local_t * local = NULL; + + int call_count = -1; + + LOCK (&frame->lock); + { + local = frame->local; + + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + AFR_STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + } + + return 0; +} + + +int32_t +afr_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int child_count = 0; + int i = 0; + + int ret = -1; + int call_count = -1; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + child_count = priv->child_count; + + ALLOC_OR_GOTO (local, afr_local_t, out); + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + local->fd = fd_ref (fd); + + call_count = local->call_count; + + for (i = 0; i < child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_opendir_cbk, + priv->children[i], + priv->children[i]->fops->opendir, + loc, fd); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, fd); + } + + return 0; +} + + +/** + * Common algorithm for directory read calls: + * + * - Try the fop on the first child that is up + * - if we have failed due to ENOTCONN: + * try the next child + * + * Applicable to: readdir + */ + +int32_t +afr_readdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + gf_dirent_t *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + priv = this->private; + children = priv->children; + + local = frame->local; + + if (op_ret == -1) { + last_tried = local->cont.readdir.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + + this_try = ++local->cont.readdir.last_tried; + unwind = 0; + + STACK_WIND (frame, afr_readdir_cbk, + children[this_try], + children[this_try]->fops->readdir, + local->fd, local->cont.readdir.size, + local->cont.readdir.offset); + } + +out: + if (unwind) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, buf); + } + + return 0; +} + + +int32_t +afr_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + + int ret = -1; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + + call_child = afr_first_up_child (priv); + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "no child is up :("); + goto out; + } + + local->cont.readdir.last_tried = call_child; + + local->fd = fd_ref (fd); + local->cont.readdir.size = size; + local->cont.readdir.offset = offset; + + STACK_WIND (frame, afr_readdir_cbk, + children[call_child], children[call_child]->fops->readdir, + fd, size, offset); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + return 0; +} + + +int32_t +afr_getdents_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dir_entry_t *entry, int32_t count) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + priv = this->private; + children = priv->children; + + local = frame->local; + + if (op_ret == -1) { + last_tried = local->cont.getdents.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + + this_try = ++local->cont.getdents.last_tried; + unwind = 0; + + STACK_WIND (frame, afr_getdents_cbk, + children[this_try], + children[this_try]->fops->getdents, + local->fd, local->cont.getdents.size, + local->cont.getdents.offset, local->cont.getdents.flag); + } + +out: + if (unwind) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, entry, count); + } + + return 0; +} + + +int32_t +afr_getdents (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset, int32_t flag) +{ + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + call_child = afr_first_up_child (priv); + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "no child is up :("); + goto out; + } + + local->cont.getdents.last_tried = call_child; + + local->fd = fd_ref (fd); + + local->cont.getdents.size = size; + local->cont.getdents.offset = offset; + local->cont.getdents.flag = flag; + + frame->local = local; + + STACK_WIND (frame, afr_getdents_cbk, + children[call_child], children[call_child]->fops->getdents, + fd, size, offset, flag); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + + diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h new file mode 100644 index 000000000..172ec3c90 --- /dev/null +++ b/xlators/cluster/afr/src/afr-dir-read.h @@ -0,0 +1,47 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __DIR_READ_H__ +#define __DIR_READ_H__ + + +int32_t +afr_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd); + +int32_t +afr_closedir (call_frame_t *frame, xlator_t *this, + fd_t *fd); + +int32_t +afr_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset); + + +int32_t +afr_getdents (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset, int32_t flag); + + +int32_t +afr_checksum (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags); + + +#endif /* __DIR_READ_H__ */ diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c new file mode 100644 index 000000000..87a6e09b5 --- /dev/null +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -0,0 +1,1786 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" + +#include "afr.h" +#include "afr-transaction.h" + + +void +afr_build_parent_loc (loc_t *parent, loc_t *child) +{ + char *tmp = NULL; + + if (!child->parent) { + loc_copy (parent, child); + return; + } + + tmp = strdup (child->path); + parent->path = strdup (dirname (tmp)); + FREE (tmp); + + parent->name = strrchr (parent->path, '/'); + if (parent->name) + parent->name++; + + parent->inode = inode_ref (child->parent); + parent->parent = inode_parent (parent->inode, 0, NULL); + parent->ino = parent->inode->ino; +} + + +/* {{{ create */ + +int +afr_create_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + local->cont.create.fd, + local->cont.create.inode, + &local->cont.create.buf); + return 0; +} + + +int +afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + local->op_ret = op_ret; + + if ((local->success_count == 0) + || (child_index == priv->read_child)) { + local->cont.create.buf = *buf; + local->cont.create.buf.st_ino = + afr_itransform (buf->st_ino, + priv->child_count, + child_index); + } + local->cont.create.inode = inode; + + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_create_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_create_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->create, + &local->loc, + local->cont.create.flags, + local->cont.create.mode, + local->cont.create.fd); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_create_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, loc); + + local->cont.create.flags = flags; + local->cont.create.mode = mode; + local->cont.create.fd = fd_ref (fd); + + local->transaction.fop = afr_create_wind; + local->transaction.done = afr_create_done; + local->transaction.unwind = afr_create_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, loc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (loc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ mknod */ + +int +afr_mknod_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + local->cont.mknod.inode, + &local->cont.mknod.buf); + return 0; +} + + +int +afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + local->op_ret = op_ret; + + if ((local->success_count == 0) + || (child_index == priv->read_child)) { + local->cont.mknod.buf = *buf; + local->cont.mknod.buf.st_ino = + afr_itransform (buf->st_ino, + priv->child_count, + child_index); + } + local->cont.mknod.inode = inode; + + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int32_t +afr_mknod_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->mknod, + &local->loc, local->cont.mknod.mode, + local->cont.mknod.dev); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_mknod_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t dev) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, loc); + + local->cont.mknod.mode = mode; + local->cont.mknod.dev = dev; + + local->transaction.fop = afr_mknod_wind; + local->transaction.done = afr_mknod_done; + local->transaction.unwind = afr_mknod_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, loc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (loc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ mkdir */ + + +int +afr_mkdir_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + local->cont.mkdir.inode, + &local->cont.mkdir.buf); + return 0; +} + + +int +afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + local->op_ret = op_ret; + + if ((local->success_count == 0) + || (child_index == priv->read_child)) { + local->cont.mkdir.buf = *buf; + local->cont.mkdir.buf.st_ino = + afr_itransform (buf->st_ino, priv->child_count, + child_index); + } + local->cont.mkdir.inode = inode; + + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_mkdir_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->mkdir, + &local->loc, local->cont.mkdir.mode); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_mkdir_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, loc); + + local->cont.mkdir.mode = mode; + + local->transaction.fop = afr_mkdir_wind; + local->transaction.done = afr_mkdir_done; + local->transaction.unwind = afr_mkdir_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, loc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (loc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ link */ + + +int +afr_link_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.link.buf.st_ino = local->cont.link.ino; + + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + local->cont.link.inode, + &local->cont.link.buf); + } + + return 0; +} + + +int +afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + local->op_ret = op_ret; + + if ((local->success_count == 0) + || (child_index == priv->read_child)) { + local->cont.link.buf = *buf; + local->cont.link.buf.st_ino = + afr_itransform (buf->st_ino, priv->child_count, + child_index); + } + local->cont.link.inode = inode; + + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_link_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->link, + &local->loc, + &local->newloc); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_link_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, oldloc); + loc_copy (&local->newloc, newloc); + + local->cont.link.ino = oldloc->inode->ino; + + local->transaction.fop = afr_link_wind; + local->transaction.done = afr_link_done; + local->transaction.unwind = afr_link_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, oldloc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (oldloc->path); + local->transaction.new_basename = AFR_BASENAME (newloc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + +/* }}} */ + +/* {{{ symlink */ + + +int +afr_symlink_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + local->cont.symlink.inode, + &local->cont.symlink.buf); + return 0; +} + + +int +afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + local->op_ret = op_ret; + + if ((local->success_count == 0) + || (child_index == priv->read_child)) { + local->cont.symlink.buf = *buf; + local->cont.symlink.buf.st_ino = + afr_itransform (buf->st_ino, priv->child_count, + child_index); + } + local->cont.symlink.inode = inode; + + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_symlink_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->symlink, + local->cont.symlink.linkpath, + &local->loc); + + if (!--call_count) + break; + + } + } + + return 0; +} + + +int +afr_symlink_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_symlink (call_frame_t *frame, xlator_t *this, + const char *linkpath, loc_t *loc) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, loc); + + local->cont.symlink.ino = loc->inode->ino; + local->cont.symlink.linkpath = strdup (linkpath); + + local->transaction.fop = afr_symlink_wind; + local->transaction.done = afr_symlink_done; + local->transaction.unwind = afr_symlink_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, loc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (loc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ rename */ + +int +afr_rename_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.rename.buf.st_ino = local->cont.rename.ino; + + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.rename.buf); + } + + return 0; +} + + +int +afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if ((op_ret != -1) && (local->success_count == 0)) { + local->op_ret = op_ret; + + if (buf) { + local->cont.rename.buf = *buf; + local->cont.rename.buf.st_ino = + afr_itransform (buf->st_ino, priv->child_count, + child_index); + } + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int32_t +afr_rename_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_rename_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->rename, + &local->loc, + &local->newloc); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_rename_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, oldloc); + loc_copy (&local->newloc, newloc); + + local->cont.rename.ino = oldloc->inode->ino; + + local->transaction.fop = afr_rename_wind; + local->transaction.done = afr_rename_done; + local->transaction.unwind = afr_rename_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, oldloc); + afr_build_parent_loc (&local->transaction.new_parent_loc, newloc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (oldloc->path); + local->transaction.new_basename = AFR_BASENAME (newloc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ unlink */ + +int +afr_unlink_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int32_t +afr_unlink_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->unlink, + &local->loc); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int32_t +afr_unlink_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int32_t +afr_unlink (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, loc); + + local->transaction.fop = afr_unlink_wind; + local->transaction.done = afr_unlink_done; + local->transaction.unwind = afr_unlink_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, loc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (loc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + +/* }}} */ + +/* {{{ rmdir */ + + + +int +afr_rmdir_unwind (call_frame_t *frame, xlator_t *this) +{ + call_frame_t *main_frame = NULL; + afr_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) { + main_frame = local->transaction.main_frame; + } + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + } + local->success_count++; + + if (local->success_count == priv->wait_count) + need_unwind = 1; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_rmdir_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->rmdir, + &local->loc); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_rmdir_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + loc_copy (&local->loc, loc); + + local->transaction.fop = afr_rmdir_wind; + local->transaction.done = afr_rmdir_done; + local->transaction.unwind = afr_rmdir_unwind; + + afr_build_parent_loc (&local->transaction.parent_loc, loc); + + local->transaction.main_frame = frame; + local->transaction.basename = AFR_BASENAME (loc->path); + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + +/* }}} */ + +/* {{{ setdents */ + +int32_t +afr_setdents_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if ((op_ret != -1) && (local->success_count == 0)) { + local->op_ret = op_ret; + local->success_count++; + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int32_t +afr_setdents_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_setdents_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->setdents, + local->fd, local->cont.setdents.flags, + local->cont.setdents.entries, + local->cont.setdents.count); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int32_t +afr_setdents_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int32_t +afr_setdents (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + + local->fd = fd_ref (fd); + + local->cont.setdents.flags = flags; + local->cont.setdents.entries = entries; + local->cont.setdents.count = count; + + local->transaction.fop = afr_setdents_wind; + local->transaction.done = afr_setdents_done; + + local->transaction.basename = NULL; + local->transaction.pending = AFR_ENTRY_PENDING; + + afr_transaction (frame, this, AFR_ENTRY_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + +/* }}} */ diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h new file mode 100644 index 000000000..e6e8a5e79 --- /dev/null +++ b/xlators/cluster/afr/src/afr-dir-write.h @@ -0,0 +1,59 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __DIR_WRITE_H__ +#define __DIR_WRITE_H__ + +int32_t +afr_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd); + +int32_t +afr_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t dev); + +int32_t +afr_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode); + +int32_t +afr_unlink (call_frame_t *frame, xlator_t *this, + loc_t *loc); + +int32_t +afr_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc); + +int32_t +afr_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc); + +int32_t +afr_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc); + +int32_t +afr_symlink (call_frame_t *frame, xlator_t *this, + const char *linkpath, loc_t *oldloc); + +int32_t +afr_setdents (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count); + +#endif /* __DIR_WRITE_H__ */ diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c new file mode 100644 index 000000000..a6c99ec05 --- /dev/null +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -0,0 +1,721 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" + +#include "afr.h" + + +/** + * Common algorithm for inode read calls: + * + * - Try the fop on the first child that is up + * - if we have failed due to ENOTCONN: + * try the next child + * + * Applicable to: access, stat, fstat, readlink, getxattr + */ + +/* {{{ access */ + +int32_t +afr_access_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + priv = this->private; + children = priv->children; + + local = frame->local; + + if (op_ret == -1) { + last_tried = local->cont.access.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + this_try = ++local->cont.access.last_tried; + + unwind = 0; + + STACK_WIND_COOKIE (frame, afr_access_cbk, + (void *) (long) this_try, + children[this_try], + children[this_try]->fops->access, + &local->loc, local->cont.access.mask); + } + +out: + if (unwind) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + + +int32_t +afr_access (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t mask) +{ + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv->children, out); + + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + call_child = afr_first_up_child (priv); + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "no child is up :("); + goto out; + } + + local->cont.access.last_tried = call_child; + loc_copy (&local->loc, loc); + local->cont.access.mask = mask; + + STACK_WIND_COOKIE (frame, afr_access_cbk, + (void *) (long) call_child, + children[call_child], children[call_child]->fops->access, + loc, mask); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + + +/* }}} */ + +/* {{{ stat */ + +int32_t +afr_stat_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct stat *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int deitransform_child = -1; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + priv = this->private; + children = priv->children; + + deitransform_child = (long) cookie; + + local = frame->local; + + if (op_ret == -1) { + retry: + last_tried = local->cont.stat.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + this_try = ++local->cont.stat.last_tried; + + if (this_try == deitransform_child) { + goto retry; + } + + unwind = 0; + + STACK_WIND_COOKIE (frame, afr_stat_cbk, + (void *) (long) deitransform_child, + children[this_try], + children[this_try]->fops->stat, + &local->loc); + } + +out: + if (unwind) { + if (op_ret != -1) + buf->st_ino = local->cont.stat.ino; + + AFR_STACK_UNWIND (frame, op_ret, op_errno, buf); + } + + return 0; +} + + +int32_t +afr_stat (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int call_child = 0; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv->children, out); + + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + frame->local = local; + + call_child = afr_deitransform (loc->inode->ino, priv->child_count); + loc_copy (&local->loc, loc); + + /* + if stat fails from the deitranform'd child, we try + all children starting with the first one + */ + local->cont.stat.last_tried = -1; + local->cont.stat.ino = loc->inode->ino; + + STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child, + children[call_child], + children[call_child]->fops->stat, + loc); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + + +/* }}} */ + +/* {{{ fstat */ + +int32_t +afr_fstat_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct stat *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int deitransform_child = -1; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + priv = this->private; + children = priv->children; + + deitransform_child = (long) cookie; + + local = frame->local; + + if (op_ret == -1) { + retry: + last_tried = local->cont.fstat.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + this_try = ++local->cont.fstat.last_tried; + + if (this_try == deitransform_child) { + /* + skip the deitransform'd child since if we are here + we must have already tried that child + */ + goto retry; + } + + + unwind = 0; + + STACK_WIND_COOKIE (frame, afr_fstat_cbk, + (void *) (long) deitransform_child, + children[this_try], + children[this_try]->fops->fstat, + local->fd); + } + +out: + if (unwind) { + if (op_ret != -1) + buf->st_ino = local->cont.fstat.ino; + + AFR_STACK_UNWIND (frame, op_ret, op_errno, buf); + } + + return 0; +} + + +int32_t +afr_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int call_child = 0; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv->children, out); + + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + frame->local = local; + + VALIDATE_OR_GOTO (fd->inode, out); + + call_child = afr_deitransform (fd->inode->ino, priv->child_count); + + /* + if fstat fails from the deitranform'd child, we try + all children starting with the first one + */ + local->cont.fstat.last_tried = -1; + local->cont.fstat.ino = fd->inode->ino; + local->fd = fd_ref (fd); + + STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child, + children[call_child], + children[call_child]->fops->fstat, + fd); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ readlink */ + +int32_t +afr_readlink_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + const char *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + priv = this->private; + children = priv->children; + + local = frame->local; + + if (op_ret == -1) { + last_tried = local->cont.readlink.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + this_try = ++local->cont.readlink.last_tried; + + unwind = 0; + STACK_WIND_COOKIE (frame, afr_readlink_cbk, + (void *) (long) this_try, + children[this_try], + children[this_try]->fops->readlink, + &local->loc, + local->cont.readlink.size); + } + +out: + if (unwind) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, buf); + } + + return 0; +} + + +int32_t +afr_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size) +{ + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv->children, out); + + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + frame->local = local; + + call_child = afr_first_up_child (priv); + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "no child is up :("); + goto out; + } + + local->cont.readlink.last_tried = call_child; + loc_copy (&local->loc, loc); + local->cont.readlink.size = size; + + STACK_WIND_COOKIE (frame, afr_readlink_cbk, + (void *) (long) call_child, + children[call_child], children[call_child]->fops->readlink, + loc, size); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + return 0; +} + + +/* }}} */ + +/* {{{ getxattr */ + +int32_t +afr_getxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *dict) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + priv = this->private; + children = priv->children; + + local = frame->local; + + if (op_ret == -1) { + last_tried = local->cont.getxattr.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + this_try = ++local->cont.getxattr.last_tried; + + unwind = 0; + STACK_WIND_COOKIE (frame, afr_getxattr_cbk, + (void *) (long) this_try, + children[this_try], + children[this_try]->fops->getxattr, + &local->loc, + local->cont.getxattr.name); + } + +out: + if (unwind) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, dict); + } + + return 0; +} + + +int32_t +afr_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name) +{ + afr_private_t * priv = NULL; + xlator_t ** children = NULL; + int call_child = 0; + afr_local_t * local = NULL; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv->children, out); + + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + frame->local = local; + + call_child = afr_first_up_child (priv); + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "no child is up :("); + goto out; + } + + local->cont.getxattr.last_tried = call_child; + loc_copy (&local->loc, loc); + if (name) + local->cont.getxattr.name = strdup (name); + + STACK_WIND_COOKIE (frame, afr_getxattr_cbk, + (void *) (long) call_child, + children[call_child], children[call_child]->fops->getxattr, + loc, name); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + return 0; +} + + +/* }}} */ + +/* {{{ readv */ + +/** + * read algorithm: + * + * if the user has specified a read subvolume, use it + * otherwise - + * use the inode number to hash it to one of the subvolumes, and + * read from there (to balance read load) + * + * if any of the above read's fail, try the children in sequence + * beginning at the beginning + */ + +int32_t +afr_readv_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct stat *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int unwind = 1; + int last_tried = -1; + int this_try = -1; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv->children, out); + + children = priv->children; + + local = frame->local; + + if (op_ret == -1) { + retry: + last_tried = local->cont.readv.last_tried; + + if (all_tried (last_tried, priv->child_count)) { + goto out; + } + this_try = ++local->cont.readv.last_tried; + + if (this_try == priv->read_child) { + /* + skip the read child since if we are here + we must have already tried that child + */ + goto retry; + } + + unwind = 0; + + STACK_WIND_COOKIE (frame, afr_readv_cbk, + (void *) (long) this_try, + children[this_try], + children[this_try]->fops->readv, + local->fd, local->cont.readv.size, + local->cont.readv.offset); + } + +out: + if (unwind) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf); + } + + return 0; +} + + +int32_t +afr_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + + int call_child = 0; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + children = priv->children; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + frame->local = local; + + if (priv->read_child != -1) { + call_child = priv->read_child; + + /* + if read fails from the read child, we try + all children starting with the first one + */ + local->cont.readv.last_tried = -1; + } else { + call_child = afr_first_up_child (priv); + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "no child is up :("); + goto out; + } + + local->cont.readv.last_tried = call_child; + } + + local->fd = fd_ref (fd); + + local->cont.readv.size = size; + local->cont.readv.offset = offset; + + STACK_WIND_COOKIE (frame, afr_readv_cbk, + (void *) (long) call_child, + children[call_child], + children[call_child]->fops->readv, + fd, size, offset); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, 0, NULL); + } + return 0; +} + +/* }}} */ diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h new file mode 100644 index 000000000..6b3bd2da8 --- /dev/null +++ b/xlators/cluster/afr/src/afr-inode-read.h @@ -0,0 +1,47 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __INODE_READ_H__ +#define __INODE_READ_H__ + +int32_t +afr_access (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t mask); + +int32_t +afr_stat (call_frame_t *frame, xlator_t *this, + loc_t *loc); + +int32_t +afr_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd); + +int32_t +afr_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size); + +int32_t +afr_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset); + +int32_t +afr_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name); + +#endif /* __INODE_READ_H__ */ diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c new file mode 100644 index 000000000..267350b2c --- /dev/null +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -0,0 +1,2024 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" + +#include "afr.h" +#include "afr-transaction.h" + + +/* {{{ chmod */ + + +int +afr_chmod_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.chmod.buf.st_ino = local->cont.chmod.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.chmod.buf); + } + return 0; +} + + +int +afr_chmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.chmod.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + afr_chmod_unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_chmod_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int i = 0; + int call_count = -1; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_chmod_wind_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->chmod, + &local->loc, + local->cont.chmod.mode); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_chmod_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int32_t +afr_chmod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->cont.chmod.mode = mode; + local->cont.chmod.ino = loc->inode->ino; + + local->transaction.fop = afr_chmod_wind; + local->transaction.done = afr_chmod_done; + local->transaction.unwind = afr_chmod_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + local->transaction.pending = AFR_METADATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + + +/* {{{ fchmod */ + +int +afr_fchmod_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.fchmod.buf.st_ino = local->cont.fchmod.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.fchmod.buf); + } + return 0; +} + + +int +afr_fchmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.fchmod.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + afr_fchmod_unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_fchmod_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int i = 0; + int call_count = -1; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_fchmod_wind_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fchmod, + local->fd, + local->cont.fchmod.mode); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_fchmod_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int32_t +afr_fchmod (call_frame_t *frame, xlator_t *this, + fd_t *fd, mode_t mode) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t * transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->cont.fchmod.mode = mode; + local->cont.fchmod.ino = fd->inode->ino; + + local->transaction.fop = afr_fchmod_wind; + local->transaction.done = afr_fchmod_done; + local->transaction.unwind = afr_fchmod_unwind; + + local->fd = fd_ref (fd); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + local->transaction.pending = AFR_METADATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ chown */ + +int +afr_chown_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.chown.buf.st_ino = local->cont.chown.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.chown.buf); + } + return 0; +} + + +int +afr_chown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.chown.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) { + local->transaction.unwind (frame, this); + } + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_chown_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_chown_wind_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->chown, + &local->loc, local->cont.chown.uid, + local->cont.chown.gid); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_chown_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_chown (call_frame_t *frame, xlator_t *this, + loc_t *loc, uid_t uid, gid_t gid) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->cont.chown.uid = uid; + local->cont.chown.gid = gid; + local->cont.chown.ino = loc->inode->ino; + + local->transaction.fop = afr_chown_wind; + local->transaction.done = afr_chown_done; + local->transaction.unwind = afr_chown_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + local->transaction.pending = AFR_METADATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + + +/* }}} */ + +/* {{{ chown */ + +int +afr_fchown_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.fchown.buf.st_ino = local->cont.fchown.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.fchown.buf); + } + return 0; +} + + +int +afr_fchown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int child_index = (long) cookie; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.fchown.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) { + local->transaction.unwind (frame, this); + } + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_fchown_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_fchown_wind_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fchown, + local->fd, local->cont.fchown.uid, + local->cont.fchown.gid); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_fchown_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_fchown (call_frame_t *frame, xlator_t *this, + fd_t *fd, uid_t uid, gid_t gid) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->cont.fchown.uid = uid; + local->cont.fchown.gid = gid; + local->cont.fchown.ino = fd->inode->ino; + + local->transaction.fop = afr_fchown_wind; + local->transaction.done = afr_fchown_done; + local->transaction.unwind = afr_fchown_unwind; + + local->fd = fd_ref (fd); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + local->transaction.pending = AFR_METADATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ writev */ + +int +afr_writev_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.writev.buf.st_ino = local->cont.writev.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.writev.buf); + } + return 0; +} + + +int +afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.writev.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.unwind (frame, this); + + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_writev_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int i = 0; + int call_count = -1; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_writev_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->writev, + local->fd, + local->cont.writev.vector, + local->cont.writev.count, + local->cont.writev.offset); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_writev_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (local->cont.writev.refs) + dict_unref (local->cont.writev.refs); + local->cont.writev.refs = NULL; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->op = GF_FOP_WRITE; + local->cont.writev.vector = iov_dup (vector, count); + local->cont.writev.count = count; + local->cont.writev.offset = offset; + local->cont.writev.ino = fd->inode->ino; + + if (frame->root->req_refs) + local->cont.writev.refs = dict_ref (frame->root->req_refs); + + local->transaction.fop = afr_writev_wind; + local->transaction.done = afr_writev_done; + local->transaction.unwind = afr_writev_unwind; + + local->fd = fd_ref (fd); + + local->transaction.main_frame = frame; + if (fd->flags & O_APPEND) { + local->transaction.start = 0; + local->transaction.len = 0; + } else { + local->transaction.start = offset; + local->transaction.len = iov_length (vector, count); + } + + local->transaction.pending = AFR_DATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + + +/* }}} */ + +/* {{{ truncate */ + +int +afr_truncate_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.truncate.buf.st_ino = local->cont.truncate.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.truncate.buf); + } + return 0; +} + + +int +afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.truncate.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int32_t +afr_truncate_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->truncate, + &local->loc, + local->cont.truncate.offset); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_truncate_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->op_ret = -1; + + local->cont.truncate.offset = offset; + local->cont.truncate.ino = loc->inode->ino; + + local->transaction.fop = afr_truncate_wind; + local->transaction.done = afr_truncate_done; + local->transaction.unwind = afr_truncate_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = offset; + local->transaction.pending = AFR_DATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + + +/* }}} */ + +/* {{{ ftruncate */ + + +int +afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.ftruncate.buf.st_ino = local->cont.ftruncate.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.ftruncate.buf); + } + return 0; +} + + +int +afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.ftruncate.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_ftruncate_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->ftruncate, + local->fd, local->cont.ftruncate.offset); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_ftruncate_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->op = GF_FOP_FTRUNCATE; + local->op_ret = -1; + + local->cont.ftruncate.offset = offset; + local->cont.ftruncate.ino = fd->inode->ino; + + local->transaction.fop = afr_ftruncate_wind; + local->transaction.done = afr_ftruncate_done; + local->transaction.unwind = afr_ftruncate_unwind; + + local->fd = fd_ref (fd); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = offset; + local->transaction.pending = AFR_DATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ utimens */ + + +int +afr_utimens_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + local->cont.utimens.buf.st_ino = local->cont.utimens.ino; + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno, + &local->cont.utimens.buf); + } + return 0; +} + + +int +afr_utimens_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 1; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (child_went_down (op_ret, op_errno)) + afr_transaction_child_died (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.utimens.buf = *buf; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_utimens_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_utimens_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->utimens, + &local->loc, + local->cont.utimens.tv); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_utimens_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_utimens (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct timespec tv[2]) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->op_ret = -1; + + local->cont.utimens.tv[0] = tv[0]; + local->cont.utimens.tv[1] = tv[1]; + + local->cont.utimens.ino = loc->inode->ino; + + local->transaction.fop = afr_utimens_wind; + local->transaction.done = afr_utimens_done; + local->transaction.unwind = afr_utimens_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + local->transaction.pending = AFR_METADATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ setxattr */ + + +int +afr_setxattr_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno) + } + return 0; +} + + +int +afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_setxattr_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->setxattr, + &local->loc, + local->cont.setxattr.dict, + local->cont.setxattr.flags); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_setxattr_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int32_t flags) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->op_ret = -1; + + local->cont.setxattr.dict = dict_ref (dict); + local->cont.setxattr.flags = flags; + + local->transaction.fop = afr_setxattr_wind; + local->transaction.done = afr_setxattr_done; + local->transaction.unwind = afr_setxattr_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + local->transaction.pending = AFR_METADATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + +/* }}} */ + +/* {{{ removexattr */ + + +int +afr_removexattr_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno) + } + return 0; +} + + +int +afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + int need_unwind = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + } + local->success_count++; + + if (local->success_count == priv->wait_count) { + need_unwind = 1; + } + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int32_t +afr_removexattr_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->removexattr, + &local->loc, + local->cont.removexattr.name); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_removexattr_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + + +int +afr_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + + int ret = -1; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + transaction_frame->local = local; + + local->op_ret = -1; + + local->cont.removexattr.name = strdup (name); + + local->transaction.fop = afr_removexattr_wind; + local->transaction.done = afr_removexattr_done; + local->transaction.unwind = afr_removexattr_unwind; + + loc_copy (&local->loc, loc); + + local->transaction.main_frame = frame; + local->transaction.start = 0; + local->transaction.len = 0; + local->transaction.pending = AFR_METADATA_PENDING; + + afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h new file mode 100644 index 000000000..9c0b5cad3 --- /dev/null +++ b/xlators/cluster/afr/src/afr-inode-write.h @@ -0,0 +1,63 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __INODE_WRITE_H__ +#define __INODE_WRITE_H__ + +int32_t +afr_chmod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode); + +int32_t +afr_chown (call_frame_t *frame, xlator_t *this, + loc_t *loc, uid_t uid, gid_t gid); + +int +afr_fchown (call_frame_t *frame, xlator_t *this, + fd_t *fd, uid_t uid, gid_t gid); + +int32_t +afr_fchmod (call_frame_t *frame, xlator_t *this, + fd_t *fd, mode_t mode); + +int32_t +afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset); + +int32_t +afr_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset); + +int32_t +afr_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset); + +int32_t +afr_utimens (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct timespec tv[2]); + +int32_t +afr_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int32_t flags); + +int32_t +afr_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name); + +#endif /* __INODE_WRITE_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c new file mode 100644 index 000000000..45d065169 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -0,0 +1,1073 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "glusterfs.h" +#include "xlator.h" +#include "byte-order.h" + +#include "afr.h" +#include "afr-transaction.h" +#include "afr-self-heal-common.h" +#include "afr-self-heal.h" + + +/** + * select_source - select a source and return it + * TODO: take into account option 'favorite-child' + */ + +int +afr_sh_select_source (int sources[], int child_count) +{ + int i; + for (i = 0; i < child_count; i++) + if (sources[i]) + return i; + + return -1; +} + + +/** + * sink_count - return number of sinks in sources array + */ + +int +afr_sh_sink_count (int sources[], int child_count) +{ + int i; + int sinks = 0; + for (i = 0; i < child_count; i++) + if (!sources[i]) + sinks++; + return sinks; +} + +int +afr_sh_source_count (int sources[], int child_count) +{ + int i; + int nsource = 0; + + for (i = 0; i < child_count; i++) + if (sources[i]) + nsource++; + return nsource; +} + + +int +afr_sh_supress_errenous_children (int sources[], int child_errno[], + int child_count) +{ + int i = 0; + + for (i = 0; i < child_count; i++) { + if (child_errno[i] && sources[i]) { + sources[i] = 0; + } + } + + return 0; +} + + +int +afr_sh_supress_empty_children (int sources[], dict_t *xattr[], + struct stat *buf, + int child_count, const char *key) +{ + int i = 0; + int32_t *pending = NULL; + int ret = 0; + int all_xattr_missing = 1; + + /* if the file was created by afr with xattrs */ + for (i = 0; i < child_count; i++) { + if (!xattr[i]) + continue; + + ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending)); + if (ret != 0) { + continue; + } + + all_xattr_missing = 0; + break; + } + + if (all_xattr_missing) { + /* supress 0byte files.. this avoids empty file created + by dir selfheal to overwrite the 'good' file */ + for (i = 0; i < child_count; i++) { + if (!buf[i].st_size) + sources[i] = 0; + } + goto out; + } + + + for (i = 0; i < child_count; i++) { + if (!xattr[i]) { + sources[i] = 0; + continue; + } + + ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending)); + if (ret != 0) { + sources[i] = 0; + continue; + } + + if (!pending) { + sources[i] = 0; + continue; + } + } + +out: + return 0; +} + + +void +afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this) +{ + afr_private_t * priv = this->private; + + char *buf = NULL; + char *ptr = NULL; + + int i, j; + + /* 10 digits per entry + 1 space + '[' and ']' */ + buf = MALLOC (priv->child_count * 11 + 8); + + for (i = 0; i < priv->child_count; i++) { + ptr = buf; + ptr += sprintf (ptr, "[ "); + for (j = 0; j < priv->child_count; j++) { + ptr += sprintf (ptr, "%d ", pending_matrix[i][j]); + } + ptr += sprintf (ptr, "]"); + gf_log (this->name, GF_LOG_DEBUG, + "pending_matrix: %s", buf); + } + + FREE (buf); +} + + +void +afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[], + int child_count, const char *key) +{ + int i = 0; + int j = 0; + int32_t *pending = NULL; + int ret = -1; + + /* start clean */ + for (i = 0; i < child_count; i++) { + for (j = 0; j < child_count; j++) { + pending_matrix[i][j] = 0; + } + } + + for (i = 0; i < child_count; i++) { + if (!xattr[i]) + continue; + + pending = NULL; + + ret = dict_get_ptr (xattr[i], (char *) key, + VOID(&pending)); + if (ret != 0) + continue; + + for (j = 0; j < child_count; j++) { + pending_matrix[i][j] = ntoh32 (pending[j]); + } + } +} + + +/** + * mark_sources: Mark all 'source' nodes and return number of source + * nodes found + */ + +int +afr_sh_mark_sources (int32_t *pending_matrix[], int sources[], int child_count) +{ + int i = 0; + int j = 0; + + int nsources = 0; + + + /* start clean */ + for (i = 0; i < child_count; i++) { + sources[i] = 0; + } + + /* + Let's 'normalize' the pending matrix first, + by disregarding all pending entries that refer + to themselves + */ + for (i = 0; i < child_count; i++) { + pending_matrix[i][i] = 0; + } + + for (i = 0; i < child_count; i++) { + for (j = 0; j < child_count; j++) { + if (pending_matrix[j][i]) + break; + } + + if (j == child_count) { + nsources++; + sources[i] = 1; + } + } + + return nsources; +} + + +void +afr_sh_pending_to_delta (int32_t *pending_matrix[], int32_t *delta_matrix[], + int success[], int child_count) +{ + int i = 0; + int j = 0; + + /* start clean */ + for (i = 0; i < child_count; i++) { + for (j = 0; j < child_count; j++) { + delta_matrix[i][j] = 0; + } + } + + for (i = 0; i < child_count; i++) { + for (j = 0; j < child_count; j++) { + if (!success[j]) + continue; + delta_matrix[i][j] = -pending_matrix[i][j]; + } + } +} + + +int +afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[], + int child_count, const char *key) +{ + int i = 0; + int j = 0; + + int ret = 0; + + int32_t *pending = 0; + + for (i = 0; i < child_count; i++) { + if (!xattr[i]) + continue; + + pending = CALLOC (sizeof (int32_t), child_count); + for (j = 0; j < child_count; j++) { + pending[j] = hton32 (delta_matrix[i][j]); + } + + ret = dict_set_bin (xattr[i], (char *) key, pending, + child_count * sizeof (int32_t)); + } + + return 0; +} + + +int +afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this) +{ + afr_private_t *priv = NULL; + int32_t *pending = NULL; + void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */ + + int ret = -1; + int i = 0; + + priv = this->private; + + ret = dict_get_ptr (xattr, AFR_METADATA_PENDING, &tmp_pending); + + if (ret != 0) + return 0; + + pending = tmp_pending; + for (i = 0; i < priv->child_count; i++) { + if (i == child_count) + continue; + if (pending[i]) + return 1; + } + + return 0; +} + + +int +afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this) +{ + afr_private_t *priv = NULL; + int32_t *pending = NULL; + void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */ + + int ret = -1; + int i = 0; + + priv = this->private; + + ret = dict_get_ptr (xattr, AFR_DATA_PENDING, &tmp_pending); + + if (ret != 0) + return 0; + + pending = tmp_pending; + for (i = 0; i < priv->child_count; i++) { + if (i == child_count) + continue; + if (pending[i]) + return 1; + } + + return 0; +} + + +int +afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this) +{ + afr_private_t *priv = NULL; + int32_t *pending = NULL; + void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */ + + int ret = -1; + int i = 0; + + priv = this->private; + + ret = dict_get_ptr (xattr, AFR_ENTRY_PENDING, &tmp_pending); + + if (ret != 0) + return 0; + + pending = tmp_pending; + for (i = 0; i < priv->child_count; i++) { + if (i == child_count) + continue; + if (pending[i]) + return 1; + } + + return 0; +} + + + +/** + * is_matrix_zero - return true if pending matrix is all zeroes + */ + +int +afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count) +{ + int i, j; + + for (i = 0; i < child_count; i++) + for (j = 0; j < child_count; j++) + if (pending_matrix[i][j]) + return 0; + return 1; +} + + +int +afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + +// memset (sh->child_errno, 0, sizeof (int) * priv->child_count); + memset (sh->buf, 0, sizeof (struct stat) * priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) + dict_unref (sh->xattr[i]); + sh->xattr[i] = NULL; + } + + if (local->govinda_gOvinda) { + gf_log (this->name, GF_LOG_WARNING, + "aborting selfheal of %s", + local->loc.path); + sh->completion_cbk (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "proceeding to metadata check on %s", + local->loc.path); + afr_self_heal_metadata (frame, this); + } + + return 0; +} + + +int +sh_missing_entries_unlck_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_sh_missing_entries_done (frame, this); + } + + return 0; +} + + +static int +sh_missing_entries_finish (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int i = 0; + int call_count = 0; + afr_self_heal_t *sh = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "unlocking %"PRId64"/%s on subvolume %s", + sh->parent_loc.inode->ino, local->loc.name, + priv->children[i]->name); + + STACK_WIND (frame, sh_missing_entries_unlck_cbk, + priv->children[i], + priv->children[i]->fops->entrylk, + &sh->parent_loc, local->loc.name, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + + if (!--call_count) + break; + } + } + return 0; +} + + +static int +sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int op_errno, struct stat *stbuf) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +static int +sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + call_frame_t *chown_frame = NULL; + int call_count = 0; + int child_index = 0; + struct stat *buf = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + buf = &sh->buf[sh->source]; + child_index = (long) cookie; + + if (op_ret == 0) { + chown_frame = copy_frame (frame); + + gf_log (this->name, GF_LOG_DEBUG, + "chown %s to %d %d on subvolume %s", + local->loc.path, buf->st_uid, buf->st_gid, + priv->children[child_index]->name); + + STACK_WIND (chown_frame, sh_destroy_cbk, + priv->children[child_index], + priv->children[child_index]->fops->chown, + &local->loc, + buf->st_uid, buf->st_gid); + } + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + sh_missing_entries_finish (frame, this); + } + + return 0; +} + + +static int +sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + int enoent_count = 0; + int call_count = 0; + mode_t st_mode = 0; + dev_t st_dev = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) + if (sh->child_errno[i] == ENOENT) + enoent_count++; + + call_count = enoent_count; + local->call_count = call_count; + + st_mode = sh->buf[sh->source].st_mode; + st_dev = sh->buf[sh->source].st_dev; + + gf_log (this->name, GF_LOG_DEBUG, + "mknod %s mode 0%o on %d subvolumes", + local->loc.path, st_mode, enoent_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->child_errno[i] == ENOENT) { + STACK_WIND_COOKIE (frame, + sh_missing_entries_newentry_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->mknod, + &local->loc, st_mode, st_dev); + if (!--call_count) + break; + } + } + + return 0; +} + + +static int +sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + int enoent_count = 0; + int call_count = 0; + mode_t st_mode = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) + if (sh->child_errno[i] == ENOENT) + enoent_count++; + + call_count = enoent_count; + local->call_count = call_count; + + st_mode = sh->buf[sh->source].st_mode; + + gf_log (this->name, GF_LOG_DEBUG, + "mkdir %s mode 0%o on %d subvolumes", + local->loc.path, st_mode, enoent_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->child_errno[i] == ENOENT) { + STACK_WIND_COOKIE (frame, + sh_missing_entries_newentry_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->mkdir, + &local->loc, st_mode); + if (!--call_count) + break; + } + } + + return 0; +} + + +static int +sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this, + const char *link) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + int enoent_count = 0; + int call_count = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) + if (sh->child_errno[i] == ENOENT) + enoent_count++; + + call_count = enoent_count; + local->call_count = call_count; + + gf_log (this->name, GF_LOG_DEBUG, + "symlink %s -> %s on %d subvolumes", + local->loc.path, link, enoent_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->child_errno[i] == ENOENT) { + STACK_WIND_COOKIE (frame, + sh_missing_entries_newentry_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->symlink, + link, &local->loc); + if (!--call_count) + break; + } + } + + return 0; +} + + +static int +sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + const char *link) +{ + if (op_ret > 0) + sh_missing_entries_symlink (frame, this, link); + else + sh_missing_entries_finish (frame, this); + + return 0; +} + + +static int +sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + STACK_WIND (frame, sh_missing_entries_readlink_cbk, + priv->children[sh->source], + priv->children[sh->source]->fops->readlink, + &local->loc, 4096); + + return 0; +} + + +static int +sh_missing_entries_create (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int type = 0; + int i = 0; + afr_private_t *priv = NULL; + int enoent_count = 0; + int govinda_gOvinda = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (sh->child_errno[i]) { + if (sh->child_errno[i] == ENOENT) + enoent_count++; + } else { + if (type) { + if (type != (sh->buf[i].st_mode & S_IFMT)) + govinda_gOvinda = 1; + } else { + sh->source = i; + type = sh->buf[i].st_mode & S_IFMT; + } + } + } + + if (govinda_gOvinda) { + gf_log (this->name, GF_LOG_ERROR, + "conflicing filetypes exist for path %s. returning.", + local->loc.path); + + local->govinda_gOvinda = 1; + sh_missing_entries_finish (frame, this); + return 0; + } + + if (!type) { + gf_log (this->name, GF_LOG_ERROR, + "no source found for %s. all nodes down?. returning.", + local->loc.path); + /* subvolumes down and/or file does not exist */ + sh_missing_entries_finish (frame, this); + return 0; + } + + if (enoent_count == 0) { + gf_log (this->name, GF_LOG_ERROR, + "no missing files - %s. proceeding to metadata check", + local->loc.path); + /* proceed to next step - metadata self-heal */ + sh_missing_entries_finish (frame, this); + return 0; + } + + switch (type) { + case S_IFSOCK: + case S_IFREG: + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + sh_missing_entries_mknod (frame, this); + break; + case S_IFLNK: + sh_missing_entries_readlink (frame, this); + break; + case S_IFDIR: + sh_missing_entries_mkdir (frame, this); + break; + default: + gf_log (this->name, GF_LOG_ERROR, + "unknown file type: 0%o", type); + local->govinda_gOvinda = 1; + sh_missing_entries_finish (frame, this); + } + + return 0; +} + + +static int +sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + int child_index = 0; + afr_local_t *local = NULL; + int call_count = 0; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "path %s on subvolume %s is of mode 0%o", + local->loc.path, + priv->children[child_index]->name, + buf->st_mode); + + local->self_heal.buf[child_index] = *buf; + } else { + gf_log (this->name, GF_LOG_WARNING, + "path %s on subvolume %s => -1 (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + + local->self_heal.child_errno[child_index] = op_errno; + } + + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + sh_missing_entries_create (frame, this); + } + + return 0; +} + + +static int +sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + int i = 0; + int call_count = 0; + afr_private_t *priv = NULL; + dict_t *xattr_req = NULL; + int ret = -1; + + local = frame->local; + call_count = local->child_count; + priv = this->private; + + local->call_count = call_count; + + xattr_req = dict_new(); + + if (xattr_req) + ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING, + priv->child_count * sizeof(int32_t)); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "looking up %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, + sh_missing_entries_lookup_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->lookup, + &local->loc, xattr_req); + + if (!--call_count) + break; + } + } + + if (xattr_req) + dict_unref (xattr_req); + + return 0; +} + + +static int +sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + int child_index = (long) cookie; + + + local = frame->local; + sh = &local->self_heal; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + sh->op_failed = 1; + + gf_log (this->name, + (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR), + "locking inode of %s on child %d failed: %s", + local->loc.path, child_index, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "inode of %s on child %d locked", + local->loc.path, child_index); + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->op_failed == 1) { + sh_missing_entries_finish (frame, this); + return 0; + } + + sh_missing_entries_lookup (frame, this); + } + + return 0; +} + + +static int +afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + gf_log (this->name, GF_LOG_DEBUG, + "attempting to recreate missing entries for path=%s", + local->loc.path); + + afr_build_parent_loc (&sh->parent_loc, &local->loc); + + call_count = local->child_count; + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, sh_missing_entries_lk_cbk, + priv->children[i], + priv->children[i]->fops->entrylk, + &sh->parent_loc, local->loc.name, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_self_heal (call_frame_t *frame, xlator_t *this, + int (*completion_cbk) (call_frame_t *, xlator_t *)) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + gf_log (this->name, GF_LOG_DEBUG, + "performing self heal on %s (metadata=%d data=%d entry=%d)", + local->loc.path, + local->need_metadata_self_heal, + local->need_data_self_heal, + local->need_entry_self_heal); + + sh->completion_cbk = completion_cbk; + + sh->buf = CALLOC (priv->child_count, sizeof (struct stat)); + sh->child_errno = CALLOC (priv->child_count, sizeof (int)); + sh->success = CALLOC (priv->child_count, sizeof (int)); + sh->xattr = CALLOC (priv->child_count, sizeof (dict_t *)); + sh->sources = CALLOC (sizeof (*sh->sources), priv->child_count); + + sh->pending_matrix = CALLOC (sizeof (int32_t *), priv->child_count); + for (i = 0; i < priv->child_count; i++) { + sh->pending_matrix[i] = CALLOC (sizeof (int32_t), + priv->child_count); + } + + sh->delta_matrix = CALLOC (sizeof (int32_t *), priv->child_count); + for (i = 0; i < priv->child_count; i++) { + sh->delta_matrix[i] = CALLOC (sizeof (int32_t), + priv->child_count); + } + + if (local->success_count && local->enoent_count) { + afr_self_heal_missing_entries (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "proceeding to metadata check on %s", + local->loc.path); + afr_sh_missing_entries_done (frame, this); + } + + return 0; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h new file mode 100644 index 000000000..9dd597f07 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -0,0 +1,66 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __AFR_SELF_HEAL_COMMON_H__ +#define __AFR_SELF_HEAL_COMMON_H__ + +#define FILE_HAS_HOLES(buf) (((buf)->st_size) > ((buf)->st_blocks * 512)) + +int +afr_sh_select_source (int sources[], int child_count); + +int +afr_sh_sink_count (int sources[], int child_count); + +int +afr_sh_source_count (int sources[], int child_count); + +int +afr_sh_supress_errenous_children (int sources[], int child_errno[], + int child_count); + +int +afr_sh_supress_empty_children (int sources[], dict_t *xattr[], + struct stat *buf, + int child_count, const char *key); + +void +afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this); + +void +afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[], + int child_count, const char *key); + +void +afr_sh_pending_to_delta (int32_t *pending_matrix[], int32_t *delta_matrix[], + int32_t success[], int child_count); + +int +afr_sh_mark_sources (int32_t *pending_matrix[], int sources[], + int child_count); + +int +afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[], + int child_count, const char *key); + +int +afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count); + + +#endif /* __AFR_SELF_HEAL_COMMON_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c new file mode 100644 index 000000000..3a48da485 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -0,0 +1,1030 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" + +#include "afr-transaction.h" +#include "afr-self-heal.h" +#include "afr-self-heal-common.h" + + + +int +afr_sh_data_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + /* + TODO: cleanup sh->* + */ + + gf_log (this->name, GF_LOG_DEBUG, + "self heal of %s completed", + local->loc.path); + + sh->completion_cbk (frame, this); + + return 0; +} + + +int +afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + fd_unref (sh->healing_fd); + sh->healing_fd = NULL; + afr_sh_data_done (frame, this); + } + + return 0; +} + + +int +afr_sh_data_close (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_self_heal_t *sh = NULL; + int i = 0; + int call_count = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + if (!sh->healing_fd) { + afr_sh_data_done (frame, this); + return 0; + } + + call_count = sh->active_sinks + 1; + local->call_count = call_count; + + + /* closed source */ + gf_log (this->name, GF_LOG_DEBUG, + "closing fd of %s on %s", + local->loc.path, priv->children[sh->source]->name); + + STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk, + (void *) (long) sh->source, + priv->children[sh->source], + priv->children[sh->source]->fops->flush, + sh->healing_fd); + call_count--; + + for (i = 0; i < priv->child_count; i++) { + if (sh->sources[i] || !local->child_up[i]) + continue; + + gf_log (this->name, GF_LOG_DEBUG, + "closing fd of %s on %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->flush, + sh->healing_fd); + if (!--call_count) + break; + } + + return 0; +} + + +int +afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + int call_count = 0; + int child_index = (long) cookie; + + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "locking inode of %s on child %d failed: %s", + local->loc.path, child_index, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "inode of %s on child %d locked", + local->loc.path, child_index); + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_sh_data_close (frame, this); + } + + return 0; +} + + +int +afr_sh_data_unlock (call_frame_t *frame, xlator_t *this) +{ + struct flock flock; + int i = 0; + int call_count = 0; + + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + afr_self_heal_t * sh = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + + local->call_count = call_count; + + flock.l_start = 0; + flock.l_len = 0; + flock.l_type = F_UNLCK; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "unlocking %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_data_unlck_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + &local->loc, F_SETLK, &flock); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_sh_data_finish (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + gf_log (this->name, GF_LOG_DEBUG, + "finishing data selfheal of %s", local->loc.path); + + afr_sh_data_unlock (frame, this); + + return 0; +} + + +int +afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xattr) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_data_finish (frame, this); + + return 0; +} + + +int +afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int i = 0; + dict_t **erase_xattr = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + + afr_sh_pending_to_delta (sh->pending_matrix, sh->delta_matrix, + sh->success, priv->child_count); + + erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) { + call_count++; + + erase_xattr[i] = get_new_dict(); + dict_ref (erase_xattr[i]); + } + } + + afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, + priv->child_count, AFR_DATA_PENDING); + + local->call_count = call_count; + for (i = 0; i < priv->child_count; i++) { + if (!erase_xattr[i]) + continue; + + gf_log (this->name, GF_LOG_DEBUG, + "erasing pending flags from %s on %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->loc, + GF_XATTROP_ADD_ARRAY, erase_xattr[i]); + if (!--call_count) + break; + } + + for (i = 0; i < priv->child_count; i++) { + if (erase_xattr[i]) { + dict_unref (erase_xattr[i]); + } + } + FREE (erase_xattr); + + return 0; +} + + +int +afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + int child_index = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) + gf_log (this->name, GF_LOG_ERROR, + "ftruncate of %s on subvolume %s failed (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + else + gf_log (this->name, GF_LOG_DEBUG, + "ftruncate of %s on subvolume %s completed", + local->loc.path, + priv->children[child_index]->name); + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_sh_data_erase_pending (frame, this); + } + + return 0; +} + + +int +afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + int *sources = NULL; + int call_count = 0; + int i = 0; + + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + sources = sh->sources; + call_count = sh->active_sinks; + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (sources[i] || !local->child_up[i]) + continue; + + STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->ftruncate, + sh->healing_fd, sh->file_size); + + if (!--call_count) + break; + } + + return 0; +} + + +int +afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this); + +int +afr_sh_data_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + int child_index = (long) cookie; + int call_count = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + gf_log (this->name, GF_LOG_DEBUG, + "wrote %d bytes of data from %s to child %d, offset %"PRId64"", + op_ret, local->loc.path, child_index, sh->offset - op_ret); + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "write to %s failed on subvolume %s (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + sh->op_failed = 1; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_sh_data_read_write_iter (frame, this); + } + + return 0; +} + + +int +afr_sh_data_read_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct stat *buf) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + int child_index = (long) cookie; + int i = 0; + int call_count = 0; + + off_t offset; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + call_count = sh->active_sinks; + + local->call_count = call_count; + + gf_log (this->name, GF_LOG_DEBUG, + "read %d bytes of data from %s on child %d, offset %"PRId64"", + op_ret, local->loc.path, child_index, sh->offset); + + if (op_ret <= 0) { + afr_sh_data_trim_sinks (frame, this); + return 0; + } + + /* what if we read less than block size? */ + offset = sh->offset; + sh->offset += op_ret; + + frame->root->req_refs = frame->root->rsp_refs; + + if (sh->file_has_holes) { + if (iov_0filled (vector, count) == 0) { + /* the iter function depends on the + sh->offset already being updated + above + */ + afr_sh_data_read_write_iter (frame, this); + goto out; + } + } + + for (i = 0; i < priv->child_count; i++) { + if (sh->sources[i] || !local->child_up[i]) + continue; + + /* this is a sink, so write to it */ + STACK_WIND_COOKIE (frame, afr_sh_data_write_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->writev, + sh->healing_fd, vector, count, offset); + + if (!--call_count) + break; + } + +out: + return 0; +} + + +int +afr_sh_data_read_write (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + STACK_WIND_COOKIE (frame, afr_sh_data_read_cbk, + (void *) (long) sh->source, + priv->children[sh->source], + priv->children[sh->source]->fops->readv, + sh->healing_fd, sh->block_size, + sh->offset); + + return 0; +} + + +int +afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + if (sh->op_failed) { + afr_sh_data_finish (frame, this); + goto out; + } + + if (sh->offset >= sh->file_size) { + gf_log (this->name, GF_LOG_DEBUG, + "closing fd's of %s", + local->loc.path); + afr_sh_data_trim_sinks (frame, this); + + goto out; + } + + afr_sh_data_read_write (frame, this); + +out: + return 0; +} + + +int +afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int child_index = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + child_index = (long) cookie; + + /* TODO: some of the open's might fail. + In that case, modify cleanup fn to send flush on those + fd's which are already open */ + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "open of %s failed on child %s (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + sh->op_failed = 1; + } + + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->op_failed) { + afr_sh_data_finish (frame, this); + return 0; + } + gf_log (this->name, GF_LOG_DEBUG, + "fd for %s opened, commencing sync", + local->loc.path); + + gf_log (this->name, GF_LOG_WARNING, + "sourcing file %s from %s to other sinks", + local->loc.path, priv->children[sh->source]->name); + + afr_sh_data_read_write (frame, this); + } + + return 0; +} + + +int +afr_sh_data_open (call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int call_count = 0; + + int source = -1; + int *sources = NULL; + + fd_t *fd = NULL; + + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = sh->active_sinks + 1; + local->call_count = call_count; + + fd = fd_create (local->loc.inode, frame->root->pid); + sh->healing_fd = fd; + + source = local->self_heal.source; + sources = local->self_heal.sources; + + sh->block_size = 65536; + sh->file_size = sh->buf[source].st_size; + + if (FILE_HAS_HOLES (&sh->buf[source])) + sh->file_has_holes = 1; + + /* open source */ + STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk, + (void *) (long) source, + priv->children[source], + priv->children[source]->fops->open, + &local->loc, O_RDONLY|O_LARGEFILE, fd); + call_count--; + + /* open sinks */ + for (i = 0; i < priv->child_count; i++) { + if(sources[i] || !local->child_up[i]) + continue; + + STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->open, + &local->loc, + O_WRONLY|O_LARGEFILE, fd); + + if (!--call_count) + break; + } + + return 0; +} + + +int +afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int active_sinks = 0; + int source = 0; + int i = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + source = sh->source; + + for (i = 0; i < priv->child_count; i++) { + if (sh->sources[i] == 0 && local->child_up[i] == 1) { + active_sinks++; + sh->success[i] = 1; + } + } + sh->success[source] = 1; + + if (active_sinks == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "no active sinks for performing self-heal on file %s", + local->loc.path); + afr_sh_data_finish (frame, this); + return 0; + } + sh->active_sinks = active_sinks; + + gf_log (this->name, GF_LOG_DEBUG, + "syncing data of %s from subvolume %s to %d active sinks", + local->loc.path, priv->children[source]->name, active_sinks); + + afr_sh_data_open (frame, this); + + return 0; +} + + +int +afr_sh_data_fix (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int nsources = 0; + int source = 0; + int i = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr, + priv->child_count, AFR_DATA_PENDING); + + afr_sh_print_pending_matrix (sh->pending_matrix, this); + + + afr_sh_mark_sources (sh->pending_matrix, sh->sources, + priv->child_count); + + afr_sh_supress_empty_children (sh->sources, sh->xattr, sh->buf, + priv->child_count, AFR_DATA_PENDING); + + afr_sh_supress_errenous_children (sh->sources, sh->child_errno, + priv->child_count); + + nsources = afr_sh_source_count (sh->sources, priv->child_count); + + if ((nsources == 0) + && (priv->favorite_child != -1) + && (sh->child_errno[priv->favorite_child] == 0)) { + + gf_log (this->name, GF_LOG_WARNING, + "Picking favorite child %s as authentic source to resolve conflicting data of %s", + priv->children[priv->favorite_child]->name, + local->loc.path); + + sh->sources[priv->favorite_child] = 1; + + nsources = afr_sh_source_count (sh->sources, + priv->child_count); + } + + if (nsources == 0) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to resolve conflicting data of %s. " + "Please resolve manually by deleting the file %s " + "from all but the preferred subvolume. " + "Please consider 'option favorite-child <>'", + local->loc.path, local->loc.path); + + local->govinda_gOvinda = 1; + + afr_sh_data_finish (frame, this); + return 0; + } + + source = afr_sh_select_source (sh->sources, priv->child_count); + sh->source = source; + + /* detect changes not visible through pending flags -- JIC */ + for (i = 0; i < priv->child_count; i++) { + if (i == source || sh->child_errno[i]) + continue; + + if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source])) + sh->sources[i] = 0; + } + + afr_sh_data_sync_prepare (frame, this); + + return 0; +} + + +int +afr_sh_data_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + int call_count = -1; + int child_index = (long) cookie; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret != -1) { + sh->xattr[child_index] = dict_ref (xattr); + sh->buf[child_index] = *buf; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_sh_data_fix (frame, this); + } + + return 0; +} + + +int +afr_sh_data_lookup (call_frame_t *frame, xlator_t *this) +{ + afr_self_heal_t *sh = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xattr_req = NULL; + + int call_count = 0; + int i = 0; + int ret = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + call_count = local->child_count; + + local->call_count = call_count; + + xattr_req = dict_new(); + if (xattr_req) + ret = dict_set_uint64 (xattr_req, AFR_DATA_PENDING, + priv->child_count * sizeof(int32_t)); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_sh_data_lookup_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->lookup, + &local->loc, xattr_req); + if (!--call_count) + break; + } + } + + if (xattr_req) + dict_unref (xattr_req); + + return 0; +} + + +int +afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + int child_index = (long) cookie; + + /* TODO: what if lock fails? */ + + local = frame->local; + sh = &local->self_heal; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + sh->op_failed = 1; + + gf_log (this->name, + (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR), + "locking of %s on child %d failed: %s", + local->loc.path, child_index, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "inode of %s on child %d locked", + local->loc.path, child_index); + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->op_failed) { + afr_sh_data_finish (frame, this); + return 0; + } + + afr_sh_data_lookup (frame, this); + } + + return 0; +} + + +int +afr_sh_data_lock (call_frame_t *frame, xlator_t *this) +{ + struct flock flock; + int i = 0; + int call_count = 0; + + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + afr_self_heal_t * sh = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + + local->call_count = call_count; + + flock.l_start = 0; + flock.l_len = 0; + flock.l_type = F_WRLCK; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "locking %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_data_lock_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + &local->loc, F_SETLK, &flock); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_self_heal_data (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = this->private; + + + local = frame->local; + sh = &local->self_heal; + + if (local->need_data_self_heal && priv->data_self_heal) { + afr_sh_data_lock (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "not doing data self heal on %s", + local->loc.path); + afr_sh_data_done (frame, this); + } + + return 0; +} + diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c new file mode 100644 index 000000000..ec341922e --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -0,0 +1,2038 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" + +#include "afr-transaction.h" +#include "afr-self-heal.h" +#include "afr-self-heal-common.h" + + + +int +afr_sh_entry_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + /* + TODO: cleanup sh->* + */ + + gf_log (this->name, GF_LOG_DEBUG, + "self heal of %s completed", + local->loc.path); + + sh->completion_cbk (frame, this); + + return 0; +} + + +int +afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + int child_index = (long) cookie; + + /* TODO: what if lock fails? */ + + local = frame->local; + sh = &local->self_heal; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "unlocking inode of %s on child %d failed: %s", + local->loc.path, child_index, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "unlocked inode of %s on child %d", + local->loc.path, child_index); + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->healing_fd) + fd_unref (sh->healing_fd); + sh->healing_fd = NULL; + afr_sh_entry_done (frame, this); + } + + return 0; +} + + +int +afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int call_count = 0; + + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + afr_self_heal_t * sh = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "unlocking %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_entry_unlck_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->entrylk, + &local->loc, NULL, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_sh_entry_finish (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + gf_log (this->name, GF_LOG_DEBUG, + "finishing entry selfheal of %s", local->loc.path); + + afr_sh_entry_unlock (frame, this); + + return 0; +} + + +int +afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xattr) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_entry_finish (frame, this); + + return 0; +} + + +int +afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int i = 0; + dict_t **erase_xattr = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + + afr_sh_pending_to_delta (sh->pending_matrix, sh->delta_matrix, + sh->success, priv->child_count); + + erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) { + call_count++; + + erase_xattr[i] = get_new_dict(); + dict_ref (erase_xattr[i]); + } + } + + afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, + priv->child_count, AFR_ENTRY_PENDING); + + local->call_count = call_count; + for (i = 0; i < priv->child_count; i++) { + if (!erase_xattr[i]) + continue; + + gf_log (this->name, GF_LOG_DEBUG, + "erasing pending flags from %s on %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->loc, + GF_XATTROP_ADD_ARRAY, erase_xattr[i]); + if (!--call_count) + break; + } + + for (i = 0; i < priv->child_count; i++) { + if (erase_xattr[i]) { + dict_unref (erase_xattr[i]); + } + } + FREE (erase_xattr); + + return 0; +} + + + +static int +next_active_source (call_frame_t *frame, xlator_t *this, + int current_active_source) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int source = -1; + int next_active_source = -1; + int i = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + source = sh->source; + + if (source != -1) { + if (current_active_source != source) + next_active_source = source; + goto out; + } + + /* + the next active sink becomes the source for the + 'conservative decision' of merging all entries + */ + + for (i = 0; i < priv->child_count; i++) { + if ((sh->sources[i] == 0) + && (local->child_up[i] == 1) + && (i > current_active_source)) { + + next_active_source = i; + break; + } + } +out: + return next_active_source; +} + + + +static int +next_active_sink (call_frame_t *frame, xlator_t *this, + int current_active_sink) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int next_active_sink = -1; + int i = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + /* + the next active sink becomes the source for the + 'conservative decision' of merging all entries + */ + + for (i = 0; i < priv->child_count; i++) { + if ((sh->sources[i] == 0) + && (local->child_up[i] == 1) + && (i > current_active_sink)) { + + next_active_sink = i; + break; + } + } + + return next_active_sink; +} + + +int +build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name) +{ + int ret = -1; + + if (!child) { + goto out; + } + + if (strcmp (parent->path, "/") == 0) + asprintf ((char **)&child->path, "/%s", name); + else + asprintf ((char **)&child->path, "%s/%s", parent->path, name); + + if (!child->path) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + child->name = strrchr (child->path, '/'); + if (child->name) + child->name++; + + child->parent = inode_ref (parent->inode); + child->inode = inode_new (parent->inode->table); + + if (!child->inode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ret = 0; +out: + if (ret == -1) + loc_wipe (child); + + return ret; +} + + +int +afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this); + +int +afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this, + int active_src); + +int +afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this, + int active_src) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_entry_expunge_subvol (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_private_t *priv = NULL; + afr_local_t *expunge_local = NULL; + afr_self_heal_t *expunge_sh = NULL; + int active_src = 0; + call_frame_t *frame = NULL; + + + priv = this->private; + expunge_local = expunge_frame->local; + expunge_sh = &expunge_local->self_heal; + frame = expunge_sh->sh_frame; + + active_src = (long) cookie; + + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "removed %s on %s", + expunge_local->loc.path, + priv->children[active_src]->name); + } else { + gf_log (this->name, GF_LOG_ERROR, + "removing %s on %s failed (%s)", + expunge_local->loc.path, + priv->children[active_src]->name, + strerror (op_errno)); + } + + AFR_STACK_DESTROY (expunge_frame); + afr_sh_entry_expunge_entry_done (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this, + int active_src) +{ + afr_private_t *priv = NULL; + afr_local_t *expunge_local = NULL; + + priv = this->private; + expunge_local = expunge_frame->local; + + gf_log (this->name, GF_LOG_WARNING, + "removing directory %s on %s", + expunge_local->loc.path, priv->children[active_src]->name); + + STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk, + (void *) (long) active_src, + priv->children[active_src], + priv->children[active_src]->fops->rmdir, + &expunge_local->loc); + + return 0; +} + + +int +afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this, + int active_src) +{ + afr_private_t *priv = NULL; + afr_local_t *expunge_local = NULL; + + priv = this->private; + expunge_local = expunge_frame->local; + + gf_log (this->name, GF_LOG_WARNING, + "unlinking file %s on %s", + expunge_local->loc.path, priv->children[active_src]->name); + + STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk, + (void *) (long) active_src, + priv->children[active_src], + priv->children[active_src]->fops->unlink, + &expunge_local->loc); + + return 0; +} + + +int +afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, + int active_src, struct stat *buf) +{ + afr_private_t *priv = NULL; + afr_local_t *expunge_local = NULL; + afr_self_heal_t *expunge_sh = NULL; + int source = 0; + call_frame_t *frame = NULL; + int type = 0; + + priv = this->private; + expunge_local = expunge_frame->local; + expunge_sh = &expunge_local->self_heal; + frame = expunge_sh->sh_frame; + source = expunge_sh->source; + + type = (buf->st_mode & S_IFMT); + + switch (type) { + case S_IFSOCK: + case S_IFREG: + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + case S_IFLNK: + afr_sh_entry_expunge_unlink (expunge_frame, this, active_src); + + break; + case S_IFDIR: + afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src); + break; + default: + gf_log (this->name, GF_LOG_ERROR, + "%s has unknown file type on %s: 0%o", + expunge_local->loc.path, + priv->children[source]->name, type); + goto out; + break; + } + + return 0; +out: + AFR_STACK_DESTROY (expunge_frame); + afr_sh_entry_expunge_entry_done (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *x) +{ + afr_private_t *priv = NULL; + afr_local_t *expunge_local = NULL; + afr_self_heal_t *expunge_sh = NULL; + call_frame_t *frame = NULL; + int active_src = 0; + + priv = this->private; + expunge_local = expunge_frame->local; + expunge_sh = &expunge_local->self_heal; + frame = expunge_sh->sh_frame; + active_src = (long) cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "lookup of %s on %s failed (%s)", + expunge_local->loc.path, + priv->children[active_src]->name, + strerror (op_errno)); + goto out; + } + + afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf); + + return 0; +out: + AFR_STACK_DESTROY (expunge_frame); + afr_sh_entry_expunge_entry_done (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this, + int active_src) +{ + afr_private_t *priv = NULL; + afr_local_t *expunge_local = NULL; + + priv = this->private; + expunge_local = expunge_frame->local; + + gf_log (this->name, GF_LOG_DEBUG, + "looking up %s on %s", + expunge_local->loc.path, priv->children[active_src]->name); + + STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk, + (void *) (long) active_src, + priv->children[active_src], + priv->children[active_src]->fops->lookup, + &expunge_local->loc, 0); + + return 0; +} + + +int +afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *x) +{ + afr_private_t *priv = NULL; + afr_local_t *expunge_local = NULL; + afr_self_heal_t *expunge_sh = NULL; + int source = 0; + call_frame_t *frame = NULL; + int active_src = 0; + + + priv = this->private; + expunge_local = expunge_frame->local; + expunge_sh = &expunge_local->self_heal; + frame = expunge_sh->sh_frame; + active_src = expunge_sh->active_source; + source = (long) cookie; + + if (op_ret == -1 && op_errno == ENOENT) { + + gf_log (this->name, GF_LOG_DEBUG, + "missing entry %s on %s", + expunge_local->loc.path, + priv->children[source]->name); + + afr_sh_entry_expunge_purge (expunge_frame, this, active_src); + + return 0; + } + + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%s exists under %s", + expunge_local->loc.path, + priv->children[source]->name); + } else { + gf_log (this->name, GF_LOG_ERROR, + "looking up %s under %s failed (%s)", + expunge_local->loc.path, + priv->children[source]->name, + strerror (op_errno)); + } + + AFR_STACK_DESTROY (expunge_frame); + afr_sh_entry_expunge_entry_done (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this, + char *name) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int ret = -1; + call_frame_t *expunge_frame = NULL; + afr_local_t *expunge_local = NULL; + afr_self_heal_t *expunge_sh = NULL; + int active_src = 0; + int source = 0; + int op_errno = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + active_src = sh->active_source; + source = sh->source; + + if ((strcmp (name, ".") == 0) + || (strcmp (name, "..") == 0)) { + gf_log (this->name, GF_LOG_DEBUG, + "skipping inspection of %s under %s", + name, local->loc.path); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "inspecting existance of %s under %s", + name, local->loc.path); + + expunge_frame = copy_frame (frame); + if (!expunge_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (expunge_local, afr_local_t, out); + + expunge_frame->local = expunge_local; + expunge_sh = &expunge_local->self_heal; + expunge_sh->sh_frame = frame; + expunge_sh->active_source = active_src; + + ret = build_child_loc (this, &expunge_local->loc, &local->loc, name); + if (ret != 0) { + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "looking up %s on %s", expunge_local->loc.path, + priv->children[source]->name); + + STACK_WIND_COOKIE (expunge_frame, + afr_sh_entry_expunge_entry_cbk, + (void *) (long) source, + priv->children[source], + priv->children[source]->fops->lookup, + &expunge_local->loc, 0); + + ret = 0; +out: + if (ret == -1) + afr_sh_entry_expunge_entry_done (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + gf_dirent_t *entries) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + gf_dirent_t *entry = NULL; + off_t last_offset = 0; + int active_src = 0; + int entry_count = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + active_src = sh->active_source; + + if (op_ret <= 0) { + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "readdir of %s on subvolume %s failed (%s)", + local->loc.path, + priv->children[active_src]->name, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "readdir of %s on subvolume %s complete", + local->loc.path, + priv->children[active_src]->name); + } + + afr_sh_entry_expunge_all (frame, this); + return 0; + } + + list_for_each_entry (entry, &entries->list, list) { + last_offset = entry->d_off; + entry_count++; + } + + gf_log (this->name, GF_LOG_DEBUG, + "readdir'ed %d entries from %s", + entry_count, priv->children[active_src]->name); + + sh->offset = last_offset; + local->call_count = entry_count; + + list_for_each_entry (entry, &entries->list, list) { + afr_sh_entry_expunge_entry (frame, this, entry->d_name); + } + + return 0; +} + +int +afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this, + int active_src) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk, + priv->children[active_src], + priv->children[active_src]->fops->readdir, + sh->healing_fd, sh->block_size, sh->offset); + + return 0; +} + + +int +afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int active_src = -1; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + sh->offset = 0; + + if (sh->source == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "no active sources for %s to expunge entries", + local->loc.path); + goto out; + } + + active_src = next_active_sink (frame, this, sh->active_source); + sh->active_source = active_src; + + if (sh->op_failed) { + goto out; + } + + if (active_src == -1) { + /* completed creating missing files on all subvolumes */ + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "expunging entries of %s on %s to other sinks", + local->loc.path, priv->children[active_src]->name); + + afr_sh_entry_expunge_subvol (frame, this, active_src); + + return 0; +out: + afr_sh_entry_erase_pending (frame, this); + return 0; + +} + + +int +afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this); + +int +afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this, + int active_src); + +int +afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this, + int active_src) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_entry_impunge_subvol (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_impunge_utimens_cbk (call_frame_t *impunge_frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct stat *stbuf) +{ + int call_count = 0; + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + call_frame_t *frame = NULL; + int active_src = 0; + int child_index = 0; + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + child_index = (long) cookie; + + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "utimes set for %s on %s", + impunge_local->loc.path, + priv->children[child_index]->name); + } else { + gf_log (this->name, GF_LOG_ERROR, + "setting utimes of %s on %s failed (%s)", + impunge_local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + } + + LOCK (&impunge_frame->lock); + { + call_count = --impunge_local->call_count; + } + UNLOCK (&impunge_frame->lock); + + if (call_count == 0) { + AFR_STACK_DESTROY (impunge_frame); + afr_sh_entry_impunge_entry_done (frame, this, active_src); + } + + return 0; +} + + +int +afr_sh_entry_impunge_chown_cbk (call_frame_t *impunge_frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct stat *stbuf) +{ + int call_count = 0; + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + call_frame_t *frame = NULL; + int active_src = 0; + int child_index = 0; + struct timespec ts[2]; + + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + child_index = (long) cookie; + + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "ownership of %s on %s changed", + impunge_local->loc.path, + priv->children[child_index]->name); + } else { + gf_log (this->name, GF_LOG_ERROR, + "setting ownership of %s on %s failed (%s)", + impunge_local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + goto out; + } + +#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC + ts[0] = impunge_local->cont.lookup.buf.st_atim; + ts[1] = impunge_local->cont.lookup.buf.st_mtim; +#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC + ts[0] = impunge_local->cont.lookup.buf.st_atimespec; + ts[1] = impunge_local->cont.lookup.buf.st_mtimespec; +#else + ts[0].tv_sec = impunge_local->cont.lookup.buf.st_atime; + ts[1].tv_sec = impunge_local->cont.lookup.buf.st_mtime; +#endif + STACK_WIND_COOKIE (impunge_frame, + afr_sh_entry_impunge_utimens_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->utimens, + &impunge_local->loc, ts); + + return 0; + +out: + LOCK (&impunge_frame->lock); + { + call_count = --impunge_local->call_count; + } + UNLOCK (&impunge_frame->lock); + + if (call_count == 0) { + AFR_STACK_DESTROY (impunge_frame); + afr_sh_entry_impunge_entry_done (frame, this, active_src); + } + + return 0; +} + + +int +afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + int call_count = 0; + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + call_frame_t *frame = NULL; + int active_src = 0; + int child_index = 0; + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + + child_index = (long) cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "creation of %s on %s failed (%s)", + impunge_local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "setting ownership of %s on %s to %d/%d", + impunge_local->loc.path, + priv->children[child_index]->name, + impunge_local->cont.lookup.buf.st_uid, + impunge_local->cont.lookup.buf.st_gid); + + inode->st_mode = stbuf->st_mode; + + STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_chown_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->chown, + &impunge_local->loc, + impunge_local->cont.lookup.buf.st_uid, + impunge_local->cont.lookup.buf.st_gid); + return 0; + +out: + LOCK (&impunge_frame->lock); + { + call_count = --impunge_local->call_count; + } + UNLOCK (&impunge_frame->lock); + + if (call_count == 0) { + AFR_STACK_DESTROY (impunge_frame); + afr_sh_entry_impunge_entry_done (frame, this, active_src); + } + + return 0; +} + + +int +afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this, + int child_index, struct stat *stbuf) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + + gf_log (this->name, GF_LOG_WARNING, + "creating file %s mode=0%o dev=0x%"GF_PRI_DEV" on %s", + impunge_local->loc.path, + stbuf->st_mode, stbuf->st_rdev, + priv->children[child_index]->name); + + STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->mknod, + &impunge_local->loc, + stbuf->st_mode, stbuf->st_rdev); + + return 0; +} + + + +int +afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this, + int child_index, struct stat *stbuf) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + + gf_log (this->name, GF_LOG_WARNING, + "creating directory %s mode=0%o on %s", + impunge_local->loc.path, + stbuf->st_mode, + priv->children[child_index]->name); + + STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->mkdir, + &impunge_local->loc, stbuf->st_mode); + + return 0; +} + + +int +afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this, + int child_index, const char *linkname) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + + gf_log (this->name, GF_LOG_WARNING, + "creating symlink %s -> %s on %s", + impunge_local->loc.path, linkname, + priv->children[child_index]->name); + + STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->symlink, + linkname, &impunge_local->loc); + + return 0; +} + + +int +afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + const char *linkname) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + int child_index = -1; + call_frame_t *frame = NULL; + int call_count = -1; + int active_src = -1; + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + active_src = impunge_sh->active_source; + + child_index = (long) cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "readlink of %s on %s failed (%s)", + impunge_local->loc.path, + priv->children[active_src]->name, + strerror (op_errno)); + goto out; + } + + afr_sh_entry_impunge_symlink (impunge_frame, this, child_index, + linkname); + return 0; + +out: + LOCK (&impunge_frame->lock); + { + call_count = --impunge_local->call_count; + } + UNLOCK (&impunge_frame->lock); + + if (call_count == 0) { + AFR_STACK_DESTROY (impunge_frame); + afr_sh_entry_impunge_entry_done (frame, this, active_src); + } + + return 0; +} + + +int +afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this, + int child_index, struct stat *stbuf) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + int active_src = -1; + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + active_src = impunge_sh->active_source; + + STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk, + (void *) (long) child_index, + priv->children[active_src], + priv->children[active_src]->fops->readlink, + &impunge_local->loc, 4096); + + return 0; +} + + +int +afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame, + void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, + dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + int active_src = 0; + int type = 0; + int child_index = 0; + call_frame_t *frame = NULL; + int call_count = 0; + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + + child_index = (long) cookie; + + active_src = impunge_sh->active_source; + + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "looking up %s on %s (for %s) failed (%s)", + impunge_local->loc.path, + priv->children[active_src]->name, + priv->children[child_index]->name, + strerror (op_errno)); + goto out; + } + + impunge_local->cont.lookup.buf = *buf; + type = (buf->st_mode & S_IFMT); + + switch (type) { + case S_IFSOCK: + case S_IFREG: + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + afr_sh_entry_impunge_mknod (impunge_frame, this, + child_index, buf); + break; + case S_IFLNK: + afr_sh_entry_impunge_readlink (impunge_frame, this, + child_index, buf); + break; + case S_IFDIR: + afr_sh_entry_impunge_mkdir (impunge_frame, this, + child_index, buf); + break; + default: + gf_log (this->name, GF_LOG_ERROR, + "%s has unknown file type on %s: 0%o", + impunge_local->loc.path, + priv->children[active_src]->name, type); + goto out; + break; + } + + return 0; + +out: + LOCK (&impunge_frame->lock); + { + call_count = --impunge_local->call_count; + } + UNLOCK (&impunge_frame->lock); + + if (call_count == 0) { + AFR_STACK_DESTROY (impunge_frame); + afr_sh_entry_impunge_entry_done (frame, this, active_src); + } + + return 0; +} + + +int +afr_sh_entry_impunge_recreate (call_frame_t *impunge_frame, xlator_t *this, + int child_index) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + int active_src = 0; + + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + + active_src = impunge_sh->active_source; + + STACK_WIND_COOKIE (impunge_frame, + afr_sh_entry_impunge_recreate_lookup_cbk, + (void *) (long) child_index, + priv->children[active_src], + priv->children[active_src]->fops->lookup, + &impunge_local->loc, 0); + + return 0; +} + + +int +afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *x) +{ + afr_private_t *priv = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + int call_count = 0; + int child_index = 0; + call_frame_t *frame = NULL; + int active_src = 0; + + priv = this->private; + impunge_local = impunge_frame->local; + impunge_sh = &impunge_local->self_heal; + frame = impunge_sh->sh_frame; + child_index = (long) cookie; + active_src = impunge_sh->active_source; + + if (op_ret == -1 && op_errno == ENOENT) { + /* decrease call_count in recreate-callback */ + gf_log (this->name, GF_LOG_DEBUG, + "missing entry %s on %s", + impunge_local->loc.path, + priv->children[child_index]->name); + + afr_sh_entry_impunge_recreate (impunge_frame, this, + child_index); + return 0; + } + + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%s exists under %s", + impunge_local->loc.path, + priv->children[child_index]->name); + } else { + gf_log (this->name, GF_LOG_ERROR, + "looking up %s under %s failed (%s)", + impunge_local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + } + + LOCK (&impunge_frame->lock); + { + call_count = --impunge_local->call_count; + } + UNLOCK (&impunge_frame->lock); + + if (call_count == 0) { + AFR_STACK_DESTROY (impunge_frame); + afr_sh_entry_impunge_entry_done (frame, this, active_src); + } + + return 0; +} + + +int +afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this, + char *name) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int ret = -1; + call_frame_t *impunge_frame = NULL; + afr_local_t *impunge_local = NULL; + afr_self_heal_t *impunge_sh = NULL; + int active_src = 0; + int i = 0; + int call_count = 0; + int op_errno = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + active_src = sh->active_source; + + if ((strcmp (name, ".") == 0) + || (strcmp (name, "..") == 0)) { + gf_log (this->name, GF_LOG_DEBUG, + "skipping inspection of %s under %s", + name, local->loc.path); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "inspecting existance of %s under %s", + name, local->loc.path); + + impunge_frame = copy_frame (frame); + if (!impunge_frame) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + ALLOC_OR_GOTO (impunge_local, afr_local_t, out); + + impunge_frame->local = impunge_local; + impunge_sh = &impunge_local->self_heal; + impunge_sh->sh_frame = frame; + impunge_sh->active_source = active_src; + + ret = build_child_loc (this, &impunge_local->loc, &local->loc, name); + if (ret != 0) { + goto out; + } + + for (i = 0; i < priv->child_count; i++) { + if (i == active_src) + continue; + if (local->child_up[i] == 0) + continue; + if (sh->sources[i] == 1) + continue; + call_count++; + } + + impunge_local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (i == active_src) + continue; + if (local->child_up[i] == 0) + continue; + if (sh->sources[i] == 1) + continue; + + gf_log (this->name, GF_LOG_DEBUG, + "looking up %s on %s", impunge_local->loc.path, + priv->children[i]->name); + + STACK_WIND_COOKIE (impunge_frame, + afr_sh_entry_impunge_entry_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->lookup, + &impunge_local->loc, 0); + + if (!--call_count) + break; + } + + ret = 0; +out: + if (ret == -1) + afr_sh_entry_impunge_entry_done (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + gf_dirent_t *entries) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + gf_dirent_t *entry = NULL; + off_t last_offset = 0; + int active_src = 0; + int entry_count = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + active_src = sh->active_source; + + if (op_ret <= 0) { + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "readdir of %s on subvolume %s failed (%s)", + local->loc.path, + priv->children[active_src]->name, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "readdir of %s on subvolume %s complete", + local->loc.path, + priv->children[active_src]->name); + } + + afr_sh_entry_impunge_all (frame, this); + return 0; + } + + list_for_each_entry (entry, &entries->list, list) { + last_offset = entry->d_off; + entry_count++; + } + + gf_log (this->name, GF_LOG_DEBUG, + "readdir'ed %d entries from %s", + entry_count, priv->children[active_src]->name); + + sh->offset = last_offset; + local->call_count = entry_count; + + list_for_each_entry (entry, &entries->list, list) { + afr_sh_entry_impunge_entry (frame, this, entry->d_name); + } + + return 0; +} + + +int +afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this, + int active_src) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk, + priv->children[active_src], + priv->children[active_src]->fops->readdir, + sh->healing_fd, sh->block_size, sh->offset); + + return 0; +} + + +int +afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int active_src = -1; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + sh->offset = 0; + + active_src = next_active_source (frame, this, sh->active_source); + sh->active_source = active_src; + + if (sh->op_failed) { + afr_sh_entry_finish (frame, this); + return 0; + } + + if (active_src == -1) { + /* completed creating missing files on all subvolumes */ + afr_sh_entry_expunge_all (frame, this); + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "impunging entries of %s on %s to other sinks", + local->loc.path, priv->children[active_src]->name); + + afr_sh_entry_impunge_subvol (frame, this, active_src); + + return 0; +} + + +int +afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int child_index = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + child_index = (long) cookie; + + /* TODO: some of the open's might fail. + In that case, modify cleanup fn to send flush on those + fd's which are already open */ + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "opendir of %s failed on child %s (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + sh->op_failed = 1; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->op_failed) { + afr_sh_entry_finish (frame, this); + return 0; + } + gf_log (this->name, GF_LOG_DEBUG, + "fd for %s opened, commencing sync", + local->loc.path); + + sh->active_source = -1; + afr_sh_entry_impunge_all (frame, this); + } + + return 0; +} + + +int +afr_sh_entry_open (call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int call_count = 0; + + int source = -1; + int *sources = NULL; + + fd_t *fd = NULL; + + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + source = local->self_heal.source; + sources = local->self_heal.sources; + + sh->block_size = 131072; + sh->offset = 0; + + call_count = sh->active_sinks; + if (source != -1) + call_count++; + + local->call_count = call_count; + + fd = fd_create (local->loc.inode, frame->root->pid); + sh->healing_fd = fd; + + if (source != -1) { + gf_log (this->name, GF_LOG_DEBUG, + "opening directory %s on subvolume %s (source)", + local->loc.path, priv->children[source]->name); + + /* open source */ + STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk, + (void *) (long) source, + priv->children[source], + priv->children[source]->fops->opendir, + &local->loc, fd); + call_count--; + } + + /* open sinks */ + for (i = 0; i < priv->child_count; i++) { + if (sources[i] || !local->child_up[i]) + continue; + + gf_log (this->name, GF_LOG_DEBUG, + "opening directory %s on subvolume %s (sink)", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->opendir, + &local->loc, fd); + + if (!--call_count) + break; + } + + return 0; +} + + +int +afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int active_sinks = 0; + int source = 0; + int i = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + source = sh->source; + + for (i = 0; i < priv->child_count; i++) { + if (sh->sources[i] == 0 && local->child_up[i] == 1) { + active_sinks++; + sh->success[i] = 1; + } + } + if (source != -1) + sh->success[source] = 1; + + if (active_sinks == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "no active sinks for self-heal on dir %s", + local->loc.path); + afr_sh_entry_finish (frame, this); + return 0; + } + if (source == -1 && active_sinks < 2) { + gf_log (this->name, GF_LOG_WARNING, + "cannot sync with 0 sources and 1 sink on dir %s", + local->loc.path); + afr_sh_entry_finish (frame, this); + return 0; + } + sh->active_sinks = active_sinks; + + if (source != -1) + gf_log (this->name, GF_LOG_DEBUG, + "syncing %s from subvolume %s to %d active sinks", + local->loc.path, priv->children[source]->name, + active_sinks); + else + gf_log (this->name, GF_LOG_DEBUG, + "no active sources for %s found. " + "merging all entries as a conservative decision", + local->loc.path); + + afr_sh_entry_open (frame, this); + + return 0; +} + + +int +afr_sh_entry_fix (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int source = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr, + priv->child_count, AFR_ENTRY_PENDING); + + afr_sh_print_pending_matrix (sh->pending_matrix, this); + + + afr_sh_mark_sources (sh->pending_matrix, sh->sources, + priv->child_count); + + afr_sh_supress_errenous_children (sh->sources, sh->child_errno, + priv->child_count); + + source = afr_sh_select_source (sh->sources, priv->child_count); + sh->source = source; + + afr_sh_entry_sync_prepare (frame, this); + + return 0; +} + + + +int +afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + int call_count = -1; + int child_index = (long) cookie; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret != -1) { + sh->xattr[child_index] = dict_ref (xattr); + sh->buf[child_index] = *buf; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_sh_entry_fix (frame, this); + } + + return 0; +} + + + +int +afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this) +{ + afr_self_heal_t * sh = NULL; + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + dict_t *xattr_req = NULL; + int ret = 0; + int call_count = 0; + int i = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + call_count = local->child_count; + + local->call_count = call_count; + + xattr_req = dict_new(); + if (xattr_req) + ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING, + priv->child_count * sizeof(int32_t)); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, + afr_sh_entry_lookup_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->lookup, + &local->loc, xattr_req); + if (!--call_count) + break; + } + } + + if (xattr_req) + dict_unref (xattr_req); + + return 0; +} + + + +int +afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + int call_count = 0; + int child_index = (long) cookie; + + /* TODO: what if lock fails? */ + + local = frame->local; + sh = &local->self_heal; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + sh->op_failed = 1; + + gf_log (this->name, + (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR), + "locking inode of %s on child %d failed: %s", + local->loc.path, child_index, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "inode of %s on child %d locked", + local->loc.path, child_index); + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->op_failed == 1) { + afr_sh_entry_finish (frame, this); + return 0; + } + + afr_sh_entry_lookup (frame, this); + } + + return 0; +} + + +int +afr_sh_entry_lock (call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int call_count = 0; + + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + afr_self_heal_t * sh = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "locking %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_entry_lock_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->entrylk, + &local->loc, NULL, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_self_heal_entry (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + if (local->need_entry_self_heal && priv->entry_self_heal) { + afr_sh_entry_lock (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "proceeding to completion on %s", + local->loc.path); + afr_sh_entry_done (frame, this); + } + + return 0; +} + diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c new file mode 100644 index 000000000..e65a426db --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -0,0 +1,791 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" + +#include "afr-transaction.h" +#include "afr-self-heal.h" +#include "afr-self-heal-common.h" + + +int +afr_sh_metadata_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + +// memset (sh->child_errno, 0, sizeof (int) * priv->child_count); + memset (sh->buf, 0, sizeof (struct stat) * priv->child_count); + memset (sh->success, 0, sizeof (int) * priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) + dict_unref (sh->xattr[i]); + sh->xattr[i] = NULL; + } + + if (local->govinda_gOvinda) { + gf_log (this->name, GF_LOG_WARNING, + "aborting selfheal of %s", + local->loc.path); + sh->completion_cbk (frame, this); + } else { + if (S_ISREG (local->cont.lookup.buf.st_mode)) { + gf_log (this->name, GF_LOG_DEBUG, + "proceeding to data check on %s", + local->loc.path); + afr_self_heal_data (frame, this); + return 0; + } + + if (S_ISDIR (local->cont.lookup.buf.st_mode)) { + gf_log (this->name, GF_LOG_DEBUG, + "proceeding to entry check on %s", + local->loc.path); + afr_self_heal_entry (frame, this); + return 0; + } + gf_log (this->name, GF_LOG_DEBUG, + "completed self heal of %s", + local->loc.path); + + sh->completion_cbk (frame, this); + } + + return 0; +} + + +int +afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + int call_count = 0; + + + local = frame->local; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_metadata_done (frame, this); + + return 0; +} + + +int +afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; + struct flock flock = {0, }; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + flock.l_start = 0; + flock.l_len = 0; + flock.l_type = F_UNLCK; + + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "unlocking %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND (frame, afr_sh_metadata_unlck_cbk, + priv->children[i], + priv->children[i]->fops->inodelk, + &local->loc, F_SETLK, &flock); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xattr) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_metadata_finish (frame, this); + + return 0; +} + + +int +afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int i = 0; + dict_t **erase_xattr = NULL; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + + afr_sh_pending_to_delta (sh->pending_matrix, sh->delta_matrix, + sh->success, priv->child_count); + + erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) { + call_count++; + + erase_xattr[i] = get_new_dict(); + dict_ref (erase_xattr[i]); + } + } + + afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, + priv->child_count, AFR_METADATA_PENDING); + + local->call_count = call_count; + + if (call_count == 0) { + gf_log (this->name, GF_LOG_WARNING, + "metadata of %s not healed on any subvolume", + local->loc.path); + + afr_sh_metadata_finish (frame, this); + } + + for (i = 0; i < priv->child_count; i++) { + if (!erase_xattr[i]) + continue; + + gf_log (this->name, GF_LOG_DEBUG, + "erasing pending flags from %s on %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->loc, + GF_XATTROP_ADD_ARRAY, erase_xattr[i]); + if (!--call_count) + break; + } + + for (i = 0; i < priv->child_count; i++) { + if (erase_xattr[i]) { + dict_unref (erase_xattr[i]); + } + } + FREE (erase_xattr); + + return 0; +} + + +int +afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int child_index = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "setting attributes failed for %s on %s (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + + sh->success[child_index] = 0; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_metadata_erase_pending (frame, this); + + return 0; +} + + +int +afr_sh_metadata_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno); + + return 0; +} + + +int +afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno); + + return 0; +} + + +int +afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int source = 0; + int active_sinks = 0; + int call_count = 0; + int i = 0; + struct timespec ts[2]; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + source = sh->source; + active_sinks = sh->active_sinks; + + /* + * 4 calls per sink - chown, chmod, utimes, setxattr + */ + if (xattr) + call_count = active_sinks * 4; + else + call_count = active_sinks * 3; + + local->call_count = call_count; + +#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC + ts[0] = sh->buf[source].st_atim; + ts[1] = sh->buf[source].st_mtim; +#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC + ts[0] = sh->buf[source].st_atimespec; + ts[1] = sh->buf[source].st_mtimespec; +#else + ts[0].tv_sec = sh->buf[source].st_atime; + ts[1].tv_sec = sh->buf[source].st_mtime; +#endif + + for (i = 0; i < priv->child_count; i++) { + if (call_count == 0) { + break; + } + if (sh->sources[i] || !local->child_up[i]) + continue; + + gf_log (this->name, GF_LOG_DEBUG, + "syncing metadata of %s from %s to %s", + local->loc.path, priv->children[source]->name, + priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->chown, + &local->loc, + sh->buf[source].st_uid, + sh->buf[source].st_gid); + + STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->chmod, + &local->loc, sh->buf[source].st_mode); + + STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->utimens, + &local->loc, ts); + + call_count = call_count - 3; + + if (!xattr) + continue; + + STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->setxattr, + &local->loc, xattr, 0); + call_count--; + } + + return 0; +} + + +int +afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int source = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + source = sh->source; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "getxattr of %s failed on subvolume %s (%s). proceeding without xattr", + local->loc.path, priv->children[source]->name, + strerror (op_errno)); + + afr_sh_metadata_sync (frame, this, NULL); + } else { + dict_del (xattr, AFR_DATA_PENDING); + dict_del (xattr, AFR_METADATA_PENDING); + dict_del (xattr, AFR_ENTRY_PENDING); + afr_sh_metadata_sync (frame, this, xattr); + } + + return 0; +} + + +int +afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int active_sinks = 0; + int source = 0; + int i = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + source = sh->source; + + for (i = 0; i < priv->child_count; i++) { + if (sh->sources[i] == 0 && local->child_up[i] == 1) { + active_sinks++; + sh->success[i] = 1; + } + } + sh->success[source] = 1; + + if (active_sinks == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "no active sinks for performing self-heal on file %s", + local->loc.path); + afr_sh_metadata_finish (frame, this); + return 0; + } + sh->active_sinks = active_sinks; + + gf_log (this->name, GF_LOG_DEBUG, + "syncing metadata of %s from subvolume %s to %d active sinks", + local->loc.path, priv->children[source]->name, active_sinks); + + STACK_WIND (frame, afr_sh_metadata_getxattr_cbk, + priv->children[source], + priv->children[source]->fops->getxattr, + &local->loc, NULL); + + return 0; +} + + +int +afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int nsources = 0; + int source = 0; + int i = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr, + priv->child_count, AFR_METADATA_PENDING); + + afr_sh_print_pending_matrix (sh->pending_matrix, this); + + afr_sh_mark_sources (sh->pending_matrix, sh->sources, + priv->child_count); + + afr_sh_supress_errenous_children (sh->sources, sh->child_errno, + priv->child_count); + + nsources = afr_sh_source_count (sh->sources, priv->child_count); + + if ((nsources == 0) + && (priv->favorite_child != -1) + && (sh->child_errno[priv->favorite_child] == 0)) { + + gf_log (this->name, GF_LOG_WARNING, + "Picking favorite child %s as authentic source to resolve conflicting metadata of %s", + priv->children[priv->favorite_child]->name, + local->loc.path); + + sh->sources[priv->favorite_child] = 1; + + nsources = afr_sh_source_count (sh->sources, + priv->child_count); + } + + if (nsources == 0) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to resolve conflicting metadata of %s. " + "Please resolve manually by fixing the " + "permissions/ownership of %s on your subvolumes. " + "You can also consider 'option favorite-child <>'", + local->loc.path, local->loc.path); + + local->govinda_gOvinda = 1; + + afr_sh_metadata_finish (frame, this); + return 0; + } + + source = afr_sh_select_source (sh->sources, priv->child_count); + sh->source = source; + + /* detect changes not visible through pending flags -- JIC */ + for (i = 0; i < priv->child_count; i++) { + if (i == source || sh->child_errno[i]) + continue; + + if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source])) + sh->sources[i] = 0; + + if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source])) + sh->sources[i] = 0; + } + + afr_sh_metadata_sync_prepare (frame, this); + + return 0; +} + + +int +afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int child_index = 0; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + child_index = (long) cookie; + + LOCK (&frame->lock); + { + if (op_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "path %s on subvolume %s is of mode 0%o", + local->loc.path, + priv->children[child_index]->name, + buf->st_mode); + + sh->buf[child_index] = *buf; + if (xattr) + sh->xattr[child_index] = dict_ref (xattr); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "path %s on subvolume %s => -1 (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + + sh->child_errno[child_index] = op_errno; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + afr_sh_metadata_fix (frame, this); + + return 0; +} + + +int +afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; + dict_t *xattr_req = NULL; + int ret = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + local->call_count = call_count; + + xattr_req = dict_new(); + + if (xattr_req) + ret = dict_set_uint64 (xattr_req, AFR_METADATA_PENDING, + priv->child_count * sizeof(int32_t)); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "looking up %s on %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->lookup, + &local->loc, xattr_req); + if (!--call_count) + break; + } + } + + if (xattr_req) + dict_unref (xattr_req); + + return 0; +} + + +int +afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int child_index = (long) cookie; + + /* TODO: what if lock fails? */ + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + sh->op_failed = 1; + + gf_log (this->name, + (op_errno == EAGAIN ? GF_LOG_DEBUG : GF_LOG_ERROR), + "locking of %s on child %d failed: %s", + local->loc.path, child_index, + strerror (op_errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "inode of %s on child %d locked", + local->loc.path, child_index); + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->op_failed) { + afr_sh_metadata_finish (frame, this); + return 0; + } + + afr_sh_metadata_lookup (frame, this); + } + + return 0; +} + + +int +afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + int call_count = 0; + struct flock flock = {0, }; + + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + call_count = local->child_count; + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + flock.l_start = 0; + flock.l_len = 0; + flock.l_type = F_WRLCK; + + if (local->child_up[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "locking %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_sh_metadata_lk_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + &local->loc, F_SETLK, &flock); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_self_heal_metadata (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = this->private; + + + local = frame->local; + sh = &local->self_heal; + + if (local->need_metadata_self_heal && priv->metadata_self_heal) { + afr_sh_metadata_lock (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "proceeding to data check on %s", + local->loc.path); + afr_sh_metadata_done (frame, this); + } + + return 0; +} + diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h new file mode 100644 index 000000000..1c97a9bc1 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -0,0 +1,52 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __AFR_SELF_HEAL_H__ +#define __AFR_SELF_HEAL_H__ + +#include <sys/stat.h> + +#define FILETYPE_DIFFERS(buf1,buf2) ((S_IFMT & ((struct stat *)buf1)->st_mode) != (S_IFMT & ((struct stat *)buf2)->st_mode)) +#define PERMISSION_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_mode) != (((struct stat *)buf2)->st_mode)) +#define OWNERSHIP_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_uid) != (((struct stat *)buf2)->st_uid) || (((struct stat *)buf1)->st_gid != (((struct stat *)buf2)->st_gid))) +#define SIZE_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_size) != (((struct stat *)buf2)->st_size)) + + + +int +afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this); +int +afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this); +int +afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this); + +int +afr_self_heal_entry (call_frame_t *frame, xlator_t *this); + +int +afr_self_heal_data (call_frame_t *frame, xlator_t *this); + +int +afr_self_heal_metadata (call_frame_t *frame, xlator_t *this); + +int +afr_self_heal (call_frame_t *frame, xlator_t *this, + int (*completion_cbk) (call_frame_t *, xlator_t *)); + +#endif /* __AFR_SELF_HEAL_H__ */ diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c new file mode 100644 index 000000000..3df9f07e5 --- /dev/null +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -0,0 +1,957 @@ +/* + Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "dict.h" +#include "byte-order.h" + +#include "afr.h" +#include "afr-transaction.h" + +#include <signal.h> + + +static void +__mark_all_pending (int32_t *pending, int child_count) +{ + int i; + + for (i = 0; i < child_count; i++) + pending[i] = hton32 (1); +} + + +static void +__mark_child_dead (int32_t *pending, int child_count, int child) +{ + pending[child] = 0; +} + + +static void +__mark_down_children (int32_t *pending, int child_count, unsigned char *child_up) +{ + int i; + + for (i = 0; i < child_count; i++) + if (!child_up[i]) + pending[i] = 0; +} + + +static void +__mark_all_success (int32_t *pending, int child_count) +{ + int i; + + for (i = 0; i < child_count; i++) + pending[i] = hton32 (-1); +} + + +static int +__is_first_write_on_fd (xlator_t *this, fd_t *fd) +{ + int op_ret = 0; + int _ret = -1; + + _ret = fd_ctx_get (fd, this, NULL); + if (_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "first writev() on fd=%p, writing changelog", + fd); + + _ret = fd_ctx_set (fd, this, 0xaf1); + op_ret = 1; + } + + return op_ret; +} + + +static int +__changelog_enabled (afr_private_t *priv, afr_transaction_type type) +{ + int ret = 0; + + switch (type) { + case AFR_DATA_TRANSACTION: + if (priv->data_change_log) + ret = 1; + + break; + + case AFR_METADATA_TRANSACTION: + if (priv->metadata_change_log) + ret = 1; + + break; + + case AFR_ENTRY_TRANSACTION: + case AFR_ENTRY_RENAME_TRANSACTION: + if (priv->entry_change_log) + ret = 1; + + break; + + case AFR_FLUSH_TRANSACTION: + ret = 1; + } + + return ret; +} + + +static int +__changelog_needed_pre_op (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + fd_t * fd = NULL; + + int op_ret = 0; + + priv = this->private; + local = frame->local; + + if (__changelog_enabled (priv, local->transaction.type)) { + switch (local->op) { + + case GF_FOP_WRITE: + case GF_FOP_FTRUNCATE: + /* + if it's a data transaction, we write the changelog + only on the first write on an fd + */ + + fd = local->fd; + if (!fd || __is_first_write_on_fd (this, fd)) + op_ret = 1; + + break; + + case GF_FOP_FLUSH: + /* only do post-op on flush() */ + + op_ret = 0; + break; + + default: + op_ret = 1; + } + } + + return op_ret; +} + + +static int +__changelog_needed_post_op (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int ret = 0; + afr_transaction_type type = -1; + + priv = this->private; + local = frame->local; + type = local->transaction.type; + + if (__changelog_enabled (priv, type) + && (local->op != GF_FOP_WRITE) + && (local->op != GF_FOP_FTRUNCATE)) + ret = 1; + + return ret; +} + + +static int +afr_lock_server_count (afr_private_t *priv, afr_transaction_type type) +{ + int ret = 0; + + switch (type) { + case AFR_FLUSH_TRANSACTION: + case AFR_DATA_TRANSACTION: + ret = priv->data_lock_server_count; + break; + + case AFR_METADATA_TRANSACTION: + ret = priv->metadata_lock_server_count; + break; + + case AFR_ENTRY_TRANSACTION: + case AFR_ENTRY_RENAME_TRANSACTION: + ret = priv->entry_lock_server_count; + break; + } + + return ret; +} + + +/* {{{ unlock */ + +int32_t +afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local; + int call_count = 0; + + local = frame->local; + + LOCK (&frame->lock); + { + call_count = --local->call_count; + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + local->transaction.done (frame, this); + } + + return 0; +} + + +int +afr_unlock (call_frame_t *frame, xlator_t *this) +{ + struct flock flock; + + int i = 0; + int call_count = 0; + + afr_local_t *local = NULL; + afr_private_t * priv = this->private; + + local = frame->local; + + call_count = afr_locked_nodes_count (local->transaction.locked_nodes, + priv->child_count); + + if (call_count == 0) { + local->transaction.done (frame, this); + return 0; + } + + if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) + call_count *= 2; + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + flock.l_start = local->transaction.start; + flock.l_len = local->transaction.len; + flock.l_type = F_UNLCK; + + if (local->transaction.locked_nodes[i]) { + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + + if (local->fd) { + STACK_WIND (frame, afr_unlock_common_cbk, + priv->children[i], + priv->children[i]->fops->finodelk, + local->fd, F_SETLK, &flock); + } else { + STACK_WIND (frame, afr_unlock_common_cbk, + priv->children[i], + priv->children[i]->fops->inodelk, + &local->loc, F_SETLK, &flock); + } + + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + + STACK_WIND (frame, afr_unlock_common_cbk, + priv->children[i], + priv->children[i]->fops->entrylk, + &local->transaction.new_parent_loc, + local->transaction.new_basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + + call_count--; + + /* fall through */ + + case AFR_ENTRY_TRANSACTION: + if (local->fd) { + STACK_WIND (frame, afr_unlock_common_cbk, + priv->children[i], + priv->children[i]->fops->fentrylk, + local->fd, + local->transaction.basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + } else { + STACK_WIND (frame, afr_unlock_common_cbk, + priv->children[i], + priv->children[i]->fops->entrylk, + &local->transaction.parent_loc, + local->transaction.basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + + } + break; + } + + if (!--call_count) + break; + } + } + + return 0; +} + +/* }}} */ + + +/* {{{ pending */ + +int32_t +afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int call_count = -1; + + priv = this->private; + local = frame->local; + + LOCK (&frame->lock); + { + call_count = --local->call_count; + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + if (afr_lock_server_count (priv, local->transaction.type) == 0) { + local->transaction.done (frame, this); + } else { + afr_unlock (frame, this); + } + } + + return 0; +} + + +int +afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = this->private; + + int ret = 0; + int i = 0; + int call_count = 0; + + afr_local_t * local = NULL; + dict_t * xattr = dict_ref (get_new_dict ()); + + local = frame->local; + + __mark_all_success (local->pending_array, priv->child_count); + __mark_down_children (local->pending_array, priv->child_count, local->child_up); + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + call_count *= 2; + } + + local->call_count = call_count; + + if (call_count == 0) { + /* no child is up */ + dict_unref (xattr); + afr_unlock (frame, this); + return 0; + } + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + ret = dict_set_static_bin (xattr, local->transaction.pending, + local->pending_array, + priv->child_count * sizeof (int32_t)); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, + "failed to set pending entry"); + + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + { + if (local->fd) + STACK_WIND (frame, afr_changelog_post_op_cbk, + priv->children[i], + priv->children[i]->fops->fxattrop, + local->fd, + GF_XATTROP_ADD_ARRAY, xattr); + else + STACK_WIND (frame, afr_changelog_post_op_cbk, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->loc, + GF_XATTROP_ADD_ARRAY, xattr); + } + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + { + STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.new_parent_loc, + GF_XATTROP_ADD_ARRAY, xattr); + + call_count--; + } + + /* + set it again because previous stack_wind + might have already returned (think of case + where subvolume is posix) and would have + used the dict as placeholder for return + value + */ + ret = dict_set_static_bin (xattr, local->transaction.pending, + local->pending_array, + priv->child_count * sizeof (int32_t)); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, + "failed to set pending entry"); + + /* fall through */ + + case AFR_ENTRY_TRANSACTION: + { + if (local->fd) + STACK_WIND (frame, afr_changelog_post_op_cbk, + priv->children[i], + priv->children[i]->fops->fxattrop, + local->fd, + GF_XATTROP_ADD_ARRAY, xattr); + else + STACK_WIND (frame, afr_changelog_post_op_cbk, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.parent_loc, + GF_XATTROP_ADD_ARRAY, xattr); + } + break; + } + + if (!--call_count) + break; + } + } + + dict_unref (xattr); + return 0; +} + + +int32_t +afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr) +{ + afr_local_t * local = NULL; + afr_private_t * priv = this->private; + loc_t * loc = NULL; + + int call_count = -1; + int child_index = (long) cookie; + + local = frame->local; + loc = &local->loc; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->child_up[child_index] = 0; + + if (op_errno == ENOTSUP) { + gf_log (this->name, GF_LOG_ERROR, + "xattrop not supported by %s", + priv->children[child_index]->name); + local->op_ret = -1; + } else if (!child_went_down (op_ret, op_errno)) { + gf_log (this->name, GF_LOG_ERROR, + "xattrop failed on child %s: %s", + priv->children[child_index]->name, + strerror (op_errno)); + } + local->op_errno = op_errno; + } + + call_count = --local->call_count; + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + if ((local->op_ret == -1) && + (local->op_errno == ENOTSUP)) { + local->transaction.resume (frame, this); + } else { + local->transaction.fop (frame, this); + } + } + + return 0; +} + + +int +afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) +{ + afr_private_t * priv = this->private; + + int i = 0; + int ret = 0; + int call_count = 0; + dict_t *xattr = NULL; + + afr_local_t *local = NULL; + + local = frame->local; + xattr = get_new_dict (); + dict_ref (xattr); + + call_count = afr_up_children_count (priv->child_count, + local->child_up); + + if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + call_count *= 2; + } + + if (call_count == 0) { + /* no child is up */ + dict_unref (xattr); + afr_unlock (frame, this); + return 0; + } + + local->call_count = call_count; + + __mark_all_pending (local->pending_array, priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + ret = dict_set_static_bin (xattr, + local->transaction.pending, + local->pending_array, + (priv->child_count * + sizeof (int32_t))); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, + "failed to set pending entry"); + + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + { + if (local->fd) + STACK_WIND_COOKIE (frame, + afr_changelog_pre_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fxattrop, + local->fd, + GF_XATTROP_ADD_ARRAY, xattr); + else + STACK_WIND_COOKIE (frame, + afr_changelog_pre_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &(local->loc), + GF_XATTROP_ADD_ARRAY, xattr); + } + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + { + STACK_WIND_COOKIE (frame, + afr_changelog_pre_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.new_parent_loc, + GF_XATTROP_ADD_ARRAY, xattr); + + call_count--; + } + + + /* + set it again because previous stack_wind + might have already returned (think of case + where subvolume is posix) and would have + used the dict as placeholder for return + value + */ + + ret = dict_set_static_bin (xattr, local->transaction.pending, + local->pending_array, + priv->child_count * sizeof (int32_t)); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, + "failed to set pending entry"); + + /* fall through */ + + case AFR_ENTRY_TRANSACTION: + { + if (local->fd) + STACK_WIND_COOKIE (frame, + afr_changelog_pre_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fxattrop, + local->fd, + GF_XATTROP_ADD_ARRAY, xattr); + else + STACK_WIND_COOKIE (frame, + afr_changelog_pre_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.parent_loc, + GF_XATTROP_ADD_ARRAY, xattr); + } + + break; + } + + if (!--call_count) + break; + } + } + + dict_unref (xattr); + return 0; +} + +/* }}} */ + +/* {{{ lock */ + +static +int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index); + +int32_t +afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + int done = 0; + int child_index = (long) cookie; + + int call_count = 0; + + local = frame->local; + priv = this->private; + + LOCK (&frame->lock); + { + if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) { + /* wait for the other lock to return */ + call_count = --local->call_count; + } + + if (op_ret == -1) { + if (op_errno == ENOSYS) { + /* return ENOTSUP */ + gf_log (this->name, GF_LOG_ERROR, + "subvolume does not support locking. " + "please load features/posix-locks xlator on server"); + local->op_ret = op_ret; + done = 1; + } + + local->child_up[child_index] = 0; + local->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + if ((local->op_ret == -1) && + (local->op_errno == ENOSYS)) { + afr_unlock (frame, this); + } else { + local->transaction.locked_nodes[child_index] = 1; + local->transaction.lock_count++; + afr_lock_rec (frame, this, child_index + 1); + } + } + + return 0; +} + + +static loc_t * +lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2) +{ + int ret = 0; + + ret = strcmp (l1->path, l2->path); + + if (ret == 0) + ret = strcmp (b1, b2); + + if (ret <= 0) + return l1; + else + return l2; +} + + +static +int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + struct flock flock; + + loc_t * lower = NULL; + loc_t * higher = NULL; + + const char *lower_name = NULL; + const char *higher_name = NULL; + + local = frame->local; + priv = this->private; + + flock.l_start = local->transaction.start; + flock.l_len = local->transaction.len; + flock.l_type = F_WRLCK; + + /* skip over children that are down */ + while ((child_index < priv->child_count) + && !local->child_up[child_index]) + child_index++; + + if ((child_index == priv->child_count) && + local->transaction.lock_count == 0) { + + gf_log (this->name, GF_LOG_DEBUG, + "unable to lock on even one child"); + + local->op_ret = -1; + local->op_errno = EAGAIN; + + local->transaction.done (frame, this); + + return 0; + + } + + if ((child_index == priv->child_count) + || (local->transaction.lock_count == + afr_lock_server_count (priv, local->transaction.type))) { + + /* we're done locking */ + + if (__changelog_needed_pre_op (frame, this)) { + afr_changelog_pre_op (frame, this); + } else { + local->transaction.fop (frame, this); + } + + return 0; + } + + switch (local->transaction.type) { + case AFR_DATA_TRANSACTION: + case AFR_METADATA_TRANSACTION: + case AFR_FLUSH_TRANSACTION: + + if (local->fd) { + STACK_WIND_COOKIE (frame, afr_lock_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->finodelk, + local->fd, F_SETLKW, &flock); + + } else { + STACK_WIND_COOKIE (frame, afr_lock_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->inodelk, + &local->loc, F_SETLKW, &flock); + } + + break; + + case AFR_ENTRY_RENAME_TRANSACTION: + { + local->call_count = 2; + + lower = lower_path (&local->transaction.parent_loc, + local->transaction.basename, + &local->transaction.new_parent_loc, + local->transaction.new_basename); + + lower_name = (lower == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + higher = (lower == &local->transaction.parent_loc ? + &local->transaction.new_parent_loc : + &local->transaction.parent_loc); + + higher_name = (higher == &local->transaction.parent_loc ? + local->transaction.basename : + local->transaction.new_basename); + + + /* TODO: these locks should be blocking */ + + STACK_WIND_COOKIE (frame, afr_lock_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + lower, lower_name, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + + STACK_WIND_COOKIE (frame, afr_lock_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + higher, higher_name, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + + break; + } + + case AFR_ENTRY_TRANSACTION: + if (local->fd) { + STACK_WIND_COOKIE (frame, afr_lock_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->fentrylk, + local->fd, + local->transaction.basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + } else { + STACK_WIND_COOKIE (frame, afr_lock_cbk, + (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->entrylk, + &local->transaction.parent_loc, + local->transaction.basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK); + } + + break; + } + + return 0; +} + + +int32_t afr_lock (call_frame_t *frame, xlator_t *this) +{ + return afr_lock_rec (frame, this, 0); +} + + +/* }}} */ + +int32_t +afr_transaction_resume (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + local = frame->local; + priv = this->private; + + if (__changelog_needed_post_op (frame, this)) { + afr_changelog_post_op (frame, this); + } else { + if (afr_lock_server_count (priv, local->transaction.type) == 0) { + local->transaction.done (frame, this); + } else { + afr_unlock (frame, this); + } + } + + return 0; +} + + +/** + * afr_transaction_child_died - inform that a child died during an fop + */ + +void +afr_transaction_child_died (call_frame_t *frame, xlator_t *this, int child_index) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + local = frame->local; + priv = this->private; + + __mark_child_dead (local->pending_array, priv->child_count, child_index); +} + + +int32_t +afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + local = frame->local; + priv = this->private; + + afr_transaction_local_init (local, priv); + + local->transaction.resume = afr_transaction_resume; + local->transaction.type = type; + + if (afr_lock_server_count (priv, local->transaction.type) == 0) { + if (__changelog_needed_pre_op (frame, this)) { + afr_changelog_pre_op (frame, this); + } else { + local->transaction.fop (frame, this); + } + } else { + afr_lock (frame, this); + } + + return 0; +} diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h new file mode 100644 index 000000000..49cdd219f --- /dev/null +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -0,0 +1,36 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __TRANSACTION_H__ +#define __TRANSACTION_H__ + +#define AFR_METADATA_PENDING "trusted.glusterfs.afr.metadata-pending" + +#define AFR_DATA_PENDING "trusted.glusterfs.afr.data-pending" + +#define AFR_ENTRY_PENDING "trusted.glusterfs.afr.entry-pending" + +void +afr_transaction_child_died (call_frame_t *frame, xlator_t *this, + int child_index); + +int32_t +afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type); + +#endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c new file mode 100644 index 000000000..e4c1a8479 --- /dev/null +++ b/xlators/cluster/afr/src/afr.c @@ -0,0 +1,2338 @@ +/* + Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <libgen.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/time.h> +#include <stdlib.h> +#include <signal.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "afr.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "list.h" +#include "call-stub.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" + +#include "afr-inode-read.h" +#include "afr-inode-write.h" +#include "afr-dir-read.h" +#include "afr-dir-write.h" +#include "afr-transaction.h" + +#include "afr-self-heal.h" + + +/** + * afr_local_cleanup - cleanup everything in frame->local + */ + +void +afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) +{ + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int i = 0; + + + sh = &local->self_heal; + priv = this->private; + + if (sh->buf) + FREE (sh->buf); + + if (sh->xattr) { + for (i = 0; i < priv->child_count; i++) { + if (sh->xattr[i]) { + dict_unref (sh->xattr[i]); + sh->xattr[i] = NULL; + } + } + FREE (sh->xattr); + } + + if (sh->child_errno) + FREE (sh->child_errno); + + if (sh->pending_matrix) { + for (i = 0; i < priv->child_count; i++) { + FREE (sh->pending_matrix[i]); + } + FREE (sh->pending_matrix); + } + + if (sh->delta_matrix) { + for (i = 0; i < priv->child_count; i++) { + FREE (sh->delta_matrix[i]); + } + FREE (sh->delta_matrix); + } + + if (sh->sources) + FREE (sh->sources); + + if (sh->success) + FREE (sh->success); + + if (sh->healing_fd) { + fd_unref (sh->healing_fd); + sh->healing_fd = NULL; + } + + loc_wipe (&sh->parent_loc); +} + + +void +afr_local_cleanup (afr_local_t *local, xlator_t *this) +{ + if (!local) + return; + + afr_local_sh_cleanup (local, this); + + FREE (local->child_errno); + FREE (local->pending_array); + + loc_wipe (&local->loc); + loc_wipe (&local->newloc); + + FREE (local->transaction.locked_nodes); + FREE (local->transaction.child_errno); + + FREE (local->transaction.basename); + FREE (local->transaction.new_basename); + + loc_wipe (&local->transaction.parent_loc); + loc_wipe (&local->transaction.new_parent_loc); + + if (local->fd) + fd_unref (local->fd); + + if (local->xattr_req) + dict_unref (local->xattr_req); + + FREE (local->child_up); + + { /* lookup */ + if (local->cont.lookup.xattr) + dict_unref (local->cont.lookup.xattr); + } + + { /* getxattr */ + if (local->cont.getxattr.name) + FREE (local->cont.getxattr.name); + } + + { /* lk */ + if (local->cont.lk.locked_nodes) + FREE (local->cont.lk.locked_nodes); + } + + { /* checksum */ + if (local->cont.checksum.file_checksum) + FREE (local->cont.checksum.file_checksum); + if (local->cont.checksum.dir_checksum) + FREE (local->cont.checksum.dir_checksum); + } + + { /* create */ + if (local->cont.create.fd) + fd_unref (local->cont.create.fd); + } + + { /* writev */ + FREE (local->cont.writev.vector); + } + + { /* setxattr */ + if (local->cont.setxattr.dict) + dict_unref (local->cont.setxattr.dict); + } + + { /* removexattr */ + FREE (local->cont.removexattr.name); + } + + { /* symlink */ + FREE (local->cont.symlink.linkpath); + } +} + + +int +afr_frame_return (call_frame_t *frame) +{ + afr_local_t *local = NULL; + int call_count = 0; + + local = frame->local; + + LOCK (&frame->lock); + { + call_count = --local->call_count; + } + UNLOCK (&frame->lock); + + return call_count; +} + +/** + * first_up_child - return the index of the first child that is up + */ + +int +afr_first_up_child (afr_private_t *priv) +{ + xlator_t ** children = NULL; + int ret = -1; + int i = 0; + + LOCK (&priv->lock); + { + children = priv->children; + for (i = 0; i < priv->child_count; i++) { + if (priv->child_up[i]) { + ret = i; + break; + } + } + } + UNLOCK (&priv->lock); + + return ret; +} + + +/** + * up_children_count - return the number of children that are up + */ + +int +afr_up_children_count (int child_count, unsigned char *child_up) +{ + int i = 0; + int ret = 0; + + for (i = 0; i < child_count; i++) + if (child_up[i]) + ret++; + return ret; +} + + +int +afr_locked_nodes_count (unsigned char *locked_nodes, int child_count) +{ + int ret = 0; + int i; + + for (i = 0; i < child_count; i++) + if (locked_nodes[i]) + ret++; + + return ret; +} + + +ino64_t +afr_itransform (ino64_t ino, int child_count, int child_index) +{ + ino64_t scaled_ino = -1; + + if (ino == ((uint64_t) -1)) { + scaled_ino = ((uint64_t) -1); + goto out; + } + + scaled_ino = (ino * child_count) + child_index; + +out: + return scaled_ino; +} + + +int +afr_deitransform_orig (ino64_t ino, int child_count) +{ + int index = -1; + + index = ino % child_count; + + return index; +} + + +int +afr_deitransform (ino64_t ino, int child_count) +{ + return 0; +} + + +int +afr_self_heal_cbk (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + int ret = -1; + + local = frame->local; + + if (local->govinda_gOvinda) { + ret = inode_ctx_put (local->cont.lookup.inode, this, 1); + + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + } else { + inode_ctx_del (local->cont.lookup.inode, this, NULL); + } + + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->cont.lookup.inode, + &local->cont.lookup.buf, + local->cont.lookup.xattr); + + return 0; +} + + +int +afr_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + struct stat * lookup_buf = NULL; + int call_count = -1; + int child_index = -1; + int prev_child_index = -1; + uint32_t open_fd_count = 0; + int ret = 0; + + child_index = (long) cookie; + priv = this->private; + + LOCK (&frame->lock); + { + local = frame->local; + + lookup_buf = &local->cont.lookup.buf; + + if (op_ret == -1) { + if (op_errno == ENOENT) + local->enoent_count++; + + if (op_errno != ENOTCONN) + local->op_errno = op_errno; + + goto unlock; + } + + if (afr_sh_has_metadata_pending (xattr, child_index, this)) + local->need_metadata_self_heal = 1; + + if (afr_sh_has_entry_pending (xattr, child_index, this)) + local->need_entry_self_heal = 1; + + if (afr_sh_has_data_pending (xattr, child_index, this)) + local->need_data_self_heal = 1; + + ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT, + &open_fd_count); + local->open_fd_count += open_fd_count; + + /* in case of revalidate, we need to send stat of the + * child whose stat was sent during the first lookup. + * (so that time stamp does not vary with revalidate. + * in case it is down, stat of the fist success will + * be replied */ + + /* inode number should be preserved across revalidates */ + + if (local->success_count == 0) { + local->op_ret = op_ret; + + local->cont.lookup.inode = inode; + local->cont.lookup.xattr = dict_ref (xattr); + + *lookup_buf = *buf; + lookup_buf->st_ino = afr_itransform (buf->st_ino, + priv->child_count, + child_index); + } else { + if (FILETYPE_DIFFERS (buf, lookup_buf)) { + /* mismatching filetypes with same name + -- Govinda !! GOvinda !!! + */ + local->govinda_gOvinda = 1; + } + + if (PERMISSION_DIFFERS (buf, lookup_buf)) { + /* mismatching permissions */ + local->need_metadata_self_heal = 1; + } + + if (OWNERSHIP_DIFFERS (buf, lookup_buf)) { + /* mismatching permissions */ + local->need_metadata_self_heal = 1; + } + + if (SIZE_DIFFERS (buf, lookup_buf) + && S_ISREG (buf->st_mode)) { + local->need_data_self_heal = 1; + } + + prev_child_index = afr_deitransform_orig (lookup_buf->st_ino, + priv->child_count); + if (child_index < prev_child_index) { + *lookup_buf = *buf; + lookup_buf->st_ino = afr_itransform (buf->st_ino, + priv->child_count, + child_index); + } + } + + local->success_count++; + } +unlock: + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (local->op_ret == 0) { + /* KLUDGE: assuming DHT will not itransform in + revalidate */ + if (local->cont.lookup.inode->ino) + lookup_buf->st_ino = + local->cont.lookup.inode->ino; + } + + if (local->success_count && local->enoent_count) { + local->need_metadata_self_heal = 1; + local->need_data_self_heal = 1; + local->need_entry_self_heal = 1; + } + + if (local->success_count) { + /* check for govinda_gOvinda case in previous lookup */ + if (!inode_ctx_get (local->cont.lookup.inode, + this, NULL)) + local->need_data_self_heal = 1; + } + + if ((local->need_metadata_self_heal + || local->need_data_self_heal + || local->need_entry_self_heal) + && (!local->open_fd_count)) { + + if (!local->cont.lookup.inode->st_mode) { + /* fix for RT #602 */ + local->cont.lookup.inode->st_mode = + lookup_buf->st_mode; + } + + afr_self_heal (frame, this, afr_self_heal_cbk); + } else { + AFR_STACK_UNWIND (frame, local->op_ret, + local->op_errno, + local->cont.lookup.inode, + &local->cont.lookup.buf, + local->cont.lookup.xattr); + } + } + + return 0; +} + + +int +afr_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int ret = -1; + int i = 0; + int32_t op_errno = 0; + + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + local->op_ret = -1; + + frame->local = local; + + loc_copy (&local->loc, loc); + + local->reval_child_index = 0; + + local->call_count = priv->child_count; + + local->child_up = memdup (priv->child_up, priv->child_count); + local->child_count = afr_up_children_count (priv->child_count, + local->child_up); + + /* By default assume ENOTCONN. On success it will be set to 0. */ + local->op_errno = ENOTCONN; + + if ((xattr_req == NULL) + && (priv->metadata_self_heal + || priv->data_self_heal + || priv->entry_self_heal)) + local->xattr_req = dict_new (); + else + local->xattr_req = dict_ref (xattr_req); + + if (priv->metadata_self_heal) { + ret = dict_set_uint64 (local->xattr_req, AFR_METADATA_PENDING, + priv->child_count * sizeof(int32_t)); + } + + if (priv->data_self_heal) { + ret = dict_set_uint64 (local->xattr_req, AFR_DATA_PENDING, + priv->child_count * sizeof(int32_t)); + } + + if (priv->entry_self_heal) { + ret = dict_set_uint64 (local->xattr_req, AFR_ENTRY_PENDING, + priv->child_count * sizeof(int32_t)); + } + + ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0); + + for (i = 0; i < priv->child_count; i++) { + STACK_WIND_COOKIE (frame, afr_lookup_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->lookup, + loc, local->xattr_req); + } + + ret = 0; +out: + if (ret == -1) + AFR_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + + +/* {{{ open */ + +int +afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + afr_local_t * local = frame->local; + + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->fd); + return 0; +} + + +int +afr_open_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + fd_t *fd) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int call_count = -1; + + priv = this->private; + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if ((local->cont.open.flags & O_TRUNC) + && (local->op_ret >= 0)) { + STACK_WIND (frame, afr_open_ftruncate_cbk, + this, this->fops->ftruncate, + fd, 0); + } else { + AFR_STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + } + } + + return 0; +} + + +int +afr_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int i = 0; + int ret = -1; + + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t wind_flags = flags & (~O_TRUNC); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + + ret = inode_ctx_get (loc->inode, this, NULL); + if (ret == 0) { + /* if ctx is set it means self-heal failed */ + + gf_log (this->name, GF_LOG_WARNING, + "returning EIO, file has to be manually corrected " + "in backend"); + op_errno = EIO; + goto out; + } + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + call_count = local->call_count; + + local->cont.open.flags = flags; + local->fd = fd_ref (fd); + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->open, + loc, wind_flags, fd); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, fd); + } + + return 0; +} + +/* }}} */ + +/* {{{ flush */ + +int +afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + afr_local_t * local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + + +int +afr_flush_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int i = 0; + int call_count = -1; + + local = frame->local; + priv = this->private; + + call_count = afr_up_children_count (priv->child_count, local->child_up); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_flush_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->flush, + local->fd); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_flush_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +afr_simple_flush_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +static int +__is_fd_ctx_set (xlator_t *this, fd_t *fd) +{ + int _ret = 0; + int op_ret = 0; + + _ret = fd_ctx_get (fd, this, NULL); + if (_ret == 0) + op_ret = 1; + + return op_ret; +} + + +int +afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + + int ret = -1; + int i = 0; + int call_count = 0; + + int op_ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + + if (__is_fd_ctx_set (this, fd)) { + local->op = GF_FOP_FLUSH; + local->transaction.fop = afr_flush_wind; + local->transaction.done = afr_flush_done; + + local->fd = fd_ref (fd); + + local->transaction.start = 0; + local->transaction.len = 0; + + local->transaction.pending = AFR_DATA_PENDING; + + afr_transaction (frame, this, AFR_FLUSH_TRANSACTION); + } else { + /* + * if fd's ctx is not set, then there is no need + * to erase changelog. So just send the flush + */ + + call_count = local->call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_simple_flush_cbk, + priv->children[i], + priv->children[i]->fops->flush, + fd); + + if (!--call_count) + break; + } + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + + return 0; +} + +/* }}} */ + +/* {{{ fsync */ + +int +afr_fsync_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t datasync) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_fsync_cbk, + priv->children[i], + priv->children[i]->fops->fsync, + fd, datasync); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + +/* }}} */ + +/* {{{ fsync */ + +int32_t +afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int32_t +afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t datasync) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_fsync_cbk, + priv->children[i], + priv->children[i]->fops->fsyncdir, + fd, datasync); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + +/* }}} */ + +/* {{{ xattrop */ + +int32_t +afr_xattrop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xattr) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr); + + return 0; +} + + +int32_t +afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_xattrop_cbk, + priv->children[i], + priv->children[i]->fops->xattrop, + loc, optype, xattr); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + +/* }}} */ + +/* {{{ fxattrop */ + +int32_t +afr_fxattrop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xattr) +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr); + + return 0; +} + + +int32_t +afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_fxattrop_cbk, + priv->children[i], + priv->children[i]->fops->fxattrop, + fd, optype, xattr); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + +/* }}} */ + + +int32_t +afr_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int32_t +afr_inodelk (call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t cmd, struct flock *flock) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_inodelk_cbk, + priv->children[i], + priv->children[i]->fops->inodelk, + loc, cmd, flock); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + + +int32_t +afr_finodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int32_t +afr_finodelk (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t cmd, struct flock *flock) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_finodelk_cbk, + priv->children[i], + priv->children[i]->fops->finodelk, + fd, cmd, flock); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + + +int32_t +afr_entrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int32_t +afr_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_entrylk_cbk, + priv->children[i], + priv->children[i]->fops->entrylk, + loc, basename, cmd, type); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + + + +int32_t +afr_fentrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0) + local->op_ret = 0; + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int32_t +afr_fentrylk (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_fentrylk_cbk, + priv->children[i], + priv->children[i]->fops->fentrylk, + fd, basename, cmd, type); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + + +int32_t +afr_checksum_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + uint8_t *file_checksum, uint8_t *dir_checksum) + +{ + afr_local_t *local = NULL; + + int call_count = -1; + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == 0 && (local->op_ret != 0)) { + local->op_ret = 0; + + local->cont.checksum.file_checksum = MALLOC (ZR_FILENAME_MAX); + memcpy (local->cont.checksum.file_checksum, file_checksum, + ZR_FILENAME_MAX); + + local->cont.checksum.dir_checksum = MALLOC (ZR_FILENAME_MAX); + memcpy (local->cont.checksum.dir_checksum, dir_checksum, + ZR_FILENAME_MAX); + + } + + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->cont.checksum.file_checksum, + local->cont.checksum.dir_checksum); + + return 0; +} + + +int32_t +afr_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flag) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int ret = -1; + + int i = 0; + int32_t call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + call_count = local->call_count; + frame->local = local; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_checksum_cbk, + priv->children[i], + priv->children[i]->fops->checksum, + loc, flag); + + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + + +int32_t +afr_statfs_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct statvfs *statvfs) +{ + afr_local_t *local = NULL; + + int call_count = 0; + + LOCK (&frame->lock); + { + local = frame->local; + + if (op_ret == 0) { + local->op_ret = op_ret; + + if (local->cont.statfs.buf_set) { + if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail) + local->cont.statfs.buf = *statvfs; + } else { + local->cont.statfs.buf = *statvfs; + local->cont.statfs.buf_set = 1; + } + } + + if (op_ret == -1) + local->op_errno = op_errno; + + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->cont.statfs.buf); + + return 0; +} + + +int32_t +afr_statfs (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + afr_private_t * priv = NULL; + int child_count = 0; + afr_local_t * local = NULL; + int i = 0; + + int ret = -1; + int call_count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + child_count = priv->child_count; + + ALLOC_OR_GOTO (local, afr_local_t, out); + + ret = AFR_LOCAL_INIT (local, priv); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + frame->local = local; + call_count = local->call_count; + + for (i = 0; i < child_count; i++) { + if (local->child_up[i]) { + STACK_WIND (frame, afr_statfs_cbk, + priv->children[i], + priv->children[i]->fops->statfs, + loc); + if (!--call_count) + break; + } + } + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + return 0; +} + + +int32_t +afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + afr_local_t * local = NULL; + + int call_count = -1; + + local = frame->local; + call_count = afr_frame_return (frame); + + if (call_count == 0) + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, + lock); + + return 0; +} + + +int32_t +afr_lk_unlock (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + + int i; + int call_count = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes, + priv->child_count); + + if (call_count == 0) { + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->cont.lk.flock); + return 0; + } + + local->call_count = call_count; + + local->cont.lk.flock.l_type = F_UNLCK; + + for (i = 0; i < priv->child_count; i++) { + if (local->cont.lk.locked_nodes[i]) { + STACK_WIND (frame, afr_lk_unlock_cbk, + priv->children[i], + priv->children[i]->fops->lk, + local->fd, F_SETLK, + &local->cont.lk.flock); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int32_t +afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = -1; + int child_index = -1; + + local = frame->local; + priv = this->private; + + child_index = (long) cookie; + + call_count = --local->call_count; + + if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) { + local->op_ret = -1; + local->op_errno = op_errno; + + afr_lk_unlock (frame, this); + return 0; + } + + if (op_ret == 0) { + local->op_ret = 0; + local->op_errno = 0; + local->cont.lk.flock = *lock; + local->cont.lk.locked_nodes[child_index] = 1; + } + + child_index++; + + if (child_index < priv->child_count) { + STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->lk, + local->fd, local->cont.lk.cmd, + &local->cont.lk.flock); + } else if (local->op_ret == -1) { + /* all nodes have gone down */ + + AFR_STACK_UNWIND (frame, -1, ENOTCONN, &local->cont.lk.flock); + } else { + /* locking has succeeded on all nodes that are up */ + + AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->cont.lk.flock); + } + + return 0; +} + + +int +afr_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, + struct flock *flock) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + int i = 0; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ALLOC_OR_GOTO (local, afr_local_t, out); + AFR_LOCAL_INIT (local, priv); + + frame->local = local; + + local->cont.lk.locked_nodes = CALLOC (priv->child_count, + sizeof (*local->cont.lk.locked_nodes)); + + if (!local->cont.lk.locked_nodes) { + gf_log (this->name, GF_LOG_ERROR, "out of memory :("); + op_errno = ENOMEM; + goto out; + } + + local->fd = fd_ref (fd); + local->cont.lk.cmd = cmd; + local->cont.lk.flock = *flock; + + STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0, + priv->children[i], + priv->children[i]->fops->lk, + fd, cmd, flock); + + op_ret = 0; +out: + if (op_ret == -1) { + AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL); + } + return 0; +} + + +/** + * find_child_index - find the child's index in the array of subvolumes + * @this: AFR + * @child: child + */ + +static int +find_child_index (xlator_t *this, xlator_t *child) +{ + afr_private_t *priv = NULL; + + int i = -1; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if ((xlator_t *) child == priv->children[i]) + break; + } + + return i; +} + + +int32_t +notify (xlator_t *this, int32_t event, + void *data, ...) +{ + afr_private_t * priv = NULL; + unsigned char * child_up = NULL; + + int i = -1; + int up_children = 0; + + priv = this->private; + + if (!priv) + return 0; + + child_up = priv->child_up; + + switch (event) { + case GF_EVENT_CHILD_UP: + i = find_child_index (this, data); + + child_up[i] = 1; + + /* + if all the children were down, and one child came up, + send notify to parent + */ + + for (i = 0; i < priv->child_count; i++) + if (child_up[i]) + up_children++; + + if (up_children == 1) + default_notify (this, event, data); + + break; + + case GF_EVENT_CHILD_DOWN: + i = find_child_index (this, data); + + child_up[i] = 0; + + /* + if all children are down, and this was the last to go down, + send notify to parent + */ + + for (i = 0; i < priv->child_count; i++) + if (child_up[i]) + up_children++; + + if (up_children == 0) + default_notify (this, event, data); + + break; + + default: + default_notify (this, event, data); + } + + return 0; +} + + +static const char *favorite_child_warning_str = "You have specified subvolume '%s' " + "as the 'favorite child'. This means that if a discrepancy in the content " + "or attributes (ownership, permission, etc.) of a file is detected among " + "the subvolumes, the file on '%s' will be considered the definitive " + "version and its contents will OVERWRITE the contents of the file on other " + "subvolumes. All versions of the file except that on '%s' " + "WILL BE LOST."; + +static const char *no_lock_servers_warning_str = "You have set lock-server-count = 0. " + "This means correctness is NO LONGER GUARANTEED in all cases. If two or more " + "applications write to the same region of a file, there is a possibility that " + "its copies will be INCONSISTENT. Set it to a value greater than 0 unless you " + "are ABSOLUTELY SURE of what you are doing and WILL NOT HOLD GlusterFS " + "RESPOSIBLE for inconsistent data. If you are in doubt, set it to a value " + "greater than 0."; + +int32_t +init (xlator_t *this) +{ + afr_private_t * priv = NULL; + int child_count = 0; + xlator_list_t * trav = NULL; + int i = 0; + int ret = -1; + int op_errno = 0; + + char * read_subvol = NULL; + char * fav_child = NULL; + char * self_heal = NULL; + char * change_log = NULL; + + int32_t lock_server_count = 1; + + int fav_ret = -1; + int read_ret = -1; + int dict_ret = -1; + + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "AFR needs more than one child defined"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + ALLOC_OR_GOTO (this->private, afr_private_t, out); + + priv = this->private; + + read_ret = dict_get_str (this->options, "read-subvolume", &read_subvol); + priv->read_child = -1; + + fav_ret = dict_get_str (this->options, "favorite-child", &fav_child); + priv->favorite_child = -1; + + /* Default values */ + + priv->data_self_heal = 1; + priv->metadata_self_heal = 1; + priv->entry_self_heal = 1; + + dict_ret = dict_get_str (this->options, "data-self-heal", &self_heal); + if (dict_ret == 0) { + ret = gf_string2boolean (self_heal, &priv->data_self_heal); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "invalid 'option data-self-heal %s' " + "defaulting to data-self-heal as 'on'", + self_heal); + priv->data_self_heal = 1; + } + } + + dict_ret = dict_get_str (this->options, "metadata-self-heal", + &self_heal); + if (dict_ret == 0) { + ret = gf_string2boolean (self_heal, &priv->metadata_self_heal); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "invalid 'option metadata-self-heal %s' " + "defaulting to metadata-self-heal as 'on'", + self_heal); + priv->metadata_self_heal = 1; + } + } + + dict_ret = dict_get_str (this->options, "entry-self-heal", &self_heal); + if (dict_ret == 0) { + ret = gf_string2boolean (self_heal, &priv->entry_self_heal); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "invalid 'option entry-self-heal %s' " + "defaulting to entry-self-heal as 'on'", + self_heal); + priv->entry_self_heal = 1; + } + } + + /* Change log options */ + + priv->data_change_log = 1; + priv->metadata_change_log = 0; + priv->entry_change_log = 1; + + dict_ret = dict_get_str (this->options, "data-change-log", + &change_log); + if (dict_ret == 0) { + ret = gf_string2boolean (change_log, &priv->data_change_log); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "invalid 'option data-change-log %s'. " + "defaulting to data-change-log as 'on'", + change_log); + priv->data_change_log = 1; + } + } + + dict_ret = dict_get_str (this->options, "metadata-change-log", + &change_log); + if (dict_ret == 0) { + ret = gf_string2boolean (change_log, + &priv->metadata_change_log); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "invalid 'option metadata-change-log %s'. " + "defaulting to metadata-change-log as 'off'", + change_log); + priv->metadata_change_log = 0; + } + } + + dict_ret = dict_get_str (this->options, "entry-change-log", + &change_log); + if (dict_ret == 0) { + ret = gf_string2boolean (change_log, &priv->entry_change_log); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "invalid 'option entry-change-log %s'. " + "defaulting to entry-change-log as 'on'", + change_log); + priv->entry_change_log = 1; + } + } + + /* Locking options */ + + priv->data_lock_server_count = 1; + priv->metadata_lock_server_count = 0; + priv->entry_lock_server_count = 1; + + dict_ret = dict_get_int32 (this->options, "data-lock-server-count", + &lock_server_count); + if (dict_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setting data lock server count to %d", + lock_server_count); + + if (lock_server_count == 0) + gf_log (this->name, GF_LOG_WARNING, + no_lock_servers_warning_str); + + priv->data_lock_server_count = lock_server_count; + } + + + dict_ret = dict_get_int32 (this->options, + "metadata-lock-server-count", + &lock_server_count); + if (dict_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setting metadata lock server count to %d", + lock_server_count); + priv->metadata_lock_server_count = lock_server_count; + } + + + dict_ret = dict_get_int32 (this->options, "entry-lock-server-count", + &lock_server_count); + if (dict_ret == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setting entry lock server count to %d", + lock_server_count); + + priv->entry_lock_server_count = lock_server_count; + } + + + trav = this->children; + while (trav) { + if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) { + gf_log (this->name, GF_LOG_DEBUG, + "subvolume '%s' specified as read child", + trav->xlator->name); + + priv->read_child = child_count; + } + + if (fav_ret == 0 && !strcmp (fav_child, trav->xlator->name)) { + gf_log (this->name, GF_LOG_WARNING, + favorite_child_warning_str, trav->xlator->name, + trav->xlator->name, trav->xlator->name); + priv->favorite_child = child_count; + } + + child_count++; + trav = trav->next; + } + + /* XXX: return inode numbers from 1st subvolume till + afr supports read-subvolume based on inode's ctx + (and not itransform) for this reason afr_deitransform() + returns 0 always + */ + priv->read_child = 0; + + priv->wait_count = 1; + + priv->child_count = child_count; + LOCK_INIT (&priv->lock); + + priv->child_up = CALLOC (sizeof (unsigned char), child_count); + if (!priv->child_up) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto out; + } + + priv->children = CALLOC (sizeof (xlator_t *), child_count); + if (!priv->children) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto out; + } + + trav = this->children; + i = 0; + while (i < child_count) { + priv->children[i] = trav->xlator; + + trav = trav->next; + i++; + } + + ret = 0; +out: + return ret; +} + + +int +fini (xlator_t *this) +{ + return 0; +} + + +struct xlator_fops fops = { + .lookup = afr_lookup, + .open = afr_open, + .lk = afr_lk, + .flush = afr_flush, + .statfs = afr_statfs, + .fsync = afr_fsync, + .fsyncdir = afr_fsyncdir, + .xattrop = afr_xattrop, + .fxattrop = afr_fxattrop, + .inodelk = afr_inodelk, + .finodelk = afr_finodelk, + .entrylk = afr_entrylk, + .fentrylk = afr_fentrylk, + .checksum = afr_checksum, + + /* inode read */ + .access = afr_access, + .stat = afr_stat, + .fstat = afr_fstat, + .readlink = afr_readlink, + .getxattr = afr_getxattr, + .readv = afr_readv, + + /* inode write */ + .chmod = afr_chmod, + .chown = afr_chown, + .fchmod = afr_fchmod, + .fchown = afr_fchown, + .writev = afr_writev, + .truncate = afr_truncate, + .ftruncate = afr_ftruncate, + .utimens = afr_utimens, + .setxattr = afr_setxattr, + .removexattr = afr_removexattr, + + /* dir read */ + .opendir = afr_opendir, + .readdir = afr_readdir, + .getdents = afr_getdents, + + /* dir write */ + .create = afr_create, + .mknod = afr_mknod, + .mkdir = afr_mkdir, + .unlink = afr_unlink, + .rmdir = afr_rmdir, + .link = afr_link, + .symlink = afr_symlink, + .rename = afr_rename, + .setdents = afr_setdents, +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {"read-subvolume" }, + .type = GF_OPTION_TYPE_XLATOR + }, + { .key = {"favorite-child"}, + .type = GF_OPTION_TYPE_XLATOR + }, + { .key = {"data-self-heal"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"metadata-self-heal"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"entry-self-heal"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"data-change-log"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"metadata-change-log"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"entry-change-log"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"data-lock-server-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 0 + }, + { .key = {"metadata-lock-server-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 0 + }, + { .key = {"entry-lock-server-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 0 + }, + { .key = {NULL} }, +}; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h new file mode 100644 index 000000000..4cf6cdf9d --- /dev/null +++ b/xlators/cluster/afr/src/afr.h @@ -0,0 +1,523 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef __AFR_H__ +#define __AFR_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "scheduler.h" +#include "call-stub.h" +#include "compat-errno.h" + + +typedef struct _afr_private { + gf_lock_t lock; /* to guard access to child_count, etc */ + unsigned int child_count; /* total number of children */ + + xlator_t **children; + + unsigned char *child_up; + + gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t metadata_self_heal; /* on/off */ + gf_boolean_t entry_self_heal; /* on/off */ + + + gf_boolean_t data_change_log; /* on/off */ + gf_boolean_t metadata_change_log; /* on/off */ + gf_boolean_t entry_change_log; /* on/off */ + + unsigned int read_child; /* read-subvolume */ + unsigned int favorite_child; /* subvolume to be preferred in resolving + split-brain cases */ + + unsigned int data_lock_server_count; + unsigned int metadata_lock_server_count; + unsigned int entry_lock_server_count; + + unsigned int wait_count; /* # of servers to wait for success */ +} afr_private_t; + +typedef struct { + /* array of stat's, one for each child */ + struct stat *buf; + + /* array of xattr's, one for each child */ + dict_t **xattr; + + /* array of errno's, one for each child */ + int *child_errno; + + int32_t **pending_matrix; + int32_t **delta_matrix; + + int *sources; + int source; + int active_source; + int active_sinks; + int *success; + + fd_t *healing_fd; + int op_failed; + + int file_has_holes; + blksize_t block_size; + off_t file_size; + off_t offset; + + loc_t parent_loc; + int (*completion_cbk) (call_frame_t *frame, xlator_t *this); + call_frame_t *sh_frame; +} afr_self_heal_t; + + +typedef enum { + AFR_DATA_TRANSACTION, /* truncate, write, ... */ + AFR_METADATA_TRANSACTION, /* chmod, chown, ... */ + AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */ + AFR_ENTRY_RENAME_TRANSACTION, /* rename */ + AFR_FLUSH_TRANSACTION, /* flush */ +} afr_transaction_type; + +typedef struct _afr_local { + unsigned int call_count; + unsigned int success_count; + unsigned int enoent_count; + + unsigned int need_metadata_self_heal; + unsigned int need_entry_self_heal; + unsigned int need_data_self_heal; + unsigned int govinda_gOvinda; + + unsigned int reval_child_index; + int32_t op_ret; + int32_t op_errno; + + int32_t *pending_array; + + loc_t loc; + loc_t newloc; + + fd_t *fd; + + glusterfs_fop_t fop; + + unsigned char *child_up; + int child_count; + + int32_t *child_errno; + + dict_t *xattr_req; + int open_fd_count; + /* + This struct contains the arguments for the "continuation" + (scheme-like) of fops + */ + + int op; + struct { + struct { + unsigned char buf_set; + struct statvfs buf; + } statfs; + + struct { + inode_t *inode; + struct stat buf; + dict_t *xattr; + } lookup; + + struct { + int32_t flags; + } open; + + struct { + int32_t cmd; + struct flock flock; + unsigned char *locked_nodes; + } lk; + + struct { + uint8_t *file_checksum; + uint8_t *dir_checksum; + } checksum; + + /* inode read */ + + struct { + int32_t mask; + int last_tried; /* index of the child we tried previously */ + } access; + + struct { + int last_tried; + ino_t ino; + } stat; + + struct { + int last_tried; + ino_t ino; + } fstat; + + struct { + size_t size; + int last_tried; + } readlink; + + struct { + const char *name; + int last_tried; + } getxattr; + + struct { + size_t size; + off_t offset; + int last_tried; + } readv; + + /* dir read */ + + struct { + int success_count; + int32_t op_ret; + int32_t op_errno; + } opendir; + + struct { + int32_t op_ret; + int32_t op_errno; + size_t size; + off_t offset; + + int last_tried; + } readdir; + + struct { + int32_t op_ret; + int32_t op_errno; + + size_t size; + off_t offset; + int32_t flag; + + int last_tried; + } getdents; + + /* inode write */ + + struct { + ino_t ino; + mode_t mode; + struct stat buf; + } chmod; + + struct { + ino_t ino; + mode_t mode; + struct stat buf; + } fchmod; + + struct { + ino_t ino; + uid_t uid; + gid_t gid; + struct stat buf; + } chown; + + struct { + ino_t ino; + uid_t uid; + gid_t gid; + struct stat buf; + } fchown; + + struct { + ino_t ino; + struct stat buf; + + int32_t op_ret; + + struct iovec *vector; + dict_t *refs; + int32_t count; + off_t offset; + } writev; + + struct { + ino_t ino; + off_t offset; + struct stat buf; + } truncate; + + struct { + ino_t ino; + off_t offset; + struct stat buf; + } ftruncate; + + struct { + ino_t ino; + struct timespec tv[2]; + struct stat buf; + } utimens; + + struct { + dict_t *dict; + int32_t flags; + } setxattr; + + struct { + const char *name; + } removexattr; + + /* dir write */ + + struct { + ino_t ino; + fd_t *fd; + int32_t flags; + mode_t mode; + inode_t *inode; + struct stat buf; + } create; + + struct { + ino_t ino; + dev_t dev; + mode_t mode; + inode_t *inode; + struct stat buf; + } mknod; + + struct { + ino_t ino; + int32_t mode; + inode_t *inode; + struct stat buf; + } mkdir; + + struct { + int32_t op_ret; + int32_t op_errno; + } unlink; + + struct { + int32_t op_ret; + int32_t op_errno; + } rmdir; + + struct { + ino_t ino; + struct stat buf; + } rename; + + struct { + ino_t ino; + inode_t *inode; + struct stat buf; + } link; + + struct { + ino_t ino; + inode_t *inode; + struct stat buf; + char *linkpath; + } symlink; + + struct { + int32_t flags; + dir_entry_t *entries; + int32_t count; + } setdents; + } cont; + + struct { + off_t start, len; + + unsigned char *locked_nodes; + int lock_count; + + const char *basename; + const char *new_basename; + + char *pending; + + loc_t parent_loc; + loc_t new_parent_loc; + + afr_transaction_type type; + + int success_count; + int erase_pending; + int failure_count; + + int last_tried; + int32_t *child_errno; + + call_frame_t *main_frame; + + int (*fop) (call_frame_t *frame, xlator_t *this); + + int (*done) (call_frame_t *frame, xlator_t *this); + + int (*resume) (call_frame_t *frame, xlator_t *this); + + int (*unwind) (call_frame_t *frame, xlator_t *this); + } transaction; + + afr_self_heal_t self_heal; +} afr_local_t; + +/* try alloc and if it fails, goto label */ +#define ALLOC_OR_GOTO(var, type, label) do { \ + var = CALLOC (sizeof (type), 1); \ + if (!var) { \ + gf_log (this->name, GF_LOG_ERROR, \ + "out of memory :("); \ + op_errno = ENOMEM; \ + goto label; \ + } \ + } while (0); + + +/* did a call fail due to a child failing? */ +#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \ + ((op_errno == ENOTCONN) || \ + (op_errno == EBADFD))) + +/* have we tried all children? */ +#define all_tried(i, count) ((i) == (count) - 1) + +void +afr_build_parent_loc (loc_t *parent, loc_t *child); + +int +afr_up_children_count (int child_count, unsigned char *child_up); + +int +afr_locked_nodes_count (unsigned char *locked_nodes, int child_count); + +int +afr_first_up_child (afr_private_t *priv); + +ino64_t +afr_itransform (ino64_t ino, int child_count, int child_index); + +int +afr_deitransform (ino64_t ino, int child_count); + +void +afr_local_cleanup (afr_local_t *local, xlator_t *this); + +int +afr_frame_return (call_frame_t *frame); + +#define AFR_STACK_UNWIND(frame, params ...) \ + do { \ + afr_local_t *__local = NULL; \ + xlator_t *__this = NULL; \ + __local = frame->local; \ + __this = frame->this; \ + frame->local = NULL; \ + STACK_UNWIND (frame, params); \ + afr_local_cleanup (__local, __this); \ + free (__local); \ +} while (0); + +#define AFR_STACK_DESTROY(frame) \ + do { \ + afr_local_t *__local = NULL; \ + xlator_t *__this = NULL; \ + __local = frame->local; \ + __this = frame->this; \ + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + afr_local_cleanup (__local, __this); \ + free (__local); \ +} while (0); + +/* allocate and return a string that is the basename of argument */ +static inline char * +AFR_BASENAME (const char *str) +{ + char *__tmp_str = NULL; + char *__basename_str = NULL; + __tmp_str = strdup (str); + __basename_str = strdup (basename (__tmp_str)); + FREE (__tmp_str); + return __basename_str; +} + +/* initialize local_t */ +static inline int +AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) +{ + local->child_up = CALLOC (sizeof (*local->child_up), + priv->child_count); + if (!local->child_up) { + return -ENOMEM; + } + + memcpy (local->child_up, priv->child_up, + sizeof (*local->child_up) * priv->child_count); + + + local->call_count = afr_up_children_count (priv->child_count, local->child_up); + if (local->call_count == 0) + return -ENOTCONN; + + local->transaction.erase_pending = 1; + + local->op_ret = -1; + local->op_errno = EUCLEAN; + + return 0; +} + + +static inline int +afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) +{ + local->child_errno = CALLOC (sizeof (*local->child_errno), + priv->child_count); + if (!local->child_errno) { + return -ENOMEM; + } + + local->pending_array = CALLOC (sizeof (*local->pending_array), + priv->child_count); + if (!local->pending_array) { + return -ENOMEM; + } + + local->transaction.locked_nodes = CALLOC (sizeof (*local->transaction.locked_nodes), + priv->child_count); + + local->transaction.child_errno = CALLOC (sizeof (*local->transaction.child_errno), + priv->child_count); + + return 0; +} + +#endif /* __AFR_H__ */ diff --git a/xlators/cluster/dht/Makefile.am b/xlators/cluster/dht/Makefile.am new file mode 100644 index 000000000..f963effea --- /dev/null +++ b/xlators/cluster/dht/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src
\ No newline at end of file diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am new file mode 100644 index 000000000..b7d07d137 --- /dev/null +++ b/xlators/cluster/dht/src/Makefile.am @@ -0,0 +1,30 @@ + +xlator_LTLIBRARIES = dht.la nufa.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + + +dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \ + dht-selfheal.c dht-rename.c dht-hashfn.c dht-hashfn-tea.c + +dht_la_SOURCES = $(dht_common_source) dht.c + +nufa_la_SOURCES = $(dht_common_source) nufa.c + +dht_la_LDFLAGS = -module -avoidversion +dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +nufa_la_LDFLAGS = -module -avoidversion +nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = dht-common.h dht-common.c + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/distribute.so + +install-data-hook: + ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
\ No newline at end of file diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c new file mode 100644 index 000000000..5e4979e31 --- /dev/null +++ b/xlators/cluster/dht/src/dht-common.c @@ -0,0 +1,3470 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + + +/* TODO: + - use volumename in xattr instead of "dht" + - use NS locks + - handle all cases in self heal layout reconstruction + - complete linkfile selfheal +*/ + +int +dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int ret = 0; + + local = frame->local; + ret = op_ret; + + if (ret == 0) { + layout = local->selfheal.layout; + ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + + if (ret == 0) + local->selfheal.layout = NULL; + + if (local->st_ino) { + local->stbuf.st_ino = local->st_ino; + } else { + gf_log (this->name, GF_LOG_WARNING, + "could not find hashed subvolume for %s", + local->loc.path); + } + } + + DHT_STACK_UNWIND (frame, ret, local->op_errno, local->inode, + &local->stbuf, local->xattr); + + return 0; +} + + +int +dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = 0; + int is_dir = 0; + + conf = this->private; + local = frame->local; + prev = cookie; + + layout = local->layout; + + LOCK (&frame->lock); + { + /* TODO: assert equal mode on stbuf->st_mode and + local->stbuf->st_mode + + else mkdir/chmod/chown and fix + */ + /* TODO: assert equal hash type in xattr, local->xattr */ + + /* TODO: always ensure same subvolume is in layout->list[0] */ + + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, xattr); + + if (op_ret == -1) { + local->op_errno = ENOENT; + gf_log (this->name, GF_LOG_WARNING, + "lookup of %s on %s returned error (%s)", + local->loc.path, prev->this->name, + strerror (op_errno)); + + goto unlock; + } + + is_dir = check_is_dir (inode, stbuf, xattr); + if (!is_dir) + goto unlock; + + local->op_ret = 0; + if (local->xattr == NULL) + local->xattr = dict_ref (xattr); + if (local->inode == NULL) + local->inode = inode_ref (inode); + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + if (prev->this == local->hashed_subvol) + local->st_ino = local->stbuf.st_ino; + + } +unlock: + UNLOCK (&frame->lock); + + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + if (local->op_ret == 0) { + ret = dht_layout_normalize (this, &local->loc, layout); + + local->layout = NULL; + + if (ret != 0) { + layout->gen = conf->gen; + + gf_log (this->name, GF_LOG_WARNING, + "fixing assignment on %s", + local->loc.path); + goto selfheal; + } + + inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + + if (local->st_ino) { + local->stbuf.st_ino = local->st_ino; + } else { + gf_log (this->name, GF_LOG_WARNING, + "could not find hashed subvolume for %s", + local->loc.path); + } + } + + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr); + } + + return 0; + +selfheal: + ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk, + &local->loc, layout); + + return 0; +} + +int +dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; + int is_linkfile = 0; + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + + if (op_errno != ENOTCONN && op_errno != ENOENT) { + gf_log (this->name, GF_LOG_WARNING, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + } + + goto unlock; + } + + if (S_IFMT & (stbuf->st_mode ^ local->inode->st_mode)) { + gf_log (this->name, GF_LOG_WARNING, + "mismatching filetypes 0%o v/s 0%o for %s", + (stbuf->st_mode & S_IFMT), + (local->inode->st_mode & S_IFMT), + local->loc.path); + + local->op_ret = -1; + local->op_errno = EINVAL; + + goto unlock; + } + + layout = dht_layout_get (this, inode); + + is_dir = check_is_dir (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr); + + if (is_linkfile) { + gf_log (this->name, GF_LOG_WARNING, + "linkfile found in revalidate for %s", + local->loc.path); + local->layout_mismatch = 1; + + goto unlock; + } + + if (is_dir) { + ret = dht_layout_dir_mismatch (this, layout, + prev->this, &local->loc, + xattr); + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, + "mismatching layouts for %s", + local->loc.path); + + local->layout_mismatch = 1; + + goto unlock; + } + } + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + local->op_ret = 0; + local->stbuf.st_ino = local->st_ino; + + if (!local->xattr) + local->xattr = dict_ref (xattr); + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + if (!S_ISDIR (local->stbuf.st_mode) + && (local->hashed_subvol != local->cached_subvol) + && (local->stbuf.st_nlink == 1)) + local->stbuf.st_mode |= S_ISVTX; + + if (local->layout_mismatch) { + local->op_ret = -1; + local->op_errno = ESTALE; + } + + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr); + } + + return 0; +} + + +int +dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + xlator_t *cached_subvol = NULL; + + local = frame->local; + cached_subvol = local->cached_subvol; + + layout = dht_layout_for_subvol (this, local->cached_subvol); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + cached_subvol ? cached_subvol->name : "<nil>"); + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + + inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + local->op_ret = 0; + if (local->stbuf.st_nlink == 1) + local->stbuf.st_mode |= S_ISVTX; + +unwind: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr); + return 0; +} + + +int +dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + int is_linkfile = 0; + int is_dir = 0; + xlator_t *subvol = NULL; + loc_t *loc = NULL; + xlator_t *link_subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + + conf = this->private; + + local = frame->local; + loc = &local->loc; + + prev = cookie; + subvol = prev->this; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + if (op_errno != ENOENT) + local->op_errno = op_errno; + goto unlock; + } + + is_linkfile = check_is_linkfile (inode, buf, xattr); + is_dir = check_is_dir (inode, buf, xattr); + + if (is_linkfile) { + link_subvol = dht_linkfile_subvol (this, inode, buf, + xattr); + gf_log (this->name, GF_LOG_DEBUG, + "found on %s linkfile %s (-> %s)", + subvol->name, loc->path, + link_subvol ? link_subvol->name : "''"); + goto unlock; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "found on %s file %s", + subvol->name, loc->path); + } + + if (!local->cached_subvol) { + /* found one file */ + dht_stat_merge (this, &local->stbuf, buf, subvol); + local->xattr = dict_ref (xattr); + local->cached_subvol = subvol; + } else { + gf_log (this->name, GF_LOG_WARNING, + "multiple subvolumes (%s and %s atleast) have " + "file %s", local->cached_subvol->name, + subvol->name, local->loc.path); + } + } +unlock: + UNLOCK (&frame->lock); + + if (is_linkfile) { + gf_log (this->name, GF_LOG_WARNING, + "deleting stale linkfile %s on %s", + loc->path, subvol->name); + dht_linkfile_unlink (frame, this, subvol, loc); + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + hashed_subvol = local->hashed_subvol; + cached_subvol = local->cached_subvol; + + if (!cached_subvol) { + DHT_STACK_UNWIND (frame, -1, ENOENT, NULL, NULL, NULL); + return 0; + } + + gf_log (this->name, GF_LOG_WARNING, + "linking file %s existing on %s to %s (hash)", + loc->path, cached_subvol->name, hashed_subvol->name); + + dht_linkfile_create (frame, dht_lookup_linkfile_create_cbk, + cached_subvol, hashed_subvol, loc); + } + + return 0; +} + + +int +dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int i = 0; + int call_cnt = 0; + + conf = this->private; + local = frame->local; + + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + if (!local->inode) + local->inode = inode_ref (loc->inode); + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_everywhere_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + loc, local->xattr_req); + } + + return 0; +} + + +int +dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + call_frame_t *prev = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + loc_t *loc = NULL; + + prev = cookie; + subvol = prev->this; + + local = frame->local; + loc = &local->loc; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "lookup of %s on %s (following linkfile) failed (%s)", + local->loc.path, subvol->name, strerror (op_errno)); + + dht_lookup_everywhere (frame, this, loc); + return 0; + } + + /* TODO: assert type is non-dir and non-linkfile */ + + if (stbuf->st_nlink == 1) + stbuf->st_mode |= S_ISVTX; + dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + inode_ctx_put (inode, this, (uint64_t)(long)layout); + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); + + return 0; +} + + +int +dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_layout_t *layout = NULL; + char is_linkfile = 0; + char is_dir = 0; + xlator_t *subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + int i = 0; + call_frame_t *prev = NULL; + int call_cnt = 0; + + + conf = this->private; + + prev = cookie; + local = frame->local; + loc = &local->loc; + + if (ENTRY_MISSING (op_ret, op_errno)) { + if (conf->search_unhashed) { + local->op_errno = ENOENT; + dht_lookup_everywhere (frame, this, loc); + return 0; + } + } + + if (op_ret == 0) { + is_dir = check_is_dir (inode, stbuf, xattr); + if (is_dir) { + local->inode = inode_ref (inode); + local->xattr = dict_ref (xattr); + } + } + + if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + op_ret = -1; + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + return 0; + } + + if (op_ret == -1) + goto out; + + is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_dir = check_is_dir (inode, stbuf, xattr); + + if (!is_dir && !is_linkfile) { + /* non-directory and not a linkfile */ + + dht_itransform (this, prev->this, stbuf->st_ino, + &stbuf->st_ino); + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + inode_ctx_put (inode, this, (uint64_t)(long)layout); + goto out; + } + + if (is_linkfile) { + subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + + if (!subvol) { + gf_log (this->name, GF_LOG_WARNING, + "linkfile not having link subvolume. path=%s", + loc->path); + dht_lookup_everywhere (frame, this, loc); + return 0; + } + + STACK_WIND (frame, dht_lookup_linkfile_cbk, + subvol, subvol->fops->lookup, + &local->loc, local->xattr_req); + } + + return 0; + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); + return 0; +} + + +int +dht_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + xlator_t *subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "copying location failed for path=%s", + loc->path); + goto err; + } + + if (xattr_req) { + local->xattr_req = dict_ref (xattr_req); + } else { + local->xattr_req = dict_new (); + } + + hashed_subvol = dht_subvol_get_hashed (this, loc); + cached_subvol = dht_subvol_get_cached (this, loc->inode); + + local->cached_subvol = cached_subvol; + local->hashed_subvol = hashed_subvol; + + if (is_revalidate (loc)) { + layout = dht_layout_get (this, loc->inode); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "revalidate without cache. path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + if (layout->gen && (layout->gen < conf->gen)) { + gf_log (this->name, GF_LOG_WARNING, + "incomplete layout failure for path=%s", + loc->path); + op_errno = EAGAIN; + goto err; + } + + local->inode = inode_ref (loc->inode); + local->st_ino = loc->inode->ino; + + local->call_cnt = layout->cnt; + call_cnt = local->call_cnt; + + /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, + * revalidates directly go to the cached-subvolume. + */ + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + + STACK_WIND (frame, dht_revalidate_cbk, + subvol, subvol->fops->lookup, + loc, local->xattr_req); + + if (!--call_cnt) + break; + } + } else { + /* TODO: remove the hard-coding */ + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht.linkto", 256); + + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s, " + "checking on all the subvols to see if " + "it is a directory", loc->path); + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + return 0; + } + + STACK_WIND (frame, dht_lookup_cbk, + hashed_subvol, hashed_subvol->fops->lookup, + loc, local->xattr_req); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + + +int +dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + if (local->inode) + local->stbuf.st_ino = local->inode->ino; + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_stat (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->stat, + loc); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + layout = dht_layout_get (this, fd->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "local allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = layout->cnt;; + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->fstat, + fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_chmod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->chmod, + loc, mode); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_chown (call_frame_t *frame, xlator_t *this, + loc_t *loc, uid_t uid, gid_t gid) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->chown, + loc, uid, gid); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fchmod (call_frame_t *frame, xlator_t *this, + fd_t *fd, mode_t mode) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + + layout = dht_layout_get (this, fd->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->fchmod, + fd, mode); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fchown (call_frame_t *frame, xlator_t *this, + fd_t *fd, uid_t uid, gid_t gid) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + layout = dht_layout_get (this, fd->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->fchown, + fd, uid, gid); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_utimens (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct timespec tv[2]) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->utimens, + loc, tv); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->truncate, + loc, offset); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = 1; + + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->ftruncate, + fd, offset); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +dht_access (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t mask) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->access, + loc, mask); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, const char *path) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, path); + + return 0; +} + + +int +dht_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_readlink_cbk, + subvol, subvol->fops->readlink, + loc, size); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, xattr); + + return 0; +} + + +int +dht_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_getxattr_cbk, + subvol, subvol->fops->getxattr, + loc, key); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr, int flags) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->setxattr, + loc, xattr, flags); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->removexattr, + loc, key); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->fd); + + return 0; +} + + +int +dht_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags, fd_t *fd) +{ + xlator_t *subvol = NULL; + int ret = -1; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_fd_cbk, + subvol, subvol->fops->open, + loc, flags, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + struct iovec *vector, int count, struct stat *stbuf) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + + return 0; +} + + +int +dht_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_readv_cbk, + subvol, subvol->fops->readv, + fd, size, off); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + + return 0; +} + + +int +dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct stat *stbuf) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, stbuf); + + return 0; +} + + +int +dht_writev (call_frame_t *frame, xlator_t *this, + fd_t *fd, struct iovec *vector, int count, off_t off) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_writev_cbk, + subvol, subvol->fops->writev, + fd, vector, count, off); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0); + + return 0; +} + + +int +dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->flush, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocatoin failed :("); + goto err; + } + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->fsync, + fd, datasync); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct flock *flock) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, flock); + + return 0; +} + + +int +dht_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int cmd, struct flock *flock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_lk_cbk, + subvol, subvol->fops->lk, + fd, cmd, flock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + +/* gf_lk no longer exists +int +dht_gf_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct flock *flock) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, flock); + + return 0; +} + + +int +dht_gf_lk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int cmd, struct flock *flock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_gf_lk_cbk, + subvol, subvol->fops->gf_lk, + fd, cmd, flock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} +*/ + +int +dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct statvfs *statvfs) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + local->op_ret = 0; + + /* TODO: normalize sizes */ + local->statvfs.f_bsize = statvfs->f_bsize; + local->statvfs.f_frsize = statvfs->f_frsize; + + local->statvfs.f_blocks += statvfs->f_blocks; + local->statvfs.f_bfree += statvfs->f_bfree; + local->statvfs.f_bavail += statvfs->f_bavail; + local->statvfs.f_files += statvfs->f_files; + local->statvfs.f_ffree += statvfs->f_ffree; + local->statvfs.f_favail += statvfs->f_favail; + local->statvfs.f_fsid = statvfs->f_fsid; + local->statvfs.f_flag = statvfs->f_flag; + local->statvfs.f_namemax = statvfs->f_namemax; + + } +unlock: + UNLOCK (&frame->lock); + + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->statvfs); + + return 0; +} + + +int +dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_statfs_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, loc); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_fd_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + loc, fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *orig_entries) +{ + dht_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + xlator_t *next = NULL; + dht_layout_t *layout = NULL; + int count = 0; + + + INIT_LIST_HEAD (&entries.list); + prev = cookie; + local = frame->local; + + if (op_ret < 0) + goto done; + + layout = dht_layout_get (this, local->fd->inode); + + list_for_each_entry (orig_entry, &orig_entries->list, list) { + subvol = dht_layout_search (this, layout, orig_entry->d_name); + + if (!subvol || subvol == prev->this) { + entry = gf_dirent_for_name (orig_entry->d_name); + if (!entry) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto unwind; + } + + dht_itransform (this, subvol, orig_entry->d_ino, + &entry->d_ino); + dht_itransform (this, subvol, orig_entry->d_off, + &entry->d_off); + + entry->d_type = orig_entry->d_type; + entry->d_len = orig_entry->d_len; + + list_add_tail (&entry->list, &entries.list); + count++; + } + } + op_ret = count; + +done: + if (count == 0) { + next = dht_subvol_next (this, prev->this); + if (!next) { + goto unwind; + } + + STACK_WIND (frame, dht_readdir_cbk, + next, next->fops->readdir, + local->fd, local->size, 0); + return 0; + } + +unwind: + if (op_ret < 0) + op_ret = 0; + + DHT_STACK_UNWIND (frame, op_ret, op_errno, &entries); + + gf_dirent_free (&entries); + + return 0; +} + + +int +dht_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t yoff) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + xlator_t *xvol = NULL; + off_t xoff = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->fd = fd_ref (fd); + local->size = size; + + dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); + + /* TODO: do proper readdir */ + STACK_WIND (frame, dht_readdir_cbk, + xvol, xvol->fops->readdir, + fd, size, xoff); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == -1) + local->op_errno = op_errno; + + if (op_ret == 0) + local->op_ret = 0; + } + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_fsyncdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->fsyncdir, + fd, datasync); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + + + if (op_ret == -1) + goto out; + + prev = cookie; + + dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); + layout = dht_layout_for_subvol (this, prev->this); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + ret = inode_ctx_put (inode, this, (uint64_t)(long)layout); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not set inode context"); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + return 0; +} + + +int +dht_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_newfile_cbk, + subvol, subvol->fops->mknod, + loc, mode, rdev); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_symlink (call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_newfile_cbk, + subvol, subvol->fops->symlink, + linkname, loc); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + cached_subvol = dht_subvol_get_cached (this, loc->inode); + if (!cached_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed (this, loc); + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + if (hashed_subvol != cached_subvol) + local->call_cnt++; + + STACK_WIND (frame, dht_err_cbk, + cached_subvol, cached_subvol->fops->unlink, loc); + + if (hashed_subvol != cached_subvol) + STACK_WIND (frame, dht_err_cbk, + hashed_subvol, hashed_subvol->fops->unlink, loc); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + + prev = cookie; + local = frame->local; + + if (op_ret == -1) + goto out; + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + stbuf->st_ino = local->loc.inode->ino; + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + + return 0; +} + + +int +dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + xlator_t *srcvol = NULL; + + + if (op_ret == -1) + goto err; + + local = frame->local; + srcvol = local->linkfile.srcvol; + + STACK_WIND (frame, dht_link_cbk, + srcvol, srcvol->fops->link, + &local->loc, &local->loc2); + + return 0; + +err: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + + return 0; +} + + +int +dht_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (oldloc, err); + VALIDATE_OR_GOTO (newloc, err); + + cached_subvol = dht_subvol_get_cached (this, oldloc->inode); + if (!cached_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", oldloc->path); + op_errno = EINVAL; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed (this, newloc); + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + newloc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc, oldloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + if (hashed_subvol != cached_subvol) { + dht_linkfile_create (frame, dht_link_linkfile_cbk, + cached_subvol, hashed_subvol, newloc); + } else { + STACK_WIND (frame, dht_link_cbk, + cached_subvol, cached_subvol->fops->link, + oldloc, newloc); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + fd_t *fd, inode_t *inode, struct stat *stbuf) +{ + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + + + if (op_ret == -1) + goto out; + + prev = cookie; + + dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); + layout = dht_layout_for_subvol (this, prev->this); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + ret = inode_ctx_put (inode, this, (uint64_t)(long)layout); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not set inode context"); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf); + return 0; +} + + +int +dht_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + + return 0; +} + + +int +dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + + + local = frame->local; + layout = local->selfheal.layout; + + if (op_ret == 0) { + inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + local->selfheal.layout = NULL; + local->stbuf.st_ino = local->st_ino; + } + + DHT_STACK_UNWIND (frame, op_ret, op_errno, + local->inode, &local->stbuf); + + return 0; +} + + +int +dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + int ret = -1; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + + local = frame->local; + prev = cookie; + layout = local->layout; + + LOCK (&frame->lock); + { + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, NULL); + + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + local->layout = NULL; + dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, + &local->loc, layout); + } + + return 0; +} + +int +dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int ret = -1; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *hashed_subvol = NULL; + + local = frame->local; + prev = cookie; + layout = local->layout; + conf = this->private; + hashed_subvol = local->hashed_subvol; + + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, NULL); + + if (op_ret == -1) { + local->op_errno = op_errno; + goto err; + } + local->op_ret = 0; + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + local->st_ino = local->stbuf.st_ino; + + local->call_cnt = conf->subvolume_cnt - 1; + + if (local->call_cnt == 0) { + local->layout = NULL; + dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, + &local->loc, layout); + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == hashed_subvol) + continue; + STACK_WIND (frame, dht_mkdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->mkdir, + &local->loc, local->mode); + } + return 0; +err: + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; +} + +int +dht_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int ret = -1; + xlator_t *hashed_subvol = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed (this, loc); + + if (hashed_subvol == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "hashed subvol not found"); + op_errno = EINVAL; + goto err; + } + + local->hashed_subvol = hashed_subvol; + local->inode = inode_ref (loc->inode); + ret = loc_copy (&local->loc, loc); + local->mode = mode; + + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + STACK_WIND (frame, dht_mkdir_hashed_cbk, + hashed_subvol, + hashed_subvol->fops->mkdir, + loc, mode); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + + local = frame->local; + local->layout = NULL; + + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + uint64_t tmp_layout = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = -1; + + if (op_errno != ENOENT) + local->need_selfheal = 1; + + gf_log (this->name, GF_LOG_ERROR, + "rmdir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto unlock; + } + } +unlock: + UNLOCK (&frame->lock); + + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + if (local->need_selfheal) { + inode_ctx_get (local->loc.inode, this, + &tmp_layout); + layout = (dht_layout_t *)(long)tmp_layout; + + /* TODO: neater interface needed below */ + local->stbuf.st_mode = local->loc.inode->st_mode; + + dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, + &local->loc, layout); + } else { + DHT_STACK_UNWIND (frame, local->op_ret, + local->op_errno); + } + } + + return 0; +} + + +int +dht_rmdir_do (call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; + + conf = this->private; + local = frame->local; + + if (local->op_ret == -1) + goto err; + + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rmdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->rmdir, + &local->loc); + } + + return 0; + +err: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + return 0; +} + + +int +dht_rmdir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + local = frame->local; + prev = cookie; + + if (op_ret > 2) { + gf_log (this->name, GF_LOG_DEBUG, + "readdir on %s for %s returned %d entries", + prev->this->name, local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rmdir_do (frame, this); + } + + return 0; +} + + +int +dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "opendir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto err; + } + + STACK_WIND (frame, dht_rmdir_readdir_cbk, + prev->this, prev->this->fops->readdir, + local->fd, 4096, 0); + + return 0; + +err: + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rmdir_do (frame, this); + } + + return 0; +} + + +int +dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + int ret = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->call_cnt = conf->subvolume_cnt; + local->op_ret = 0; + + ret = loc_copy (&local->loc, loc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->fd = fd_create (local->loc.inode, frame->root->pid); + if (!local->fd) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rmdir_opendir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + loc, local->fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +static int32_t +dht_xattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +dht_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, + dht_xattrop_cbk, + subvol, subvol->fops->xattrop, + loc, flags, dict); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + +static int32_t +dht_fxattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +dht_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + dht_fxattrop_cbk, + subvol, subvol->fops->fxattrop, + fd, flags, dict); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +static int32_t +dht_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +dht_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *lock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, + dht_inodelk_cbk, + subvol, subvol->fops->inodelk, + loc, cmd, lock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +static int32_t +dht_finodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +dht_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *lock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + + STACK_WIND (frame, + dht_finodelk_cbk, + subvol, subvol->fops->finodelk, + fd, cmd, lock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +static int32_t +dht_entrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +dht_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, dht_entrylk_cbk, + subvol, subvol->fops->entrylk, + loc, basename, cmd, type); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + +static int32_t +dht_fentrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +dht_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_fentrylk_cbk, + subvol, subvol->fops->fentrylk, + fd, basename, cmd, type); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_forget (xlator_t *this, inode_t *inode) +{ + uint64_t tmp_layout = 0; + dht_layout_t *layout = NULL; + + inode_ctx_get (inode, this, &tmp_layout); + + if (!layout) + return 0; + layout = (dht_layout_t *)(long)tmp_layout; + if (!layout->preset) + FREE (layout); + + return 0; +} + + + +static int +dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) +{ + xlator_list_t *subvols = NULL; + int cnt = 0; + + + for (subvols = this->children; subvols; subvols = subvols->next) + cnt++; + + conf->subvolumes = CALLOC (cnt, sizeof (xlator_t *)); + if (!conf->subvolumes) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + return -1; + } + conf->subvolume_cnt = cnt; + + cnt = 0; + for (subvols = this->children; subvols; subvols = subvols->next) + conf->subvolumes[cnt++] = subvols->xlator; + + conf->subvolume_status = CALLOC (cnt, sizeof (char)); + if (!conf->subvolume_status) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + return -1; + } + + return 0; +} + + +int +dht_notify (xlator_t *this, int event, void *data, ...) +{ + xlator_t *subvol = NULL; + int cnt = -1; + int i = -1; + dht_conf_t *conf = NULL; + int ret = -1; + + + conf = this->private; + + switch (event) { + case GF_EVENT_CHILD_UP: + subvol = data; + + conf->gen++; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } + + if (cnt == -1) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_UP bad subvolume %s", + subvol->name); + break; + } + + LOCK (&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 1; + } + UNLOCK (&conf->subvolume_lock); + + break; + + case GF_EVENT_CHILD_DOWN: + subvol = data; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } + + if (cnt == -1) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_DOWN bad subvolume %s", + subvol->name); + break; + } + + LOCK (&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 0; + } + UNLOCK (&conf->subvolume_lock); + + break; + } + + ret = default_notify (this, event, data); + + return ret; +} + diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h new file mode 100644 index 000000000..17017381b --- /dev/null +++ b/xlators/cluster/dht/src/dht-common.h @@ -0,0 +1,212 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#ifndef _DHT_H +#define _DHT_H + + +typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno); + + +struct dht_layout { + int cnt; + int preset; + int gen; + int type; + struct { + int err; /* 0 = normal + -1 = dir exists and no xattr + >0 = dir lookup failed with errno + */ + uint32_t start; + uint32_t stop; + xlator_t *xlator; + } list[0]; +}; +typedef struct dht_layout dht_layout_t; + + +struct dht_local { + int call_cnt; + loc_t loc; + loc_t loc2; + int op_ret; + int op_errno; + int layout_mismatch; + struct stat stbuf; + struct statvfs statvfs; + fd_t *fd; + inode_t *inode; + dict_t *xattr; + dict_t *xattr_req; + dht_layout_t *layout; + size_t size; + ino_t st_ino; + xlator_t *src_hashed, *src_cached; + xlator_t *dst_hashed, *dst_cached; + xlator_t *cached_subvol; + xlator_t *hashed_subvol; + char need_selfheal; + struct { + fop_mknod_cbk_t linkfile_cbk; + struct stat stbuf; + loc_t loc; + inode_t *inode; + dict_t *xattr; + xlator_t *srcvol; + } linkfile; + struct { + uint32_t hole_cnt; + uint32_t overlaps_cnt; + uint32_t missing; + uint32_t down; + uint32_t misc; + dht_selfheal_dir_cbk_t dir_cbk; + dht_layout_t *layout; + } selfheal; + + /* needed by nufa */ + int32_t flags; + mode_t mode; + dev_t rdev; +}; +typedef struct dht_local dht_local_t; + + +struct dht_conf { + gf_lock_t subvolume_lock; + int subvolume_cnt; + xlator_t **subvolumes; + xlator_t *local_volume; /* Needed by NUFA */ + char *subvolume_status; + dht_layout_t **file_layouts; + dht_layout_t **dir_layouts; + dht_layout_t *default_dir_layout; + gf_boolean_t search_unhashed; + int gen; +}; +typedef struct dht_conf dht_conf_t; + + +struct dht_disk_layout { + uint32_t cnt; + uint32_t type; + struct { + uint32_t start; + uint32_t stop; + } list[1]; +}; +typedef struct dht_disk_layout dht_disk_layout_t; + +#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT) + +#define is_fs_root(loc) (strcmp (loc->path, "/") == 0) + +#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0) + +#define is_last_call(cnt) (cnt == 0) + +#define DHT_LINKFILE_MODE (S_ISVTX) +#define check_is_linkfile(i,s,x) ((s->st_mode & ~S_IFMT) == DHT_LINKFILE_MODE) + +#define check_is_dir(i,s,x) (S_ISDIR(s->st_mode)) + +#define layout_is_sane(layout) ((layout) && (layout->cnt > 0)) + +#define DHT_STACK_UNWIND(frame, params ...) do { \ + dht_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_UNWIND (frame, params); \ + dht_local_wipe (__local); \ + } while (0) + +#define DHT_STACK_DESTROY(frame) do { \ + dht_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + dht_local_wipe (__local); \ + } while (0) + +dht_layout_t *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, + const char *name); +int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); +int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, + uint32_t *misc_p); +int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, + xlator_t *subvol, loc_t *loc, dict_t *xattr); + +xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, + struct stat *buf, dict_t *xattr); +int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc); + +int dht_layouts_init (xlator_t *this, dht_conf_t *conf); +int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + int op_ret, int op_errno, dict_t *xattr); + +int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, + int pos, int32_t **disk_layout_p); +int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, + int pos, int32_t *disk_layout); + + +int dht_frame_return (call_frame_t *frame); + +int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); +int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol, + uint64_t *x); + +void dht_local_wipe (dht_local_t *local); +dht_local_t *dht_local_init (call_frame_t *frame); +int dht_stat_merge (xlator_t *this, struct stat *to, struct stat *from, + xlator_t *subvol); + +xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc); +xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode); +xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev); +int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); + +int dht_hash_compute (int type, const char *name, uint32_t *hash_p); + +int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, + xlator_t *tovol, xlator_t *fromvol, loc_t *loc); +int +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); +int +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); + +int dht_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc); +#endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-hashfn-tea.c b/xlators/cluster/dht/src/dht-hashfn-tea.c new file mode 100644 index 000000000..8437b4955 --- /dev/null +++ b/xlators/cluster/dht/src/dht-hashfn-tea.c @@ -0,0 +1,146 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include <stdint.h> +#include <stdio.h> +#include <string.h> + + +#define DELTA 0x9E3779B9 +#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ +#define PARTROUNDS 6 /* 6 gets complete mixing */ + + +static int +tearound (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1) +{ + uint32_t sum = 0; + int n = 0; + uint32_t b0 = 0; + uint32_t b1 = 0; + + b0 = *h0; + b1 = *h1; + + n = rounds; + + do { + sum += DELTA; + b0 += ((b1 << 4) + array[0]) + ^ (b1 + sum) + ^ ((b1 >> 5) + array[1]); + b1 += ((b0 << 4) + array[2]) + ^ (b0 + sum) + ^ ((b0 >> 5) + array[3]); + } while (--n); + + *h0 += b0; + *h1 += b1; + + return 0; +} + + +uint32_t +__pad (int len) +{ + uint32_t pad = 0; + + pad = (uint32_t) len | ((uint32_t) len << 8); + pad |= pad << 16; + + return pad; +} + + +uint32_t +dht_hashfn_tea (const char *msg, int len) +{ + uint32_t h0 = 0x9464a485; + uint32_t h1 = 0x542e1a94; + uint32_t array[4]; + uint32_t pad = 0; + int i = 0; + int j = 0; + int full_quads = 0; + int full_words = 0; + int full_bytes = 0; + uint32_t *intmsg = NULL; + int word = 0; + + + intmsg = (uint32_t *) msg; + pad = __pad (len); + + full_bytes = len; + full_words = len / 4; + full_quads = len / 16; + + for (i = 0; i < full_quads; i++) { + for (j = 0; j < 4; j++) { + word = *intmsg; + array[j] = word; + intmsg++; + full_words--; + full_bytes -= 4; + } + tearound (PARTROUNDS, &array[0], &h0, &h1); + } + + if ((len % 16) == 0) { + goto done; + } + + for (j = 0; j < 4; j++) { + if (full_words) { + word = *intmsg; + array[j] = word; + intmsg++; + full_words--; + full_bytes -= 4; + } else { + array[j] = pad; + while (full_bytes) { + array[j] <<= 8; + array[j] |= msg[len - full_bytes]; + full_bytes--; + } + } + } + tearound (FULLROUNDS, &array[0], &h0, &h1); + +done: + return h0 ^ h1; +} + + +#if 0 +int +main (int argc, char *argv[]) +{ + int i = 0; + int hashval = 0; + + for (i = 1; i < argc; i++) { + hashval = tea (argv[i], strlen (argv[i])); + printf ("%s: %x\n", argv[i], hashval); + } +} +#endif diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c new file mode 100644 index 000000000..9e321a43c --- /dev/null +++ b/xlators/cluster/dht/src/dht-hashfn.c @@ -0,0 +1,88 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +uint32_t dht_hashfn_tea (const char *name, int len); + + +typedef enum { + DHT_HASH_TYPE_TEA, +} dht_hashfn_type_t; + + +int +dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p) +{ + int ret = 0; + uint32_t hash = 0; + + switch (type) { + case DHT_HASH_TYPE_TEA: + hash = dht_hashfn_tea (name, strlen (name)); + break; + default: + ret = -1; + break; + } + + if (ret == 0) { + *hash_p = hash; + } + + return ret; +} + + +#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \ + rsync_frndly_name = (char *) name; \ + if (name[0] == '.') { \ + char *dot = 0; \ + int namelen = 0; \ + \ + dot = strrchr (name, '.'); \ + if (dot && dot > (name + 1) && *(dot + 1)) { \ + namelen = (dot - name); \ + rsync_frndly_name = alloca (namelen); \ + strncpy (rsync_frndly_name, name + 1, \ + namelen); \ + rsync_frndly_name[namelen - 1] = 0; \ + } \ + } \ + } while (0); + + +int +dht_hash_compute (int type, const char *name, uint32_t *hash_p) +{ + char *rsync_friendly_name = NULL; + + MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name); + + return dht_hash_compute_internal (type, rsync_friendly_name, hash_p); +} diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c new file mode 100644 index 000000000..52d072002 --- /dev/null +++ b/xlators/cluster/dht/src/dht-helper.c @@ -0,0 +1,326 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +int +dht_frame_return (call_frame_t *frame) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + + if (!frame) + return -1; + + local = frame->local; + + LOCK (&frame->lock); + { + this_call_cnt = --local->call_cnt; + } + UNLOCK (&frame->lock); + + return this_call_cnt; +} + + +int +dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) +{ + dht_conf_t *conf = NULL; + int cnt = 0; + int max = 0; + uint64_t y = 0; + + + if (x == ((uint64_t) -1)) { + y = (uint64_t) -1; + goto out; + } + + conf = this->private; + + max = conf->subvolume_cnt; + cnt = dht_subvol_cnt (this, subvol); + + y = ((x * max) + cnt); + +out: + if (y_p) + *y_p = y; + + return 0; +} + + +int +dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, + uint64_t *x_p) +{ + dht_conf_t *conf = NULL; + int cnt = 0; + int max = 0; + uint64_t x = 0; + xlator_t *subvol = 0; + + + conf = this->private; + max = conf->subvolume_cnt; + + cnt = y % max; + x = y / max; + + subvol = conf->subvolumes[cnt]; + + if (subvol_p) + *subvol_p = subvol; + + if (x_p) + *x_p = x; + + return 0; +} + + +void +dht_local_wipe (dht_local_t *local) +{ + if (!local) + return; + + loc_wipe (&local->loc); + loc_wipe (&local->loc2); + + if (local->xattr) + dict_unref (local->xattr); + + if (local->inode) + inode_unref (local->inode); + + if (local->layout) + FREE (local->layout); + + loc_wipe (&local->linkfile.loc); + + if (local->linkfile.xattr) + dict_unref (local->linkfile.xattr); + + if (local->linkfile.inode) + inode_unref (local->linkfile.inode); + + if (local->fd) { + fd_unref (local->fd); + local->fd = NULL; + } + + if (local->xattr_req) + dict_unref (local->xattr_req); + + FREE (local); +} + + +dht_local_t * +dht_local_init (call_frame_t *frame) +{ + dht_local_t *local = NULL; + + /* TODO: use mem-pool */ + local = CALLOC (1, sizeof (*local)); + + if (!local) + return NULL; + + local->op_ret = -1; + local->op_errno = EUCLEAN; + + frame->local = local; + + return local; +} + + +char * +basestr (const char *str) +{ + char *basestr = NULL; + + basestr = strrchr (str, '/'); + if (basestr) + basestr ++; + + return basestr; +} + +xlator_t * +dht_first_up_child (xlator_t *this) +{ + dht_conf_t *conf = NULL; + xlator_t *child = NULL; + int i = 0; + + conf = this->private; + + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolume_status[i]) { + child = conf->subvolumes[i]; + break; + } + } + } + UNLOCK (&conf->subvolume_lock); + + return child; +} + +xlator_t * +dht_subvol_get_hashed (xlator_t *this, loc_t *loc) +{ + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + + if (is_fs_root (loc)) { + subvol = dht_first_up_child (this); + goto out; + } + + layout = dht_layout_get (this, loc->parent); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "layout missing path=%s parent=%"PRId64, + loc->path, loc->parent->ino); + goto out; + } + + subvol = dht_layout_search (this, layout, loc->name); + + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "could not find subvolume for path=%s", + loc->path); + goto out; + } + +out: + return subvol; +} + + +xlator_t * +dht_subvol_get_cached (xlator_t *this, inode_t *inode) +{ + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + + + layout = dht_layout_get (this, inode); + + if (!layout) { + goto out; + } + + subvol = layout->list[0].xlator; + +out: + return subvol; +} + + +xlator_t * +dht_subvol_next (xlator_t *this, xlator_t *prev) +{ + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *next = NULL; + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev) { + if ((i + 1) < conf->subvolume_cnt) + next = conf->subvolumes[i + 1]; + break; + } + } + + return next; +} + + +int +dht_subvol_cnt (xlator_t *this, xlator_t *subvol) +{ + int i = 0; + int ret = -1; + dht_conf_t *conf = NULL; + + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + ret = i; + break; + } + } + + return ret; +} + + +#define set_if_greater(a, b) do { \ + if ((a) < (b)) \ + (a) = (b); \ + } while (0) + +int +dht_stat_merge (xlator_t *this, struct stat *to, + struct stat *from, xlator_t *subvol) +{ + to->st_dev = from->st_dev; + + dht_itransform (this, subvol, from->st_ino, &to->st_ino); + + to->st_mode = from->st_mode; + to->st_nlink = from->st_nlink; + to->st_uid = from->st_uid; + to->st_gid = from->st_gid; + to->st_rdev = from->st_rdev; + to->st_size += from->st_size; + to->st_blksize = from->st_blksize; + to->st_blocks += from->st_blocks; + + set_if_greater (to->st_atime, from->st_atime); + set_if_greater (to->st_mtime, from->st_mtime); + set_if_greater (to->st_ctime, from->st_ctime); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c new file mode 100644 index 000000000..08b4a2746 --- /dev/null +++ b/xlators/cluster/dht/src/dht-layout.c @@ -0,0 +1,543 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "byte-order.h" + +#define layout_base_size (sizeof (dht_layout_t)) + +#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0]) + +#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size)) + + +dht_layout_t * +dht_layout_new (xlator_t *this, int cnt) +{ + dht_layout_t *layout = NULL; + + + layout = CALLOC (1, layout_size (cnt)); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + layout->cnt = cnt; + +out: + return layout; +} + + +dht_layout_t * +dht_layout_get (xlator_t *this, inode_t *inode) +{ + uint64_t layout = 0; + int ret = -1; + + ret = inode_ctx_get (inode, this, &layout); + + return (dht_layout_t *)(long)layout; +} + + +xlator_t * +dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) +{ + uint32_t hash = 0; + xlator_t *subvol = NULL; + int i = 0; + int ret = 0; + + + ret = dht_hash_compute (layout->type, name, &hash); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "hash computation failed for type=%d name=%s", + layout->type, name); + goto out; + } + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].start <= hash + && layout->list[i].stop >= hash) { + subvol = layout->list[i].xlator; + break; + } + } + + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume for hash (value) = %u", hash); + } + +out: + return subvol; +} + + +dht_layout_t * +dht_layout_for_subvol (xlator_t *this, xlator_t *subvol) +{ + dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == subvol) { + layout = conf->file_layouts[i]; + break; + } + } + + return layout; +} + + +int +dht_layouts_init (xlator_t *this, dht_conf_t *conf) +{ + dht_layout_t *layout = NULL; + int i = 0; + int ret = -1; + + + conf->file_layouts = CALLOC (conf->subvolume_cnt, + sizeof (dht_layout_t *)); + if (!conf->file_layouts) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + layout = dht_layout_new (this, 1); + + if (!layout) { + goto out; + } + + layout->preset = 1; + + layout->list[0].xlator = conf->subvolumes[i]; + + conf->file_layouts[i] = layout; + } + + ret = 0; +out: + return ret; +} + + +int +dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, + int pos, int32_t **disk_layout_p) +{ + int ret = -1; + int32_t *disk_layout = NULL; + + disk_layout = CALLOC (5, sizeof (int)); + if (!disk_layout) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + disk_layout[0] = hton32 (1); + disk_layout[1] = hton32 (layout->type); + disk_layout[2] = hton32 (layout->list[pos].start); + disk_layout[3] = hton32 (layout->list[pos].stop); + + if (disk_layout_p) + *disk_layout_p = disk_layout; + ret = 0; + +out: + return ret; +} + + +int +dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, + int pos, int32_t *disk_layout) +{ + int cnt = 0; + int type = 0; + int start_off = 0; + int stop_off = 0; + + + /* TODO: assert disk_layout_ptr is of required length */ + + cnt = ntoh32 (disk_layout[0]); + if (cnt != 1) { + gf_log (this->name, GF_LOG_ERROR, + "disk layout has invalid count %d", cnt); + return -1; + } + + /* TODO: assert type is compatible */ + type = ntoh32 (disk_layout[1]); + start_off = ntoh32 (disk_layout[2]); + stop_off = ntoh32 (disk_layout[3]); + + layout->list[pos].start = start_off; + layout->list[pos].stop = stop_off; + + gf_log (this->name, GF_LOG_DEBUG, + "merged to layout: %u - %u (type %d) from %s", + start_off, stop_off, type, + layout->list[pos].xlator->name); + + return 0; +} + + +int +dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + int op_ret, int op_errno, dict_t *xattr) +{ + int i = 0; + int ret = -1; + int err = -1; + int32_t *disk_layout = NULL; + + + if (op_ret != 0) { + err = op_errno; + } + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == NULL) { + layout->list[i].err = err; + layout->list[i].xlator = subvol; + break; + } + } + + if (op_ret != 0) { + ret = 0; + goto out; + } + + if (xattr) { + /* during lookup and not mkdir */ + ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", + VOID(&disk_layout)); + } + + if (ret != 0) { + layout->list[i].err = -1; + gf_log (this->name, GF_LOG_DEBUG, + "missing disk layout on %s. err = %d", + subvol->name, err); + ret = 0; + goto out; + } + + ret = dht_disk_layout_merge (this, layout, i, disk_layout); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "layout merge from subvolume %s failed", + subvol->name); + goto out; + } + layout->list[i].err = 0; + +out: + return ret; +} + + +void +dht_layout_entry_swap (dht_layout_t *layout, int i, int j) +{ + uint32_t start_swap = 0; + uint32_t stop_swap = 0; + xlator_t *xlator_swap = 0; + int err_swap = 0; + + + start_swap = layout->list[i].start; + stop_swap = layout->list[i].stop; + xlator_swap = layout->list[i].xlator; + err_swap = layout->list[i].err; + + layout->list[i].start = layout->list[j].start; + layout->list[i].stop = layout->list[j].stop; + layout->list[i].xlator = layout->list[j].xlator; + layout->list[i].err = layout->list[j].err; + + layout->list[j].start = start_swap; + layout->list[j].stop = stop_swap; + layout->list[j].xlator = xlator_swap; + layout->list[j].err = err_swap; +} + + +int64_t +dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) +{ + int64_t diff = 0; + + if (layout->list[i].err || layout->list[j].err) + diff = layout->list[i].err - layout->list[j].err; + else + diff = (int64_t) layout->list[i].start + - (int64_t) layout->list[j].start; + + return diff; +} + + +int +dht_layout_sort (dht_layout_t *layout) +{ + int i = 0; + int j = 0; + int64_t ret = 0; + + /* TODO: O(n^2) -- bad bad */ + + for (i = 0; i < layout->cnt - 1; i++) { + for (j = i + 1; j < layout->cnt; j++) { + ret = dht_layout_entry_cmp (layout, i, j); + if (ret > 0) + dht_layout_entry_swap (layout, i, j); + } + } + + return 0; +} + + +int +dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p) +{ + dht_conf_t *conf = NULL; + uint32_t holes = 0; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0; + uint32_t hole_cnt = 0; + uint32_t overlap_cnt = 0; + int i = 0; + int ret = 0; + uint32_t prev_stop = 0; + uint32_t last_stop = 0; + char is_virgin = 1; + + + conf = this->private; + + /* TODO: explain WTF is happening */ + + last_stop = layout->list[0].start - 1; + prev_stop = last_stop; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err) { + switch (layout->list[i].err) { + case -1: + case ENOENT: + missing++; + break; + case ENOTCONN: + down++; + break; + default: + misc++; + } + continue; + } + + is_virgin = 0; + + if ((prev_stop + 1) < layout->list[i].start) { + hole_cnt++; + holes += (layout->list[i].start - (prev_stop + 1)); + } + + if ((prev_stop + 1) > layout->list[i].start) { + overlap_cnt++; + overlaps += ((prev_stop + 1) - layout->list[i].start); + } + prev_stop = layout->list[i].stop; + } + + if ((last_stop - prev_stop) || is_virgin) + hole_cnt++; + holes += (last_stop - prev_stop); + + if (holes_p) + *holes_p = hole_cnt; + + if (overlaps_p) + *overlaps_p = overlap_cnt; + + if (missing_p) + *missing_p = missing; + + if (down_p) + *down_p = down; + + if (misc_p) + *misc_p = misc; + + return ret; +} + + +int +dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) +{ + int ret = 0; + uint32_t holes = 0; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0; + + + ret = dht_layout_sort (layout); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "sort failed?! how the ...."); + goto out; + } + + ret = dht_layout_anomalies (this, loc, layout, + &holes, &overlaps, + &missing, &down, &misc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "error while finding anomalies in %s -- not good news", + loc->path); + goto out; + } + + if (holes || overlaps) { + if (missing == layout->cnt) { + gf_log (this->name, GF_LOG_WARNING, + "directory %s looked up first time", + loc->path); + } else { + gf_log (this->name, GF_LOG_ERROR, + "found anomalies in %s. holes=%d overlaps=%d", + loc->path, holes, overlaps); + } + ret = 1; + } + +out: + return ret; +} + + +int +dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + loc_t *loc, dict_t *xattr) +{ + int idx = 0; + int pos = -1; + int ret = -1; + int32_t *disk_layout = NULL; + int32_t count = -1; + uint32_t start_off = -1; + uint32_t stop_off = -1; + + + for (idx = 0; idx < layout->cnt; idx++) { + if (layout->list[idx].xlator == subvol) { + pos = idx; + break; + } + } + + if (pos == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s - no layout info for subvolume %s", + loc->path, subvol->name); + ret = 1; + goto out; + } + + if (xattr == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "%s - xattr dictionary is NULL", + loc->path); + ret = -1; + goto out; + } + + ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", + VOID(&disk_layout)); + + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s - disk layout missing", loc->path); + ret = -1; + goto out; + } + + count = ntoh32 (disk_layout[0]); + if (count != 1) { + gf_log (this->name, GF_LOG_ERROR, + "%s - disk layout has invalid count %d", + loc->path, count); + ret = -1; + goto out; + } + + start_off = ntoh32 (disk_layout[2]); + stop_off = ntoh32 (disk_layout[3]); + + if ((layout->list[pos].start != start_off) + || (layout->list[pos].stop != stop_off)) { + gf_log (this->name, GF_LOG_DEBUG, + "subvol: %s; inode layout - %"PRId32" - %"PRId32"; " + "disk layout - %"PRId32" - %"PRId32, + layout->list[pos].xlator->name, + layout->list[pos].start, layout->list[pos].stop, + start_off, stop_off); + ret = 1; + } else { + ret = 0; + } +out: + return ret; +} + diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c new file mode 100644 index 000000000..9cc24ccf6 --- /dev/null +++ b/xlators/cluster/dht/src/dht-linkfile.c @@ -0,0 +1,224 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "compat.h" +#include "dht-common.h" + + + +int +dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + + + local = frame->local; + local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno, + local->linkfile.inode, + &local->linkfile.stbuf); + + return 0; +} + + +int +dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + dict_t *xattr = NULL; + data_t *str_data = NULL; + int ret = -1; + + local = frame->local; + prev = cookie; + + if (op_ret == -1) + goto err; + + xattr = get_new_dict (); + if (!xattr) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->linkfile.xattr = dict_ref (xattr); + local->linkfile.inode = inode_ref (inode); + + str_data = str_to_data (local->linkfile.srcvol->name); + if (!str_data) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize linkfile data"); + op_errno = EINVAL; + } + str_data = NULL; + + local->linkfile.stbuf = *stbuf; + + STACK_WIND (frame, dht_linkfile_xattr_cbk, + prev->this, prev->this->fops->setxattr, + &local->linkfile.loc, local->linkfile.xattr, 0); + + return 0; + +err: + if (str_data) { + data_destroy (str_data); + str_data = NULL; + } + + local->linkfile.linkfile_cbk (frame, cookie, this, + op_ret, op_errno, inode, stbuf); + return 0; +} + + +int +dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, + xlator_t *tovol, xlator_t *fromvol, loc_t *loc) +{ + dht_local_t *local = NULL; + + + local = frame->local; + local->linkfile.linkfile_cbk = linkfile_cbk; + local->linkfile.srcvol = tovol; + loc_copy (&local->linkfile.loc, loc); + + STACK_WIND (frame, dht_linkfile_create_cbk, + fromvol, fromvol->fops->mknod, loc, + S_IFREG | DHT_LINKFILE_MODE, 0); + + return 0; +} + + +int +dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + + local = frame->local; + prev = cookie; + subvol = prev->this; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "unlinking linkfile %s on %s failed (%s)", + local->loc.path, subvol->name, strerror (op_errno)); + } + + DHT_STACK_DESTROY (frame); + + return 0; +} + + +int +dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc) +{ + call_frame_t *unlink_frame = NULL; + dht_local_t *unlink_local = NULL; + + unlink_frame = copy_frame (frame); + if (!unlink_frame) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + unlink_local = dht_local_init (unlink_frame); + if (!unlink_local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + loc_copy (&unlink_local->loc, loc); + + STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk, + subvol, subvol->fops->unlink, + &unlink_local->loc); + + return 0; +err: + if (unlink_frame) + DHT_STACK_DESTROY (unlink_frame); + + return -1; +} + + +xlator_t * +dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct stat *stbuf, + dict_t *xattr) +{ + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + void *volname = NULL; + int i = 0, ret = 0; + + + conf = this->private; + + if (!xattr) + goto out; + + ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname); + + if ((-1 == ret) || !volname) + goto out; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) { + subvol = conf->subvolumes[i]; + break; + } + } + +out: + return subvol; +} + + diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c new file mode 100644 index 000000000..e5532f1bc --- /dev/null +++ b/xlators/cluster/dht/src/dht-rename.c @@ -0,0 +1,562 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should + * delete the newpath if it gets EEXISTS from link() call. + */ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + + +int +dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + /* TODO: undo the damage */ + + gf_log (this->name, GF_LOG_ERROR, + "rename %s -> %s on %s failed (%s)", + local->loc.path, local->loc2.path, + prev->this->name, strerror (op_errno)); + + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + /* TODO: construct proper stbuf for dir */ + local->stbuf = *stbuf; + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + + return 0; +} + + + +int +dht_rename_dir_do (call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; + + conf = this->private; + local = frame->local; + + if (local->op_ret == -1) + goto err; + + local->call_cnt = conf->subvolume_cnt; + local->op_ret = 0; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rename_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->rename, + &local->loc, &local->loc2); + } + + return 0; + +err: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + return 0; +} + + +int +dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + local = frame->local; + prev = cookie; + + if (op_ret > 2) { + gf_log (this->name, GF_LOG_DEBUG, + "readdir on %s for %s returned %d entries", + prev->this->name, local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rename_dir_do (frame, this); + } + + return 0; +} + + +int +dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "opendir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto err; + } + + STACK_WIND (frame, dht_rename_readdir_cbk, + prev->this, prev->this->fops->readdir, + local->fd, 4096, 0); + + return 0; + +err: + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rename_dir_do (frame, this); + } + + return 0; +} + + +int +dht_rename_dir (call_frame_t *frame, xlator_t *this) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int i = 0; + int op_errno = -1; + + + conf = frame->this->private; + local = frame->local; + + local->call_cnt = conf->subvolume_cnt; + + local->fd = fd_create (local->loc.inode, frame->root->pid); + if (!local->fd) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->op_ret = 0; + + if (!local->dst_cached) { + dht_rename_dir_do (frame, this); + return 0; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rename_opendir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + &local->loc2, local->fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + +int +dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + + local = frame->local; + prev = cookie; + + this_call_cnt = dht_frame_return (frame); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "unlink on %s failed (%s)", + prev->this->name, strerror (op_errno)); + } + + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *rename_subvol = NULL; + + local = frame->local; + prev = cookie; + + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "rename on %s failed (%s)", prev->this->name, + strerror (op_errno)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + + /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk + * is called. since rename has already happened on rename_subvol, + * unlink should not be sent for oldpath (either linkfile or cached-file) + * on rename_subvol. */ + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; + + /* TODO: delete files in background */ + + if (src_cached != dst_hashed && src_cached != dst_cached) + local->call_cnt++; + + if (src_hashed != rename_subvol && src_hashed != src_cached) + local->call_cnt++; + + if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) + local->call_cnt++; + + if (local->call_cnt == 0) + goto unwind; + + if (src_cached != dst_hashed && src_cached != dst_cached) { + gf_log (this->name, GF_LOG_DEBUG, + "deleting old src datafile %s @ %s", + local->loc.path, src_cached->name); + + STACK_WIND (frame, dht_rename_unlink_cbk, + src_cached, src_cached->fops->unlink, + &local->loc); + } + + if (src_hashed != rename_subvol && src_hashed != src_cached) { + gf_log (this->name, GF_LOG_DEBUG, + "deleting old src linkfile %s @ %s", + local->loc.path, src_hashed->name); + + STACK_WIND (frame, dht_rename_unlink_cbk, + src_hashed, src_hashed->fops->unlink, + &local->loc); + } + + if (dst_cached + && (dst_cached != dst_hashed) + && (dst_cached != src_cached)) { + gf_log (this->name, GF_LOG_DEBUG, + "deleting old dst datafile %s @ %s", + local->loc2.path, dst_cached->name); + + STACK_WIND (frame, dht_rename_unlink_cbk, + dst_cached, dst_cached->fops->unlink, + &local->loc2); + } + return 0; + +unwind: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_do_rename (call_frame_t *frame) +{ + dht_local_t *local = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_cached = NULL; + xlator_t *this = NULL; + xlator_t *rename_subvol = NULL; + + + local = frame->local; + this = frame->this; + + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + src_cached = local->src_cached; + + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; + + gf_log (this->name, GF_LOG_DEBUG, + "renaming %s => %s (%s)", + local->loc.path, local->loc2.path, rename_subvol->name); + + STACK_WIND (frame, dht_rename_cbk, + rename_subvol, rename_subvol->fops->rename, + &local->loc, &local->loc2); + + return 0; +} + + +int +dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "link/file on %s failed (%s)", + prev->this->name, strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + if (local->op_ret == -1) + goto unwind; + + dht_do_rename (frame); + } + + return 0; + +unwind: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_rename_create_links (call_frame_t *frame) +{ + dht_local_t *local = NULL; + xlator_t *this = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + int call_cnt = 0; + + + local = frame->local; + this = frame->this; + + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + + if (src_cached == dst_cached) + goto nolinks; + + if (dst_hashed != src_hashed && dst_hashed != src_cached) + call_cnt++; + + if (src_cached != dst_hashed) + call_cnt++; + + local->call_cnt = call_cnt; + + if (dst_hashed != src_hashed && dst_hashed != src_cached) { + gf_log (this->name, GF_LOG_DEBUG, + "linkfile %s @ %s => %s", + local->loc.path, dst_hashed->name, src_cached->name); + dht_linkfile_create (frame, dht_rename_links_cbk, + src_cached, dst_hashed, &local->loc); + } + + if (src_cached != dst_hashed) { + gf_log (this->name, GF_LOG_DEBUG, + "link %s => %s (%s)", local->loc.path, + local->loc2.path, src_cached->name); + STACK_WIND (frame, dht_rename_links_cbk, + src_cached, src_cached->fops->link, + &local->loc, &local->loc2); + } + +nolinks: + if (!call_cnt) { + /* skip to next step */ + dht_do_rename (frame); + } + + return 0; +} + + +int +dht_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + xlator_t *src_cached = NULL; + xlator_t *src_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *dst_hashed = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (oldloc, err); + VALIDATE_OR_GOTO (newloc, err); + + src_hashed = dht_subvol_get_hashed (this, oldloc); + if (!src_hashed) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + oldloc->path); + op_errno = EINVAL; + goto err; + } + + src_cached = dht_subvol_get_cached (this, oldloc->inode); + if (!src_cached) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", oldloc->path); + op_errno = EINVAL; + goto err; + } + + dst_hashed = dht_subvol_get_hashed (this, newloc); + if (!dst_hashed) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + newloc->path); + op_errno = EINVAL; + goto err; + } + + if (newloc->inode) + dst_cached = dht_subvol_get_cached (this, newloc->inode); + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc, oldloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->src_hashed = src_hashed; + local->src_cached = src_cached; + local->dst_hashed = dst_hashed; + local->dst_cached = dst_cached; + + gf_log (this->name, GF_LOG_DEBUG, + "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)", + oldloc->path, src_hashed->name, src_cached->name, + newloc->path, dst_hashed->name, + dst_cached ? dst_cached->name : "<nul>"); + + if (S_ISDIR (oldloc->inode->st_mode)) { + dht_rename_dir (frame, this); + } else { + local->op_ret = 0; + dht_rename_create_links (frame); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c new file mode 100644 index 000000000..ee32b2253 --- /dev/null +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -0,0 +1,460 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +int +dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret) +{ + dht_local_t *local = NULL; + + + local = frame->local; + local->selfheal.dir_cbk (frame, NULL, frame->this, ret, + local->op_errno); + + return 0; +} + + +int +dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + int i = 0; + dht_layout_t *layout = NULL; + int err = 0; + int this_call_cnt = 0; + + local = frame->local; + layout = local->selfheal.layout; + prev = cookie; + subvol = prev->this; + + if (op_ret == 0) + err = 0; + else + err = op_errno; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) { + layout->list[i].err = err; + break; + } + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_selfheal_dir_finish (frame, this, 0); + } + + return 0; +} + + +int +dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout, int i) +{ + xlator_t *subvol = NULL; + dict_t *xattr = NULL; + int ret = 0; + xlator_t *this = NULL; + int32_t *disk_layout = NULL; + + + subvol = layout->list[i].xlator; + this = frame->this; + + xattr = get_new_dict (); + if (!xattr) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = dht_disk_layout_extract (this, layout, i, &disk_layout); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to extract disk layout"); + goto err; + } + + ret = dict_set_bin (xattr, "trusted.glusterfs.dht", + disk_layout, 4 * 4); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set xattr dictionary"); + goto err; + } + disk_layout = NULL; + + gf_log (this->name, GF_LOG_DEBUG, + "setting hash range %u - %u (type %d) on subvolume %s for %s", + layout->list[i].start, layout->list[i].stop, + layout->type, subvol->name, loc->path); + + dict_ref (xattr); + + STACK_WIND (frame, dht_selfheal_dir_xattr_cbk, + subvol, subvol->fops->setxattr, + loc, xattr, 0); + + dict_unref (xattr); + + return 0; + +err: + if (xattr) + dict_destroy (xattr); + + if (disk_layout) + FREE (disk_layout); + + dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this, + -1, ENOMEM); + return 0; +} + + +int +dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) +{ + dht_local_t *local = NULL; + int missing_xattr = 0; + int i = 0; + int ret = 0; + xlator_t *this = NULL; + + local = frame->local; + this = frame->this; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) + continue; + /* attr missing and layout present */ + missing_xattr++; + } + + gf_log (this->name, GF_LOG_DEBUG, + "%d subvolumes missing xattr for %s", + missing_xattr, loc->path); + + if (missing_xattr == 0) { + dht_selfheal_dir_finish (frame, this, 0); + return 0; + } + + local->call_cnt = missing_xattr; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) + continue; + + ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i); + + if (--missing_xattr == 0) + break; + } + return 0; +} + + +int +dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + int i = 0; + int this_call_cnt = 0; + + + local = frame->local; + layout = local->selfheal.layout; + prev = cookie; + subvol = prev->this; + + if ((op_ret == 0) || (op_errno == EEXIST)) { + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) { + layout->list[i].err = -1; + break; + } + } + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_selfheal_dir_xattr (frame, &local->loc, layout); + } + + return 0; +} + + +int +dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout, int force) +{ + int missing_dirs = 0; + int i = 0; + dht_local_t *local = NULL; + xlator_t *this = NULL; + + + local = frame->local; + this = frame->this; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == ENOENT || force) + missing_dirs++; + } + + if (missing_dirs == 0) { + dht_selfheal_dir_xattr (frame, loc, layout); + return 0; + } + + local->call_cnt = missing_dirs; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == ENOENT || force) { + gf_log (this->name, GF_LOG_DEBUG, + "creating directory %s on subvol %s", + loc->path, layout->list[i].xlator->name); + + STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->mkdir, + loc, local->stbuf.st_mode); + } + } + + return 0; +} + +void +dht_selfheal_fix_this_virgin (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) +{ + dht_conf_t *conf = NULL; + xlator_t *this = NULL; + uint32_t chunk = 0; + int i = 0; + uint32_t start = 0; + int cnt = 0; + int err = 0; + + this = frame->this; + conf = this->private; + + for (i = 0; i < layout->cnt; i++) { + err = layout->list[i].err; + if (err == -1) { + cnt++; + } + } + + chunk = ((unsigned long) 0xffffffff) / cnt; + + start = 0; + for (i = 0; i < layout->cnt; i++) { + err = layout->list[i].err; + if (err == -1) { + layout->list[i].start = start; + layout->list[i].stop = start + chunk - 1; + + start = start + chunk; + + gf_log (this->name, GF_LOG_DEBUG, + "gave fix: %u - %u on %s for %s", + layout->list[i].start, layout->list[i].stop, + layout->list[i].xlator->name, loc->path); + if (--cnt == 0) { + layout->list[i].stop = 0xffffffff; + break; + } + } + } +} + + +int +dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) +{ + dht_conf_t *conf = NULL; + xlator_t *this = NULL; + dht_local_t *local = NULL; + int missing = -1; + int down = -1; + int holes = -1; + int ret = -1; + int i = -1; + + this = frame->this; + conf = this->private; + local = frame->local; + + missing = local->selfheal.missing; + down = local->selfheal.down; + holes = local->selfheal.hole_cnt; + + if ((missing + down) == conf->subvolume_cnt) { + dht_selfheal_fix_this_virgin (frame, loc, layout); + ret = 0; + } + + if (holes <= down) { + /* the down subvol might fill up the holes */ + ret = 0; + } + + for (i = 0; i < layout->cnt; i++) { + /* directory not present */ + if (layout->list[i].err == ENOENT) { + ret = 0; + break; + } + } + + /* TODO: give a fix to these non-virgins */ + + return ret; +} + + +int +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) +{ + dht_local_t *local = NULL; + uint32_t holes = 0; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0; + int ret = 0; + xlator_t *this = NULL; + + + local = frame->local; + this = frame->this; + + ret = dht_layout_anomalies (this, loc, layout, + &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, + &local->selfheal.missing, + &local->selfheal.down, + &local->selfheal.misc); + + holes = local->selfheal.hole_cnt; + overlaps = local->selfheal.overlaps_cnt; + missing = local->selfheal.missing; + down = local->selfheal.down; + misc = local->selfheal.misc; + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = layout; + +/* + if (down) { + gf_log (this->name, GF_LOG_ERROR, + "%d subvolumes down -- not fixing", down); + ret = 0; + goto sorry_no_fix; + } + + if (overlaps) { + gf_log (this->name, GF_LOG_ERROR, + "not fixing overlaps in %s", loc->path); + local->op_errno = EINVAL; + ret = -1; + goto sorry_no_fix; + } + + if (misc) { + gf_log (this->name, GF_LOG_ERROR, + "%d subvolumes have unrecoverable errors", misc); + ret = 0; + goto sorry_no_fix; + } + + if (holes > missing) { + gf_log (this->name, GF_LOG_ERROR, + "%d holes and %d pigeons -- not fixing", + holes, missing); + ret = 0; + goto sorry_no_fix; + } +*/ + ret = dht_selfheal_dir_getafix (frame, loc, layout); + + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "the directory is not a virgin"); + goto sorry_no_fix; + } + + dht_selfheal_dir_mkdir (frame, loc, layout, 0); + + return 0; + +sorry_no_fix: + /* TODO: need to put appropriate local->op_errno */ + dht_selfheal_dir_finish (frame, this, ret); + + return 0; +} + + +int +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) +{ + int ret = 0; + dht_local_t *local = NULL; + + + local = frame->local; + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = layout; + + ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c new file mode 100644 index 000000000..836e7a4e8 --- /dev/null +++ b/xlators/cluster/dht/src/dht.c @@ -0,0 +1,222 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "dht-common.c" + +/* TODO: + - use volumename in xattr instead of "dht" + - use NS locks + - handle all cases in self heal layout reconstruction + - complete linkfile selfheal +*/ + + + +int +notify (xlator_t *this, int event, void *data, ...) +{ + int ret = -1; + + ret = dht_notify (this, event, data); + + return ret; +} + +void +fini (xlator_t *this) +{ + int i = 0; + dht_conf_t *conf = NULL; + + conf = this->private; + + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return; +} + +int +init (xlator_t *this) +{ + dht_conf_t *conf = NULL; + char *lookup_unhashed_str = NULL; + int ret = -1; + int i = 0; + + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "DHT needs more than one child defined"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + conf = CALLOC (1, sizeof (*conf)); + if (!conf) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + conf->search_unhashed = 0; + + if (dict_get_str (this->options, "lookup-unhashed", + &lookup_unhashed_str) == 0) { + gf_string2boolean (lookup_unhashed_str, + &conf->search_unhashed); + } + + ret = dht_init_subvolumes (this, conf); + if (ret == -1) { + goto err; + } + + ret = dht_layouts_init (this, conf); + if (ret == -1) { + goto err; + } + + LOCK_INIT (&conf->subvolume_lock); + + conf->gen = 1; + + this->private = conf; + + return 0; + +err: + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return -1; +} + + +struct xlator_fops fops = { + .lookup = dht_lookup, + .mknod = dht_mknod, + .create = dht_create, + + .stat = dht_stat, + .chmod = dht_chmod, + .chown = dht_chown, + .fchown = dht_fchown, + .fchmod = dht_fchmod, + .fstat = dht_fstat, + .utimens = dht_utimens, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .access = dht_access, + .readlink = dht_readlink, + .setxattr = dht_setxattr, + .getxattr = dht_getxattr, + .removexattr = dht_removexattr, + .open = dht_open, + .readv = dht_readv, + .writev = dht_writev, + .flush = dht_flush, + .fsync = dht_fsync, + .statfs = dht_statfs, + .lk = dht_lk, + .opendir = dht_opendir, + .readdir = dht_readdir, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = dht_unlink, + .link = dht_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, +#if 0 + .setdents = dht_setdents, + .getdents = dht_getdents, + .checksum = dht_checksum, +#endif +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +// .release = dht_release, +// .releasedir = dht_releasedir, + .forget = dht_forget +}; + + +struct volume_options options[] = { + { .key = {"lookup-unhashed"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c new file mode 100644 index 000000000..6333e002f --- /dev/null +++ b/xlators/cluster/dht/src/nufa.c @@ -0,0 +1,684 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dht-common.c" + +/* TODO: all 'TODO's in dht.c holds good */ + +int +nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + char is_linkfile = 0; + char is_dir = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + int i = 0; + call_frame_t *prev = NULL; + int call_cnt = 0; + + + conf = this->private; + + prev = cookie; + local = frame->local; + loc = &local->loc; + + if (ENTRY_MISSING (op_ret, op_errno)) { + if (conf->search_unhashed) { + local->op_errno = ENOENT; + dht_lookup_everywhere (frame, this, loc); + return 0; + } + } + + if (op_ret == -1) + goto out; + + is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_dir = check_is_dir (inode, stbuf, xattr); + + if (!is_dir && !is_linkfile) { + /* non-directory and not a linkfile */ + + dht_itransform (this, prev->this, stbuf->st_ino, + &stbuf->st_ino); + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto err; + } + + inode_ctx_put (inode, this, (uint64_t)(long)layout); + goto out; + } + + if (is_dir) { + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->inode = inode_ref (inode); + local->xattr = dict_ref (xattr); + + local->op_ret = 0; + local->op_errno = 0; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + op_ret = -1; + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + } + + if (is_linkfile) { + subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + + if (!subvol) { + gf_log (this->name, GF_LOG_WARNING, + "linkfile not having link subvolume. path=%s", + loc->path); + dht_lookup_everywhere (frame, this, loc); + return 0; + } + + STACK_WIND (frame, dht_lookup_linkfile_cbk, + subvol, subvol->fops->lookup, + &local->loc, local->xattr_req); + } + + return 0; + +out: + if (!local->hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + local->loc.path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_lookup_cbk, + local->hashed_subvol, local->hashed_subvol->fops->lookup, + &local->loc, local->xattr_req); + + return 0; + + err: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); + return 0; +} + +int +nufa_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "copying location failed for path=%s", + loc->path); + goto err; + } + + if (xattr_req) { + local->xattr_req = dict_ref (xattr_req); + } else { + local->xattr_req = dict_new (); + } + + hashed_subvol = dht_subvol_get_hashed (this, &local->loc); + cached_subvol = dht_subvol_get_cached (this, local->loc.inode); + + local->cached_subvol = cached_subvol; + local->hashed_subvol = hashed_subvol; + + if (is_revalidate (loc)) { + layout = dht_layout_get (this, loc->inode); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "revalidate without cache. path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + if (layout->gen && (layout->gen < conf->gen)) { + gf_log (this->name, GF_LOG_WARNING, + "incomplete layout failure for path=%s", + loc->path); + op_errno = EAGAIN; + goto err; + } + + local->inode = inode_ref (loc->inode); + local->st_ino = loc->inode->ino; + + local->call_cnt = layout->cnt; + call_cnt = local->call_cnt; + + /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, + * revalidates directly go to the cached-subvolume. + */ + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + + STACK_WIND (frame, dht_revalidate_cbk, + subvol, subvol->fops->lookup, + loc, local->xattr_req); + + if (!--call_cnt) + break; + } + } else { + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht.linkto", 256); + + /* Send it to only local volume */ + STACK_WIND (frame, nufa_local_lookup_cbk, + conf->local_volume, + conf->local_volume->fops->lookup, + loc, local->xattr_req); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int +nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + dht_conf_t *conf = NULL; + + local = frame->local; + prev = cookie; + conf = this->private; + + if (op_ret == -1) + goto err; + + STACK_WIND (frame, dht_create_cbk, + conf->local_volume, conf->local_volume->fops->create, + &local->loc, local->flags, local->mode, local->fd); + + return 0; + + err: + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int +nufa_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + int op_errno = -1; + int ret = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + if (subvol != conf->local_volume) { + /* create a link file instead of actual file */ + ret = loc_copy (&local->loc, loc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->fd = fd_ref (fd); + local->mode = mode; + local->flags = flags; + + dht_linkfile_create (frame, nufa_create_linkfile_create_cbk, + conf->local_volume, subvol, loc); + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + + return 0; +} + +int +nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + dht_conf_t *conf = NULL; + + local = frame->local; + prev = cookie; + conf = this->private; + + if (op_ret >= 0) { + STACK_WIND (frame, dht_newfile_cbk, + conf->local_volume, + conf->local_volume->fops->mknod, + &local->loc, local->mode, local->rdev); + + return 0; + } + + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + return 0; +} + + +int +nufa_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + int op_errno = -1; + int ret = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + conf = this->private; + + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + + if (conf->local_volume != subvol) { + /* Create linkfile first */ + ret = loc_copy (&local->loc, loc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->mode = mode; + local->rdev = rdev; + + dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, + conf->local_volume, subvol, loc); + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_newfile_cbk, + subvol, subvol->fops->mknod, + loc, mode, rdev); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +notify (xlator_t *this, int event, void *data, ...) +{ + int ret = -1; + + ret = dht_notify (this, event, data); + + return ret; +} + +void +fini (xlator_t *this) +{ + int i = 0; + dht_conf_t *conf = NULL; + + conf = this->private; + + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return; +} + +int +init (xlator_t *this) +{ + dht_conf_t *conf = NULL; + xlator_list_t *trav = NULL; + data_t *data = NULL; + char *local_volname = NULL; + char *lookup_unhashed_str = NULL; + int ret = -1; + int i = 0; + char my_hostname[256]; + + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "DHT needs more than one child defined"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + conf = CALLOC (1, sizeof (*conf)); + if (!conf) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + conf->search_unhashed = 0; + + if (dict_get_str (this->options, "lookup-unhashed", + &lookup_unhashed_str) == 0) { + gf_string2boolean (lookup_unhashed_str, + &conf->search_unhashed); + } + + ret = dht_init_subvolumes (this, conf); + if (ret == -1) { + goto err; + } + + ret = dht_layouts_init (this, conf); + if (ret == -1) { + goto err; + } + + LOCK_INIT (&conf->subvolume_lock); + + conf->gen = 1; + + local_volname = "localhost"; + ret = gethostname (my_hostname, 256); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not find hostname (%s)", + strerror (errno)); + } + + if (ret == 0) + local_volname = my_hostname; + + data = dict_get (this->options, "local-volume-name"); + if (data) { + local_volname = data->data; + } + + trav = this->children; + while (trav) { + if (strcmp (trav->xlator->name, local_volname) == 0) + break; + trav = trav->next; + } + + if (!trav) { + gf_log (this->name, GF_LOG_ERROR, + "Could not find subvolume named '%s'. " + "Please define volume with the name as the hostname " + "or override it with 'option local-volume-name'", + local_volname); + goto err; + } + /* The volume specified exists */ + conf->local_volume = trav->xlator; + + this->private = conf; + + return 0; + +err: + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return -1; +} + + +struct xlator_fops fops = { + .lookup = nufa_lookup, + .create = nufa_create, + .mknod = nufa_mknod, + + .stat = dht_stat, + .chmod = dht_chmod, + .chown = dht_chown, + .fchown = dht_fchown, + .fchmod = dht_fchmod, + .fstat = dht_fstat, + .utimens = dht_utimens, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .access = dht_access, + .readlink = dht_readlink, + .setxattr = dht_setxattr, + .getxattr = dht_getxattr, + .removexattr = dht_removexattr, + .open = dht_open, + .readv = dht_readv, + .writev = dht_writev, + .flush = dht_flush, + .fsync = dht_fsync, + .statfs = dht_statfs, + .lk = dht_lk, + .opendir = dht_opendir, + .readdir = dht_readdir, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = dht_unlink, + .link = dht_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, +#if 0 + .setdents = dht_setdents, + .getdents = dht_getdents, + .checksum = dht_checksum, +#endif +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +// .release = dht_release, +// .releasedir = dht_releasedir, + .forget = dht_forget +}; + + +struct volume_options options[] = { + { .key = {"local-volume-name"}, + .type = GF_OPTION_TYPE_XLATOR + }, + { .key = {"lookup-unhashed"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; diff --git a/xlators/cluster/ha/Makefile.am b/xlators/cluster/ha/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/ha/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am new file mode 100644 index 000000000..069a0dcde --- /dev/null +++ b/xlators/cluster/ha/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = ha.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +ha_la_LDFLAGS = -module -avoidversion + +ha_la_SOURCES = ha-helpers.c ha.c +ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = ha.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c new file mode 100644 index 000000000..8193caf27 --- /dev/null +++ b/xlators/cluster/ha/src/ha-helpers.c @@ -0,0 +1,191 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "xlator.h" +#include "call-stub.h" +#include "defaults.h" +#include "dict.h" +#include "compat-errno.h" +#include "ha.h" + +int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd) +{ + ha_local_t *local = NULL; + int i = -1; + ha_private_t *pvt = NULL; + int child_count = 0; + int ret = -1; + hafd_t *hafdp = NULL; + xlator_t *this = NULL; + uint64_t tmp_hafdp = 0; + + this = frame->this; + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + + if (local == NULL) { + ret = fd_ctx_get (fd, this, &tmp_hafdp); + if (ret < 0) { + goto out; + } + hafdp = (hafd_t *)(long)tmp_hafdp; + local = frame->local = CALLOC (1, sizeof (*local)); + if (local == NULL) { + ret = -ENOMEM; + goto out; + } + local->state = CALLOC (1, child_count); + if (local->state == NULL) { + ret = -ENOMEM; + goto out; + } + + /* take care of the preferred subvolume */ + if (pvt->pref_subvol == -1) + local->active = hafdp->active; + else + local->active = pvt->pref_subvol; + + LOCK (&hafdp->lock); + memcpy (local->state, hafdp->fdstate, child_count); + UNLOCK (&hafdp->lock); + + /* in case the preferred subvolume is down */ + if ((local->active != -1) && (local->state[local->active] == 0)) + local->active = -1; + + for (i = 0; i < child_count; i++) { + if (local->state[i]) { + if (local->active == -1) + local->active = i; + local->tries++; + } + } + if (local->active == -1) { + ret = -ENOTCONN; + goto out; + } + local->fd = fd_ref (fd); + } + ret = 0; +out: + return ret; +} + +int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno) +{ + xlator_t *xl = NULL; + ha_private_t *pvt = NULL; + xlator_t **children = NULL; + int prev_child = -1; + hafd_t *hafdp = NULL; + int ret = -1; + call_stub_t *stub = NULL; + ha_local_t *local = NULL; + uint64_t tmp_hafdp = 0; + + xl = frame->this; + pvt = xl->private; + children = pvt->children; + prev_child = (long) cookie; + local = frame->local; + + if (op_ret == -1) { + gf_log (xl->name, GF_LOG_ERROR ,"(child=%s) (op_ret=%d op_errno=%s)", + children[prev_child]->name, op_ret, strerror (op_errno)); + } + if (op_ret == -1 && (op_errno == ENOTCONN)) { + ret = 0; + if (local->fd) { + ret = fd_ctx_get (local->fd, xl, &tmp_hafdp); + } + hafdp = (hafd_t *)(long)tmp_hafdp; + if (ret == 0) { + if (local->fd) { + LOCK(&hafdp->lock); + hafdp->fdstate[prev_child] = 0; + UNLOCK(&hafdp->lock); + } + local->tries--; + if (local->tries != 0) { + while (1) { + local->active = (local->active + 1) % pvt->child_count; + if (local->state[local->active]) + break; + } + stub = local->stub; + local->stub = NULL; + call_resume (stub); + return -1; + } + } + } + if (local->stub) + call_stub_destroy (local->stub); + if (local->fd) { + FREE (local->state); + fd_unref (local->fd); + } + return 0; +} + +int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode) +{ + int i = -1; + ha_private_t *pvt = NULL; + xlator_t *xl = NULL; + int ret = -1; + ha_local_t *local = NULL; + uint64_t tmp_state = 0; + + xl = frame->this; + pvt = xl->private; + local = frame->local; + + if (local == NULL) { + local = frame->local = CALLOC (1, sizeof (*local)); + if (local == NULL) { + ret = -ENOMEM; + goto out; + } + local->active = pvt->pref_subvol; + ret = inode_ctx_get (inode, xl, &tmp_state); + if (ret < 0) { + goto out; + } + local->state = (char *)(long)tmp_state; + if (local->active != -1 && local->state[local->active] == 0) + local->active = -1; + for (i = 0; i < pvt->child_count; i++) { + if (local->state[i]) { + if (local->active == -1) + local->active = i; + local->tries++; + } + } + if (local->active == -1) { + ret = -ENOTCONN; + goto out; + } + } + ret = 0; +out: + return ret; +} diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c new file mode 100644 index 000000000..4542bdc7e --- /dev/null +++ b/xlators/cluster/ha/src/ha.c @@ -0,0 +1,3479 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/* generate errors randomly, code is simple now, better alogorithm + * can be written to decide what error to be returned and when + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "call-stub.h" +#include "defaults.h" +#include "dict.h" +#include "compat-errno.h" +#include "ha.h" + +/* + * TODO: + * - dbench fails if ha over server side afr + * - lock calls - lock on all subvols. + * - support preferred-subvolume option. code already there. + * - do not alloc the call-stub in case only one subvol is up. + */ + +int +ha_forget (xlator_t *this, + inode_t *inode) +{ + uint64_t stateino = 0; + char *state = NULL; + if (!inode_ctx_del (inode, this, &stateino)) { + state = ((char *)(long)stateino); + FREE (state); + } + + return 0; + +} + +int32_t +ha_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int child_count = 0, i = 0, callcnt = 0; + char *state = NULL; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_state = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) { + if (pvt->children[i] == prev_frame->this) + break; + } + if ((op_ret == -1) && (op_errno != ENOENT)) { + gf_log (this->name, GF_LOG_ERROR, "(child=%s) (op_ret=%d op_errno=%s)", + children[i]->name, op_ret, strerror (op_errno)); + } + inode_ctx_get (local->inode, this, &tmp_state); + state = (char *)(long)tmp_state; + + LOCK (&frame->lock); + if (local->revalidate == 1) { + if ((!op_ret) != state[i]) { + local->revalidate_error = 1; + gf_log (this->name, GF_LOG_DEBUG, "revalidate error on %s", + pvt->children[i]->name); + } + } else { + if (op_ret == 0) { + state[i] = 1; + } + } + if (local->op_ret == -1 && op_ret == 0) { + local->op_ret = 0; + local->buf = *buf; + if (dict) + local->dict = dict_ref (dict); + } + if (op_ret == -1 && op_ret != ENOTCONN) + local->op_errno = op_errno; + callcnt = --local->call_count; + UNLOCK (&frame->lock); + + if (callcnt == 0) { + dict_t *ctx = local->dict; + inode_t *inode = local->inode; + if (local->revalidate_error == 1) { + local->op_ret = -1; + local->op_errno = EIO; + gf_log (this->name, GF_LOG_DEBUG, "revalidate error, returning EIO"); + } + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + inode, + &local->buf, + ctx); + if (inode) + inode_unref (inode); + if (ctx) + dict_unref (ctx); + } + return 0; +} + +int32_t +ha_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int child_count = 0, i = 0; + char *state = NULL; + xlator_t **children = NULL; + int ret = -1; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + children = pvt->children; + + frame->local = local = CALLOC (1, sizeof (*local)); + child_count = pvt->child_count; + local->inode = inode_ref (loc->inode); + + ret = inode_ctx_get (loc->inode, this, NULL); + if (ret) { + state = CALLOC (1, child_count); + inode_ctx_put (loc->inode, this, (uint64_t)(long)state); + } else + local->revalidate = 1; + + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->call_count = child_count; + + for (i = 0; i < child_count; i++) { + STACK_WIND (frame, + ha_lookup_cbk, + children[i], + children[i]->fops->lookup, + loc, + xattr_req); + } + return 0; +} + + int32_t +ha_stat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ha_local_t *local = NULL; + int op_errno = ENOTCONN; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_stat_stub (frame, ha_stat, loc); + + STACK_WIND_COOKIE (frame, + ha_stat_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->stat, + loc); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_chmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_chmod_stub (frame, ha_chmod, loc, mode); + + STACK_WIND_COOKIE (frame, + ha_chmod_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->chmod, + loc, + mode); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_fchmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_fchmod_stub (frame, ha_fchmod, fd, mode); + + STACK_WIND_COOKIE (frame, + ha_fchmod_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->fchmod, + fd, + mode); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_chown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_chown_stub (frame, ha_chown, loc, uid, gid); + + STACK_WIND_COOKIE (frame, + ha_chown_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->chown, + loc, + uid, + gid); + return 0; +err: + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; +} + + int32_t +ha_fchown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_fchown_stub (frame, ha_fchown, fd, uid, gid); + + STACK_WIND_COOKIE (frame, + ha_fchown_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->fchown, + fd, + uid, + gid); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_truncate_stub (frame, ha_truncate, loc, offset); + + STACK_WIND_COOKIE (frame, + ha_truncate_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->truncate, + loc, + offset); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_ftruncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_ftruncate_stub (frame, ha_ftruncate, fd, offset); + + STACK_WIND_COOKIE (frame, + ha_ftruncate_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->ftruncate, + fd, + offset); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + +int32_t +ha_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_utimens_stub (frame, ha_utimens, loc, tv); + + STACK_WIND_COOKIE (frame, + ha_utimens_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->utimens, + loc, + tv); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + +int32_t +ha_access_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_access_stub (frame, ha_access, loc, mask); + + STACK_WIND_COOKIE (frame, + ha_access_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->access, + loc, + mask); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + + int32_t +ha_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + path); + } + return 0; +} + +int32_t +ha_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + ha_local_t *local = frame->local; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_readlink_stub (frame, ha_readlink, loc, size); + + STACK_WIND_COOKIE (frame, + ha_readlink_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->readlink, + loc, + size); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + +int +ha_mknod_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0, ret = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "(path=%s) (op_ret=%d op_errno=%d)", + local->stub->args.mknod.loc.path, op_ret, op_errno); + } + ret = inode_ctx_get (local->stub->args.mknod.loc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "unwind(-1), inode_ctx_get() error"); + /* It is difficult to handle this error at this stage + * as we still expect more cbks, we can't return as + * of now + */ + } else if (op_ret == 0) { + stateino[i] = 1; + } + LOCK (&frame->lock); + cnt = --local->call_count; + UNLOCK (&frame->lock); + + if (cnt == 0) { + call_stub_t *stub = local->stub; + FREE (local->state); + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + local->stub->args.mknod.loc.inode, + &local->buf); + call_stub_destroy (stub); + } + return 0; +} + +int32_t +ha_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0, ret = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mknod.loc.path, op_ret, op_errno); + } + + ret = inode_ctx_get (local->stub->args.mknod.loc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error"); + /* FIXME: handle the case */ + } + if (op_ret == 0) { + stateino[i] = 1; + local->op_ret = 0; + local->first_success = 1; + local->buf = *buf; + } + cnt = --local->call_count; + for (i = local->active + 1; i < child_count; i++) { + if (local->state[i]) + break; + } + + if (cnt == 0 || i == child_count) { + call_stub_t *stub = local->stub; + FREE (local->state); + stub = local->stub; + STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mknod.loc.inode, &local->buf); + call_stub_destroy (stub); + return 0; + } + + local->active = i; + + if (local->first_success == 0) { + STACK_WIND (frame, + ha_mknod_cbk, + children[i], + children[i]->fops->mknod, + &local->stub->args.mknod.loc, + local->stub->args.mknod.mode, + local->stub->args.mknod.rdev); + return 0; + } + cnt = local->call_count; + + for (; i < child_count; i++) { + if (local->state[i]) { + STACK_WIND (frame, + ha_mknod_lookup_cbk, + children[i], + children[i]->fops->lookup, + &local->stub->args.mknod.loc, + 0); + if (--cnt == 0) + break; + } + } + return 0; +} + +int32_t +ha_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t rdev) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int child_count = 0, i = 0; + char *stateino = NULL; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + + frame->local = local = CALLOC (1, sizeof (*local)); + local->stub = fop_mknod_stub (frame, ha_mknod, loc, mode, rdev); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->state = CALLOC (1, child_count); + memcpy (local->state, pvt->state, child_count); + local->active = -1; + + stateino = CALLOC (1, child_count); + inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino); + + for (i = 0; i < child_count; i++) { + if (local->state[i]) { + local->call_count++; + if (local->active == -1) + local->active = i; + } + } + + STACK_WIND (frame, + ha_mknod_cbk, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->mknod, + loc, mode, rdev); + return 0; +} + + +int +ha_mkdir_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno); + } + inode_ctx_get (local->stub->args.mkdir.loc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (op_ret == 0) + stateino[i] = 1; + + LOCK (&frame->lock); + cnt = --local->call_count; + UNLOCK (&frame->lock); + + if (cnt == 0) { + call_stub_t *stub = local->stub; + FREE (local->state); + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + local->stub->args.mkdir.loc.inode, + &local->buf); + call_stub_destroy (stub); + } + return 0; +} + +int32_t +ha_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno); + } + + inode_ctx_get (local->stub->args.mkdir.loc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (op_ret == 0) { + stateino[i] = 1; + local->op_ret = 0; + local->first_success = 1; + local->buf = *buf; + } + cnt = --local->call_count; + for (i = local->active + 1; i < child_count; i++) { + if (local->state[i]) + break; + } + + if (cnt == 0 || i == child_count) { + call_stub_t *stub = local->stub; + FREE (local->state); + stub = local->stub; + STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mkdir.loc.inode, &local->buf); + call_stub_destroy (stub); + return 0; + } + + local->active = i; + + if (local->first_success == 0) { + STACK_WIND (frame, + ha_mkdir_cbk, + children[i], + children[i]->fops->mkdir, + &local->stub->args.mkdir.loc, + local->stub->args.mkdir.mode); + return 0; + } + cnt = local->call_count; + + for (; i < child_count; i++) { + if (local->state[i]) { + STACK_WIND (frame, + ha_mkdir_lookup_cbk, + children[i], + children[i]->fops->lookup, + &local->stub->args.mkdir.loc, + 0); + if (--cnt == 0) + break; + } + } + return 0; +} + +int32_t +ha_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int child_count = 0, i = 0; + char *stateino = NULL; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + + frame->local = local = CALLOC (1, sizeof (*local)); + local->stub = fop_mkdir_stub (frame, ha_mkdir, loc, mode); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->state = CALLOC (1, child_count); + memcpy (local->state, pvt->state, child_count); + local->active = -1; + + stateino = CALLOC (1, child_count); + inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino); + for (i = 0; i < child_count; i++) { + if (local->state[i]) { + local->call_count++; + if (local->active == -1) + local->active = i; + } + } + + STACK_WIND (frame, + ha_mkdir_cbk, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->mkdir, + loc, mode); + return 0; +} + + int32_t +ha_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, op_ret, op_errno); + } + return 0; +} + +int32_t +ha_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_unlink_stub (frame, ha_unlink, loc); + + STACK_WIND_COOKIE (frame, + ha_unlink_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->unlink, + loc); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + int32_t +ha_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ha_local_t *local = frame->local; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_rmdir_stub (frame, ha_rmdir, loc); + + STACK_WIND_COOKIE (frame, + ha_rmdir_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->rmdir, + loc); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + +int +ha_symlink_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno); + } + inode_ctx_get (local->stub->args.symlink.loc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (op_ret == 0) + stateino[i] = 1; + + LOCK (&frame->lock); + cnt = --local->call_count; + UNLOCK (&frame->lock); + + if (cnt == 0) { + call_stub_t *stub = local->stub; + FREE (local->state); + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + local->stub->args.symlink.loc.inode, + &local->buf); + call_stub_destroy (stub); + } + return 0; +} + +int32_t +ha_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno); + } + inode_ctx_get (local->stub->args.symlink.loc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (op_ret == 0) { + stateino[i] = 1; + local->op_ret = 0; + local->first_success = 1; + local->buf = *buf; + } + cnt = --local->call_count; + for (i = local->active + 1; i < child_count; i++) { + if (local->state[i]) + break; + } + + if (cnt == 0 || i == child_count) { + call_stub_t *stub = local->stub; + FREE (local->state); + stub = local->stub; + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->stub->args.symlink.loc.inode, &local->buf); + call_stub_destroy (stub); + return 0; + } + + local->active = i; + + if (local->first_success == 0) { + STACK_WIND (frame, + ha_symlink_cbk, + children[i], + children[i]->fops->symlink, + local->stub->args.symlink.linkname, + &local->stub->args.symlink.loc); + return 0; + } + cnt = local->call_count; + + for (; i < child_count; i++) { + if (local->state[i]) { + STACK_WIND (frame, + ha_symlink_lookup_cbk, + children[i], + children[i]->fops->lookup, + &local->stub->args.symlink.loc, + 0); + if (--cnt == 0) + break; + } + } + return 0; +} + +int32_t +ha_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkname, + loc_t *loc) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int child_count = 0, i = 0; + char *stateino = NULL; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + + frame->local = local = CALLOC (1, sizeof (*local)); + local->stub = fop_symlink_stub (frame, ha_symlink, linkname, loc); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->state = CALLOC (1, child_count); + memcpy (local->state, pvt->state, child_count); + local->active = -1; + + stateino = CALLOC (1, child_count); + inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino); + + for (i = 0; i < child_count; i++) { + if (local->state[i]) { + local->call_count++; + if (local->active == -1) { + local->active = i; + } + } + } + + STACK_WIND (frame, + ha_symlink_cbk, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->symlink, + linkname, loc); + return 0; +} + + int32_t +ha_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, op_ret, op_errno, buf); + } + return 0; +} + +int32_t +ha_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, oldloc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_rename_stub (frame, ha_rename, oldloc, newloc); + STACK_WIND_COOKIE (frame, + ha_rename_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->rename, + oldloc, newloc); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + +int +ha_link_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno); + } + inode_ctx_get (local->stub->args.link.newloc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (op_ret == 0) + stateino[i] = 1; + + LOCK (&frame->lock); + cnt = --local->call_count; + UNLOCK (&frame->lock); + + if (cnt == 0) { + call_stub_t *stub = local->stub; + FREE (local->state); + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + local->stub->args.link.oldloc.inode, + &local->buf); + call_stub_destroy (stub); + } + return 0; +} + +int32_t +ha_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + int child_count = 0, i = 0, cnt = 0; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + for (i = 0; i < child_count; i++) + if (prev_frame->this == children[i]) + break; + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno); + } + inode_ctx_get (local->stub->args.link.newloc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (op_ret == 0) { + stateino[i] = 1; + local->op_ret = 0; + local->first_success = 1; + local->buf = *buf; + } + cnt = --local->call_count; + for (i = local->active + 1; i < child_count; i++) { + if (local->state[i]) + break; + } + + if (cnt == 0 || i == child_count) { + call_stub_t *stub = local->stub; + FREE (local->state); + stub = local->stub; + STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.link.oldloc.inode, &local->buf); + call_stub_destroy (stub); + return 0; + } + + local->active = i; + + if (local->first_success == 0) { + STACK_WIND (frame, + ha_link_cbk, + children[i], + children[i]->fops->link, + &local->stub->args.link.oldloc, + &local->stub->args.link.newloc); + return 0; + } + cnt = local->call_count; + + for (; i < child_count; i++) { + if (local->state[i]) { + STACK_WIND (frame, + ha_link_lookup_cbk, + children[i], + children[i]->fops->lookup, + &local->stub->args.link.newloc, + 0); + if (--cnt == 0) + break; + } + } + return 0; +} + +int32_t +ha_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int child_count = 0, i = 0; + char *stateino = NULL; + int32_t ret = 0; + uint64_t tmp_stateino = 0; + + ret = inode_ctx_get (newloc->inode, this, &tmp_stateino); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()"); + } + stateino = (char *)(long)tmp_stateino; + + if (stateino == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "newloc->inode's ctx is NULL, returning EINVAL"); + STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL); + return 0; + } + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + + frame->local = local = CALLOC (1, sizeof (*local)); + local->stub = fop_link_stub (frame, ha_link, oldloc, newloc); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->state = CALLOC (1, child_count); + memcpy (local->state, pvt->state, child_count); + local->active = -1; + + for (i = 0; i < child_count; i++) { + if (local->state[i]) { + local->call_count++; + if (local->active == -1) + local->active = i; + } + } + + STACK_WIND (frame, + ha_link_cbk, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->link, + oldloc, + newloc); + return 0; +} + +int32_t +ha_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int i, child_count = 0, cnt = 0, ret = 0; + char *stateino = NULL; + hafd_t *hafdp = NULL; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + uint64_t tmp_stateino = 0; + uint64_t tmp_hafdp = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + prev_frame = cookie; + children = pvt->children; + + ret = inode_ctx_get (local->stub->args.create.loc.inode, + this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error"); + /* FIXME: handle */ + } + ret = fd_ctx_get (local->stub->args.create.fd, this, &tmp_hafdp); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error"); + /* FIXME: handle */ + } + hafdp = (hafd_t *)(long)tmp_hafdp; + + for (i = 0; i < child_count; i++) { + if (prev_frame->this == children[i]) + break; + } + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.create.loc.path, op_ret, op_errno); + } + if (op_ret != -1) { + stateino[i] = 1; + hafdp->fdstate[i] = 1; + if (local->op_ret == -1) { + local->op_ret = 0; + local->buf = *buf; + local->first_success = 1; + } + local->stub->args.create.flags &= (~O_EXCL); + } + LOCK (&frame->lock); + cnt = --local->call_count; + UNLOCK (&frame->lock); + + for (i = local->active + 1; i < child_count; i++) { + if (local->state[i]) + break; + } + + if (cnt == 0 || i == child_count) { + char *state = local->state; + call_stub_t *stub = local->stub; + STACK_UNWIND (frame, local->op_ret, local->op_errno, + stub->args.create.fd, + stub->args.create.loc.inode, &local->buf); + FREE (state); + call_stub_destroy (stub); + return 0; + } + local->active = i; + cnt = local->call_count; + for (; i < child_count; i++) { + if (local->state[i]) { + STACK_WIND (frame, + ha_create_cbk, + children[i], + children[i]->fops->create, + &local->stub->args.create.loc, + local->stub->args.create.flags, + local->stub->args.create.mode, + local->stub->args.create.fd); + if ((local->first_success == 0) || (cnt == 0)) + break; + } + } + return 0; +} + +int32_t +ha_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, fd_t *fd) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + int i, child_count = 0; + char *stateino = NULL; + xlator_t **children = NULL; + hafd_t *hafdp = NULL; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + children = pvt->children; + + if (local == NULL) { + local = frame->local = CALLOC (1, sizeof (*local)); + local->stub = fop_create_stub (frame, ha_create, loc, flags, mode, fd); + local->state = CALLOC (1, child_count); + local->active = -1; + local->op_ret = -1; + local->op_errno = ENOTCONN; + memcpy (local->state, pvt->state, child_count); + + for (i = 0; i < pvt->child_count; i++) { + if (local->state[i]) { + local->call_count++; + if (local->active == -1) + local->active = i; + } + } + /* FIXME handle active -1 */ + stateino = CALLOC (1, child_count); + hafdp = CALLOC (1, sizeof (*hafdp)); + hafdp->fdstate = CALLOC (1, child_count); + hafdp->path = strdup(loc->path); + LOCK_INIT (&hafdp->lock); + fd_ctx_set (fd, this, (uint64_t)(long)hafdp); + inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino); + } + + STACK_WIND (frame, + ha_create_cbk, + children[local->active], + children[local->active]->fops->create, + loc, flags, mode, fd); + return 0; +} + + int32_t +ha_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + xlator_t **children = NULL; + int i = 0, child_count = 0, callcnt = 0, ret = 0; + call_frame_t *prev_frame = NULL; + hafd_t *hafdp = NULL; + uint64_t tmp_hafdp = 0; + + local = frame->local; + pvt = this->private; + children = pvt->children; + child_count = pvt->child_count; + prev_frame = cookie; + + ret = fd_ctx_get (local->fd, this, &tmp_hafdp); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()"); + } + hafdp = (hafd_t *)(long)tmp_hafdp; + + for (i = 0; i < child_count; i++) + if (children[i] == prev_frame->this) + break; + LOCK (&frame->lock); + if (op_ret != -1) { + hafdp->fdstate[i] = 1; + local->op_ret = 0; + } + if (op_ret == -1 && op_errno != ENOTCONN) + local->op_errno = op_errno; + callcnt = --local->call_count; + UNLOCK (&frame->lock); + + if (callcnt == 0) { + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + local->fd); + } + return 0; +} + +int32_t +ha_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, fd_t *fd) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + xlator_t **children = NULL; + int cnt = 0, i, child_count = 0, ret = 0; + hafd_t *hafdp = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + children = pvt->children; + child_count = pvt->child_count; + + + local = frame->local = CALLOC (1, sizeof (*local)); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->fd = fd; + + hafdp = CALLOC (1, sizeof (*hafdp)); + hafdp->fdstate = CALLOC (1, child_count); + hafdp->path = strdup (loc->path); + hafdp->active = -1; + if (pvt->pref_subvol == -1) { + hafdp->active = fd->inode->ino % child_count; + } + + LOCK_INIT (&hafdp->lock); + fd_ctx_set (fd, this, (uint64_t)(long)hafdp); + ret = inode_ctx_get (loc->inode, this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + for (i = 0; i < child_count; i++) + if (stateino[i]) + cnt++; + local->call_count = cnt; + for (i = 0; i < child_count; i++) { + if (stateino[i]) { + STACK_WIND (frame, + ha_open_cbk, + children[i], + children[i]->fops->open, + loc, flags, fd); + if (--cnt == 0) + break; + } + } + return 0; +} + + int32_t +ha_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + int ret = 0; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + vector, + count, + stbuf); + } + return 0; +} + +int32_t +ha_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_readv_stub (frame, ha_readv, fd, size, offset); + + STACK_WIND_COOKIE (frame, + ha_readv_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->readv, + fd, + size, + offset); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + int ret = 0; + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + stbuf); + } + return 0; +} + +int32_t +ha_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t off) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off); + + STACK_WIND_COOKIE (frame, + ha_writev_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->writev, + fd, + vector, + count, + off); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = 0; + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_flush_stub (frame, ha_flush, fd); + STACK_WIND_COOKIE (frame, + ha_flush_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->flush, + fd); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + + int32_t +ha_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = 0; + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_fsync_stub (frame, ha_fsync, fd, flags); + STACK_WIND_COOKIE (frame, + ha_fsync_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->fsync, + fd, + flags); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + int32_t +ha_fstat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int ret = 0; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_fstat_stub (frame, ha_fstat, fd); + STACK_WIND_COOKIE (frame, + ha_fstat_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->fstat, + fd); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + +int32_t +ha_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + xlator_t **children = NULL; + int i = 0, child_count = 0, callcnt = 0, ret = 0; + call_frame_t *prev_frame = NULL; + hafd_t *hafdp = NULL; + uint64_t tmp_hafdp = 0; + + local = frame->local; + pvt = this->private; + children = pvt->children; + child_count = pvt->child_count; + prev_frame = cookie; + + ret = fd_ctx_get (local->fd, this, &tmp_hafdp); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()"); + } + hafdp = (hafd_t *)(long)tmp_hafdp; + + for (i = 0; i < child_count; i++) + if (children[i] == prev_frame->this) + break; + LOCK (&frame->lock); + if (op_ret != -1) { + hafdp->fdstate[i] = 1; + local->op_ret = 0; + } + if (op_ret == -1 && op_errno != ENOTCONN) + local->op_errno = op_errno; + callcnt = --local->call_count; + UNLOCK (&frame->lock); + + if (callcnt == 0) { + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + local->fd); + } + return 0; +} + +int32_t +ha_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, fd_t *fd) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + char *stateino = NULL; + xlator_t **children = NULL; + int cnt = 0, i, child_count = 0, ret = 0; + hafd_t *hafdp = NULL; + uint64_t tmp_stateino = 0; + + local = frame->local; + pvt = this->private; + children = pvt->children; + child_count = pvt->child_count; + + local = frame->local = CALLOC (1, sizeof (*local)); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->fd = fd; + + hafdp = CALLOC (1, sizeof (*hafdp)); + hafdp->fdstate = CALLOC (1, child_count); + hafdp->path = strdup (loc->path); + LOCK_INIT (&hafdp->lock); + fd_ctx_set (fd, this, (uint64_t)(long)hafdp); + ret = inode_ctx_get (loc->inode, this, &tmp_stateino); + stateino = (char *)(long)tmp_stateino; + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error"); + } + for (i = 0; i < child_count; i++) + if (stateino[i]) + cnt++; + local->call_count = cnt; + for (i = 0; i < child_count; i++) { + if (stateino[i]) { + STACK_WIND (frame, + ha_opendir_cbk, + children[i], + children[i]->fops->opendir, + loc, fd); + if (--cnt == 0) + break; + } + } + return 0; +} + + int32_t +ha_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entries, + int32_t count) +{ + int ret = 0; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + entries, + count); + } + return 0; +} + +int32_t +ha_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, + int32_t flag) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset, flag); + STACK_WIND_COOKIE (frame, + ha_getdents_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->getdents, + fd, + size, + offset, + flag); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL, 0); + return 0; +} + + int32_t +ha_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = 0; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_setdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, + dir_entry_t *entries, + int32_t count) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + + local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries, count); + + STACK_WIND_COOKIE (frame, + ha_setdents_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->setdents, + fd, + flags, + entries, + count); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + int32_t +ha_fsyncdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = 0; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_fsyncdir_stub (frame, ha_fsyncdir, fd, flags); + STACK_WIND_COOKIE (frame, + ha_fsyncdir_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->fsyncdir, + fd, + flags); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + + int32_t +ha_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *buf) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + } + return 0; +} + +int32_t +ha_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + + local->stub = fop_statfs_stub (frame, ha_statfs, loc); + STACK_WIND_COOKIE (frame, + ha_statfs_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->statfs, + loc); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + int32_t +ha_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_setxattr_stub (frame, ha_setxattr, loc, dict, flags); + STACK_WIND_COOKIE (frame, + ha_setxattr_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->setxattr, + loc, + dict, + flags); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + int32_t +ha_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + dict); + } + return 0; +} + +int32_t +ha_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_getxattr_stub (frame, ha_getxattr, loc, name); + STACK_WIND_COOKIE (frame, + ha_getxattr_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->getxattr, + loc, + name); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + +int32_t +ha_xattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + int ret = -1; + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, op_ret, op_errno, dict); + } + return 0; +} + + +int32_t +ha_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + + local->stub = fop_xattrop_stub (frame, ha_xattrop, loc, flags, dict); + + STACK_WIND_COOKIE (frame, + ha_xattrop_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->xattrop, + loc, + flags, + dict); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, dict); + return 0; +} + +int32_t +ha_fxattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + int ret = -1; + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) + STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +ha_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_fxattrop_stub (frame, ha_fxattrop, fd, flags, dict); + + STACK_WIND_COOKIE (frame, + ha_fxattrop_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->fxattrop, + fd, + flags, + dict); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, dict); + return 0; +} + + int32_t +ha_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = -1; + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + + local->stub = fop_removexattr_stub (frame, ha_removexattr, loc, name); + + STACK_WIND_COOKIE (frame, + ha_removexattr_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->removexattr, + loc, + name); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + +int32_t +ha_lk_setlk_unlck_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + ha_local_t *local = NULL; + int cnt = 0; + call_stub_t *stub = NULL; + + local = frame->local; + + LOCK (&frame->lock); + cnt = --local->call_count; + if (op_ret == 0) + local->op_ret = 0; + UNLOCK (&frame->lock); + + if (cnt == 0) { + stub = local->stub; + FREE (local->state); + if (stub->args.lk.lock.l_type == F_UNLCK) { + STACK_UNWIND (frame, local->op_ret, local->op_errno, &stub->args.lk.lock); + } else { + STACK_UNWIND (frame, -1, EIO, NULL); + } + call_stub_destroy (stub); + } + return 0; +} + +int32_t +ha_lk_setlk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + xlator_t **children = NULL; + int i = 0, cnt = 0, j = 0; + int child_count = 0; + call_frame_t *prev_frame = NULL; + char *state = NULL; + + local = frame->local; + pvt = this->private; + children = pvt->children; + child_count = pvt->child_count; + prev_frame = cookie; + state = local->state; + + if (op_ret == 0) + local->op_ret = 0; + + if ((op_ret == 0) || (op_ret == -1 && op_errno == ENOTCONN)) { + for (i = 0; i < child_count; i++) { + if (prev_frame->this == cookie) + break; + } + i++; + for (; i < child_count; i++) { + if (local->state[i]) + break; + } + if (i == child_count) { + call_stub_t *stub = local->stub; + FREE (local->state); + STACK_UNWIND (frame, 0, op_errno, &stub->args.lk.lock); + call_stub_destroy (stub); + return 0; + } + STACK_WIND (frame, + ha_lk_setlk_cbk, + children[i], + children[i]->fops->lk, + local->stub->args.lk.fd, + local->stub->args.lk.cmd, + &local->stub->args.lk.lock); + return 0; + } else { + for (i = 0; i < child_count; i++) { + if (prev_frame->this == cookie) + break; + } + cnt = 0; + for (j = 0; j < i; j++) { + if (state[i]) + cnt++; + } + if (cnt) { + struct flock lock; + lock = local->stub->args.lk.lock; + for (i = 0; i < child_count; i++) { + if (state[i]) { + STACK_WIND (frame, + ha_lk_setlk_unlck_cbk, + children[i], + children[i]->fops->lk, + local->stub->args.lk.fd, + local->stub->args.lk.cmd, + &lock); + if (--cnt == 0) + break; + } + } + return 0; + } else { + FREE (local->state); + call_stub_destroy (local->stub); + STACK_UNWIND (frame, + op_ret, + op_errno, + lock); + return 0; + } + } +} + +int32_t +ha_lk_getlk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + fd_t *fd = NULL; + int child_count = 0, i = 0; + xlator_t **children = NULL; + call_frame_t *prev_frame = NULL; + + local = frame->local; + pvt = this->private; + fd = local->stub->args.lk.fd; + child_count = pvt->child_count; + children = pvt->children; + prev_frame = cookie; + + if (op_ret == 0) { + FREE (local->state); + call_stub_destroy (local->stub); + STACK_UNWIND (frame, 0, 0, lock); + return 0; + } + + for (i = 0; i < child_count; i++) { + if (prev_frame->this == children[i]) + break; + } + + for (; i < child_count; i++) { + if (local->state[i]) + break; + } + + if (i == child_count) { + FREE (local->state); + call_stub_destroy (local->stub); + STACK_UNWIND (frame, op_ret, op_errno, lock); + return 0; + } + + STACK_WIND (frame, + ha_lk_getlk_cbk, + children[i], + children[i]->fops->lk, + fd, + local->stub->args.lk.cmd, + &local->stub->args.lk.lock); + return 0; +} + +int32_t +ha_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + hafd_t *hafdp = NULL; + char *state = NULL; + int child_count = 0, i = 0, cnt = 0, ret = 0; + xlator_t **children = NULL; + uint64_t tmp_hafdp = 0; + + local = frame->local; + pvt = this->private; + child_count = pvt->child_count; + children = pvt->children; + ret = fd_ctx_get (fd, this, &tmp_hafdp); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, "fd_ctx_get failed"); + + if (local == NULL) { + local = frame->local = CALLOC (1, sizeof (*local)); + local->active = -1; + local->op_ret = -1; + local->op_errno = ENOTCONN; + } + hafdp = (hafd_t *)(long)tmp_hafdp; + + if (local->active == -1) { + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + local->stub = fop_lk_stub (frame, ha_lk, fd, cmd, lock); + local->state = CALLOC (1, child_count); + state = hafdp->fdstate; + LOCK (&hafdp->lock); + memcpy (local->state, state, child_count); + UNLOCK (&hafdp->lock); + if (cmd == F_GETLK) { + for (i = 0; i < child_count; i++) { + if (local->state[i]) + break; + } + STACK_WIND (frame, + ha_lk_getlk_cbk, + children[i], + children[i]->fops->lk, + fd, + cmd, + lock); + } else if (cmd == F_SETLK && lock->l_type == F_UNLCK) { + for (i = 0; i < child_count; i++) { + if (local->state[i]) + local->call_count++; + } + cnt = local->call_count; + for (i = 0; i < child_count; i++) { + if (local->state[i]) { + STACK_WIND (frame, + ha_lk_setlk_unlck_cbk, + children[i], + children[i]->fops->lk, + fd, cmd, lock); + if (--cnt == 0) + break; + } + } + } else { + for (i = 0; i < child_count; i++) { + if (local->state[i]) + break; + } + STACK_WIND (frame, + ha_lk_setlk_cbk, + children[i], + children[i]->fops->lk, + fd, + cmd, + lock); + } + return 0; +} + + int32_t +ha_inode_entry_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno); + } + return 0; +} + +int32_t +ha_inodelk (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t cmd, + struct flock *lock) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_inodelk_stub (frame, ha_inodelk, loc, cmd, lock); + STACK_WIND_COOKIE (frame, + ha_inode_entry_lk_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->inodelk, + loc, + cmd, + lock); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + +int32_t +ha_entrylk (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *basename, + entrylk_cmd cmd, + entrylk_type type) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_entrylk_stub (frame, ha_entrylk, loc, basename, cmd, type); + STACK_WIND_COOKIE (frame, + ha_inode_entry_lk_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->entrylk, + loc, basename, cmd, type); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno); + return 0; +} + + int32_t +ha_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *file_checksum, + uint8_t *dir_checksum) +{ + int ret = -1; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) { + STACK_UNWIND (frame, + op_ret, + op_errno, + file_checksum, + dir_checksum); + } + return 0; +} + +int32_t +ha_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + int op_errno = 0; + ha_local_t *local = NULL; + + op_errno = ha_alloc_init_inode (frame, loc->inode); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_checksum_stub (frame, ha_checksum, loc, flag); + + STACK_WIND_COOKIE (frame, + ha_checksum_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->checksum, + loc, + flag); + return 0; +err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; +} + +int32_t +ha_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries) +{ + int ret = 0; + + ret = ha_handle_cbk (frame, cookie, op_ret, op_errno); + if (ret == 0) + STACK_UNWIND (frame, op_ret, op_errno, entries); + return 0; +} + +int32_t +ha_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t off) +{ + ha_local_t *local = NULL; + int op_errno = 0; + + op_errno = ha_alloc_init_fd (frame, fd); + if (op_errno < 0) { + op_errno = -op_errno; + goto err; + } + local = frame->local; + local->stub = fop_readdir_stub (frame, ha_readdir, fd, size, off); + STACK_WIND_COOKIE (frame, + ha_readdir_cbk, + (void *)(long)local->active, + HA_ACTIVE_CHILD(this, local), + HA_ACTIVE_CHILD(this, local)->fops->readdir, + fd, size, off); + return 0; +err: + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; +} + +/* Management operations */ + + int32_t +ha_stats_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct xlator_stats *stats) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + int i = 0; + + local = frame->local; + pvt = this->private; + prev_frame = cookie; + children = pvt->children; + + if (op_ret == -1 && op_errno == ENOTCONN) { + for (i = 0; i < pvt->child_count; i++) { + if (prev_frame->this == children[i]) + break; + } + i++; + for (; i < pvt->child_count; i++) { + if (pvt->state[i]) + break; + } + + if (i == pvt->child_count) { + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + STACK_WIND (frame, + ha_stats_cbk, + children[i], + children[i]->mops->stats, + local->flags); + return 0; + } + + STACK_UNWIND (frame, + op_ret, + op_errno, + stats); + return 0; +} + +int32_t +ha_stats (call_frame_t *frame, + xlator_t *this, + int32_t flags) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + xlator_t **children = NULL; + int i = 0; + + local = frame->local = CALLOC (1, sizeof (*local)); + pvt = this->private; + children = pvt->children; + for (i = 0; i < pvt->child_count; i++) { + if (pvt->state[i]) + break; + } + + if (i == pvt->child_count) { + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + local->flags = flags; + + STACK_WIND (frame, + ha_stats_cbk, + children[i], + children[i]->mops->stats, + flags); + return 0; +} + + +int32_t +ha_getspec_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + char *spec_data) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + call_frame_t *prev_frame = NULL; + xlator_t **children = NULL; + int i = 0; + + local = frame->local; + pvt = this->private; + prev_frame = cookie; + children = pvt->children; + + if (op_ret == -1 && op_errno == ENOTCONN) { + for (i = 0; i < pvt->child_count; i++) { + if (prev_frame->this == children[i]) + break; + } + i++; + for (; i < pvt->child_count; i++) { + if (pvt->state[i]) + break; + } + + if (i == pvt->child_count) { + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + STACK_WIND (frame, + ha_getspec_cbk, + children[i], + children[i]->mops->getspec, + local->pattern, + local->flags); + return 0; + } + + STACK_UNWIND (frame, + op_ret, + op_errno, + spec_data); + return 0; +} + +int32_t +ha_getspec (call_frame_t *frame, + xlator_t *this, + const char *key, + int32_t flags) +{ + ha_local_t *local = NULL; + ha_private_t *pvt = NULL; + xlator_t **children = NULL; + int i = 0; + + local = frame->local = CALLOC (1, sizeof (*local)); + pvt = this->private; + children = pvt->children; + + local = frame->local = CALLOC (1, sizeof (*local)); + for (i = 0; i < pvt->child_count; i++) { + if (pvt->state[i]) + break; + } + + if (i == pvt->child_count) { + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + local->flags = flags; + local->pattern = (char *)key; + + STACK_WIND (frame, + ha_getspec_cbk, + children[i], + children[i]->mops->getspec, + key, flags); + return 0; +} + +int32_t +ha_closedir (xlator_t *this, + fd_t *fd) +{ + hafd_t *hafdp = NULL; + int op_errno = 0; + uint64_t tmp_hafdp = 0; + + op_errno = fd_ctx_del (fd, this, &tmp_hafdp); + if (op_errno != 0) { + gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error"); + return 0; + } + hafdp = (hafd_t *)(long)tmp_hafdp; + + FREE (hafdp->fdstate); + FREE (hafdp->path); + LOCK_DESTROY (&hafdp->lock); + return 0; +} + +int32_t +ha_close (xlator_t *this, + fd_t *fd) +{ + hafd_t *hafdp = NULL; + int op_errno = 0; + uint64_t tmp_hafdp = 0; + + op_errno = fd_ctx_del (fd, this, &tmp_hafdp); + if (op_errno != 0) { + gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error"); + return 0; + } + hafdp = (hafd_t *)(long)tmp_hafdp; + + FREE (hafdp->fdstate); + FREE (hafdp->path); + LOCK_DESTROY (&hafdp->lock); + return 0; +} + +/* notify */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + ha_private_t *pvt = NULL; + int32_t i = 0, upcnt = 0; + + pvt = this->private; + if (pvt == NULL) { + gf_log (this->name, GF_LOG_DEBUG, "got notify before init()"); + return 0; + } + + switch (event) + { + case GF_EVENT_CHILD_DOWN: + { + for (i = 0; i < pvt->child_count; i++) { + if (data == pvt->children[i]) + break; + } + gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_DOWN from %s", pvt->children[i]->name); + pvt->state[i] = 0; + for (i = 0; i < pvt->child_count; i++) { + if (pvt->state[i]) + break; + } + if (i == pvt->child_count) { + default_notify (this, event, data); + } + } + break; + case GF_EVENT_CHILD_UP: + { + for (i = 0; i < pvt->child_count; i++) { + if (data == pvt->children[i]) + break; + } + + gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_UP from %s", pvt->children[i]->name); + + pvt->state[i] = 1; + + for (i = 0; i < pvt->child_count; i++) { + if (pvt->state[i]) + upcnt++; + } + + if (upcnt == 1) { + default_notify (this, event, data); + } + } + break; + + default: + { + default_notify (this, event, data); + } + } + + return 0; +} + +int +init (xlator_t *this) +{ + ha_private_t *pvt = NULL; + xlator_list_t *trav = NULL; + int count = 0, ret = 0; + + if (!this->children) { + gf_log (this->name,GF_LOG_ERROR, + "FATAL: ha should have one or more child defined"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + trav = this->children; + pvt = CALLOC (1, sizeof (ha_private_t)); + + ret = dict_get_int32 (this->options, "preferred-subvolume", + &pvt->pref_subvol); + if (ret < 0) { + pvt->pref_subvol = -1; + } + + trav = this->children; + while (trav) { + count++; + trav = trav->next; + } + + pvt->child_count = count; + pvt->children = CALLOC (count, sizeof (xlator_t*)); + + trav = this->children; + count = 0; + while (trav) { + pvt->children[count] = trav->xlator; + count++; + trav = trav->next; + } + + pvt->state = CALLOC (1, count); + this->private = pvt; + return 0; +} + +void +fini (xlator_t *this) +{ + ha_private_t *priv = NULL; + priv = this->private; + FREE (priv); + return; +} + + +struct xlator_fops fops = { + .lookup = ha_lookup, + .stat = ha_stat, + .readlink = ha_readlink, + .mknod = ha_mknod, + .mkdir = ha_mkdir, + .unlink = ha_unlink, + .rmdir = ha_rmdir, + .symlink = ha_symlink, + .rename = ha_rename, + .link = ha_link, + .chmod = ha_chmod, + .chown = ha_chown, + .truncate = ha_truncate, + .utimens = ha_utimens, + .create = ha_create, + .open = ha_open, + .readv = ha_readv, + .writev = ha_writev, + .statfs = ha_statfs, + .flush = ha_flush, + .fsync = ha_fsync, + .setxattr = ha_setxattr, + .getxattr = ha_getxattr, + .removexattr = ha_removexattr, + .opendir = ha_opendir, + .readdir = ha_readdir, + .getdents = ha_getdents, + .fsyncdir = ha_fsyncdir, + .access = ha_access, + .ftruncate = ha_ftruncate, + .fstat = ha_fstat, + .lk = ha_lk, + .fchmod = ha_fchmod, + .fchown = ha_fchown, + .setdents = ha_setdents, + .lookup_cbk = ha_lookup_cbk, + .checksum = ha_checksum, + .xattrop = ha_xattrop, + .fxattrop = ha_fxattrop +}; + +struct xlator_mops mops = { + .stats = ha_stats, + .getspec = ha_getspec, +}; + +struct xlator_cbks cbks = { + .release = ha_close, + .releasedir = ha_closedir, + .forget = ha_forget, +}; diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h new file mode 100644 index 000000000..77a04f165 --- /dev/null +++ b/xlators/cluster/ha/src/ha.h @@ -0,0 +1,59 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __HA_H_ +#define __HA_H_ + +typedef struct { + call_stub_t *stub; + int32_t op_ret, op_errno; + int32_t active, tries, revalidate, revalidate_error; + int32_t call_count; + char *state, *pattern; + dict_t *dict; + loc_t *loc; + struct stat buf; + fd_t *fd; + inode_t *inode; + int32_t flags; + int32_t first_success; +} ha_local_t; + +typedef struct { + char *state; + xlator_t **children; + int child_count, pref_subvol; +} ha_private_t; + +typedef struct { + char *fdstate; + char *path; + gf_lock_t lock; + int active; +} hafd_t; + +#define HA_ACTIVE_CHILD(this, local) (((ha_private_t *)this->private)->children[local->active]) + +extern int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd); + +extern int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno) ; + +extern int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode); + +#endif diff --git a/xlators/cluster/map/Makefile.am b/xlators/cluster/map/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/map/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am new file mode 100644 index 000000000..44ee4d9ee --- /dev/null +++ b/xlators/cluster/map/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = map.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +map_la_LDFLAGS = -module -avoidversion + +map_la_SOURCES = map.c map-helper.c +map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = map.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c new file mode 100644 index 000000000..4e51219d4 --- /dev/null +++ b/xlators/cluster/map/src/map-helper.c @@ -0,0 +1,357 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "map.h" + + +xlator_t * +map_subvol_next (xlator_t *this, xlator_t *prev) +{ + map_private_t *priv = NULL; + xlator_t *next = NULL; + int i = 0; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (priv->xlarray[i].xl == prev) { + if ((i + 1) < priv->child_count) + next = priv->xlarray[i + 1].xl; + break; + } + } + + return next; +} + +int +map_subvol_cnt (xlator_t *this, xlator_t *subvol) +{ + int i = 0; + int ret = -1; + map_private_t *priv = NULL; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (subvol == priv->xlarray[i].xl) { + ret = i; + break; + } + } + + return ret; +} + +int +map_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) +{ + map_private_t *priv = NULL; + int cnt = 0; + int max = 0; + uint64_t y = 0; + + if (x == ((uint64_t) -1)) { + y = (uint64_t) -1; + goto out; + } + + priv = this->private; + + max = priv->child_count; + cnt = map_subvol_cnt (this, subvol); + + y = ((x * max) + cnt); + +out: + if (y_p) + *y_p = y; + + return 0; +} + + +int +map_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, + uint64_t *x_p) +{ + int cnt = 0; + int max = 0; + uint64_t x = 0; + xlator_t *subvol = 0; + map_private_t *priv = NULL; + + priv = this->private; + max = priv->child_count; + + cnt = y % max; + x = y / max; + + subvol = priv->xlarray[cnt].xl; + + if (subvol_p) + *subvol_p = subvol; + + if (x_p) + *x_p = x; + + return 0; +} + + +xlator_t * +get_mapping_subvol_from_path (xlator_t *this, const char *path) +{ + map_private_t *priv = NULL; + struct map_pattern *map = NULL; + + /* To make sure we handle '/' properly */ + if (!strcmp (path, "/")) + return NULL; + + priv = this->private; + + map = priv->map; + while (map) { + if (!strncmp (map->directory, path, map->dir_len)) { + if ((path[map->dir_len] == '/') || + (path[map->dir_len] == '\0')) { + return map->xl; + } + } + + map = map->next; + } + + return priv->default_xl; +} + +xlator_t * +get_mapping_subvol_from_ctx (xlator_t *this, inode_t *inode) +{ + uint64_t subvol = 0; + int ret = -1; + + ret = inode_ctx_get (inode, this, &subvol); + if (ret != 0) + return NULL; + + return (xlator_t *)(long)subvol; +} + +int +check_multiple_volume_entry (xlator_t *this, + xlator_t *subvol) +{ + int ret = -1; + int idx = 0; + map_private_t *priv = NULL; + + priv = this->private; + + for (idx = 0; idx < priv->child_count; idx++) { + if (priv->xlarray[idx].xl == subvol) { + if (priv->xlarray[idx].mapped) { + gf_log (this->name, GF_LOG_ERROR, + "subvolume '%s' is already mapped", + subvol->name); + goto out; + } + priv->xlarray[idx].mapped = 1; + ret = 0; + goto out; + } + } + + gf_log (this->name, GF_LOG_ERROR, + "subvolume '%s' is not found", + subvol->name); + + out: + return ret; +} + +int +verify_dir_and_assign_subvol (xlator_t *this, + const char *directory, + const char *subvol) +{ + int default_flag = 0; + int ret = -1; + int idx = 0; + map_private_t *priv = NULL; + xlator_list_t *trav = NULL; + struct map_pattern *tmp_map = NULL; + + priv = this->private; + + /* check if directory is valid, ie, its a top level dir, and + * not includes a '*' in it. + */ + if (!strcmp ("*", directory)) { + default_flag = 1; + } else { + if (directory[0] != '/') { + gf_log (this->name, GF_LOG_ERROR, + "map takes absolute path, starting with '/'. " + "not '%s'", directory); + goto out; + } + for (idx = 1; idx < (strlen (directory) - 1); idx++) { + if (directory[idx] == '/') { + gf_log (this->name, GF_LOG_ERROR, + "map takes only top level directory, " + "not '%s'", directory); + goto out; + } + } + } + + /* Assign proper subvolume */ + trav = this->children; + while (trav) { + if (!strcmp (trav->xlator->name, subvol)) { + + /* Check if there is another directory for + * same volume, if yes, return error. + */ + ret = check_multiple_volume_entry (this, + trav->xlator); + if (ret != 0) { + goto out; + } + + ret = 0; + if (default_flag) { + if (priv->default_xl) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "'*' specified more than " + "once. don't confuse me!!!"); + } + + priv->default_xl = trav->xlator; + goto out; + } + + tmp_map = CALLOC (1, sizeof (struct map_pattern)); + tmp_map->xl = trav->xlator; + tmp_map->dir_len = strlen (directory); + + /* make sure that the top level directory starts + * with '/' and ends without '/' + */ + tmp_map->directory = strdup (directory); + if (directory[tmp_map->dir_len - 1] == '/') { + tmp_map->dir_len--; + } + + if (!priv->map) + priv->map = tmp_map; + else { + struct map_pattern *trav_map = NULL; + trav_map = priv->map; + while (trav_map->next) + trav_map = trav_map->next; + trav_map->next = tmp_map; + } + + goto out; + } + + trav = trav->next; + } + + gf_log (this->name, GF_LOG_ERROR, + "map volume '%s' is not proper subvolume", subvol); + + out: + return ret; +} + +int +assign_default_subvol (xlator_t *this, const char *default_xl) +{ + int ret = -1; + map_private_t *priv = NULL; + xlator_list_t *trav = NULL; + + priv = this->private; + trav = this->children; + + while (trav) { + if (!strcmp (trav->xlator->name, default_xl)) { + ret = check_multiple_volume_entry (this, + trav->xlator); + if (ret != 0) { + goto out; + } + if (priv->default_xl) + gf_log (this->name, GF_LOG_WARNING, + "default-volume option provided, " + "overriding earlier '*' option"); + priv->default_xl = trav->xlator; + return 0; + } + trav = trav->next; + } + + gf_log (this->name, GF_LOG_ERROR, + "default-volume value is not an valid subvolume. check again"); + out: + return -1; +} + +void +verify_if_all_subvolumes_got_used (xlator_t *this) +{ + int idx = 0; + map_private_t *priv = NULL; + + priv = this->private; + + for (idx = 0; idx < priv->child_count; idx++) { + if (!priv->xlarray[idx].mapped) { + if (!priv->default_xl) { + priv->default_xl = priv->xlarray[idx].xl; + priv->xlarray[idx].mapped = 1; + } else { + gf_log (this->name, GF_LOG_WARNING, + "subvolume '%s' is not mapped to " + "any directory", + priv->xlarray[idx].xl->name); + } + } + } + + if (!priv->default_xl) { + gf_log (this->name, GF_LOG_WARNING, + "default subvolume not specified, filesystem " + "may not work properly. Check 'map' translator " + "documentation for more info"); + } + + return ; +} diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c new file mode 100644 index 000000000..8c4b7c83c --- /dev/null +++ b/xlators/cluster/map/src/map.c @@ -0,0 +1,2193 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "map.h" + +/* For <op>_cbk functions */ +#include "defaults.c" + + +int32_t +map_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_stat_cbk, + subvol, + subvol->fops->stat, + loc); + + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_chmod_cbk, + subvol, + subvol->fops->chmod, + loc, + mode); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_fchmod_cbk, + subvol, + subvol->fops->fchmod, + fd, + mode); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_chown_cbk, + subvol, + subvol->fops->chown, + loc, + uid, + gid); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_fchown_cbk, + subvol, + subvol->fops->fchown, + fd, + uid, + gid); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_truncate_cbk, + subvol, + subvol->fops->truncate, + loc, + offset); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_ftruncate_cbk, + subvol, + subvol->fops->ftruncate, + fd, + offset); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_utimens_cbk, + subvol, + subvol->fops->utimens, + loc, + tv); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_access_cbk, + subvol, + subvol->fops->access, + loc, + mask); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_readlink_cbk, + subvol, + subvol->fops->readlink, + loc, + size); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_unlink_cbk, + subvol, + subvol->fops->unlink, + loc); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_rmdir_cbk, + subvol, + subvol->fops->rmdir, + loc); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int32_t op_errno = 1; + xlator_t *old_subvol = NULL; + xlator_t *new_subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (oldloc, err); + VALIDATE_OR_GOTO (oldloc->inode, err); + VALIDATE_OR_GOTO (oldloc->path, err); + VALIDATE_OR_GOTO (newloc, err); + + old_subvol = get_mapping_subvol_from_ctx (this, oldloc->inode); + if (!old_subvol) { + op_errno = EINVAL; + goto err; + } + + if (newloc->path) { + new_subvol = get_mapping_subvol_from_path (this, + newloc->path); + if (new_subvol && (new_subvol != old_subvol)) { + op_errno = EXDEV; + goto err; + } + } + + STACK_WIND (frame, + default_rename_cbk, + old_subvol, + old_subvol->fops->rename, + oldloc, newloc); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int32_t op_errno = 1; + xlator_t *old_subvol = NULL; + xlator_t *new_subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (oldloc, err); + VALIDATE_OR_GOTO (oldloc->inode, err); + VALIDATE_OR_GOTO (oldloc->path, err); + VALIDATE_OR_GOTO (newloc, err); + + old_subvol = get_mapping_subvol_from_ctx (this, oldloc->inode); + if (!old_subvol) { + op_errno = EINVAL; + goto err; + } + + if (newloc->path) { + new_subvol = get_mapping_subvol_from_path (this, + newloc->path); + if (new_subvol && (new_subvol != old_subvol)) { + op_errno = EXDEV; + goto err; + } + } + + STACK_WIND (frame, + default_link_cbk, + old_subvol, + old_subvol->fops->link, + oldloc, newloc); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, fd_t *fd) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_open_cbk, + subvol, + subvol->fops->open, + loc, flags, fd); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_readv_cbk, + subvol, + subvol->fops->readv, + fd, + size, + offset); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t off) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_writev_cbk, + subvol, + subvol->fops->writev, + fd, + vector, + count, + off); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_flush_cbk, + subvol, + subvol->fops->flush, + fd); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_fsync_cbk, + subvol, + subvol->fops->fsync, + fd, + flags); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_fstat_cbk, + subvol, + subvol->fops->fstat, + fd); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, + int32_t flag) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_getdents_cbk, + subvol, + subvol->fops->getdents, + fd, + size, + offset, + flag); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_setdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, + dir_entry_t *entries, + int32_t count) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_setdents_cbk, + subvol, + subvol->fops->setdents, + fd, + flags, + entries, + count); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_fsyncdir_cbk, + subvol, + subvol->fops->fsyncdir, + fd, + flags); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + + + +int32_t +map_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + /* TODO: support for 'get' 'put' API */ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_setxattr_cbk, + subvol, + subvol->fops->setxattr, + loc, + dict, + flags); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + /* TODO: support for 'get' 'put' API */ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_getxattr_cbk, + subvol, + subvol->fops->getxattr, + loc, + name); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_xattrop_cbk, + subvol, + subvol->fops->xattrop, + loc, + flags, + dict); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_fxattrop_cbk, + subvol, + subvol->fops->fxattrop, + fd, + flags, + dict); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_removexattr_cbk, + subvol, + subvol->fops->removexattr, + loc, + name); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_lk_cbk, + subvol, + subvol->fops->lk, + fd, + cmd, + lock); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *lock) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_inodelk_cbk, + subvol, + subvol->fops->inodelk, + loc, cmd, lock); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *lock) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_finodelk_cbk, + subvol, + subvol->fops->finodelk, + fd, cmd, lock); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, default_entrylk_cbk, + subvol, + subvol->fops->entrylk, + loc, basename, cmd, type); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, default_fentrylk_cbk, + subvol, + subvol->fops->fentrylk, + fd, basename, cmd, type); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + default_checksum_cbk, + subvol, + subvol->fops->checksum, + loc, + flag); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +static int32_t +map_newentry_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + call_frame_t *prev = NULL; + prev = cookie; + + map_itransform (this, prev->this, buf->st_ino, &buf->st_ino); + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; + +} + + +int32_t +map_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t rdev) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + subvol = get_mapping_subvol_from_path (this, loc->path); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol); + if (op_errno != 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set subvolume ptr in inode ctx", + loc->path); + } + + STACK_WIND (frame, + map_newentry_cbk, + subvol, + subvol->fops->mknod, + loc, mode, rdev); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + subvol = get_mapping_subvol_from_path (this, loc->path); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol); + if (op_errno != 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set subvolume ptr in inode ctx", + loc->path); + } + + STACK_WIND (frame, + map_newentry_cbk, + subvol, + subvol->fops->mkdir, + loc, mode); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int32_t +map_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + subvol = get_mapping_subvol_from_path (this, loc->path); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol); + if (op_errno != 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set subvolume ptr in inode ctx", + loc->path); + } + + STACK_WIND (frame, + map_newentry_cbk, + subvol, + subvol->fops->symlink, + linkpath, loc); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +static int32_t +map_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + call_frame_t *prev = NULL; + prev = cookie; + + map_itransform (this, prev->this, buf->st_ino, &buf->st_ino); + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; +} + +int32_t +map_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, fd_t *fd) +{ + int32_t op_errno = 1; + xlator_t *subvol = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + subvol = get_mapping_subvol_from_path (this, loc->path); + if (!subvol) { + op_errno = EINVAL; + goto err; + } + + op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol); + if (op_errno != 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set subvolume ptr in inode ctx", + loc->path); + } + + STACK_WIND (frame, map_create_cbk, + subvol, + subvol->fops->create, + loc, flags, mode, fd); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int32_t +map_single_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + call_frame_t *prev = NULL; + prev = cookie; + + map_itransform (this, prev->this, buf->st_ino, &buf->st_ino); + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict); + + return 0; +} + +int32_t +map_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int callcnt = 0; + map_local_t *local = NULL; + inode_t *tmp_inode = NULL; + dict_t *tmp_dict = NULL; + + local = frame->local; + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if ((op_ret == 0) && (local->op_ret == -1)) { + local->op_ret = 0; + local->stbuf = *buf; + if (dict) + local->dict = dict_ref (dict); + local->inode = inode_ref (inode); + } + if (op_ret == -1) + local->op_errno = op_errno; + + } + UNLOCK (&frame->lock); + + if (!callcnt) { + tmp_dict = local->dict; + tmp_inode = local->inode; + + STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->inode, + &local->stbuf, local->dict); + + inode_unref (local->inode); + if (tmp_dict) + dict_unref (tmp_dict); + } + + return 0; +} + +int32_t +map_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + int32_t op_errno = EINVAL; + xlator_t *subvol = NULL; + map_local_t *local = NULL; + map_private_t *priv = NULL; + xlator_list_t *trav = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + priv = this->private; + + if (loc->inode->ino == 1) + goto root_inode; + + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + subvol = get_mapping_subvol_from_path (this, loc->path); + if (!subvol) { + goto err; + } + + op_errno = inode_ctx_put (loc->inode, this, + (uint64_t)(long)subvol); + if (op_errno != 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set subvolume in inode ctx", + loc->path); + } + } + + /* Just one callback */ + STACK_WIND (frame, + map_single_lookup_cbk, + subvol, + subvol->fops->lookup, + loc, + xattr_req); + + return 0; + + root_inode: + local = CALLOC (1, sizeof (map_local_t)); + + frame->local = local; + local->call_count = priv->child_count; + local->op_ret = -1; + + trav = this->children; + while (trav) { + STACK_WIND (frame, + map_lookup_cbk, + trav->xlator, + trav->xlator->fops->lookup, + loc, + xattr_req); + trav = trav->next; + } + + return 0; + + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} +/* + * unify_normalize_stats - + */ +void +map_normalize_stats (struct statvfs *buf, + unsigned long bsize, + unsigned long frsize) +{ + double factor; + + if (buf->f_bsize != bsize) { + factor = ((double) buf->f_bsize) / bsize; + buf->f_bsize = bsize; + buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree); + buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail); + } + + if (buf->f_frsize != frsize) { + factor = ((double) buf->f_frsize) / frsize; + buf->f_frsize = frsize; + buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks); + } +} + + +int32_t +map_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *stbuf) +{ + struct statvfs *dict_buf = NULL; + map_local_t *local = NULL; + int this_call_cnt = 0; + unsigned long bsize; + unsigned long frsize; + + local = frame->local; + + LOCK (&frame->lock); + { + this_call_cnt = --local->call_count; + + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + local->op_ret = 0; + + /* when a call is successfull, add it to local->dict */ + dict_buf = &local->statvfs; + + if (dict_buf->f_bsize != 0) { + bsize = max (dict_buf->f_bsize, + stbuf->f_bsize); + + frsize = max (dict_buf->f_frsize, + stbuf->f_frsize); + map_normalize_stats(dict_buf, bsize, frsize); + map_normalize_stats(stbuf, bsize, frsize); + } else { + dict_buf->f_bsize = stbuf->f_bsize; + dict_buf->f_frsize = stbuf->f_frsize; + } + + dict_buf->f_blocks += stbuf->f_blocks; + dict_buf->f_bfree += stbuf->f_bfree; + dict_buf->f_bavail += stbuf->f_bavail; + dict_buf->f_files += stbuf->f_files; + dict_buf->f_ffree += stbuf->f_ffree; + dict_buf->f_favail += stbuf->f_favail; + dict_buf->f_fsid = stbuf->f_fsid; + dict_buf->f_flag = stbuf->f_flag; + dict_buf->f_namemax = stbuf->f_namemax; + } +unlock: + UNLOCK (&frame->lock); + + if (!this_call_cnt) { + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->statvfs); + } + + return 0; +} + +int32_t +map_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t op_errno = EINVAL; + xlator_t *subvol = NULL; + map_local_t *local = NULL; + map_private_t *priv = NULL; + xlator_list_t *trav = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + if (loc->inode->ino == 1) + goto root_inode; + subvol = get_mapping_subvol_from_ctx (this, loc->inode); + if (!subvol) { + goto err; + } + + /* Just one callback */ + STACK_WIND (frame, + default_statfs_cbk, + subvol, + subvol->fops->statfs, + loc); + + return 0; + + root_inode: + local = CALLOC (1, sizeof (map_local_t)); + + priv = this->private; + frame->local = local; + local->call_count = priv->child_count; + local->op_ret = -1; + + trav = this->children; + while (trav) { + STACK_WIND (frame, + map_statfs_cbk, + trav->xlator, + trav->xlator->fops->statfs, + loc); + trav = trav->next; + } + + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + +int32_t +map_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int callcnt = 0; + map_local_t *local = NULL; + fd_t *local_fd = NULL; + + local = frame->local; + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + + local->op_ret = 0; + } + unlock: + UNLOCK (&frame->lock); + + if (!callcnt) { + local_fd = local->fd; + local->fd = NULL; + + STACK_UNWIND (frame, local->op_ret, + local->op_errno, local_fd); + + fd_unref (local_fd); + } + return 0; +} + + +int32_t +map_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, fd_t *fd) +{ + int32_t op_errno = EINVAL; + xlator_t *subvol = NULL; + map_local_t *local = NULL; + map_private_t *priv = NULL; + xlator_list_t *trav = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + if (loc->inode->ino == 1) + goto root_inode; + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + goto err; + } + + /* Just one callback */ + STACK_WIND (frame, + default_opendir_cbk, + subvol, + subvol->fops->opendir, + loc, fd); + return 0; + + root_inode: + local = CALLOC (1, sizeof (map_local_t)); + + priv = this->private; + frame->local = local; + local->call_count = priv->child_count; + local->op_ret = -1; + local->fd = fd_ref (fd); + + trav = this->children; + while (trav) { + STACK_WIND (frame, + map_opendir_cbk, + trav->xlator, + trav->xlator->fops->opendir, + loc, fd); + trav = trav->next; + } + + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int32_t +map_single_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries) +{ + call_frame_t *prev = NULL; + gf_dirent_t *orig_entry = NULL; + + prev = cookie; + + list_for_each_entry (orig_entry, &entries->list, list) { + map_itransform (this, prev->this, orig_entry->d_ino, + &orig_entry->d_ino); + } + STACK_UNWIND (frame, op_ret, op_errno, entries); + + return 0; +} + + +int +map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *orig_entries) +{ + map_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + xlator_t *next = NULL; + int count = 0; + fd_t *local_fd = NULL; + + INIT_LIST_HEAD (&entries.list); + prev = cookie; + local = frame->local; + + if (op_ret < 0) + goto done; + + list_for_each_entry (orig_entry, &orig_entries->list, list) { + subvol = prev->this; + + entry = gf_dirent_for_name (orig_entry->d_name); + if (!entry) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto unwind; + } + + map_itransform (this, subvol, orig_entry->d_ino, + &entry->d_ino); + map_itransform (this, subvol, orig_entry->d_off, + &entry->d_off); + + entry->d_type = orig_entry->d_type; + entry->d_len = orig_entry->d_len; + + list_add_tail (&entry->list, &entries.list); + count++; + } + + op_ret = count; + +done: + if (count == 0) { + next = map_subvol_next (this, prev->this); + if (!next) { + goto unwind; + } + + STACK_WIND (frame, map_readdir_cbk, + next, next->fops->readdir, + local->fd, local->size, 0); + return 0; + } + +unwind: + if (op_ret < 0) + op_ret = 0; + + local_fd = local->fd; + local->fd = NULL; + + STACK_UNWIND (frame, op_ret, op_errno, &entries); + + fd_unref (local_fd); + + gf_dirent_free (&entries); + + return 0; +} + + +int32_t +map_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t yoff) +{ + int32_t op_errno = EINVAL; + xlator_t *subvol = NULL; + map_local_t *local = NULL; + map_private_t *priv = NULL; + xlator_t *xvol = NULL; + off_t xoff = 0; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + if (fd->inode->ino == 1) + goto root_inode; + + subvol = get_mapping_subvol_from_ctx (this, fd->inode); + if (!subvol) { + goto err; + } + + /* Just one callback */ + + STACK_WIND (frame, + map_single_readdir_cbk, + subvol, + subvol->fops->readdir, + fd, size, yoff); + return 0; + + root_inode: + /* readdir on '/' */ + local = CALLOC (1, sizeof (map_local_t)); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + priv = this->private; + frame->local = local; + local->op_errno = ENOENT; + local->op_ret = -1; + + local->fd = fd_ref (fd); + local->size = size; + + map_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); + + STACK_WIND (frame, map_readdir_cbk, + xvol, xvol->fops->readdir, + fd, size, xoff); + + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +#if 0 +/* TODO : do it later as currently only unify uses this mop and mostly + unify will be used below map */ +int32_t +map_stats_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct xlator_stats *stats) +{ + STACK_UNWIND (frame, op_ret, op_errno, stats); + return 0; +} + + +int32_t +map_stats (call_frame_t *frame, + xlator_t *this, + int32_t flags) +{ + STACK_WIND (frame, + map_stats_cbk, + subvol, + subvol->mops->stats, + flags); + return 0; + err: + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} +#endif /* if 0 */ + + +/* TODO: define the behavior of notify */ + + +void +fini (xlator_t *this) +{ + map_private_t *priv = NULL; + struct map_pattern *trav_map = NULL; + struct map_pattern *tmp_map = NULL; + + priv = this->private; + + if (priv) { + if (priv->xlarray) + FREE (priv->xlarray); + + trav_map = priv->map; + while (trav_map) { + tmp_map = trav_map; + trav_map = trav_map->next; + FREE (tmp_map); + } + + FREE(priv); + } + + return; +} + +int +init (xlator_t *this) +{ + map_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int count = 0; + int ret = -1; + char *pattern_string = NULL; + char *map_pair_str = NULL; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *dup_map_pair = NULL; + char *dir_str = NULL; + char *subvol_str = NULL; + char *default_xl = NULL; + + if (!this->children) { + gf_log (this->name,GF_LOG_ERROR, + "FATAL: map should have one or more child defined"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = CALLOC (1, sizeof (map_private_t)); + this->private = priv; + + /* allocate xlator array */ + trav = this->children; + while (trav) { + count++; + trav = trav->next; + } + priv->xlarray = CALLOC (1, sizeof (struct map_xlator_array) * count); + priv->child_count = count; + + /* build xlator array */ + count = 0; + trav = this->children; + while (trav) { + priv->xlarray[count++].xl = trav->xlator; + trav = trav->next; + } + + /* map dir1:brick1;dir2:brick2;dir3:brick3;*:brick4 */ + ret = dict_get_str (this->options, "map-directory", &pattern_string); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "map.pattern not given, can't continue"); + goto err; + } + map_pair_str = strtok_r (pattern_string, ";", &tmp_str); + while (map_pair_str) { + dup_map_pair = strdup (map_pair_str); + dir_str = strtok_r (dup_map_pair, ":", &tmp_str1); + if (!dir_str) { + gf_log (this->name, GF_LOG_ERROR, + "directory string invalid"); + goto err; + } + subvol_str = strtok_r (NULL, ":", &tmp_str1); + if (!subvol_str) { + gf_log (this->name, GF_LOG_ERROR, + "mapping subvolume string invalid"); + goto err; + } + ret = verify_dir_and_assign_subvol (this, + dir_str, + subvol_str); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "verification failed"); + goto err; + } + + FREE (dup_map_pair); + + map_pair_str = strtok_r (NULL, ";", &tmp_str); + } + + /* default-volume brick4 */ + ret = dict_get_str (this->options, "default-volume", &default_xl); + if (ret == 0) { + ret = assign_default_subvol (this, default_xl); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "assigning default failed"); + goto err; + } + } + + verify_if_all_subvolumes_got_used (this); + + return 0; + err: + fini (this); + return -1; +} + + +struct xlator_fops fops = { + .lookup = map_lookup, + .mknod = map_mknod, + .create = map_create, + + .stat = map_stat, + .chmod = map_chmod, + .chown = map_chown, + .fchown = map_fchown, + .fchmod = map_fchmod, + .fstat = map_fstat, + .utimens = map_utimens, + .truncate = map_truncate, + .ftruncate = map_ftruncate, + .access = map_access, + .readlink = map_readlink, + .setxattr = map_setxattr, + .getxattr = map_getxattr, + .removexattr = map_removexattr, + .open = map_open, + .readv = map_readv, + .writev = map_writev, + .flush = map_flush, + .fsync = map_fsync, + .statfs = map_statfs, + .lk = map_lk, + .opendir = map_opendir, + .readdir = map_readdir, + .fsyncdir = map_fsyncdir, + .symlink = map_symlink, + .unlink = map_unlink, + .link = map_link, + .mkdir = map_mkdir, + .rmdir = map_rmdir, + .rename = map_rename, + .inodelk = map_inodelk, + .finodelk = map_finodelk, + .entrylk = map_entrylk, + .fentrylk = map_fentrylk, + .xattrop = map_xattrop, + .fxattrop = map_fxattrop, + .setdents = map_setdents, + .getdents = map_getdents, + .checksum = map_checksum, +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {"map-directory"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"default-volume"}, + .type = GF_OPTION_TYPE_XLATOR + }, + + { .key = {NULL} } +}; diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h new file mode 100644 index 000000000..0f1aabfd6 --- /dev/null +++ b/xlators/cluster/map/src/map.h @@ -0,0 +1,76 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __MAP_H__ +#define __MAP_H__ + +#include "xlator.h" + +struct map_pattern { + struct map_pattern *next; + xlator_t *xl; + char *directory; + int dir_len; +}; + +struct map_xlator_array { + xlator_t *xl; + int mapped; /* yes/no */ +}; + +typedef struct { + struct map_pattern *map; + xlator_t *default_xl; + struct map_xlator_array *xlarray; + int child_count; +} map_private_t; + +typedef struct { + int32_t op_ret; + int32_t op_errno; + int call_count; + struct statvfs statvfs; + struct stat stbuf; + inode_t *inode; + dict_t *dict; + fd_t *fd; + + size_t size; +} map_local_t; + +xlator_t *map_subvol_next (xlator_t *this, xlator_t *prev); +int map_subvol_cnt (xlator_t *this, xlator_t *subvol); + +int map_itransform (xlator_t *this, xlator_t *subvol, + uint64_t x, uint64_t *y_p); +int map_deitransform (xlator_t *this, uint64_t y, + xlator_t **subvol_p, uint64_t *x_p); + + +xlator_t *get_mapping_subvol_from_path (xlator_t *this, const char *path); +xlator_t *get_mapping_subvol_from_ctx (xlator_t *this, inode_t *inode); + +int check_multiple_volume_entry (xlator_t *this, xlator_t *subvol); +int verify_dir_and_assign_subvol (xlator_t *this, + const char *directory, const char *subvol); +int assign_default_subvol (xlator_t *this, const char *default_xl); +void verify_if_all_subvolumes_got_used (xlator_t *this); + + +#endif /* __MAP_H__ */ diff --git a/xlators/cluster/stripe/Makefile.am b/xlators/cluster/stripe/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/stripe/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am new file mode 100644 index 000000000..60e0a1568 --- /dev/null +++ b/xlators/cluster/stripe/src/Makefile.am @@ -0,0 +1,14 @@ + +xlator_LTLIBRARIES = stripe.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +stripe_la_LDFLAGS = -module -avoidversion + +stripe_la_SOURCES = stripe.c +stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c new file mode 100644 index 000000000..83787ca2a --- /dev/null +++ b/xlators/cluster/stripe/src/stripe.c @@ -0,0 +1,3286 @@ +/* + Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/** + * xlators/cluster/stripe: + * Stripe translator, stripes the data accross its child nodes, + * as per the options given in the volfile. The striping works + * fairly simple. It writes files at different offset as per + * calculation. So, 'ls -l' output at the real posix level will + * show file size bigger than the actual size. But when one does + * 'df' or 'du <file>', real size of the file on the server is shown. + * + * WARNING: + * Stripe translator can't regenerate data if a child node gets disconnected. + * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its + * very much necessary, or else, use it in combination with AFR, to have a + * backup copy. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "logging.h" +#include "defaults.h" +#include "compat.h" +#include "compat-errno.h" +#include <fnmatch.h> +#include <signal.h> + +#define STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \ + if (!(_loc && _loc->inode)) { \ + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \ + return 0; \ + } \ +} while(0) + +/** + * struct stripe_options : This keeps the pattern and the block-size + * information, which is used for striping on a file. + */ +struct stripe_options { + struct stripe_options *next; + char path_pattern[256]; + uint64_t block_size; +}; + +/** + * Private structure for stripe translator + */ +struct stripe_private { + struct stripe_options *pattern; + xlator_t **xl_array; + uint64_t block_size; + gf_lock_t lock; + uint8_t nodes_down; + int8_t first_child_down; + int8_t child_count; + int8_t state[256]; /* Current state of the child node, + 0 for down, 1 for up */ + gf_boolean_t xattr_supported; /* 0 for no, 1 for yes, default yes */ +}; + +/** + * Used to keep info about the replies received from fops->readv calls + */ +struct readv_replies { + struct iovec *vector; + int32_t count; //count of vector + int32_t op_ret; //op_ret of readv + int32_t op_errno; + struct stat stbuf; /* 'stbuf' is also a part of reply */ +}; + +/** + * Local structure to be passed with all the frames in case of STACK_WIND + */ +struct stripe_local; /* this itself is used inside the structure; */ + +struct stripe_local { + struct stripe_local *next; + call_frame_t *orig_frame; + + /* Used by _cbk functions */ + struct stat stbuf; + struct readv_replies *replies; + struct statvfs statvfs_buf; + dir_entry_t *entry; + struct xlator_stats stats; + + int8_t revalidate; + int8_t failed; + int8_t unwind; + + int32_t node_index; + int32_t call_count; + int32_t wind_count; /* used instead of child_cound + in case of read and write */ + int32_t op_ret; + int32_t op_errno; + int32_t count; + int32_t flags; + char *name; + inode_t *inode; + + loc_t loc; + loc_t loc2; + + /* For File I/O fops */ + dict_t *dict; + + /* General usage */ + off_t offset; + off_t stripe_size; + + int8_t *list; + struct flock lock; + fd_t *fd; + void *value; +}; + +typedef struct stripe_local stripe_local_t; +typedef struct stripe_private stripe_private_t; + +/** + * stripe_get_matching_bs - Get the matching block size for the given path. + */ +int32_t +stripe_get_matching_bs (const char *path, + struct stripe_options *opts, + uint64_t default_bs) +{ + struct stripe_options *trav = NULL; + char *pathname = NULL; + uint64_t block_size = 0; + + block_size = default_bs; + pathname = strdup (path); + trav = opts; + + while (trav) { + if (fnmatch (trav->path_pattern, + pathname, FNM_NOESCAPE) == 0) { + block_size = trav->block_size; + break; + } + trav = trav->next; + } + free (pathname); + + return block_size; +} + + +/* + * stripe_common_cbk - + */ +int32_t +stripe_common_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * stripe_stack_unwind_cbk - This function is used for all the _cbk without + * any extra arguments (other than the minimum given) + * This is called from functions like fsync,unlink,rmdir etc. + * + */ +int32_t +stripe_stack_unwind_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_errno = op_errno; + if (op_errno == ENOTCONN) + local->failed = 1; + } + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->loc.path) + loc_wipe (&local->loc); + if (local->loc2.path) + loc_wipe (&local->loc2); + + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + return 0; +} + +int32_t +stripe_common_buf_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +/** + * stripe_stack_unwind_buf_cbk - This function is used for all the _cbk with + * 'struct stat *buf' as extra argument (other than minimum) + * This is called from functions like, chmod, fchmod, chown, fchown, + * truncate, ftruncate, utimens etc. + * + * @cookie - this argument should be always 'xlator_t *' of child node + */ +int32_t +stripe_stack_unwind_buf_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_errno = op_errno; + if (op_errno == ENOTCONN) + local->failed = 1; + } + + if (op_ret == 0) { + local->op_ret = 0; + if (local->stbuf.st_blksize == 0) { + local->stbuf = *buf; + /* Because st_blocks gets added again */ + local->stbuf.st_blocks = 0; + } + + if (FIRST_CHILD(this) == + ((call_frame_t *)cookie)->this) { + /* Always, pass the inode number of + first child to the above layer */ + local->stbuf.st_ino = buf->st_ino; + local->stbuf.st_mtime = buf->st_mtime; + } + + local->stbuf.st_blocks += buf->st_blocks; + if (local->stbuf.st_size < buf->st_size) + local->stbuf.st_size = buf->st_size; + if (local->stbuf.st_blksize != buf->st_blksize) { + /* TODO: add to blocks in terms of + original block size */ + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->loc.path) + loc_wipe (&local->loc); + if (local->loc2.path) + loc_wipe (&local->loc2); + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + + return 0; +} + +/* In case of symlink, mknod, the file is created on just first node */ +int32_t +stripe_common_inode_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +/** + * stripe_stack_unwind_inode_cbk - This is called by the function like, + * link (), symlink (), mkdir (), mknod () + * This creates a inode for new inode. It keeps a list of all + * the inodes received from the child nodes. It is used while + * forwarding any fops to child nodes. + * + */ +int32_t +stripe_stack_unwind_inode_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_errno = op_errno; + if (op_errno == ENOTCONN) + local->failed = 1; + } + + if (op_ret >= 0) { + local->op_ret = 0; + + if (local->stbuf.st_blksize == 0) { + local->inode = inode; + local->stbuf = *buf; + /* Because st_blocks gets added again */ + local->stbuf.st_blocks = 0; + } + if (FIRST_CHILD(this) == + ((call_frame_t *)cookie)->this) { + local->stbuf.st_ino = buf->st_ino; + local->stbuf.st_mtime = buf->st_mtime; + } + + local->stbuf.st_blocks += buf->st_blocks; + if (local->stbuf.st_size < buf->st_size) + local->stbuf.st_size = buf->st_size; + if (local->stbuf.st_blksize != buf->st_blksize) { + /* TODO: add to blocks in terms of + original block size */ + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf); + } + + return 0; +} + +int32_t +stripe_stack_unwind_inode_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int32_t callcnt = 0; + dict_t *tmp_dict = NULL; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + if (op_errno != ENOENT) + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_errno = op_errno; + if (op_errno == ENOTCONN) + local->failed = 1; + } + + if (op_ret >= 0) { + local->op_ret = 0; + + if (local->stbuf.st_blksize == 0) { + local->inode = inode; + local->stbuf = *buf; + /* Because st_blocks gets added again */ + local->stbuf.st_blocks = 0; + } + if (FIRST_CHILD(this) == + ((call_frame_t *)cookie)->this) { + local->stbuf.st_ino = buf->st_ino; + local->stbuf.st_mtime = buf->st_mtime; + if (local->dict) + dict_unref (local->dict); + local->dict = dict_ref (dict); + } else { + if (!local->dict) + local->dict = dict_ref (dict); + } + local->stbuf.st_blocks += buf->st_blocks; + if (local->stbuf.st_size < buf->st_size) + local->stbuf.st_size = buf->st_size; + if (local->stbuf.st_blksize != buf->st_blksize) { + /* TODO: add to blocks in terms of + original block size */ + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + tmp_dict = local->dict; + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->dict); + if (tmp_dict) + dict_unref (tmp_dict); + } + + return 0; +} + + +/** + * stripe_lookup - + */ +int32_t +stripe_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = this->private; + char send_lookup_to_all = 0; + + if (!(loc && loc->inode)) { + gf_log (this->name, GF_LOG_ERROR, + "wrong argument, returning EINVAL"); + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + + if ((!loc->inode->st_mode) || + S_ISDIR (loc->inode->st_mode) || + S_ISREG (loc->inode->st_mode)) + send_lookup_to_all = 1; + + if (send_lookup_to_all) { + /* Everytime in stripe lookup, all child nodes + should be looked up */ + local->call_count = priv->child_count; + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_inode_lookup_cbk, + trav->xlator, + trav->xlator->fops->lookup, + loc, xattr_req); + trav = trav->next; + } + } else { + local->call_count = 1; + + STACK_WIND (frame, + stripe_stack_unwind_inode_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, xattr_req); + } + + return 0; +} + +/** + * stripe_stat - + */ +int32_t +stripe_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int send_lookup_to_all = 0; + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) + send_lookup_to_all = 1; + + if (!send_lookup_to_all) { + STACK_WIND (frame, + stripe_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + } else { + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = loc->inode; + local->call_count = priv->child_count; + + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->stat, + loc); + trav = trav->next; + } + } + return 0; +} + + +/** + * stripe_chmod - + */ +int32_t +stripe_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int send_fop_to_all = 0; + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) + send_fop_to_all = 1; + + if (!send_fop_to_all) { + STACK_WIND (frame, + stripe_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chmod, + loc, mode); + } else { + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = loc->inode; + local->call_count = priv->child_count; + + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->chmod, + loc, mode); + trav = trav->next; + } + } + return 0; +} + + +/** + * stripe_chown - + */ +int32_t +stripe_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + int send_fop_to_all = 0; + xlator_list_t *trav = NULL; + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) + send_fop_to_all = 1; + + trav = this->children; + if (!send_fop_to_all) { + STACK_WIND (frame, + stripe_common_buf_cbk, + trav->xlator, + trav->xlator->fops->chown, + loc, uid, gid); + } else { + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = loc->inode; + local->call_count = priv->child_count; + + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->chown, + loc, uid, gid); + trav = trav->next; + } + } + + return 0; +} + + +/** + * stripe_statfs_cbk - + */ +int32_t +stripe_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *stbuf) +{ + stripe_local_t *local = (stripe_local_t *)frame->local; + int32_t callcnt; + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret != 0 && op_errno != ENOTCONN) { + local->op_errno = op_errno; + } + if (op_ret == 0) { + struct statvfs *dict_buf = &local->statvfs_buf; + dict_buf->f_bsize = stbuf->f_bsize; + dict_buf->f_frsize = stbuf->f_frsize; + dict_buf->f_blocks += stbuf->f_blocks; + dict_buf->f_bfree += stbuf->f_bfree; + dict_buf->f_bavail += stbuf->f_bavail; + dict_buf->f_files += stbuf->f_files; + dict_buf->f_ffree += stbuf->f_ffree; + dict_buf->f_favail += stbuf->f_favail; + dict_buf->f_fsid = stbuf->f_fsid; + dict_buf->f_flag = stbuf->f_flag; + dict_buf->f_namemax = stbuf->f_namemax; + local->op_ret = 0; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + STACK_UNWIND (frame, local->op_ret, + local->op_errno, &local->statvfs_buf); + } + + return 0; +} + + +/** + * stripe_statfs - + */ +int32_t +stripe_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = this->children; + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + local->op_errno = ENOTCONN; + frame->local = local; + + local->call_count = ((stripe_private_t *)this->private)->child_count; + while (trav) { + STACK_WIND (frame, + stripe_statfs_cbk, + trav->xlator, + trav->xlator->fops->statfs, + loc); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_truncate - + */ +int32_t +stripe_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + int send_fop_to_all = 0; + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) + send_fop_to_all = 1; + + if (!send_fop_to_all) { + STACK_WIND (frame, + stripe_common_buf_cbk, + trav->xlator, + trav->xlator->fops->truncate, + loc, + offset); + } else { + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = loc->inode; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->truncate, + loc, + offset); + trav = trav->next; + } + } + + return 0; +} + + +/** + * stripe_utimens - + */ +int32_t +stripe_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + int send_fop_to_all = 0; + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) + send_fop_to_all = 1; + + if (!send_fop_to_all) { + STACK_WIND (frame, + stripe_common_buf_cbk, + trav->xlator, + trav->xlator->fops->utimens, + loc, tv); + } else { + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = loc->inode; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->utimens, + loc, tv); + trav = trav->next; + } + } + return 0; +} + + +int32_t +stripe_first_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + stripe_local_t *local = frame->local; + xlator_list_t *trav = this->children; + + if (op_ret == -1) + { + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; + } + + local->op_ret = 0; + local->stbuf = *buf; + local->call_count--; + trav = trav->next; /* Skip first child */ + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->rename, + &local->loc, &local->loc2); + trav = trav->next; + } + + return 0; +} +/** + * stripe_rename - + */ +int32_t +stripe_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + stripe_private_t *priv = this->private; + stripe_local_t *local = NULL; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, EIO, NULL); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + local->inode = oldloc->inode; + loc_copy (&local->loc, oldloc); + loc_copy (&local->loc2, newloc); + + local->call_count = priv->child_count; + + frame->local = local; + + STACK_WIND (frame, + stripe_first_rename_cbk, + trav->xlator, + trav->xlator->fops->rename, + oldloc, newloc); + + return 0; +} + + +/** + * stripe_access - + */ +int32_t +stripe_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + STACK_WIND (frame, + stripe_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, + loc, mask); + + return 0; +} + + +/** + * stripe_readlink_cbk - + */ +int32_t +stripe_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + STACK_UNWIND (frame, op_ret, op_errno, path); + + return 0; +} + + +/** + * stripe_readlink - + */ +int32_t +stripe_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + STACK_WIND (frame, + stripe_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + loc, size); + + return 0; +} + + +/** + * stripe_unlink - + */ +int32_t +stripe_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int send_fop_to_all = 0; + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO); + return 0; + } + + if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) + send_fop_to_all = 1; + + if (!send_fop_to_all) { + STACK_WIND (frame, + stripe_common_cbk, + trav->xlator, + trav->xlator->fops->unlink, + loc); + } else { + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_cbk, + trav->xlator, + trav->xlator->fops->unlink, + loc); + trav = trav->next; + } + } + + return 0; +} + + +int32_t +stripe_first_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + xlator_list_t *trav = this->children; + stripe_local_t *local = frame->local; + + if (op_ret == -1) + { + STACK_UNWIND (frame, op_ret, op_errno); + return 0; + } + + local->call_count--; /* First child successful */ + trav = trav->next; /* Skip first child */ + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_cbk, + trav->xlator, + trav->xlator->fops->rmdir, + &local->loc); + trav = trav->next; + } + + return 0; +} + +/** + * stripe_rmdir - + */ +int32_t +stripe_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = this->children; + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = loc->inode; + loc_copy (&local->loc, loc); + local->call_count = priv->child_count; + + STACK_WIND (frame, + stripe_first_rmdir_cbk, + trav->xlator, + trav->xlator->fops->rmdir, + loc); + + return 0; +} + + +/** + * stripe_setxattr - + */ +int32_t +stripe_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN); + return 0; + } + + STACK_WIND (frame, + stripe_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags); + + return 0; +} + + +int32_t +stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + loc_wipe (&local->loc); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf); + } + + return 0; +} + + +/** + */ +int32_t +stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->op_ret == -1) { + local->call_count = priv->child_count; + while (trav) { + STACK_WIND (frame, + stripe_mknod_ifreg_fail_unlink_cbk, + trav->xlator, + trav->xlator->fops->unlink, + &local->loc); + trav = trav->next; + } + return 0; + } + + loc_wipe (&local->loc); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf); + } + return 0; +} + +/** + */ +int32_t +stripe_mknod_ifreg_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + stripe_private_t *priv = this->private; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->failed = 1; + local->op_errno = op_errno; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + /* Get the mapping in inode private */ + /* Get the stat buf right */ + if (local->stbuf.st_blksize == 0) { + local->stbuf = *buf; + /* Because st_blocks gets added again */ + local->stbuf.st_blocks = 0; + } + + /* Always, pass the inode number of first child + to the above layer */ + if (FIRST_CHILD(this) == + ((call_frame_t *)cookie)->this) + local->stbuf.st_ino = buf->st_ino; + + local->stbuf.st_blocks += buf->st_blocks; + if (local->stbuf.st_size < buf->st_size) + local->stbuf.st_size = buf->st_size; + if (local->stbuf.st_blksize != buf->st_blksize) { + /* TODO: add to blocks in terms of + original block size */ + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if ((local->op_ret != -1) && priv->xattr_supported) { + /* Send a setxattr request to nodes where the + files are created */ + int32_t index = 0; + char size_key[256] = {0,}; + char index_key[256] = {0,}; + char count_key[256] = {0,}; + xlator_list_t *trav = this->children; + dict_t *dict = NULL; + + sprintf (size_key, + "trusted.%s.stripe-size", this->name); + sprintf (count_key, + "trusted.%s.stripe-count", this->name); + sprintf (index_key, + "trusted.%s.stripe-index", this->name); + + local->call_count = priv->child_count; + + while (trav) { + dict = get_new_dict (); + dict_ref (dict); + /* TODO: check return value */ + ret = dict_set_int64 (dict, size_key, + local->stripe_size); + ret = dict_set_int32 (dict, count_key, + local->call_count); + ret = dict_set_int32 (dict, index_key, index); + + STACK_WIND (frame, + stripe_mknod_ifreg_setxattr_cbk, + trav->xlator, + trav->xlator->fops->setxattr, + &local->loc, dict, 0); + + dict_unref (dict); + index++; + trav = trav->next; + } + } else { + /* Create itself has failed.. so return + without setxattring */ + loc_wipe (&local->loc); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf); + } + } + + return 0; +} + + +/** + * stripe_mknod - + */ +int32_t +stripe_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t rdev) +{ + stripe_private_t *priv = this->private; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO, NULL, NULL); + return 0; + } + + if (S_ISREG(mode)) { + /* NOTE: on older kernels (older than 2.6.9), + creat() fops is sent as mknod() + open(). Hence handling + S_IFREG files is necessary */ + if (priv->nodes_down) { + gf_log (this->name, GF_LOG_WARNING, + "Some node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO, loc->inode, NULL); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->stripe_size = stripe_get_matching_bs (loc->path, + priv->pattern, + priv->block_size); + frame->local = local; + local->inode = loc->inode; + loc_copy (&local->loc, loc); + + /* Everytime in stripe lookup, all child nodes should + be looked up */ + local->call_count = + ((stripe_private_t *)this->private)->child_count; + + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_mknod_ifreg_cbk, + trav->xlator, + trav->xlator->fops->mknod, + loc, mode, rdev); + trav = trav->next; + } + + /* This case is handled, no need to continue further. */ + return 0; + } + + + STACK_WIND (frame, + stripe_common_inode_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + loc, mode, rdev); + + return 0; +} + + +/** + * stripe_mkdir - + */ +int32_t +stripe_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + stripe_private_t *priv = this->private; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO, NULL, NULL); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + local->call_count = priv->child_count; + frame->local = local; + + /* Everytime in stripe lookup, all child nodes should be looked up */ + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_inode_cbk, + trav->xlator, + trav->xlator->fops->mkdir, + loc, mode); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_symlink - + */ +int32_t +stripe_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + stripe_private_t *priv = this->private; + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO, NULL, NULL); + return 0; + } + + /* send symlink to only first node */ + STACK_WIND (frame, + stripe_common_inode_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + linkpath, loc); + + return 0; +} + +/** + * stripe_link - + */ +int32_t +stripe_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int send_fop_to_all = 0; + stripe_private_t *priv = this->private; + stripe_local_t *local = NULL; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO, NULL, NULL); + return 0; + } + + + if (S_ISREG (oldloc->inode->st_mode)) + send_fop_to_all = 1; + + if (!send_fop_to_all) { + STACK_WIND (frame, + stripe_common_inode_cbk, + trav->xlator, + trav->xlator->fops->link, + oldloc, newloc); + } else { + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + /* Everytime in stripe lookup, all child + nodes should be looked up */ + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_inode_cbk, + trav->xlator, + trav->xlator->fops->link, + oldloc, newloc); + trav = trav->next; + } + } + + return 0; +} + +int32_t +stripe_create_fail_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + fd_t *lfd = NULL; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + lfd = local->fd; + loc_wipe (&local->loc); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->fd, local->inode, &local->stbuf); + fd_unref (lfd); + } + return 0; +} + + +/** + * stripe_create_setxattr_cbk - + */ +int32_t +stripe_create_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + fd_t *lfd = NULL; + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->op_ret == -1) { + local->call_count = priv->child_count; + while (trav) { + STACK_WIND (frame, + stripe_create_fail_unlink_cbk, + trav->xlator, + trav->xlator->fops->unlink, + &local->loc); + trav = trav->next; + } + + return 0; + } + + lfd = local->fd; + loc_wipe (&local->loc); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->fd, local->inode, &local->stbuf); + fd_unref (lfd); + } + + return 0; +} + +/** + * stripe_create_cbk - + */ +int32_t +stripe_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + stripe_private_t *priv = this->private; + fd_t *lfd = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->failed = 1; + local->op_errno = op_errno; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + /* Get the mapping in inode private */ + /* Get the stat buf right */ + if (local->stbuf.st_blksize == 0) { + local->stbuf = *buf; + /* Because st_blocks gets added again */ + local->stbuf.st_blocks = 0; + } + + /* Always, pass the inode number of first + child to the above layer */ + if (FIRST_CHILD(this) == + ((call_frame_t *)cookie)->this) + local->stbuf.st_ino = buf->st_ino; + + local->stbuf.st_blocks += buf->st_blocks; + if (local->stbuf.st_size < buf->st_size) + local->stbuf.st_size = buf->st_size; + if (local->stbuf.st_blksize != buf->st_blksize) { + /* TODO: add to blocks in terms of + original block size */ + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret >= 0) { + fd_ctx_set (local->fd, this, local->stripe_size); + } + + if ((local->op_ret != -1) && + local->stripe_size && priv->xattr_supported) { + /* Send a setxattr request to nodes where + the files are created */ + int ret = 0; + int32_t index = 0; + char size_key[256] = {0,}; + char index_key[256] = {0,}; + char count_key[256] = {0,}; + xlator_list_t *trav = this->children; + dict_t *dict = NULL; + + sprintf (size_key, + "trusted.%s.stripe-size", this->name); + sprintf (count_key, + "trusted.%s.stripe-count", this->name); + sprintf (index_key, + "trusted.%s.stripe-index", this->name); + + local->call_count = priv->child_count; + + while (trav) { + dict = get_new_dict (); + dict_ref (dict); + + /* TODO: check return values */ + ret = dict_set_int64 (dict, size_key, + local->stripe_size); + ret = dict_set_int32 (dict, count_key, + local->call_count); + ret = dict_set_int32 (dict, index_key, index); + + STACK_WIND (frame, + stripe_create_setxattr_cbk, + trav->xlator, + trav->xlator->fops->setxattr, + &local->loc, + dict, + 0); + + dict_unref (dict); + index++; + trav = trav->next; + } + } else { + /* Create itself has failed.. so return + without setxattring */ + lfd = local->fd; + loc_wipe (&local->loc); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->fd, local->inode, &local->stbuf); + + fd_unref (lfd); + } + } + + return 0; +} + + +/** + * stripe_create - If a block-size is specified for the 'name', create the + * file in all the child nodes. If not, create it in only first child. + * + * @name- complete path of the file to be created. + */ +int32_t +stripe_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + stripe_private_t *priv = this->private; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + + /* files created in O_APPEND mode does not allow lseek() on fd */ + flags &= ~O_APPEND; + + if (priv->first_child_down || priv->nodes_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO, fd, loc->inode, NULL); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + local->op_errno = ENOTCONN; + local->stripe_size = stripe_get_matching_bs (loc->path, + priv->pattern, + priv->block_size); + frame->local = local; + local->inode = loc->inode; + loc_copy (&local->loc, loc); + local->fd = fd_ref (fd); + + local->call_count = ((stripe_private_t *)this->private)->child_count; + + trav = this->children; + while (trav) { + STACK_WIND (frame, + stripe_create_cbk, + trav->xlator, + trav->xlator->fops->create, + loc, flags, mode, fd); + trav = trav->next; + } + + return 0; +} + +/** + * stripe_open_cbk - + */ +int32_t +stripe_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + local->failed = 1; + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } + + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret >= 0) { + fd_ctx_set (local->fd, this, local->stripe_size); + } + loc_wipe (&local->loc); + STACK_UNWIND (frame, local->op_ret, local->op_errno, fd); + } + + return 0; +} + + +/** + * stripe_getxattr_cbk - + */ +int32_t +stripe_open_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + xlator_list_t *trav = this->children; + stripe_private_t *priv = this->private; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + if (op_errno == ENOTCONN) + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (!local->failed && (local->op_ret != -1)) { + /* If getxattr doesn't fails, call open */ + char size_key[256] = {0,}; + data_t *stripe_size_data = NULL; + + sprintf (size_key, + "trusted.%s.stripe-size", this->name); + stripe_size_data = dict_get (dict, size_key); + + if (stripe_size_data) { + local->stripe_size = + data_to_int64 (stripe_size_data); + /* + if (local->stripe_size != priv->block_size) { + gf_log (this->name, GF_LOG_WARNING, + "file(%s) is having different " + "block-size", local->loc.path); + } + */ + } else { + /* if the file was created using earlier + versions of stripe */ + gf_log (this->name, GF_LOG_CRITICAL, + "[CRITICAL] Seems like file(%s) " + "created using earlier version", + local->loc.path); + } + } + + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_open_cbk, + trav->xlator, + trav->xlator->fops->open, + &local->loc, local->flags, local->fd); + trav = trav->next; + } + } + + return 0; +} + +/** + * stripe_open - + */ +int32_t +stripe_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* files opened in O_APPEND mode does not allow lseek() on fd */ + flags &= ~O_APPEND; + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->fd = fd; + frame->local = local; + local->inode = loc->inode; + loc_copy (&local->loc, loc); + + /* Striped files */ + local->flags = flags; + local->call_count = priv->child_count; + local->stripe_size = stripe_get_matching_bs (loc->path, + priv->pattern, + priv->block_size); + + if (priv->xattr_supported) { + while (trav) { + STACK_WIND (frame, + stripe_open_getxattr_cbk, + trav->xlator, + trav->xlator->fops->getxattr, + loc, NULL); + trav = trav->next; + } + } else { + while (trav) { + STACK_WIND (frame, + stripe_open_cbk, + trav->xlator, + trav->xlator->fops->open, + &local->loc, local->flags, local->fd); + trav = trav->next; + } + } + + return 0; +} + +/** + * stripe_opendir_cbk - + */ +int32_t +stripe_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_ret = -1; + local->failed = 1; + local->op_errno = op_errno; + } + + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + } + + return 0; +} + + +/** + * stripe_opendir - + */ +int32_t +stripe_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning EIO"); + STACK_UNWIND (frame, -1, EIO, NULL); + return 0; + } + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + frame->local = local; + local->inode = loc->inode; + local->fd = fd; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_opendir_cbk, + trav->xlator, + trav->xlator->fops->opendir, + loc, fd); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_getxattr_cbk - + */ +int32_t +stripe_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *value) +{ + STACK_UNWIND (frame, op_ret, op_errno, value); + return 0; +} + + +/** + * stripe_getxattr - + */ +int32_t +stripe_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + STACK_WIND (frame, + stripe_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, name); + + return 0; +} + +/** + * stripe_removexattr - + */ +int32_t +stripe_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + if (priv->first_child_down) { + gf_log (this->name, GF_LOG_WARNING, + "First node down, returning ENOTCONN"); + STACK_UNWIND (frame, -1, ENOTCONN, NULL); + return 0; + } + + STACK_WIND (frame, + stripe_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, name); + + return 0; +} + + +/** + * stripe_lk_cbk - + */ +int32_t +stripe_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_errno = op_errno; + if (op_errno == ENOTCONN) + local->failed = 1; + } + if (op_ret == 0 && local->op_ret == -1) { + /* First successful call, copy the *lock */ + local->op_ret = 0; + local->lock = *lock; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + STACK_UNWIND (frame, local->op_ret, + local->op_errno, &local->lock); + } + return 0; +} + + +/** + * stripe_lk - + */ +int32_t +stripe_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = this->children; + stripe_private_t *priv = this->private; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_lk_cbk, + trav->xlator, + trav->xlator->fops->lk, + fd, cmd, lock); + trav = trav->next; + } + + return 0; +} + +/** + * stripe_writedir - + */ +int32_t +stripe_setdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, + dir_entry_t *entries, + int32_t count) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_cbk, + trav->xlator, + trav->xlator->fops->setdents, + fd, flags, entries, count); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_flush - + */ +int32_t +stripe_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_cbk, + trav->xlator, + trav->xlator->fops->flush, + fd); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_close - + */ +int32_t +stripe_release (xlator_t *this, + fd_t *fd) +{ + return 0; +} + + +/** + * stripe_fsync - + */ +int32_t +stripe_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_cbk, + trav->xlator, + trav->xlator->fops->fsync, + fd, flags); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_fstat - + */ +int32_t +stripe_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = fd->inode; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->fstat, + fd); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_fchmod - + */ +int32_t +stripe_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = fd->inode; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->fchmod, + fd, mode); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_fchown - + */ +int32_t +stripe_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = fd->inode; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->fchown, + fd, uid, gid); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_ftruncate - + */ +int32_t +stripe_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->inode = fd->inode; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_buf_cbk, + trav->xlator, + trav->xlator->fops->ftruncate, + fd, offset); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_releasedir - + */ +int32_t +stripe_releasedir (xlator_t *this, + fd_t *fd) +{ + return 0; +} + + +/** + * stripe_fsyncdir - + */ +int32_t +stripe_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = this->private; + xlator_list_t *trav = this->children; + + STRIPE_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (fd); + + /* Initialization */ + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, + stripe_stack_unwind_cbk, + trav->xlator, + trav->xlator->fops->fsyncdir, + fd, + flags); + trav = trav->next; + } + + return 0; +} + + +/** + * stripe_single_readv_cbk - This function is used as return fn, when the + * file name doesn't match the pattern specified for striping. + */ +int32_t +stripe_single_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + return 0; +} + +/** + * stripe_readv_cbk - get all the striped reads, and order it properly, send it + * to above layer after putting it in a single vector. + */ +int32_t +stripe_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + int32_t index = 0; + int32_t callcnt = 0; + call_frame_t *main_frame = NULL; + stripe_local_t *main_local = NULL; + stripe_local_t *local = frame->local; + + index = local->node_index; + main_frame = local->orig_frame; + main_local = main_frame->local; + + LOCK (&main_frame->lock); + { + main_local->replies[index].op_ret = op_ret; + main_local->replies[index].op_errno = op_errno; + if (op_ret >= 0) { + main_local->replies[index].stbuf = *stbuf; + main_local->replies[index].count = count; + main_local->replies[index].vector = + iov_dup (vector, count); + + if (frame->root->rsp_refs) + dict_copy (frame->root->rsp_refs, + main_frame->root->rsp_refs); + } + callcnt = ++main_local->call_count; + } + UNLOCK(&main_frame->lock); + + if (callcnt == main_local->wind_count) { + int32_t final_count = 0; + struct iovec *final_vec = NULL; + struct stat tmp_stbuf = {0,}; + dict_t *refs = main_frame->root->rsp_refs; + + op_ret = 0; + memcpy (&tmp_stbuf, &main_local->replies[0].stbuf, + sizeof (struct stat)); + for (index=0; index < main_local->wind_count; index++) { + /* TODO: check whether each stripe returned 'expected' + * number of bytes + */ + if (main_local->replies[index].op_ret == -1) { + op_ret = -1; + op_errno = main_local->replies[index].op_errno; + break; + } + op_ret += main_local->replies[index].op_ret; + final_count += main_local->replies[index].count; + /* TODO: Do I need to send anything more in stbuf? */ + if (tmp_stbuf.st_size < + main_local->replies[index].stbuf.st_size) { + tmp_stbuf.st_size = + main_local->replies[index].stbuf.st_size; + } + } + if (op_ret != -1) { + final_vec = CALLOC (final_count, + sizeof (struct iovec)); + ERR_ABORT (final_vec); + final_count = 0; + + for (index=0; + index < main_local->wind_count; index++) { + memcpy (final_vec + final_count, + main_local->replies[index].vector, + (main_local->replies[index].count * + sizeof (struct iovec))); + final_count += + main_local->replies[index].count; + + free (main_local->replies[index].vector); + } + } else { + final_vec = NULL; + final_count = 0; + } + /* */ + FREE (main_local->replies); + refs = main_frame->root->rsp_refs; + STACK_UNWIND (main_frame, op_ret, op_errno, + final_vec, final_count, &tmp_stbuf); + + dict_unref (refs); + if (final_vec) + free (final_vec); + } + + STACK_DESTROY (frame->root); + return 0; +} + +/** + * stripe_readv - + */ +int32_t +stripe_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + int32_t index = 0; + int32_t num_stripe = 0; + size_t frame_size = 0; + off_t rounded_end = 0; + uint64_t stripe_size = 0; + off_t rounded_start = 0; + off_t frame_offset = offset; + stripe_local_t *local = NULL; + call_frame_t *rframe = NULL; + stripe_local_t *rlocal = NULL; + xlator_list_t *trav = this->children; + stripe_private_t *priv = this->private; + + fd_ctx_get (fd, this, &stripe_size); + if (!stripe_size) { + STACK_UNWIND (frame, -1, EINVAL, NULL, 0, NULL); + return 0; + } + + /* The file is stripe across the child nodes. Send the read request + * to the child nodes appropriately after checking which region of + * the file is in which child node. Always '0-<stripe_size>' part of + * the file resides in the first child. + */ + rounded_start = floor (offset, stripe_size); + rounded_end = roof (offset+size, stripe_size); + num_stripe = (rounded_end - rounded_start) / stripe_size; + + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + local->wind_count = num_stripe; + frame->local = local; + frame->root->rsp_refs = dict_ref (get_new_dict ()); + + /* This is where all the vectors should be copied. */ + local->replies = CALLOC (1, num_stripe * + sizeof (struct readv_replies)); + ERR_ABORT (local->replies); + + for (index = 0; + index < ((offset / stripe_size) % priv->child_count); + index++) { + trav = trav->next; + } + + for (index = 0; index < num_stripe; index++) { + rframe = copy_frame (frame); + rlocal = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (rlocal); + + frame_size = min (roof (frame_offset+1, stripe_size), + (offset + size)) - frame_offset; + + rlocal->node_index = index; + rlocal->orig_frame = frame; + rframe->local = rlocal; + STACK_WIND (rframe, + stripe_readv_cbk, + trav->xlator, + trav->xlator->fops->readv, + fd, frame_size, frame_offset); + + frame_offset += frame_size; + + trav = trav->next ? trav->next : this->children; + } + + return 0; +} + + +/** + * stripe_writev_cbk - + */ +int32_t +stripe_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + LOCK(&frame->lock); + { + callcnt = ++local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_errno = op_errno; + local->op_ret = -1; + } + if (op_ret >= 0) { + local->op_ret += op_ret; + local->stbuf = *stbuf; + } + } + UNLOCK (&frame->lock); + + if ((callcnt == local->wind_count) && local->unwind) { + STACK_UNWIND (frame, local->op_ret, + local->op_errno, &local->stbuf); + } + return 0; +} + + +/** + * stripe_single_writev_cbk - + */ +int32_t +stripe_single_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} +/** + * stripe_writev - + */ +int32_t +stripe_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + int32_t idx = 0; + int32_t total_size = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + int32_t tmp_count = count; + off_t fill_size = 0; + uint64_t stripe_size = 0; + struct iovec *tmp_vec = vector; + stripe_private_t *priv = this->private; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + + fd_ctx_get (fd, this, &stripe_size); + if (!stripe_size) { + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + } + + /* File has to be stripped across the child nodes */ + for (idx = 0; idx< count; idx ++) { + total_size += tmp_vec[idx].iov_len; + } + remaining_size = total_size; + + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + frame->local = local; + local->stripe_size = stripe_size; + + while (1) { + /* Send striped chunk of the vector to child + nodes appropriately. */ + trav = this->children; + + idx = (((offset + offset_offset) / + local->stripe_size) % priv->child_count); + while (idx) { + trav = trav->next; + idx--; + } + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + tmp_count = iov_subset (vector, count, offset_offset, + offset_offset + fill_size, NULL); + tmp_vec = CALLOC (tmp_count, sizeof (struct iovec)); + ERR_ABORT (tmp_vec); + tmp_count = iov_subset (vector, count, offset_offset, + offset_offset + fill_size, tmp_vec); + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + STACK_WIND(frame, + stripe_writev_cbk, + trav->xlator, + trav->xlator->fops->writev, + fd, tmp_vec, tmp_count, offset + offset_offset); + FREE (tmp_vec); + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; +} + + + +/* Management operations */ + +/** + * stripe_stats_cbk - Add all the fields received from different clients. + * Once all the clients return, send stats to above layer. + * + */ +int32_t +stripe_stats_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct xlator_stats *stats) +{ + int32_t callcnt = 0; + stripe_local_t *local = frame->local; + + LOCK(&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + ((call_frame_t *)cookie)->this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } + if (op_ret == 0) { + if (local->op_ret == -2) { + /* This is to make sure this is the + first time */ + local->stats = *stats; + local->op_ret = 0; + } else { + local->stats.nr_files += stats->nr_files; + local->stats.free_disk += stats->free_disk; + local->stats.disk_usage += stats->disk_usage; + local->stats.nr_clients += stats->nr_clients; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stats); + } + + return 0; +} + +/** + * stripe_stats - + */ +int32_t +stripe_stats (call_frame_t *frame, + xlator_t *this, + int32_t flags) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = this->children; + + local = CALLOC (1, sizeof (stripe_local_t)); + ERR_ABORT (local); + frame->local = local; + local->op_ret = -2; /* to be used as a flag in _cbk */ + local->call_count = ((stripe_private_t*)this->private)->child_count; + while (trav) { + STACK_WIND (frame, + stripe_stats_cbk, + trav->xlator, + trav->xlator->mops->stats, + flags); + trav = trav->next; + } + return 0; +} + +/** + * notify + */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + stripe_private_t *priv = this->private; + int down_client = 0; + int i = 0; + + if (!priv) + return 0; + + switch (event) + { + case GF_EVENT_CHILD_UP: + { + /* get an index number to set */ + for (i = 0; i < priv->child_count; i++) { + if (data == priv->xl_array[i]) + break; + } + priv->state[i] = 1; + for (i = 0; i < priv->child_count; i++) { + if (!priv->state[i]) + down_client++; + } + + LOCK (&priv->lock); + { + priv->nodes_down = down_client; + + if (data == FIRST_CHILD (this)) { + priv->first_child_down = 0; + default_notify (this, event, data); + } + } + UNLOCK (&priv->lock); + } + break; + case GF_EVENT_CHILD_DOWN: + { + /* get an index number to set */ + for (i = 0; i < priv->child_count; i++) { + if (data == priv->xl_array[i]) + break; + } + priv->state[i] = 0; + for (i = 0; i < priv->child_count; i++) { + if (!priv->state[i]) + down_client++; + } + + LOCK (&priv->lock); + { + priv->nodes_down = down_client; + + if (data == FIRST_CHILD (this)) { + priv->first_child_down = 1; + default_notify (this, event, data); + } + } + UNLOCK (&priv->lock); + } + break; + + default: + { + /* */ + default_notify (this, event, data); + } + break; + } + + return 0; +} +/** + * init - This function is called when xlator-graph gets initialized. + * The option given in volfiles are parsed here. + * @this - + */ +int32_t +init (xlator_t *this) +{ + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + data_t *data = NULL; + int32_t count = 0; + + trav = this->children; + while (trav) { + count++; + trav = trav->next; + } + + if (!count) { + gf_log (this->name, GF_LOG_ERROR, + "stripe configured without \"subvolumes\" option. " + "exiting"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = CALLOC (1, sizeof (stripe_private_t)); + ERR_ABORT (priv); + priv->xl_array = CALLOC (1, count * sizeof (xlator_t *)); + ERR_ABORT (priv->xl_array); + priv->child_count = count; + LOCK_INIT (&priv->lock); + + trav = this->children; + count = 0; + while (trav) { + priv->xl_array[count++] = trav->xlator; + trav = trav->next; + } + + if (count > 256) { + gf_log (this->name, GF_LOG_ERROR, + "maximum number of stripe subvolumes supported " + "is 256"); + return -1; + } + + priv->block_size = (128 * GF_UNIT_KB); + /* option stripe-pattern *avi:1GB,*pdf:4096 */ + data = dict_get (this->options, "block-size"); + if (!data) { + gf_log (this->name, GF_LOG_DEBUG, + "No \"option block-size <x>\" given, defaulting " + "to 128KB"); + } else { + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *dup_str = NULL; + char *stripe_str = NULL; + char *pattern = NULL; + char *num = NULL; + struct stripe_options *temp_stripeopt = NULL; + struct stripe_options *stripe_opt = NULL; + + /* Get the pattern for striping. + "option block-size *avi:10MB" etc */ + stripe_str = strtok_r (data->data, ",", &tmp_str); + while (stripe_str) { + dup_str = strdup (stripe_str); + stripe_opt = CALLOC (1, + sizeof (struct stripe_options)); + ERR_ABORT (stripe_opt); + pattern = strtok_r (dup_str, ":", &tmp_str1); + num = strtok_r (NULL, ":", &tmp_str1); + if (num && + (gf_string2bytesize (num, + &stripe_opt->block_size) + != 0)) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + num); + return -1; + } else if (!num && (gf_string2bytesize ( + pattern, + &stripe_opt->block_size) + != 0)) { + /* Possible that there is no pattern given */ + stripe_opt->block_size = (128 * GF_UNIT_KB); + pattern = "*"; + } + memcpy (stripe_opt->path_pattern, + pattern, strlen (pattern)); + + gf_log (this->name, GF_LOG_DEBUG, + "block-size : pattern %s : size %"PRId64, + stripe_opt->path_pattern, + stripe_opt->block_size); + + if (!priv->pattern) { + priv->pattern = stripe_opt; + } else { + temp_stripeopt = priv->pattern; + while (temp_stripeopt->next) + temp_stripeopt = temp_stripeopt->next; + temp_stripeopt->next = stripe_opt; + } + stripe_str = strtok_r (NULL, ",", &tmp_str); + } + } + + priv->xattr_supported = 1; + data = dict_get (this->options, "use-xattr"); + if (data) { + if (gf_string2boolean (data->data, + &priv->xattr_supported) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "error setting hard check for extended " + "attribute"); + //return -1; + } + } + + /* notify related */ + priv->nodes_down = priv->child_count; + this->private = priv; + + return 0; +} + +/** + * fini - Free all the private variables + * @this - + */ +void +fini (xlator_t *this) +{ + stripe_private_t *priv = this->private; + struct stripe_options *prev = NULL; + struct stripe_options *trav = priv->pattern; + while (trav) { + prev = trav; + trav = trav->next; + FREE (prev); + } + FREE (priv->xl_array); + LOCK_DESTROY (&priv->lock); + FREE (priv); + return; +} + + +struct xlator_fops fops = { + .stat = stripe_stat, + .unlink = stripe_unlink, + .symlink = stripe_symlink, + .rename = stripe_rename, + .link = stripe_link, + .chmod = stripe_chmod, + .chown = stripe_chown, + .truncate = stripe_truncate, + .utimens = stripe_utimens, + .create = stripe_create, + .open = stripe_open, + .readv = stripe_readv, + .writev = stripe_writev, + .statfs = stripe_statfs, + .flush = stripe_flush, + .fsync = stripe_fsync, + .setxattr = stripe_setxattr, + .getxattr = stripe_getxattr, + .removexattr = stripe_removexattr, + .access = stripe_access, + .ftruncate = stripe_ftruncate, + .fstat = stripe_fstat, + .readlink = stripe_readlink, + .mkdir = stripe_mkdir, + .rmdir = stripe_rmdir, + .lk = stripe_lk, + .opendir = stripe_opendir, + .fsyncdir = stripe_fsyncdir, + .fchmod = stripe_fchmod, + .fchown = stripe_fchown, + .lookup = stripe_lookup, + .setdents = stripe_setdents, + .mknod = stripe_mknod, +}; + +struct xlator_mops mops = { + .stats = stripe_stats, +}; + +struct xlator_cbks cbks = { + .release = stripe_release, + .releasedir = stripe_releasedir +}; + + +struct volume_options options[] = { + { .key = {"block-size"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"use-xattr"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; diff --git a/xlators/cluster/unify/Makefile.am b/xlators/cluster/unify/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/unify/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am new file mode 100644 index 000000000..b9e6f63e9 --- /dev/null +++ b/xlators/cluster/unify/src/Makefile.am @@ -0,0 +1,16 @@ + +xlator_LTLIBRARIES = unify.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +unify_la_LDFLAGS = -module -avoidversion + +unify_la_SOURCES = unify.c unify-self-heal.c +unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = unify.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c new file mode 100644 index 000000000..4885dd91a --- /dev/null +++ b/xlators/cluster/unify/src/unify-self-heal.c @@ -0,0 +1,1225 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/** + * unify-self-heal.c : + * This file implements few functions which enables 'unify' translator + * to be consistent in its behaviour when + * > a node fails, + * > a node gets added, + * > a failed node comes back + * > a new namespace server is added (ie, an fresh namespace server). + * + * This functionality of 'unify' will enable glusterfs to support storage + * system failure, and maintain consistancy. This works both ways, ie, when + * an entry (either file or directory) is found on namespace server, and not + * on storage nodes, its created in storage nodes and vica-versa. + * + * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()' + * + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "unify.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "common-utils.h" + +int32_t +unify_sh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +int32_t +unify_sh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +int32_t +unify_bgsh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +int32_t +unify_bgsh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +/** + * unify_local_wipe - free all the extra allocation of local->* here. + */ +static void +unify_local_wipe (unify_local_t *local) +{ + /* Free the strdup'd variables in the local structure */ + if (local->name) { + FREE (local->name); + } + + if (local->sh_struct) { + if (local->sh_struct->offset_list) + FREE (local->sh_struct->offset_list); + + if (local->sh_struct->entry_list) + FREE (local->sh_struct->entry_list); + + if (local->sh_struct->count_list) + FREE (local->sh_struct->count_list); + + FREE (local->sh_struct); + } + + loc_wipe (&local->loc1); + loc_wipe (&local->loc2); +} + +int32_t +unify_sh_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + inode_t *inode = NULL; + dict_t *tmp_dict = NULL; + dir_entry_t *prev, *entry, *trav; + + LOCK (&frame->lock); + { + /* if local->call_count == 0, that means, setdents on + * storagenodes is still pending. + */ + if (local->call_count) + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (callcnt == 0) { + if (local->sh_struct->entry_list[0]) { + prev = entry = local->sh_struct->entry_list[0]; + if (!entry) + return 0; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + + if (!local->flags) { + if (local->sh_struct->count_list[0] >= + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + /* count == size, that means, there are more entries + to read from */ + //local->call_count = 0; + local->sh_struct->offset_list[0] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND (frame, + unify_sh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[0], + GF_GET_DIR_ONLY); + } + } else { + inode = local->loc1.inode; + fd_unref (local->fd); + tmp_dict = local->dict; + + unify_local_wipe (local); + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + inode, &local->stbuf, local->dict); + if (tmp_dict) + dict_unref (local->dict); + } + } + + return 0; +} + + +int32_t +unify_sh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = 0; + unsigned long final = 0; + dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t)); + + local->sh_struct->entry_list[0] = tmp; + local->sh_struct->count_list[0] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + + if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) { + final = 1; + } + + LOCK (&frame->lock); + { + /* local->call_count will be '0' till now. make it 1 so, it + can be UNWIND'ed for the last call. */ + local->call_count = priv->child_count; + if (final) + local->flags = 1; + } + UNLOCK (&frame->lock); + + for (index = 0; index < priv->child_count; index++) + { + STACK_WIND_COOKIE (frame, + unify_sh_setdents_cbk, + (void *)index, + priv->xl_array[index], + priv->xl_array[index]->fops->setdents, + local->fd, GF_SET_DIR_ONLY, + local->sh_struct->entry_list[0], count); + } + + return 0; +} + +int32_t +unify_sh_ns_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *prev, *entry, *trav; + + LOCK (&frame->lock); + { + if (local->sh_struct->entry_list[index]) { + prev = entry = local->sh_struct->entry_list[index]; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + } + UNLOCK (&frame->lock); + + if (local->sh_struct->count_list[index] < + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries + to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND_COOKIE (frame, + unify_sh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_sh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + + +/** + * unify_sh_getdents_cbk - + */ +int32_t +unify_sh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *tmp = NULL; + + if (op_ret >= 0 && count > 0) { + /* There is some dentry found, just send the dentry to NS */ + tmp = CALLOC (1, sizeof (dir_entry_t)); + local->sh_struct->entry_list[index] = tmp; + local->sh_struct->count_list[index] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + STACK_WIND_COOKIE (frame, + unify_sh_ns_setdents_cbk, + cookie, + NS(this), + NS(this)->fops->setdents, + local->fd, + GF_SET_IF_NOT_PRESENT, + local->sh_struct->entry_list[index], + count); + return 0; + } + + if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries + to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND_COOKIE (frame, + unify_sh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_sh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + +/** + * unify_sh_opendir_cbk - + * + * @cookie: + */ +int32_t +unify_sh_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + inode_t *inode = NULL; + dict_t *tmp_dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret >= 0) { + local->op_ret = op_ret; + } else { + gf_log (this->name, GF_LOG_WARNING, "failed"); + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->call_count = priv->child_count + 1; + + if (!local->failed) { + /* send getdents() namespace after finishing + storage nodes */ + local->call_count--; + + fd_bind (fd); + + if (local->call_count) { + /* Used as the offset index. This list keeps + * track of offset sent to each node during + * STACK_WIND. + */ + local->sh_struct->offset_list = + calloc (priv->child_count, + sizeof (off_t)); + ERR_ABORT (local->sh_struct->offset_list); + + local->sh_struct->entry_list = + calloc (priv->child_count, + sizeof (dir_entry_t *)); + ERR_ABORT (local->sh_struct->entry_list); + + local->sh_struct->count_list = + calloc (priv->child_count, + sizeof (int)); + ERR_ABORT (local->sh_struct->count_list); + + /* Send getdents on all the fds */ + for (index = 0; + index < priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_sh_getdents_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_ALL); + } + + /* did stack wind, so no need to unwind here */ + return 0; + } /* (local->call_count) */ + } /* (!local->failed) */ + + /* Opendir failed on one node. */ + inode = local->loc1.inode; + fd_unref (local->fd); + tmp_dict = local->dict; + + unify_local_wipe (local); + /* Only 'self-heal' failed, lookup() was successful. */ + local->op_ret = 0; + + /* This is lookup_cbk ()'s UNWIND. */ + STACK_UNWIND (frame, local->op_ret, local->op_errno, inode, + &local->stbuf, local->dict); + if (tmp_dict) + dict_unref (tmp_dict); + } + + return 0; +} + +/** + * gf_sh_checksum_cbk - + * + * @frame: frame used in lookup. get a copy of it, and use that copy. + * @this: pointer to unify xlator. + * @inode: pointer to inode, for which the consistency check is required. + * + */ +int32_t +unify_sh_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *file_checksum, + uint8_t *dir_checksum) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + int32_t callcnt = 0; + inode_t *inode = NULL; + dict_t *tmp_dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret >= 0) { + if (NS(this) == (xlator_t *)cookie) { + memcpy (local->sh_struct->ns_file_checksum, + file_checksum, ZR_FILENAME_MAX); + memcpy (local->sh_struct->ns_dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } else { + if (local->entry_count == 0) { + /* Initialize the dir_checksum to be + * used for comparision with other + * storage nodes. Should be done for + * the first successful call *only*. + */ + /* Using 'entry_count' as a flag */ + local->entry_count = 1; + memcpy (local->sh_struct->dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } + + /* Reply from the storage nodes */ + for (index = 0; + index < ZR_FILENAME_MAX; index++) { + /* Files should be present in + only one node */ + local->sh_struct->file_checksum[index] ^= file_checksum[index]; + + /* directory structure should be + same accross */ + if (local->sh_struct->dir_checksum[index] != dir_checksum[index]) + local->failed = 1; + } + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + for (index = 0; index < ZR_FILENAME_MAX ; index++) { + if (local->sh_struct->file_checksum[index] != + local->sh_struct->ns_file_checksum[index]) { + local->failed = 1; + break; + } + if (local->sh_struct->dir_checksum[index] != + local->sh_struct->ns_dir_checksum[index]) { + local->failed = 1; + break; + } + } + + if (local->failed) { + /* Log it, it should be a rare event */ + gf_log (this->name, GF_LOG_WARNING, + "Self-heal triggered on directory %s", + local->loc1.path); + + /* Any self heal will be done at directory level */ + local->call_count = 0; + local->op_ret = -1; + local->failed = 0; + + local->fd = fd_create (local->loc1.inode, + frame->root->pid); + + local->call_count = priv->child_count + 1; + + for (index = 0; + index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (frame, + unify_sh_opendir_cbk, + priv->xl_array[index]->name, + priv->xl_array[index], + priv->xl_array[index]->fops->opendir, + &local->loc1, + local->fd); + } + /* opendir can be done on the directory */ + return 0; + } + + /* no mismatch */ + inode = local->loc1.inode; + tmp_dict = local->dict; + + unify_local_wipe (local); + + /* This is lookup_cbk ()'s UNWIND. */ + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + inode, + &local->stbuf, + local->dict); + if (tmp_dict) + dict_unref (tmp_dict); + } + + return 0; +} + +/* Foreground self-heal part over */ + +/* Background self-heal part */ + +int32_t +unify_bgsh_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + dir_entry_t *prev, *entry, *trav; + + LOCK (&frame->lock); + { + /* if local->call_count == 0, that means, setdents + on storagenodes is still pending. */ + if (local->call_count) + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + + if (callcnt == 0) { + if (local->sh_struct->entry_list[0]) { + prev = entry = local->sh_struct->entry_list[0]; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + + if (!local->flags) { + if (local->sh_struct->count_list[0] >= + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + /* count == size, that means, there are more + entries to read from */ + //local->call_count = 0; + local->sh_struct->offset_list[0] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND (frame, + unify_bgsh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[0], + GF_GET_DIR_ONLY); + } + } else { + fd_unref (local->fd); + unify_local_wipe (local); + STACK_DESTROY (frame->root); + } + } + + return 0; +} + + +int32_t +unify_bgsh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = 0; + unsigned long final = 0; + dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t)); + + local->sh_struct->entry_list[0] = tmp; + local->sh_struct->count_list[0] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + + if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) { + final = 1; + } + + LOCK (&frame->lock); + { + /* local->call_count will be '0' till now. make it 1 so, + it can be UNWIND'ed for the last call. */ + local->call_count = priv->child_count; + if (final) + local->flags = 1; + } + UNLOCK (&frame->lock); + + for (index = 0; index < priv->child_count; index++) + { + STACK_WIND_COOKIE (frame, + unify_bgsh_setdents_cbk, + (void *)index, + priv->xl_array[index], + priv->xl_array[index]->fops->setdents, + local->fd, GF_SET_DIR_ONLY, + local->sh_struct->entry_list[0], count); + } + + return 0; +} + +int32_t +unify_bgsh_ns_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *prev, *entry, *trav; + + if (local->sh_struct->entry_list[index]) { + prev = entry = local->sh_struct->entry_list[index]; + if (!entry) + return 0; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + + if (local->sh_struct->count_list[index] < + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries + to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND_COOKIE (frame, + unify_bgsh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_bgsh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + + +/** + * unify_bgsh_getdents_cbk - + */ +int32_t +unify_bgsh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *tmp = NULL; + + if (op_ret >= 0 && count > 0) { + /* There is some dentry found, just send the dentry to NS */ + tmp = CALLOC (1, sizeof (dir_entry_t)); + local->sh_struct->entry_list[index] = tmp; + local->sh_struct->count_list[index] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + STACK_WIND_COOKIE (frame, + unify_bgsh_ns_setdents_cbk, + cookie, + NS(this), + NS(this)->fops->setdents, + local->fd, + GF_SET_IF_NOT_PRESENT, + local->sh_struct->entry_list[index], + count); + return 0; + } + + if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + + STACK_WIND_COOKIE (frame, + unify_bgsh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_bgsh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + +/** + * unify_bgsh_opendir_cbk - + * + * @cookie: + */ +int32_t +unify_bgsh_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int32_t callcnt = 0; + int16_t index = 0; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret >= 0) { + local->op_ret = op_ret; + } else { + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->call_count = priv->child_count + 1; + + if (!local->failed) { + /* send getdents() namespace after finishing + storage nodes */ + local->call_count--; + callcnt = local->call_count; + + fd_bind (fd); + + if (local->call_count) { + /* Used as the offset index. This list keeps + track of offset sent to each node during + STACK_WIND. */ + local->sh_struct->offset_list = + calloc (priv->child_count, + sizeof (off_t)); + ERR_ABORT (local->sh_struct->offset_list); + + local->sh_struct->entry_list = + calloc (priv->child_count, + sizeof (dir_entry_t *)); + ERR_ABORT (local->sh_struct->entry_list); + + local->sh_struct->count_list = + calloc (priv->child_count, + sizeof (int)); + ERR_ABORT (local->sh_struct->count_list); + + /* Send getdents on all the fds */ + for (index = 0; + index < priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_bgsh_getdents_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_ALL); + } + /* did a stack wind, so no need to unwind here */ + return 0; + } /* (local->call_count) */ + } /* (!local->failed) */ + + /* Opendir failed on one node. */ + fd_unref (local->fd); + + unify_local_wipe (local); + STACK_DESTROY (frame->root); + } + + return 0; +} + +/** + * gf_bgsh_checksum_cbk - + * + * @frame: frame used in lookup. get a copy of it, and use that copy. + * @this: pointer to unify xlator. + * @inode: pointer to inode, for which the consistency check is required. + * + */ +int32_t +unify_bgsh_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *file_checksum, + uint8_t *dir_checksum) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + int32_t callcnt = 0; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret >= 0) { + if (NS(this) == (xlator_t *)cookie) { + memcpy (local->sh_struct->ns_file_checksum, + file_checksum, ZR_FILENAME_MAX); + memcpy (local->sh_struct->ns_dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } else { + if (local->entry_count == 0) { + /* Initialize the dir_checksum to be + * used for comparision with other + * storage nodes. Should be done for + * the first successful call *only*. + */ + /* Using 'entry_count' as a flag */ + local->entry_count = 1; + memcpy (local->sh_struct->dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } + + /* Reply from the storage nodes */ + for (index = 0; + index < ZR_FILENAME_MAX; index++) { + /* Files should be present in only + one node */ + local->sh_struct->file_checksum[index] ^= file_checksum[index]; + + /* directory structure should be same + accross */ + if (local->sh_struct->dir_checksum[index] != dir_checksum[index]) + local->failed = 1; + } + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + for (index = 0; index < ZR_FILENAME_MAX ; index++) { + if (local->sh_struct->file_checksum[index] != + local->sh_struct->ns_file_checksum[index]) { + local->failed = 1; + break; + } + if (local->sh_struct->dir_checksum[index] != + local->sh_struct->ns_dir_checksum[index]) { + local->failed = 1; + break; + } + } + + if (local->failed) { + /* Log it, it should be a rare event */ + gf_log (this->name, GF_LOG_WARNING, + "Self-heal triggered on directory %s", + local->loc1.path); + + /* Any self heal will be done at the directory level */ + local->op_ret = -1; + local->failed = 0; + + local->fd = fd_create (local->loc1.inode, + frame->root->pid); + local->call_count = priv->child_count + 1; + + for (index = 0; + index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (frame, + unify_bgsh_opendir_cbk, + priv->xl_array[index]->name, + priv->xl_array[index], + priv->xl_array[index]->fops->opendir, + &local->loc1, + local->fd); + } + + /* opendir can be done on the directory */ + return 0; + } + + /* no mismatch */ + unify_local_wipe (local); + STACK_DESTROY (frame->root); + } + + return 0; +} + +/* Background self-heal part over */ + + + + +/** + * zr_unify_self_heal - + * + * @frame: frame used in lookup. get a copy of it, and use that copy. + * @this: pointer to unify xlator. + * @inode: pointer to inode, for which the consistency check is required. + * + */ +int32_t +zr_unify_self_heal (call_frame_t *frame, + xlator_t *this, + unify_local_t *local) +{ + unify_private_t *priv = this->private; + call_frame_t *bg_frame = NULL; + unify_local_t *bg_local = NULL; + inode_t *tmp_inode = NULL; + dict_t *tmp_dict = NULL; + int16_t index = 0; + + if (local->inode_generation < priv->inode_generation) { + /* Any self heal will be done at the directory level */ + /* Update the inode's generation to the current generation + value. */ + local->inode_generation = priv->inode_generation; + inode_ctx_put (local->loc1.inode, this, + (uint64_t)(long)local->inode_generation); + + if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) { + local->op_ret = 0; + local->failed = 0; + local->call_count = priv->child_count + 1; + local->sh_struct = + calloc (1, sizeof (struct unify_self_heal_struct)); + + /* +1 is for NS */ + for (index = 0; + index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (frame, + unify_sh_checksum_cbk, + priv->xl_array[index], + priv->xl_array[index], + priv->xl_array[index]->fops->checksum, + &local->loc1, + 0); + } + + /* Self-heal in foreground, hence no need + to UNWIND here */ + return 0; + } + + /* Self Heal done in background */ + bg_frame = copy_frame (frame); + INIT_LOCAL (bg_frame, bg_local); + loc_copy (&bg_local->loc1, &local->loc1); + bg_local->op_ret = 0; + bg_local->failed = 0; + bg_local->call_count = priv->child_count + 1; + bg_local->sh_struct = + calloc (1, sizeof (struct unify_self_heal_struct)); + + /* +1 is for NS */ + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (bg_frame, + unify_bgsh_checksum_cbk, + priv->xl_array[index], + priv->xl_array[index], + priv->xl_array[index]->fops->checksum, + &bg_local->loc1, + 0); + } + } + + /* generation number matches, self heal already done or + * self heal done in background: just do STACK_UNWIND + */ + tmp_inode = local->loc1.inode; + tmp_dict = local->dict; + + unify_local_wipe (local); + + /* This is lookup_cbk ()'s UNWIND. */ + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + tmp_inode, + &local->stbuf, + local->dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + return 0; +} + diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c new file mode 100644 index 000000000..e2a5e14b1 --- /dev/null +++ b/xlators/cluster/unify/src/unify.c @@ -0,0 +1,4451 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/** + * xlators/cluster/unify: + * - This xlator is one of the main translator in GlusterFS, which + * actually does the clustering work of the file system. One need to + * understand that, unify assumes file to be existing in only one of + * the child node, and directories to be present on all the nodes. + * + * NOTE: + * Now, unify has support for global namespace, which is used to keep a + * global view of fs's namespace tree. The stat for directories are taken + * just from the namespace, where as for files, just 'st_ino' is taken from + * Namespace node, and other stat info is taken from the actual storage node. + * Also Namespace node helps to keep consistant inode for files across + * glusterfs (re-)mounts. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "unify.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "defaults.h" +#include "common-utils.h" +#include <signal.h> +#include <libgen.h> +#include "compat-errno.h" +#include "compat.h" + +#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \ + if (!(_loc && _loc->inode)) { \ + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \ + return 0; \ + } \ +} while(0) + + +#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \ + if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \ + STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \ + return 0; \ + } \ +} while(0) + +#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \ + if (!_fd) { \ + STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \ + return 0; \ + } \ +} while(0) + +/** + * unify_local_wipe - free all the extra allocation of local->* here. + */ +static void +unify_local_wipe (unify_local_t *local) +{ + /* Free the strdup'd variables in the local structure */ + if (local->name) { + FREE (local->name); + } + loc_wipe (&local->loc1); + loc_wipe (&local->loc2); +} + + + +/* + * unify_normalize_stats - + */ +void +unify_normalize_stats (struct statvfs *buf, + unsigned long bsize, + unsigned long frsize) +{ + double factor; + + if (buf->f_bsize != bsize) { + factor = ((double) buf->f_bsize) / bsize; + buf->f_bsize = bsize; + buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree); + buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail); + } + + if (buf->f_frsize != frsize) { + factor = ((double) buf->f_frsize) / frsize; + buf->f_frsize = frsize; + buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks); + } +} + + +xlator_t * +unify_loc_subvol (loc_t *loc, xlator_t *this) +{ + unify_private_t *priv = NULL; + xlator_t *subvol = NULL; + int16_t *list = NULL; + long index = 0; + xlator_t *subvol_i = NULL; + int ret = 0; + uint64_t tmp_list = 0; + + priv = this->private; + subvol = NS (this); + + if (!S_ISDIR (loc->inode->st_mode)) { + ret = inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + if (!list) + goto out; + + for (index = 0; list[index] != -1; index++) { + subvol_i = priv->xl_array[list[index]]; + if (subvol_i != NS (this)) { + subvol = subvol_i; + break; + } + } + } +out: + return subvol; +} + + + +/** + * unify_statfs_cbk - + */ +int32_t +unify_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *stbuf) +{ + int32_t callcnt = 0; + struct statvfs *dict_buf = NULL; + unsigned long bsize; + unsigned long frsize; + unify_local_t *local = (unify_local_t *)frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + if (op_ret >= 0) { + /* when a call is successfull, add it to local->dict */ + dict_buf = &local->statvfs_buf; + + if (dict_buf->f_bsize != 0) { + bsize = max (dict_buf->f_bsize, + stbuf->f_bsize); + + frsize = max (dict_buf->f_frsize, + stbuf->f_frsize); + unify_normalize_stats(dict_buf, bsize, frsize); + unify_normalize_stats(stbuf, bsize, frsize); + } else { + dict_buf->f_bsize = stbuf->f_bsize; + dict_buf->f_frsize = stbuf->f_frsize; + } + + dict_buf->f_blocks += stbuf->f_blocks; + dict_buf->f_bfree += stbuf->f_bfree; + dict_buf->f_bavail += stbuf->f_bavail; + dict_buf->f_files += stbuf->f_files; + dict_buf->f_ffree += stbuf->f_ffree; + dict_buf->f_favail += stbuf->f_favail; + dict_buf->f_fsid = stbuf->f_fsid; + dict_buf->f_flag = stbuf->f_flag; + dict_buf->f_namemax = stbuf->f_namemax; + local->op_ret = op_ret; + } else { + /* fop on storage node has failed due to some error */ + if (op_errno != ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): %s", + prev_frame->this->name, + strerror (op_errno)); + } + local->op_errno = op_errno; + } + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->statvfs_buf); + } + + return 0; +} + +/** + * unify_statfs - + */ +int32_t +unify_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_local_t *local = NULL; + xlator_list_t *trav = this->children; + + INIT_LOCAL (frame, local); + local->call_count = ((unify_private_t *)this->private)->child_count; + + while(trav) { + STACK_WIND (frame, + unify_statfs_cbk, + trav->xlator, + trav->xlator->fops->statfs, + loc); + trav = trav->next; + } + + return 0; +} + +/** + * unify_buf_cbk - + */ +int32_t +unify_buf_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "%s(): child(%s): path(%s): %s", + gf_fop_list[frame->root->op], + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + + local->op_errno = op_errno; + if ((op_errno == ENOENT) && priv->optimist) + local->op_ret = 0; + } + + if (op_ret >= 0) { + local->op_ret = 0; + + if (NS (this) == prev_frame->this) { + local->st_ino = buf->st_ino; + /* If the entry is directory, get the stat + from NS node */ + if (S_ISDIR (buf->st_mode) || + !local->stbuf.st_blksize) { + local->stbuf = *buf; + } + } + + if ((!S_ISDIR (buf->st_mode)) && + (NS (this) != prev_frame->this)) { + /* If file, take the stat info from Storage + node. */ + local->stbuf = *buf; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + /* If the inode number is not filled, operation should + fail */ + if (!local->st_ino) + local->op_ret = -1; + + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + + return 0; +} + +#define check_if_dht_linkfile(s) ((s->st_mode & ~S_IFMT) == S_ISVTX) + +/** + * unify_lookup_cbk - + */ +int32_t +unify_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + inode_t *tmp_inode = NULL; + dict_t *local_dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + if ((op_errno != ENOTCONN) && (op_errno != ENOENT)) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + + } else if (local->revalidate && + !(priv->optimist && (op_errno == ENOENT))) { + + gf_log (this->name, + (op_errno == ENOTCONN) ? + GF_LOG_DEBUG:GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + } + } + + if (op_ret == 0) { + local->op_ret = 0; + + if (check_if_dht_linkfile(buf)) { + gf_log (this->name, GF_LOG_CRITICAL, + "file %s may be DHT link file on %s, " + "make sure the backend is not shared " + "between unify and DHT", + local->loc1.path, + priv->xl_array[(long)cookie]->name); + } + + if (local->stbuf.st_mode && local->stbuf.st_blksize) { + /* make sure we already have a stbuf + stored in local->stbuf */ + if (S_ISDIR (local->stbuf.st_mode) && + !S_ISDIR (buf->st_mode)) { + gf_log (this->name, GF_LOG_CRITICAL, + "[CRITICAL] '%s' is directory " + "on namespace, non-directory " + "on node '%s', returning EIO", + local->loc1.path, + priv->xl_array[(long)cookie]->name); + local->return_eio = 1; + } + if (!S_ISDIR (local->stbuf.st_mode) && + S_ISDIR (buf->st_mode)) { + gf_log (this->name, GF_LOG_CRITICAL, + "[CRITICAL] '%s' is directory " + "on node '%s', non-directory " + "on namespace, returning EIO", + local->loc1.path, + priv->xl_array[(long)cookie]->name); + local->return_eio = 1; + } + } + + if (!local->revalidate && !S_ISDIR (buf->st_mode)) { + /* This is the first time lookup on file*/ + if (!local->list) { + /* list is not allocated, allocate + the max possible range */ + local->list = CALLOC (1, 2 * (priv->child_count + 2)); + if (!local->list) { + gf_log (this->name, + GF_LOG_CRITICAL, + "Not enough memory"); + STACK_UNWIND (frame, -1, + ENOMEM, inode, + NULL, NULL); + return 0; + } + } + /* update the index of the list */ + local->list [local->index++] = + (int16_t)(long)cookie; + } + + if ((!local->dict) && dict && + (priv->xl_array[(long)cookie] != NS(this))) { + local->dict = dict_ref (dict); + } + + /* index of NS node is == total child count */ + if (priv->child_count == (int16_t)(long)cookie) { + /* Take the inode number from namespace */ + local->st_ino = buf->st_ino; + if (S_ISDIR (buf->st_mode) || + !(local->stbuf.st_blksize)) { + local->stbuf = *buf; + } + } else if (!S_ISDIR (buf->st_mode)) { + /* If file, then get the stat from + storage node */ + local->stbuf = *buf; + } + + if (local->st_nlink < buf->st_nlink) { + local->st_nlink = buf->st_nlink; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local_dict = local->dict; + if (local->return_eio) { + gf_log (this->name, GF_LOG_CRITICAL, + "[CRITICAL] Unable to fix the path (%s) with " + "self-heal, try manual verification. " + "returning EIO.", local->loc1.path); + unify_local_wipe (local); + STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL); + if (local_dict) { + dict_unref (local_dict); + } + return 0; + } + + if (!local->stbuf.st_blksize) { + /* Inode not present */ + local->op_ret = -1; + } else { + if (!local->revalidate && + !S_ISDIR (local->stbuf.st_mode)) { + /* If its a file, big array is useless, + allocate the smaller one */ + int16_t *list = NULL; + list = CALLOC (1, 2 * (local->index + 1)); + ERR_ABORT (list); + memcpy (list, local->list, 2 * local->index); + /* Make the end of the list as -1 */ + FREE (local->list); + local->list = list; + local->list [local->index] = -1; + /* Update the inode's ctx with proper array */ + /* TODO: log on failure */ + inode_ctx_put (local->loc1.inode, this, + (uint64_t)(long)local->list); + } + + if (S_ISDIR(local->loc1.inode->st_mode)) { + /* lookup is done for directory */ + if (local->failed && priv->self_heal) { + /* Triggering self-heal */ + /* means, self-heal required for this + inode */ + local->inode_generation = 0; + priv->inode_generation++; + } + } else { + local->stbuf.st_ino = local->st_ino; + } + + local->stbuf.st_nlink = local->st_nlink; + } + if (local->op_ret == -1) { + if (!local->revalidate && local->list) + FREE (local->list); + } + + if ((local->op_ret >= 0) && local->failed && + local->revalidate) { + /* Done revalidate, but it failed */ + if (op_errno != ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "Revalidate failed for path(%s): %s", + local->loc1.path, strerror (op_errno)); + } + local->op_ret = -1; + } + + if ((priv->self_heal && !priv->optimist) && + (!local->revalidate && (local->op_ret == 0) && + S_ISDIR(local->stbuf.st_mode))) { + /* Let the self heal be done here */ + zr_unify_self_heal (frame, this, local); + local_dict = NULL; + } else { + /* either no self heal, or op_ret == -1 (failure) */ + tmp_inode = local->loc1.inode; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + tmp_inode, &local->stbuf, local->dict); + } + if (local_dict) { + dict_unref (local_dict); + } + } + + return 0; +} + +/** + * unify_lookup - + */ +int32_t +unify_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int16_t *list = NULL; + long index = 0; + + if (!(loc && loc->inode)) { + gf_log (this->name, GF_LOG_ERROR, + "%s: Argument not right", loc?loc->path:"(null)"); + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); + return 0; + } + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL); + return 0; + } + + if (!inode_ctx_get (loc->inode, this, NULL) && + loc->inode->st_mode && + !S_ISDIR (loc->inode->st_mode)) { + uint64_t tmp_list = 0; + /* check if revalidate or fresh lookup */ + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + } + + if (local->list) { + list = local->list; + for (index = 0; list[index] != -1; index++); + if (index != 2) { + if (index < 2) { + gf_log (this->name, GF_LOG_ERROR, + "returning ESTALE for %s: file " + "count is %ld", loc->path, index); + /* Print where all the file is present */ + for (index = 0; + local->list[index] != -1; index++) { + gf_log (this->name, GF_LOG_ERROR, + "%s: found on %s", loc->path, + priv->xl_array[list[index]]->name); + } + unify_local_wipe (local); + STACK_UNWIND (frame, -1, ESTALE, + NULL, NULL, NULL); + return 0; + } else { + /* There are more than 2 presences */ + /* Just log and continue */ + gf_log (this->name, GF_LOG_ERROR, + "%s: file count is %ld", + loc->path, index); + /* Print where all the file is present */ + for (index = 0; + local->list[index] != -1; index++) { + gf_log (this->name, GF_LOG_ERROR, + "%s: found on %s", loc->path, + priv->xl_array[list[index]]->name); + } + } + } + + /* is revalidate */ + local->revalidate = 1; + + for (index = 0; list[index] != -1; index++) + local->call_count++; + + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_lookup_cbk, + (void *)(long)list[index], //cookie + priv->xl_array [list[index]], + priv->xl_array [list[index]]->fops->lookup, + loc, + xattr_req); + if (need_break) + break; + } + } else { + if (loc->inode->st_mode) { + if (inode_ctx_get (loc->inode, this, NULL)) { + inode_ctx_get (loc->inode, this, + &local->inode_generation); + } + } + /* This is first call, there is no list */ + /* call count should be all child + 1 namespace */ + local->call_count = priv->child_count + 1; + + for (index = 0; index <= priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_lookup_cbk, + (void *)index, //cookie + priv->xl_array[index], + priv->xl_array[index]->fops->lookup, + loc, + xattr_req); + } + } + + return 0; +} + +/** + * unify_stat - if directory, get the stat directly from NameSpace child. + * if file, check for a hint and send it only there (also to NS). + * if its a fresh stat, then do it on all the nodes. + * + * NOTE: for all the call, sending cookie as xlator pointer, which will be + * used in cbk. + */ +int32_t +unify_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int16_t index = 0; + int16_t *list = NULL; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + local->st_ino = loc->inode->ino; + if (S_ISDIR (loc->inode->st_mode)) { + /* Directory */ + local->call_count = 1; + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->stat, loc); + } else { + /* File */ + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) + local->call_count++; + + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->stat, + loc); + if (need_break) + break; + } + } + + return 0; +} + +/** + * unify_access_cbk - + */ +int32_t +unify_access_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +/** + * unify_access - Send request to only namespace, which has all the + * attributes set for the file. + */ +int32_t +unify_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + STACK_WIND (frame, + unify_access_cbk, + NS(this), + NS(this)->fops->access, + loc, + mask); + + return 0; +} + +int32_t +unify_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + inode_t *tmp_inode = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if ((op_ret == -1) && !(priv->optimist && + (op_errno == ENOENT || + op_errno == EEXIST))) { + /* TODO: Decrement the inode_generation of + * this->inode's parent inode, hence the missing + * directory is created properly by self-heal. + * Currently, there is no way to get the parent + * inode directly. + */ + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + if (op_errno != EEXIST) + local->failed = 1; + local->op_errno = op_errno; + } + + if (op_ret >= 0) + local->op_ret = 0; + + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (!local->failed) { + inode_ctx_put (local->loc1.inode, this, + priv->inode_generation); + } + + tmp_inode = local->loc1.inode; + unify_local_wipe (local); + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + tmp_inode, &local->stbuf); + } + + return 0; +} + +/** + * unify_ns_mkdir_cbk - + */ +int32_t +unify_ns_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + long index = 0; + + if (op_ret == -1) { + /* No need to send mkdir request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s): %s", + local->name, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, NULL); + return 0; + } + + /* Create one inode for this entry */ + local->op_ret = 0; + local->stbuf = *buf; + + local->call_count = priv->child_count; + + /* Send mkdir request to all the nodes now */ + for (index = 0; index < priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_mkdir_cbk, + (void *)index, //cookie + priv->xl_array[index], + priv->xl_array[index]->fops->mkdir, + &local->loc1, + local->mode); + } + + return 0; +} + + +/** + * unify_mkdir - + */ +int32_t +unify_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + local->mode = mode; + + loc_copy (&local->loc1, loc); + + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_mkdir_cbk, + NS(this), + NS(this)->fops->mkdir, + loc, + mode); + return 0; +} + +/** + * unify_rmdir_cbk - + */ +int32_t +unify_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT))) + local->op_ret = 0; + if (op_ret == -1) + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + +/** + * unify_ns_rmdir_cbk - + */ +int32_t +unify_ns_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + if (op_ret == -1) { + /* No need to send rmdir request to other servers, + * as namespace action failed + */ + gf_log (this->name, + ((op_errno != ENOTEMPTY) ? + GF_LOG_ERROR : GF_LOG_DEBUG), + "namespace: path(%s): %s", + local->loc1.path, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno); + return 0; + } + + local->call_count = priv->child_count; + + for (index = 0; index < priv->child_count; index++) { + STACK_WIND (frame, + unify_rmdir_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->rmdir, + &local->loc1); + } + + return 0; +} + +/** + * unify_rmdir - + */ +int32_t +unify_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_local_t *local = NULL; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + } + + STACK_WIND (frame, + unify_ns_rmdir_cbk, + NS(this), + NS(this)->fops->rmdir, + loc); + + return 0; +} + +/** + * unify_open_cbk - + */ +int32_t +unify_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret >= 0) { + local->op_ret = op_ret; + if (NS(this) != (xlator_t *)cookie) { + /* Store child node's ptr, used in + all the f*** / FileIO calls */ + fd_ctx_set (fd, this, (uint64_t)(long)cookie); + } + } + if (op_ret == -1) { + local->op_errno = op_errno; + local->failed = 1; + } + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if ((local->failed == 1) && (local->op_ret >= 0)) { + local->call_count = 1; + /* return -1 to user */ + local->op_ret = -1; + //local->op_errno = EIO; + + if (!fd_ctx_get (local->fd, this, NULL)) { + gf_log (this->name, GF_LOG_ERROR, + "Open success on child node, " + "failed on namespace"); + } else { + gf_log (this->name, GF_LOG_ERROR, + "Open success on namespace, " + "failed on child node"); + } + } + + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + } + + return 0; +} + +#ifdef GF_DARWIN_HOST_OS +/** + * unify_create_lookup_cbk - + */ +int32_t +unify_open_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int32_t callcnt = 0; + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if ((op_ret == -1) && (op_errno != ENOENT)) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + local->index++; + if (NS(this) == priv->xl_array[(long)cookie]) { + local->list[0] = (int16_t)(long)cookie; + } else { + local->list[1] = (int16_t)(long)cookie; + } + if (S_ISDIR (buf->st_mode)) + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + int16_t file_list[3] = {0,}; + local->op_ret = -1; + + file_list[0] = local->list[0]; + file_list[1] = local->list[1]; + file_list[2] = -1; + + if (local->index != 2) { + /* Lookup failed, can't do open */ + gf_log (this->name, GF_LOG_ERROR, + "%s: present on %d nodes", + local->name, local->index); + + if (local->index < 2) { + unify_local_wipe (local); + gf_log (this->name, GF_LOG_ERROR, + "returning as file found on less " + "than 2 nodes"); + STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + return 0; + } + } + + if (local->failed) { + /* Open on directory, return EISDIR */ + unify_local_wipe (local); + STACK_UNWIND (frame, -1, EISDIR, local->fd); + return 0; + } + + /* Everything is perfect :) */ + local->call_count = 2; + + for (index = 0; file_list[index] != -1; index++) { + char need_break = (file_list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_open_cbk, + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]]->fops->open, + &local->loc1, + local->flags, + local->fd); + if (need_break) + break; + } + } + + return 0; +} + + +int32_t +unify_open_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + if (op_ret == -1) { + STACK_UNWIND (frame, -1, ENOENT); + return 0; + } + + if (path[0] == '/') { + local->name = strdup (path); + ERR_ABORT (local->name); + } else { + char *tmp_str = strdup (local->loc1.path); + char *tmp_base = dirname (tmp_str); + local->name = CALLOC (1, ZR_PATH_MAX); + strcpy (local->name, tmp_base); + strncat (local->name, "/", 1); + strcat (local->name, path); + FREE (tmp_str); + } + + local->list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (local->list); + local->call_count = priv->child_count + 1; + local->op_ret = -1; + for (index = 0; index <= priv->child_count; index++) { + /* Send the lookup to all the nodes including namespace */ + STACK_WIND_COOKIE (frame, + unify_open_lookup_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->lookup, + &local->loc1, + NULL); + } + + return 0; +} +#endif /* GF_DARWIN_HOST_OS */ + +/** + * unify_open - + */ +int32_t +unify_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + int16_t file_list[3] = {0,}; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Init */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->fd = fd; + local->flags = flags; + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + local->list = list; + file_list[0] = priv->child_count; /* Thats namespace */ + file_list[2] = -1; + for (index = 0; list[index] != -1; index++) { + local->call_count++; + if (list[index] != priv->child_count) + file_list[1] = list[index]; + } + + if (local->call_count != 2) { + /* If the lookup was done for file */ + gf_log (this->name, GF_LOG_ERROR, + "%s: entry_count is %d", + loc->path, local->call_count); + for (index = 0; local->list[index] != -1; index++) + gf_log (this->name, GF_LOG_ERROR, "%s: found on %s", + loc->path, priv->xl_array[list[index]]->name); + + if (local->call_count < 2) { + gf_log (this->name, GF_LOG_ERROR, + "returning EIO as file found on onlyone node"); + STACK_UNWIND (frame, -1, EIO, fd); + return 0; + } + } + +#ifdef GF_DARWIN_HOST_OS + /* Handle symlink here */ + if (S_ISLNK (loc->inode->st_mode)) { + /* Callcount doesn't matter here */ + STACK_WIND (frame, + unify_open_readlink_cbk, + NS(this), + NS(this)->fops->readlink, + loc, ZR_PATH_MAX); + return 0; + } +#endif /* GF_DARWIN_HOST_OS */ + + local->call_count = 2; + for (index = 0; file_list[index] != -1; index++) { + char need_break = (file_list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_open_cbk, + priv->xl_array[file_list[index]], //cookie + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]]->fops->open, + loc, + flags, + fd); + if (need_break) + break; + } + + return 0; +} + + +int32_t +unify_create_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_local_t *local = frame->local; + inode_t *inode = local->loc1.inode; + + unify_local_wipe (local); + + STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd, + inode, &local->stbuf); + + return 0; +} + +/** + * unify_create_open_cbk - + */ +int32_t +unify_create_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int ret = 0; + int32_t callcnt = 0; + unify_local_t *local = frame->local; + inode_t *inode = NULL; + xlator_t *child = NULL; + uint64_t tmp_value = 0; + + LOCK (&frame->lock); + { + if (op_ret >= 0) { + local->op_ret = op_ret; + if (NS(this) != (xlator_t *)cookie) { + /* Store child node's ptr, used in all + the f*** / FileIO calls */ + /* TODO: log on failure */ + ret = fd_ctx_get (fd, this, &tmp_value); + cookie = (void *)(long)tmp_value; + } else { + /* NOTE: open successful on namespace. + * fd's ctx can be used to identify open + * failure on storage subvolume. cool + * ide ;) */ + local->failed = 0; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + ((xlator_t *)cookie)->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + } + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed == 1 && (local->op_ret >= 0)) { + local->call_count = 1; + /* return -1 to user */ + local->op_ret = -1; + local->op_errno = EIO; + local->fd = fd; + local->call_count = 1; + + if (!fd_ctx_get (local->fd, this, &tmp_value)) { + child = (xlator_t *)(long)tmp_value; + + gf_log (this->name, GF_LOG_ERROR, + "Create success on child node, " + "failed on namespace"); + + STACK_WIND (frame, + unify_create_unlink_cbk, + child, + child->fops->unlink, + &local->loc1); + } else { + gf_log (this->name, GF_LOG_ERROR, + "Create success on namespace, " + "failed on child node"); + + STACK_WIND (frame, + unify_create_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + } + return 0; + } + inode = local->loc1.inode; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, fd, + inode, &local->stbuf); + } + return 0; +} + +/** + * unify_create_lookup_cbk - + */ +int32_t +unify_create_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int32_t callcnt = 0; + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + local->list[local->index++] = (int16_t)(long)cookie; + if (NS(this) == priv->xl_array[(long)cookie]) { + local->st_ino = buf->st_ino; + } else { + local->stbuf = *buf; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + int16_t *list = local->list; + int16_t file_list[3] = {0,}; + local->op_ret = -1; + + local->list [local->index] = -1; + file_list[0] = list[0]; + file_list[1] = list[1]; + file_list[2] = -1; + + local->stbuf.st_ino = local->st_ino; + /* TODO: log on failure */ + inode_ctx_put (local->loc1.inode, this, + (uint64_t)(long)local->list); + + if (local->index != 2) { + /* Lookup failed, can't do open */ + gf_log (this->name, GF_LOG_ERROR, + "%s: present on %d nodes", + local->loc1.path, local->index); + file_list[0] = priv->child_count; + for (index = 0; list[index] != -1; index++) { + gf_log (this->name, GF_LOG_ERROR, + "%s: found on %s", local->loc1.path, + priv->xl_array[list[index]]->name); + if (list[index] != priv->child_count) + file_list[1] = list[index]; + } + + if (local->index < 2) { + unify_local_wipe (local); + gf_log (this->name, GF_LOG_ERROR, + "returning EIO as file found on " + "only one node"); + STACK_UNWIND (frame, -1, EIO, + local->fd, inode, NULL); + return 0; + } + } + /* Everything is perfect :) */ + local->call_count = 2; + + for (index = 0; file_list[index] != -1; index++) { + char need_break = (file_list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_create_open_cbk, + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]]->fops->open, + &local->loc1, + local->flags, + local->fd); + if (need_break) + break; + } + } + + return 0; +} + + +/** + * unify_create_cbk - + */ +int32_t +unify_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + inode_t *tmp_inode = NULL; + + if (op_ret == -1) { + /* send unlink () on Namespace */ + local->op_errno = op_errno; + local->op_ret = -1; + local->call_count = 1; + gf_log (this->name, GF_LOG_ERROR, + "create failed on %s (file %s, error %s), " + "sending unlink to namespace", + prev_frame->this->name, + local->loc1.path, strerror (op_errno)); + + STACK_WIND (frame, + unify_create_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + local->stbuf = *buf; + /* Just inode number should be from NS node */ + local->stbuf.st_ino = local->st_ino; + + /* TODO: log on failure */ + ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this); + } + + tmp_inode = local->loc1.inode; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd, + tmp_inode, &local->stbuf); + + return 0; +} + +/** + * unify_ns_create_cbk - + * + */ +int32_t +unify_ns_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + struct sched_ops *sched_ops = NULL; + xlator_t *sched_xl = NULL; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t *list = NULL; + int16_t index = 0; + + if (op_ret == -1) { + /* No need to send create request to other servers, as + namespace action failed. Handle exclusive create here. */ + if ((op_errno != EEXIST) || + ((op_errno == EEXIST) && + ((local->flags & O_EXCL) == O_EXCL))) { + /* If its just a create call without O_EXCL, + don't do this */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s): %s", + local->loc1.path, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; + } + } + + if (op_ret >= 0) { + /* Get the inode number from the NS node */ + local->st_ino = buf->st_ino; + + local->op_ret = -1; + + /* Start the mapping list */ + list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (list); + inode_ctx_put (inode, this, (uint64_t)(long)list); + list[0] = priv->child_count; + list[2] = -1; + + /* This means, file doesn't exist anywhere in the Filesystem */ + sched_ops = priv->sched_ops; + + /* Send create request to the scheduled node now */ + sched_xl = sched_ops->schedule (this, local->loc1.path); + if (sched_xl == NULL) + { + /* send unlink () on Namespace */ + local->op_errno = ENOTCONN; + local->op_ret = -1; + local->call_count = 1; + gf_log (this->name, GF_LOG_ERROR, + "no node online to schedule create:(file %s) " + "sending unlink to namespace", + (local->loc1.path)?local->loc1.path:""); + + STACK_WIND (frame, + unify_create_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + for (index = 0; index < priv->child_count; index++) + if (sched_xl == priv->xl_array[index]) + break; + list[1] = index; + + STACK_WIND (frame, unify_create_cbk, + sched_xl, sched_xl->fops->create, + &local->loc1, local->flags, local->mode, fd); + } else { + /* File already exists, and there is no O_EXCL flag */ + + gf_log (this->name, GF_LOG_DEBUG, + "File(%s) already exists on namespace, sending " + "open instead", local->loc1.path); + + local->list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (local->list); + local->call_count = priv->child_count + 1; + local->op_ret = -1; + for (index = 0; index <= priv->child_count; index++) { + /* Send lookup() to all nodes including namespace */ + STACK_WIND_COOKIE (frame, + unify_create_lookup_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->lookup, + &local->loc1, + NULL); + } + } + return 0; +} + +/** + * unify_create - create a file in global namespace first, so other + * clients can see them. Create the file in storage nodes in background. + */ +int32_t +unify_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + local->mode = mode; + local->flags = flags; + local->fd = fd; + + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_create_cbk, + NS(this), + NS(this)->fops->create, + loc, + flags | O_EXCL, + mode, + fd); + + return 0; +} + + +/** + * unify_opendir_cbk - + */ +int32_t +unify_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + +/** + * unify_opendir - + */ +int32_t +unify_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + STACK_WIND (frame, unify_opendir_cbk, + NS(this), NS(this)->fops->opendir, loc, fd); + + return 0; +} + + +/** + * unify_chmod - + */ +int32_t +unify_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count + 1; + + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->chmod, + loc, mode); + } + } else { + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + for (index = 0; local->list[index] != -1; index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->chmod, + loc, + mode); + if (!--callcnt) + break; + } + } + + return 0; +} + +/** + * unify_chown - + */ +int32_t +unify_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count + 1; + + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->chown, + loc, uid, gid); + } + } else { + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + for (index = 0; local->list[index] != -1; index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->chown, + loc, uid, gid); + if (!--callcnt) + break; + } + } + + return 0; +} + + +/** + * unify_truncate_cbk - + */ +int32_t +unify_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + local->op_errno = op_errno; + if (!((op_errno == ENOENT) && priv->optimist)) + local->op_ret = -1; + } + + if (op_ret >= 0) { + if (NS (this) == prev_frame->this) { + local->st_ino = buf->st_ino; + /* If the entry is directory, get the + stat from NS node */ + if (S_ISDIR (buf->st_mode) || + !local->stbuf.st_blksize) { + local->stbuf = *buf; + } + } + + if ((!S_ISDIR (buf->st_mode)) && + (NS (this) != prev_frame->this)) { + /* If file, take the stat info from + Storage node. */ + local->stbuf = *buf; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->st_ino) + local->stbuf.st_ino = local->st_ino; + else + local->op_ret = -1; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + + return 0; +} + +/** + * unify_truncate - + */ +int32_t +unify_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = 1; + + STACK_WIND (frame, + unify_buf_cbk, + NS(this), + NS(this)->fops->stat, + loc); + } else { + local->op_ret = 0; + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + /* Don't send truncate to NS node */ + STACK_WIND (frame, unify_truncate_cbk, NS(this), + NS(this)->fops->stat, loc); + callcnt--; + + for (index = 0; local->list[index] != -1; index++) { + if (NS(this) != priv->xl_array[local->list[index]]) { + STACK_WIND (frame, + unify_truncate_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->truncate, + loc, + offset); + if (!--callcnt) + break; + } + } + } + + return 0; +} + +/** + * unify_utimens - + */ +int32_t +unify_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count + 1; + + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->utimens, + loc, tv); + } + } else { + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + for (index = 0; local->list[index] != -1; index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->utimens, + loc, + tv); + if (!--callcnt) + break; + } + } + + return 0; +} + +/** + * unify_readlink_cbk - + */ +int32_t +unify_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + STACK_UNWIND (frame, op_ret, op_errno, path); + return 0; +} + +/** + * unify_readlink - Read the link only from the storage node. + */ +int32_t +unify_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + unify_private_t *priv = this->private; + int32_t entry_count = 0; + int16_t *list = NULL; + int16_t index = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) + entry_count++; + + if (entry_count >= 2) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_readlink_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->readlink, + loc, + size); + break; + } + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "returning ENOENT, no softlink files found " + "on storage node"); + STACK_UNWIND (frame, -1, ENOENT, NULL); + } + + return 0; +} + + +/** + * unify_unlink_cbk - + */ +int32_t +unify_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist)) + local->op_ret = 0; + if (op_ret == -1) + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + + +/** + * unify_unlink - + */ +int32_t +unify_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) + local->call_count++; + + if (local->call_count) { + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + STACK_WIND (frame, + unify_unlink_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->unlink, + loc); + if (need_break) + break; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "%s: returning ENOENT", loc->path); + STACK_UNWIND (frame, -1, ENOENT); + } + + return 0; +} + + +/** + * unify_readv_cbk - + */ +int32_t +unify_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + return 0; +} + +/** + * unify_readv - + */ +int32_t +unify_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, + unify_readv_cbk, + child, + child->fops->readv, + fd, + size, + offset); + + + return 0; +} + +/** + * unify_writev_cbk - + */ +int32_t +unify_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + +/** + * unify_writev - + */ +int32_t +unify_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t off) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, + unify_writev_cbk, + child, + child->fops->writev, + fd, + vector, + count, + off); + + return 0; +} + +/** + * unify_ftruncate - + */ +int32_t +unify_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + xlator_t *child = NULL; + unify_local_t *local = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->op_ret = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + local->call_count = 2; + + STACK_WIND (frame, unify_truncate_cbk, + child, child->fops->ftruncate, + fd, offset); + + STACK_WIND (frame, unify_truncate_cbk, + NS(this), NS(this)->fops->fstat, + fd); + + return 0; +} + + +/** + * unify_fchmod - + */ +int32_t +unify_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + unify_local_t *local = NULL; + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->st_ino = fd->inode->ino; + + if (!fd_ctx_get (fd, this, &tmp_child)) { + /* If its set, then its file */ + child = (xlator_t *)(long)tmp_child; + + local->call_count = 2; + + STACK_WIND (frame, unify_buf_cbk, child, + child->fops->fchmod, fd, mode); + + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fchmod, fd, mode); + + } else { + /* this is an directory */ + local->call_count = 1; + + STACK_WIND (frame, unify_buf_cbk, + NS(this), NS(this)->fops->fchmod, fd, mode); + } + + return 0; +} + +/** + * unify_fchown - + */ +int32_t +unify_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + unify_local_t *local = NULL; + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->st_ino = fd->inode->ino; + + if (!fd_ctx_get (fd, this, &tmp_child)) { + /* If its set, then its file */ + child = (xlator_t *)(long)tmp_child; + + local->call_count = 2; + + STACK_WIND (frame, unify_buf_cbk, child, + child->fops->fchown, fd, uid, gid); + + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fchown, fd, uid, gid); + } else { + local->call_count = 1; + + STACK_WIND (frame, unify_buf_cbk, + NS(this), NS(this)->fops->fchown, + fd, uid, gid); + } + + return 0; +} + +/** + * unify_flush_cbk - + */ +int32_t +unify_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_flush - + */ +int32_t +unify_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_flush_cbk, child, + child->fops->flush, fd); + + return 0; +} + + +/** + * unify_fsync_cbk - + */ +int32_t +unify_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_fsync - + */ +int32_t +unify_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_fsync_cbk, child, + child->fops->fsync, fd, flags); + + return 0; +} + +/** + * unify_fstat - Send fstat FOP to Namespace only if its directory, and to + * both namespace and the storage node if its a file. + */ +int32_t +unify_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + unify_local_t *local = NULL; + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd); + + INIT_LOCAL (frame, local); + local->st_ino = fd->inode->ino; + + if (!fd_ctx_get (fd, this, &tmp_child)) { + /* If its set, then its file */ + child = (xlator_t *)(long)tmp_child; + local->call_count = 2; + + STACK_WIND (frame, unify_buf_cbk, child, + child->fops->fstat, fd); + + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fstat, fd); + + } else { + /* this is an directory */ + local->call_count = 1; + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fstat, fd); + } + + return 0; +} + +/** + * unify_getdents_cbk - + */ +int32_t +unify_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + STACK_UNWIND (frame, op_ret, op_errno, entry, count); + return 0; +} + +/** + * unify_getdents - send the FOP request to all the nodes. + */ +int32_t +unify_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, + int32_t flag) +{ + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd); + + STACK_WIND (frame, unify_getdents_cbk, NS(this), + NS(this)->fops->getdents, fd, size, offset, flag); + + return 0; +} + + +/** + * unify_readdir_cbk - + */ +int32_t +unify_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + + return 0; +} + +/** + * unify_readdir - send the FOP request to all the nodes. + */ +int32_t +unify_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd); + + STACK_WIND (frame, unify_readdir_cbk, NS(this), + NS(this)->fops->readdir, fd, size, offset); + + return 0; +} + + +/** + * unify_fsyncdir_cbk - + */ +int32_t +unify_fsyncdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/** + * unify_fsyncdir - + */ +int32_t +unify_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd); + + STACK_WIND (frame, unify_fsyncdir_cbk, + NS(this), NS(this)->fops->fsyncdir, fd, flags); + + return 0; +} + +/** + * unify_lk_cbk - UNWIND frame with the proper return arguments. + */ +int32_t +unify_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + STACK_UNWIND (frame, op_ret, op_errno, lock); + return 0; +} + +/** + * unify_lk - Send it to all the storage nodes, (should be 1) which has file. + */ +int32_t +unify_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_lk_cbk, child, + child->fops->lk, fd, cmd, lock); + + return 0; +} + + +int32_t +unify_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno); + +static int32_t +unify_setxattr_file_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_private_t *private = this->private; + unify_local_t *local = frame->local; + xlator_t *sched_xl = NULL; + struct sched_ops *sched_ops = NULL; + + if (op_ret == -1) { + if (!ENOTSUP) + gf_log (this->name, GF_LOG_ERROR, + "setxattr with XATTR_CREATE on ns: " + "path(%s) key(%s): %s", + local->loc1.path, local->name, + strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno); + return 0; + } + + LOCK (&frame->lock); + { + local->failed = 0; + local->op_ret = 0; + local->op_errno = 0; + local->call_count = 1; + } + UNLOCK (&frame->lock); + + /* schedule XATTR_CREATE on one of the child node */ + sched_ops = private->sched_ops; + + /* Send create request to the scheduled node now */ + sched_xl = sched_ops->schedule (this, local->name); + if (!sched_xl) { + STACK_UNWIND (frame, -1, ENOTCONN); + return 0; + } + + STACK_WIND (frame, + unify_setxattr_cbk, + sched_xl, + sched_xl->fops->setxattr, + &local->loc1, + local->dict, + local->flags); + return 0; +} + +/** + * unify_setxattr_cbk - When all the child nodes return, UNWIND frame. + */ +int32_t +unify_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + dict_t *dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, (((op_errno == ENOENT) || + (op_errno == ENOTSUP))? + GF_LOG_DEBUG : GF_LOG_ERROR), + "child(%s): path(%s): %s", + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + if (local->failed == -1) { + local->failed = 1; + } + local->op_errno = op_errno; + } else { + local->failed = 0; + local->op_ret = op_ret; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed && local->name && + ZR_FILE_CONTENT_REQUEST(local->name)) { + dict = get_new_dict (); + dict_set (dict, local->dict->members_list->key, + data_from_dynptr(NULL, 0)); + dict_ref (dict); + + local->call_count = 1; + + STACK_WIND (frame, + unify_setxattr_file_cbk, + NS(this), + NS(this)->fops->setxattr, + &local->loc1, + dict, + XATTR_CREATE); + + dict_unref (dict); + return 0; + } + + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + +/** + * unify_sexattr - This function should be sent to all the storage nodes, + * which contains the file, (excluding namespace). + */ +int32_t +unify_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + int32_t call_count = 0; + uint64_t tmp_list = 0; + data_pair_t *trav = dict->members_list; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->failed = -1; + loc_copy (&local->loc1, loc); + + if (S_ISDIR (loc->inode->st_mode)) { + + if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) { + /* direct the storage xlators to change file + content only if file exists */ + local->flags = flags; + local->dict = dict; + local->name = strdup (trav->key); + flags |= XATTR_REPLACE; + } + + local->call_count = priv->child_count; + for (index = 0; index < priv->child_count; index++) { + STACK_WIND (frame, + unify_setxattr_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->setxattr, + loc, dict, flags); + } + return 0; + } + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + call_count++; + } + } + + if (local->call_count) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_setxattr_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->setxattr, + loc, + dict, + flags); + if (!--call_count) + break; + } + } + return 0; + } + + /* No entry in storage nodes */ + gf_log (this->name, GF_LOG_DEBUG, + "returning ENOENT, file not found on storage node."); + STACK_UNWIND (frame, -1, ENOENT); + + return 0; +} + + +/** + * unify_getxattr_cbk - This function is called from only one child, so, no + * need of any lock or anything else, just send it to above layer + */ +int32_t +unify_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *value) +{ + int32_t callcnt = 0; + dict_t *local_value = NULL; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, + (((op_errno == ENOENT) || + (op_errno == ENODATA) || + (op_errno == ENOTSUP)) ? + GF_LOG_DEBUG : GF_LOG_ERROR), + "child(%s): path(%s): %s", + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + } else { + if (!local->dict) + local->dict = dict_ref (value); + local->op_ret = op_ret; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local_value = local->dict; + local->dict = NULL; + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local_value); + + if (local_value) + dict_unref (local_value); + } + + return 0; +} + + +/** + * unify_getxattr - This FOP is sent to only the storage node. + */ +int32_t +unify_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + unify_private_t *priv = this->private; + int16_t *list = NULL; + int16_t index = 0; + int16_t count = 0; + unify_local_t *local = NULL; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + INIT_LOCAL (frame, local); + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count; + for (index = 0; index < priv->child_count; index++) + STACK_WIND (frame, + unify_getxattr_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->getxattr, + loc, + name); + return 0; + } + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + count++; + } + } + + if (count) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_getxattr_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->getxattr, + loc, + name); + if (!--count) + break; + } + } + } else { + dict_t *tmp_dict = get_new_dict (); + gf_log (this->name, GF_LOG_DEBUG, + "%s: returning ENODATA, no file found on storage node", + loc->path); + STACK_UNWIND (frame, -1, ENODATA, tmp_dict); + dict_destroy (tmp_dict); + } + + return 0; +} + +/** + * unify_removexattr_cbk - Wait till all the child node returns the call + * and then UNWIND to above layer. + */ +int32_t +unify_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) { + local->op_errno = op_errno; + if (op_errno != ENOTSUP) + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + prev_frame->this->name, + local->loc1.path, strerror (op_errno)); + } else { + local->op_ret = op_ret; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + +/** + * unify_removexattr - Send it to all the child nodes which has the files. + */ +int32_t +unify_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + int32_t call_count = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count; + for (index = 0; index < priv->child_count; index++) + STACK_WIND (frame, + unify_removexattr_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->removexattr, + loc, + name); + + return 0; + } + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + call_count++; + } + } + + if (local->call_count) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_removexattr_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->removexattr, + loc, + name); + if (!--call_count) + break; + } + } + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "%s: returning ENOENT, not found on storage node.", loc->path); + STACK_UNWIND (frame, -1, ENOENT); + + return 0; +} + + +int32_t +unify_mknod_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) + gf_log (this->name, GF_LOG_ERROR, + "%s: %s", local->loc1.path, strerror (op_errno)); + + unify_local_wipe (local); + /* No log required here as this -1 is for mknod call */ + STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL); + return 0; +} + +/** + * unify_mknod_cbk - + */ +int32_t +unify_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "mknod failed on storage node, sending unlink to " + "namespace"); + local->op_errno = op_errno; + STACK_WIND (frame, + unify_mknod_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + return 0; + } + + local->stbuf = *buf; + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf); + return 0; +} + +/** + * unify_ns_mknod_cbk - + */ +int32_t +unify_ns_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + struct sched_ops *sched_ops = NULL; + xlator_t *sched_xl = NULL; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t *list = NULL; + int16_t index = 0; + call_frame_t *prev_frame = cookie; + + if (op_ret == -1) { + /* No need to send mknod request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + prev_frame->this->name, local->loc1.path, + strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; + } + + /* Create one inode for this entry */ + local->op_ret = 0; + local->stbuf = *buf; + local->st_ino = buf->st_ino; + + list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (list); + list[0] = priv->child_count; + list[2] = -1; + inode_ctx_put (inode, this, (uint64_t)(long)list); + + sched_ops = priv->sched_ops; + + /* Send mknod request to scheduled node now */ + sched_xl = sched_ops->schedule (this, local->loc1.path); + if (!sched_xl) { + gf_log (this->name, GF_LOG_ERROR, + "mknod failed on storage node, no node online " + "at the moment, sending unlink to NS"); + local->op_errno = ENOTCONN; + STACK_WIND (frame, + unify_mknod_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + for (index = 0; index < priv->child_count; index++) + if (sched_xl == priv->xl_array[index]) + break; + list[1] = index; + + STACK_WIND (frame, unify_mknod_cbk, + sched_xl, sched_xl->fops->mknod, + &local->loc1, local->mode, local->dev); + + return 0; +} + +/** + * unify_mknod - Create a device on namespace first, and later create on + * the storage node. + */ +int32_t +unify_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t rdev) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + local->mode = mode; + local->dev = rdev; + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_mknod_cbk, + NS(this), + NS(this)->fops->mknod, + loc, + mode, + rdev); + + return 0; +} + +int32_t +unify_symlink_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_local_t *local = frame->local; + if (op_ret == -1) + gf_log (this->name, GF_LOG_ERROR, + "%s: %s", local->loc1.path, strerror (op_errno)); + + unify_local_wipe (local); + STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL); + return 0; +} + +/** + * unify_symlink_cbk - + */ +int32_t +unify_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) { + /* Symlink on storage node failed, hence send unlink + to the NS node */ + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "symlink on storage node failed, sending unlink " + "to namespace"); + + STACK_WIND (frame, + unify_symlink_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + local->stbuf = *buf; + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf); + + return 0; +} + +/** + * unify_ns_symlink_cbk - + */ +int32_t +unify_ns_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + + struct sched_ops *sched_ops = NULL; + xlator_t *sched_xl = NULL; + int16_t *list = NULL; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + + if (op_ret == -1) { + /* No need to send symlink request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s): %s", + local->loc1.path, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, NULL, buf); + return 0; + } + + /* Create one inode for this entry */ + local->op_ret = 0; + local->st_ino = buf->st_ino; + + /* Start the mapping list */ + + list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (list); + list[0] = priv->child_count; //namespace's index + list[2] = -1; + inode_ctx_put (inode, this, (uint64_t)(long)list); + + sched_ops = priv->sched_ops; + + /* Send symlink request to all the nodes now */ + sched_xl = sched_ops->schedule (this, local->loc1.path); + if (!sched_xl) { + /* Symlink on storage node failed, hence send unlink + to the NS node */ + local->op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "symlink on storage node failed, no node online, " + "sending unlink to namespace"); + + STACK_WIND (frame, + unify_symlink_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + for (index = 0; index < priv->child_count; index++) + if (sched_xl == priv->xl_array[index]) + break; + list[1] = index; + + STACK_WIND (frame, + unify_symlink_cbk, + sched_xl, + sched_xl->fops->symlink, + local->name, + &local->loc1); + + return 0; +} + +/** + * unify_symlink - + */ +int32_t +unify_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->name = strdup (linkpath); + + if ((local->name == NULL) || + (local->loc1.path == NULL)) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_symlink_cbk, + NS(this), + NS(this)->fops->symlink, + linkpath, + loc); + + return 0; +} + + +int32_t +unify_rename_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s -> %s): %s", + prev_frame->this->name, + local->loc1.path, local->loc2.path, + strerror (op_errno)); + + } + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + return 0; +} + +int32_t +unify_ns_rename_undo_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s -> %s): %s", + local->loc1.path, local->loc2.path, + strerror (op_errno)); + } + + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf); + return 0; +} + +int32_t +unify_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t index = 0; + int32_t callcnt = 0; + int16_t *list = NULL; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret >= 0) { + if (!S_ISDIR (buf->st_mode)) + local->stbuf = *buf; + local->op_ret = op_ret; + } else { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s -> %s): %s", + prev_frame->this->name, + local->loc1.path, local->loc2.path, + strerror (op_errno)); + local->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->stbuf.st_ino = local->st_ino; + if (S_ISDIR (local->loc1.inode->st_mode)) { + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf); + return 0; + } + + if (local->op_ret == -1) { + /* TODO: check this logic */ + + /* Rename failed in storage node, successful on NS, + * hence, rename back the entries in NS */ + /* NOTE: this will be done only if the destination + * doesn't exists, if the destination exists, the + * job of correcting NS is left to self-heal + */ + if (!local->index) { + loc_t tmp_oldloc = { + /* its actual 'newloc->path' */ + .path = local->loc2.path, + .inode = local->loc1.inode, + .parent = local->loc2.parent + }; + + loc_t tmp_newloc = { + /* Actual 'oldloc->path' */ + .path = local->loc1.path, + .parent = local->loc1.parent + }; + + gf_log (this->name, GF_LOG_ERROR, + "rename succussful on namespace, on " + "stroage node failed, reverting back"); + + STACK_WIND (frame, + unify_ns_rename_undo_cbk, + NS(this), + NS(this)->fops->rename, + &tmp_oldloc, + &tmp_newloc); + return 0; + } + } else { + /* Rename successful on storage nodes */ + + int32_t idx = 0; + int16_t *tmp_list = NULL; + uint64_t tmp_list_int64 = 0; + if (local->loc2.inode) { + inode_ctx_get (local->loc2.inode, + this, &tmp_list_int64); + list = (int16_t *)(long)tmp_list_int64; + + } + + if (list) { + for (index = 0; list[index] != -1; index++); + tmp_list = CALLOC (1, index * 2); + memcpy (tmp_list, list, index * 2); + + for (index = 0; list[index] != -1; index++) { + /* TODO: Check this logic. */ + /* If the destination file exists in + * the same storage node where we sent + * 'rename' call, no need to send + * unlink + */ + for (idx = 0; + local->list[idx] != -1; idx++) { + if (tmp_list[index] == local->list[idx]) { + tmp_list[index] = priv->child_count; + continue; + } + } + + if (NS(this) != priv->xl_array[tmp_list[index]]) { + local->call_count++; + callcnt++; + } + } + + if (local->call_count) { + if (callcnt > 1) + gf_log (this->name, + GF_LOG_ERROR, + "%s->%s: more (%d) " + "subvolumes have the " + "newloc entry", + local->loc1.path, + local->loc2.path, + callcnt); + + for (index=0; + tmp_list[index] != -1; index++) { + if (NS(this) != priv->xl_array[tmp_list[index]]) { + STACK_WIND (frame, + unify_rename_unlink_cbk, + priv->xl_array[tmp_list[index]], + priv->xl_array[tmp_list[index]]->fops->unlink, + &local->loc2); + if (!--callcnt) + break; + } + } + + FREE (tmp_list); + return 0; + } + if (tmp_list) + FREE (tmp_list); + } + } + + /* Need not send 'unlink' to storage node */ + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, + local->op_errno, &local->stbuf); + } + + return 0; +} + +int32_t +unify_ns_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t index = 0; + int32_t callcnt = 0; + int16_t *list = NULL; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + if (op_ret == -1) { + /* Free local->new_inode */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s -> %s): %s", + local->loc1.path, local->loc2.path, + strerror (op_errno)); + + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; + } + + local->stbuf = *buf; + local->st_ino = buf->st_ino; + + /* Everything is fine. */ + if (S_ISDIR (buf->st_mode)) { + local->call_count = priv->child_count; + for (index=0; index < priv->child_count; index++) { + STACK_WIND (frame, + unify_rename_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->rename, + &local->loc1, + &local->loc2); + } + + return 0; + } + + local->call_count = 0; + /* send rename */ + list = local->list; + for (index=0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + callcnt++; + } + } + + if (local->call_count) { + for (index=0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + STACK_WIND (frame, + unify_rename_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->rename, + &local->loc1, + &local->loc2); + if (!--callcnt) + break; + } + } + } else { + /* file doesn't seem to be present in storage nodes */ + gf_log (this->name, GF_LOG_CRITICAL, + "CRITICAL: source file not in storage node, " + "rename successful on namespace :O"); + unify_local_wipe (local); + STACK_UNWIND (frame, -1, EIO, NULL); + } + return 0; +} + + +/** + * unify_rename - One of the tricky function. The deadliest of all :O + */ +int32_t +unify_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + unify_local_t *local = NULL; + uint64_t tmp_list = 0; + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, oldloc); + loc_copy (&local->loc2, newloc); + + if ((local->loc1.path == NULL) || + (local->loc2.path == NULL)) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + + inode_ctx_get (oldloc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + STACK_WIND (frame, + unify_ns_rename_cbk, + NS(this), + NS(this)->fops->rename, + oldloc, + newloc); + return 0; +} + +/** + * unify_link_cbk - + */ +int32_t +unify_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret >= 0) + local->stbuf = *buf; + local->stbuf.st_ino = local->st_ino; + + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf); + + return 0; +} + +/** + * unify_ns_link_cbk - + */ +int32_t +unify_ns_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + int16_t *list = local->list; + int16_t index = 0; + + if (op_ret == -1) { + /* No need to send link request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s -> %s): %s", + local->loc1.path, local->loc2.path, + strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; + } + + /* Update inode for this entry */ + local->op_ret = 0; + local->st_ino = buf->st_ino; + + /* Send link request to the node now */ + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + if (priv->xl_array[list[index]] != NS (this)) { + STACK_WIND (frame, + unify_link_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->link, + &local->loc1, + &local->loc2); + } + if (need_break) + break; + } + + return 0; +} + +/** + * unify_link - + */ +int32_t +unify_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + unify_local_t *local = NULL; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc); + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + loc_copy (&local->loc1, oldloc); + loc_copy (&local->loc2, newloc); + + inode_ctx_get (oldloc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + STACK_WIND (frame, + unify_ns_link_cbk, + NS(this), + NS(this)->fops->link, + oldloc, + newloc); + + return 0; +} + + +/** + * unify_checksum_cbk - + */ +int32_t +unify_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *fchecksum, + uint8_t *dchecksum) +{ + STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum); + + return 0; +} + +/** + * unify_checksum - + */ +int32_t +unify_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + STACK_WIND (frame, + unify_checksum_cbk, + NS(this), + NS(this)->fops->checksum, + loc, + flag); + + return 0; +} + + +/** + * unify_finodelk_cbk - + */ +int +unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_finodelk + */ +int +unify_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int cmd, struct flock *flock) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_finodelk_cbk, + child, child->fops->finodelk, + fd, cmd, flock); + + return 0; +} + + + +/** + * unify_fentrylk_cbk - + */ +int +unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_fentrylk + */ +int +unify_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) + +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_fentrylk_cbk, + child, child->fops->fentrylk, + fd, basename, cmd, type); + + return 0; +} + + + +/** + * unify_fxattrop_cbk - + */ +int +unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr) +{ + STACK_UNWIND (frame, op_ret, op_errno, xattr); + return 0; +} + +/** + * unify_fxattrop + */ +int +unify_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_fxattrop_cbk, + child, child->fops->fxattrop, + fd, optype, xattr); + + return 0; +} + + +/** + * unify_inodelk_cbk - + */ +int +unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +/** + * unify_inodelk + */ +int +unify_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int cmd, struct flock *flock) +{ + xlator_t *child = NULL; + + child = unify_loc_subvol (loc, this); + + STACK_WIND (frame, unify_inodelk_cbk, + child, child->fops->inodelk, + loc, cmd, flock); + + return 0; +} + + + +/** + * unify_entrylk_cbk - + */ +int +unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_entrylk + */ +int +unify_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) + +{ + xlator_t *child = NULL; + + child = unify_loc_subvol (loc, this); + + STACK_WIND (frame, unify_entrylk_cbk, + child, child->fops->entrylk, + loc, basename, cmd, type); + + return 0; +} + + + +/** + * unify_xattrop_cbk - + */ +int +unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr) +{ + STACK_UNWIND (frame, op_ret, op_errno, xattr); + return 0; +} + +/** + * unify_xattrop + */ +int +unify_xattrop (call_frame_t *frame, xlator_t *this, + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr) +{ + xlator_t *child = NULL; + + child = unify_loc_subvol (loc, this); + + STACK_WIND (frame, unify_xattrop_cbk, + child, child->fops->xattrop, + loc, optype, xattr); + + return 0; +} + + +/** + * notify + */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + unify_private_t *priv = this->private; + struct sched_ops *sched = NULL; + + if (!priv) { + return 0; + } + + sched = priv->sched_ops; + if (!sched) { + gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O"); + raise (SIGTERM); + return 0; + } + if (priv->namespace == data) { + if (event == GF_EVENT_CHILD_UP) { + sched->notify (this, event, data); + } + return 0; + } + + switch (event) + { + case GF_EVENT_CHILD_UP: + { + /* Call scheduler's update () to enable it for scheduling */ + sched->notify (this, event, data); + + LOCK (&priv->lock); + { + /* Increment the inode's generation, which is + used for self_heal */ + ++priv->inode_generation; + ++priv->num_child_up; + } + UNLOCK (&priv->lock); + + if (!priv->is_up) { + default_notify (this, event, data); + priv->is_up = 1; + } + } + break; + case GF_EVENT_CHILD_DOWN: + { + /* Call scheduler's update () to disable the child node + * for scheduling + */ + sched->notify (this, event, data); + LOCK (&priv->lock); + { + --priv->num_child_up; + } + UNLOCK (&priv->lock); + + if (priv->num_child_up == 0) { + /* Send CHILD_DOWN to upper layer */ + default_notify (this, event, data); + priv->is_up = 0; + } + } + break; + + default: + { + default_notify (this, event, data); + } + break; + } + + return 0; +} + +/** + * init - This function is called first in the xlator, while initializing. + * All the config file options are checked and appropriate flags are set. + * + * @this - + */ +int32_t +init (xlator_t *this) +{ + int32_t ret = 0; + int32_t count = 0; + data_t *scheduler = NULL; + data_t *data = NULL; + xlator_t *ns_xl = NULL; + xlator_list_t *trav = NULL; + xlator_list_t *xlparent = NULL; + xlator_list_t *parent = NULL; + unify_private_t *_private = NULL; + + /* Check for number of child nodes, if there is no child nodes, exit */ + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "No child nodes specified. check \"subvolumes \" " + "option in volfile"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + /* Check for 'scheduler' in volume */ + scheduler = dict_get (this->options, "scheduler"); + if (!scheduler) { + gf_log (this->name, GF_LOG_ERROR, + "\"option scheduler <x>\" is missing in volfile"); + return -1; + } + + /* Setting "option namespace <node>" */ + data = dict_get (this->options, "namespace"); + if(!data) { + gf_log (this->name, GF_LOG_CRITICAL, + "namespace option not specified, Exiting"); + return -1; + } + /* Search namespace in the child node, if found, exit */ + trav = this->children; + while (trav) { + if (strcmp (trav->xlator->name, data->data) == 0) + break; + trav = trav->next; + } + if (trav) { + gf_log (this->name, GF_LOG_CRITICAL, + "namespace node used as a subvolume, Exiting"); + return -1; + } + + /* Search for the namespace node, if found, continue */ + ns_xl = this->next; + while (ns_xl) { + if (strcmp (ns_xl->name, data->data) == 0) + break; + ns_xl = ns_xl->next; + } + if (!ns_xl) { + gf_log (this->name, GF_LOG_CRITICAL, + "namespace node not found in volfile, Exiting"); + return -1; + } + + gf_log (this->name, GF_LOG_DEBUG, + "namespace node specified as %s", data->data); + + _private = CALLOC (1, sizeof (*_private)); + ERR_ABORT (_private); + _private->sched_ops = get_scheduler (this, scheduler->data); + if (!_private->sched_ops) { + gf_log (this->name, GF_LOG_CRITICAL, + "Error while loading scheduler. Exiting"); + FREE (_private); + return -1; + } + + if (ns_xl->parents) { + gf_log (this->name, GF_LOG_CRITICAL, + "Namespace node should not be a child of any other node. Exiting"); + FREE (_private); + return -1; + } + + _private->namespace = ns_xl; + + /* update _private structure */ + { + count = 0; + trav = this->children; + /* Get the number of child count */ + while (trav) { + count++; + trav = trav->next; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Child node count is %d", count); + + _private->child_count = count; + if (count == 1) { + /* TODO: Should I error out here? */ + gf_log (this->name, GF_LOG_CRITICAL, + "WARNING: You have defined only one " + "\"subvolumes\" for unify volume. It may not " + "be the desired config, review your volume " + "volfile. If this is how you are testing it," + " you may hit some performance penalty"); + } + + _private->xl_array = CALLOC (1, + sizeof (xlator_t) * (count + 1)); + ERR_ABORT (_private->xl_array); + + count = 0; + trav = this->children; + while (trav) { + _private->xl_array[count++] = trav->xlator; + trav = trav->next; + } + _private->xl_array[count] = _private->namespace; + + /* self-heal part, start with generation '1' */ + _private->inode_generation = 1; + /* Because, Foreground part is tested well */ + _private->self_heal = ZR_UNIFY_FG_SELF_HEAL; + data = dict_get (this->options, "self-heal"); + if (data) { + if (strcasecmp (data->data, "off") == 0) + _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF; + + if (strcasecmp (data->data, "foreground") == 0) + _private->self_heal = ZR_UNIFY_FG_SELF_HEAL; + + if (strcasecmp (data->data, "background") == 0) + _private->self_heal = ZR_UNIFY_BG_SELF_HEAL; + } + + /* optimist - ask bulde for more about it */ + data = dict_get (this->options, "optimist"); + if (data) { + if (gf_string2boolean (data->data, + &_private->optimist) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "optimist excepts only boolean " + "options"); + } + } + + LOCK_INIT (&_private->lock); + } + + /* Now that everything is fine. */ + this->private = (void *)_private; + { + /* Initialize scheduler, if everything else is successful */ + ret = _private->sched_ops->init (this); + if (ret == -1) { + gf_log (this->name, GF_LOG_CRITICAL, + "Initializing scheduler failed, Exiting"); + FREE (_private); + return -1; + } + + ret = 0; + + /* This section is required because some fops may look + * for 'xl->parent' variable + */ + xlparent = CALLOC (1, sizeof (*xlparent)); + xlparent->xlator = this; + if (!ns_xl->parents) { + ns_xl->parents = xlparent; + } else { + parent = ns_xl->parents; + while (parent->next) + parent = parent->next; + parent->next = xlparent; + } + /* Initialize the namespace volume */ + if (!ns_xl->ready) { + ret = xlator_tree_init (ns_xl); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "initializing namespace node failed, " + "Exiting"); + FREE (_private); + return -1; + } + } + } + + /* Tell namespace node that init is done */ + ns_xl->notify (ns_xl, GF_EVENT_PARENT_UP, this); + + return 0; +} + +/** + * fini - Free all the allocated memory + */ +void +fini (xlator_t *this) +{ + unify_private_t *priv = this->private; + priv->sched_ops->fini (this); + this->private = NULL; + LOCK_DESTROY (&priv->lock); + FREE (priv->xl_array); + FREE (priv); + return; +} + + +struct xlator_fops fops = { + .stat = unify_stat, + .chmod = unify_chmod, + .readlink = unify_readlink, + .mknod = unify_mknod, + .mkdir = unify_mkdir, + .unlink = unify_unlink, + .rmdir = unify_rmdir, + .symlink = unify_symlink, + .rename = unify_rename, + .link = unify_link, + .chown = unify_chown, + .truncate = unify_truncate, + .create = unify_create, + .open = unify_open, + .readv = unify_readv, + .writev = unify_writev, + .statfs = unify_statfs, + .flush = unify_flush, + .fsync = unify_fsync, + .setxattr = unify_setxattr, + .getxattr = unify_getxattr, + .removexattr = unify_removexattr, + .opendir = unify_opendir, + .readdir = unify_readdir, + .fsyncdir = unify_fsyncdir, + .access = unify_access, + .ftruncate = unify_ftruncate, + .fstat = unify_fstat, + .lk = unify_lk, + .fchown = unify_fchown, + .fchmod = unify_fchmod, + .utimens = unify_utimens, + .lookup = unify_lookup, + .getdents = unify_getdents, + .checksum = unify_checksum, + .inodelk = unify_inodelk, + .finodelk = unify_finodelk, + .entrylk = unify_entrylk, + .fentrylk = unify_fentrylk, + .xattrop = unify_xattrop, + .fxattrop = unify_fxattrop +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = { "namespace" }, + .type = GF_OPTION_TYPE_XLATOR + }, + { .key = { "scheduler" }, + .value = { "alu", "rr", "random", "nufa", "switch" }, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"self-heal"}, + .value = { "foreground", "background", "off" }, + .type = GF_OPTION_TYPE_STR + }, + /* TODO: remove it some time later */ + { .key = {"optimist"}, + .type = GF_OPTION_TYPE_BOOL + }, + + { .key = {NULL} }, +}; diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h new file mode 100644 index 000000000..bc18dc53f --- /dev/null +++ b/xlators/cluster/unify/src/unify.h @@ -0,0 +1,132 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#ifndef _UNIFY_H +#define _UNIFY_H + +#include "scheduler.h" +#include "list.h" + +#define MAX_DIR_ENTRY_STRING (32 * 1024) + +#define ZR_UNIFY_SELF_HEAL_OFF 0 +#define ZR_UNIFY_FG_SELF_HEAL 1 +#define ZR_UNIFY_BG_SELF_HEAL 2 + +/* Sometimes one should use completely random numbers.. its good :p */ +#define UNIFY_SELF_HEAL_GETDENTS_COUNT 1024 + +#define NS(xl) (((unify_private_t *)xl->private)->namespace) + +/* This is used to allocate memory for local structure */ +#define INIT_LOCAL(fr, loc) \ +do { \ + loc = CALLOC (1, sizeof (unify_local_t)); \ + ERR_ABORT (loc); \ + if (!loc) { \ + STACK_UNWIND (fr, -1, ENOMEM); \ + return 0; \ + } \ + fr->local = loc; \ + loc->op_ret = -1; \ + loc->op_errno = ENOENT; \ +} while (0) + + + +struct unify_private { + /* Update this structure depending on requirement */ + void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE, + if xlator is using scheduler */ + struct sched_ops *sched_ops; /* Scheduler options */ + xlator_t *namespace; /* ptr to namespace xlator */ + xlator_t **xl_array; + gf_boolean_t optimist; + int16_t child_count; + int16_t num_child_up; + uint8_t self_heal; + uint8_t is_up; + uint64_t inode_generation; + gf_lock_t lock; +}; +typedef struct unify_private unify_private_t; + +struct unify_self_heal_struct { + uint8_t dir_checksum[ZR_FILENAME_MAX]; + uint8_t ns_dir_checksum[ZR_FILENAME_MAX]; + uint8_t file_checksum[ZR_FILENAME_MAX]; + uint8_t ns_file_checksum[ZR_FILENAME_MAX]; + off_t *offset_list; + int *count_list; + dir_entry_t **entry_list; +}; + + +struct _unify_local_t { + int32_t call_count; + int32_t op_ret; + int32_t op_errno; + mode_t mode; + off_t offset; + dev_t dev; + uid_t uid; + gid_t gid; + int32_t flags; + int32_t entry_count; + int32_t count; // dir_entry_t count; + fd_t *fd; + struct stat stbuf; + struct statvfs statvfs_buf; + struct timespec tv[2]; + char *name; + int32_t revalidate; + + ino_t st_ino; + nlink_t st_nlink; + + dict_t *dict; + + int16_t *list; + int16_t *new_list; /* Used only in case of rename */ + int16_t index; + + int32_t failed; + int32_t return_eio; /* Used in case of different st-mode + present for a given path */ + + uint64_t inode_generation; /* used to store the per directory + * inode_generation. Got from inode's ctx + * of directory inodes + */ + + struct unify_self_heal_struct *sh_struct; + loc_t loc1, loc2; +}; +typedef struct _unify_local_t unify_local_t; + +int32_t zr_unify_self_heal (call_frame_t *frame, + xlator_t *this, + unify_local_t *local); + +#endif /* _UNIFY_H */ diff --git a/xlators/debug/Makefile.am b/xlators/debug/Makefile.am new file mode 100644 index 000000000..16cf893a1 --- /dev/null +++ b/xlators/debug/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = trace error-gen + +CLEANFILES = diff --git a/xlators/debug/error-gen/Makefile.am b/xlators/debug/error-gen/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/debug/error-gen/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am new file mode 100644 index 000000000..1bd7f332c --- /dev/null +++ b/xlators/debug/error-gen/src/Makefile.am @@ -0,0 +1,14 @@ + +xlator_LTLIBRARIES = error-gen.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug + +error_gen_la_LDFLAGS = -module -avoidversion + +error_gen_la_SOURCES = error-gen.c +error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c new file mode 100644 index 000000000..9c0b3253e --- /dev/null +++ b/xlators/debug/error-gen/src/error-gen.c @@ -0,0 +1,1780 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" + +typedef struct { + int op_count; +} eg_t; + +int error_gen (xlator_t *this) +{ + eg_t *egp = NULL; + int count = 0; + egp = this->private; + count = ++egp->op_count; + if((count % 10) == 0) { + count = count / 10; + if ((count % 2) == 0) + return ENOTCONN; + else + return EIO; + } + return 0; +} + +static int32_t +error_gen_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + inode, + buf, + dict); + return 0; +} + +int32_t +error_gen_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, + xattr_req); + return 0; +} + + +int32_t +error_gen_forget (xlator_t *this, + inode_t *inode) +{ + return 0; +} + +int32_t +error_gen_stat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + return 0; +} + +int32_t +error_gen_chmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_chmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chmod, + loc, + mode); + return 0; +} + + +int32_t +error_gen_fchmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_fchmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fchmod, + fd, + mode); + return 0; +} + +int32_t +error_gen_chown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_chown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chown, + loc, + uid, + gid); + return 0; +} + +int32_t +error_gen_fchown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_fchown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fchown, + fd, + uid, + gid); + return 0; +} + +int32_t +error_gen_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + return 0; +} + +int32_t +error_gen_ftruncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_ftruncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset); + return 0; +} + +int32_t +error_gen_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + + +int32_t +error_gen_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_utimens_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimens, + loc, + tv); + return 0; +} + +int32_t +error_gen_access_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + STACK_WIND (frame, + error_gen_access_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, + loc, + mask); + return 0; +} + + +int32_t +error_gen_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + path); + return 0; +} + +int32_t +error_gen_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + loc, + size); + return 0; +} + + +int32_t +error_gen_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + inode, + buf); + return 0; +} + +int32_t +error_gen_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t rdev) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_mknod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + loc, mode, rdev); + return 0; +} + +int32_t +error_gen_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + inode, + buf); + return 0; +} + +int32_t +error_gen_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_mkdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, + loc, mode); + return 0; +} + +int32_t +error_gen_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +error_gen_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + STACK_WIND (frame, + error_gen_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + loc); + return 0; +} + +int32_t +error_gen_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + STACK_WIND (frame, + error_gen_rmdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + loc); + return 0; +} + + +int32_t +error_gen_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +error_gen_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + linkpath, loc); + return 0; +} + + +int32_t +error_gen_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +error_gen_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_rename_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, + oldloc, newloc); + return 0; +} + + +int32_t +error_gen_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +error_gen_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; + } + STACK_WIND (frame, + error_gen_link_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, + oldloc, newloc); + return 0; +} + + +int32_t +error_gen_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; +} + +int32_t +error_gen_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, fd_t *fd) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; + } + + STACK_WIND (frame, error_gen_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, flags, mode, fd); + return 0; +} + +int32_t +error_gen_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + fd); + return 0; +} + +int32_t +error_gen_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, fd_t *fd) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, flags, fd); + return 0; +} + +int32_t +error_gen_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + vector, + count, + stbuf); + return 0; +} + +int32_t +error_gen_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + return 0; + } + + + STACK_WIND (frame, + error_gen_readv_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, + fd, + size, + offset); + return 0; +} + + +int32_t +error_gen_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + stbuf); + return 0; +} + +int32_t +error_gen_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t off) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + + STACK_WIND (frame, + error_gen_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, + vector, + count, + off); + return 0; +} + +int32_t +error_gen_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, + error_gen_flush_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + return 0; +} + + +int32_t +error_gen_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, + error_gen_fsync_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, + fd, + flags); + return 0; +} + +int32_t +error_gen_fstat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_fstat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, + fd); + return 0; +} + +int32_t +error_gen_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + fd); + return 0; +} + +int32_t +error_gen_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, fd_t *fd) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_opendir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + loc, fd); + return 0; +} + + +int32_t +error_gen_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entries, + int32_t count) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + entries, + count); + return 0; +} + +int32_t +error_gen_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, + int32_t flag) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, 0); + return 0; + } + + STACK_WIND (frame, + error_gen_getdents_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getdents, + fd, + size, + offset, + flag); + return 0; +} + + +int32_t +error_gen_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_setdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, + dir_entry_t *entries, + int32_t count) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, 0); + return 0; + } + + STACK_WIND (frame, + error_gen_setdents_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setdents, + fd, + flags, + entries, + count); + return 0; +} + + +int32_t +error_gen_fsyncdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, + error_gen_fsyncdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, + fd, + flags); + return 0; +} + + +int32_t +error_gen_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +error_gen_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_statfs_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, + loc); + return 0; +} + + +int32_t +error_gen_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, + error_gen_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, + dict, + flags); + return 0; +} + +int32_t +error_gen_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + dict); + return 0; +} + +int32_t +error_gen_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, + name); + return 0; +} + +int32_t +error_gen_xattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +error_gen_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_xattrop_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, + loc, flags, dict); + return 0; +} + +int32_t +error_gen_fxattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +error_gen_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_fxattrop_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, + fd, flags, dict); + return 0; +} + +int32_t +error_gen_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +error_gen_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, + name); + return 0; +} + +int32_t +error_gen_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + lock); + return 0; +} + +int32_t +error_gen_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_lk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, + fd, + cmd, + lock); + return 0; +} + + +int32_t +error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +error_gen_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *lock) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, + error_gen_inodelk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + loc, cmd, lock); + return 0; +} + + +int32_t +error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +error_gen_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *lock) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, + error_gen_finodelk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->finodelk, + fd, cmd, lock); + return 0; +} + + +int32_t +error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +error_gen_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, error_gen_entrylk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, + loc, basename, cmd, type); + return 0; +} + +int32_t +error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +error_gen_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno); + return 0; + } + + STACK_WIND (frame, error_gen_fentrylk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fentrylk, + fd, basename, cmd, type); + return 0; +} + + +/* Management operations */ + +int32_t +error_gen_stats_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct xlator_stats *stats) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + stats); + return 0; +} + + +int32_t +error_gen_stats (call_frame_t *frame, + xlator_t *this, + int32_t flags) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_stats_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->mops->stats, + flags); + return 0; +} + + + +int32_t +error_gen_getspec_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + char *spec_data) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + spec_data); + return 0; +} + + +int32_t +error_gen_getspec (call_frame_t *frame, + xlator_t *this, + const char *key, + int32_t flags) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_getspec_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->mops->getspec, + key, flags); + return 0; +} + + +int32_t +error_gen_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *file_checksum, + uint8_t *dir_checksum) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + file_checksum, + dir_checksum); + return 0; +} + + +int32_t +error_gen_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_checksum_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->checksum, + loc, + flag); + return 0; +} + +int32_t +error_gen_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries) +{ + STACK_UNWIND (frame, op_ret, op_errno, entries); + return 0; +} + + +int32_t +error_gen_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t off) +{ + int op_errno = 0; + op_errno = error_gen(this); + if (op_errno) { + GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno)); + STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; + } + + STACK_WIND (frame, + error_gen_readdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, + fd, size, off); + return 0; +} + +int32_t +error_gen_closedir (xlator_t *this, + fd_t *fd) +{ + return 0; +} + +int32_t +error_gen_close (xlator_t *this, + fd_t *fd) +{ + return 0; +} + +int +init (xlator_t *this) +{ + eg_t *pvt = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "error-gen not configured with one subvolume"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + pvt = CALLOC (1, sizeof (eg_t)); + this->private = pvt; + return 0; +} + +void +fini (xlator_t *this) +{ + gf_log (this->name, GF_LOG_DEBUG, "fini called"); + return; +} + + +struct xlator_fops fops = { + .lookup = error_gen_lookup, + .stat = error_gen_stat, + .readlink = error_gen_readlink, + .mknod = error_gen_mknod, + .mkdir = error_gen_mkdir, + .unlink = error_gen_unlink, + .rmdir = error_gen_rmdir, + .symlink = error_gen_symlink, + .rename = error_gen_rename, + .link = error_gen_link, + .chmod = error_gen_chmod, + .chown = error_gen_chown, + .truncate = error_gen_truncate, + .utimens = error_gen_utimens, + .create = error_gen_create, + .open = error_gen_open, + .readv = error_gen_readv, + .writev = error_gen_writev, + .statfs = error_gen_statfs, + .flush = error_gen_flush, + .fsync = error_gen_fsync, + .setxattr = error_gen_setxattr, + .getxattr = error_gen_getxattr, + .removexattr = error_gen_removexattr, + .opendir = error_gen_opendir, + .readdir = error_gen_readdir, + .getdents = error_gen_getdents, + .fsyncdir = error_gen_fsyncdir, + .access = error_gen_access, + .ftruncate = error_gen_ftruncate, + .fstat = error_gen_fstat, + .lk = error_gen_lk, + .fchmod = error_gen_fchmod, + .fchown = error_gen_fchown, + .setdents = error_gen_setdents, + .lookup_cbk = error_gen_lookup_cbk, + .checksum = error_gen_checksum, + .xattrop = error_gen_xattrop, + .fxattrop = error_gen_fxattrop, + .inodelk = error_gen_inodelk, + .finodelk = error_gen_finodelk, + .entrylk = error_gen_entrylk, + .fentrylk = error_gen_fentrylk +}; + +struct xlator_mops mops = { + .stats = error_gen_stats, + .getspec = error_gen_getspec, +}; + +struct xlator_cbks cbks = { + .release = error_gen_close, + .releasedir = error_gen_closedir, +}; diff --git a/xlators/debug/trace/Makefile.am b/xlators/debug/trace/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/debug/trace/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am new file mode 100644 index 000000000..0f1679a04 --- /dev/null +++ b/xlators/debug/trace/src/Makefile.am @@ -0,0 +1,14 @@ + +xlator_LTLIBRARIES = trace.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug + +trace_la_LDFLAGS = -module -avoidversion + +trace_la_SOURCES = trace.c +trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c new file mode 100644 index 000000000..3ccf11a83 --- /dev/null +++ b/xlators/debug/trace/src/trace.c @@ -0,0 +1,2321 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/** + * xlators/debug/trace : + * This translator logs all the arguments to the fops/mops and also + * their _cbk functions, which later passes the call to next layer. + * Very helpful translator for debugging. + */ + +#include <time.h> +#include <errno.h> +#include "glusterfs.h" +#include "xlator.h" +#include "common-utils.h" + +#define ERR_EINVAL_NORETURN(cond) \ +do \ + { \ + if ((cond)) \ + { \ + gf_log ("ERROR", GF_LOG_ERROR, \ + "%s: %s: (%s) is true", \ + __FILE__, __FUNCTION__, #cond); \ + } \ + } while (0) + +typedef struct trace_private { + int32_t debug_flag; +} trace_private_t; + +struct { + char *name; + int enabled; +} trace_fop_names[GF_FOP_MAXVALUE]; + +int32_t +trace_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this); + + if (trace_fop_names[GF_FOP_CREATE].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, fd=%p, ino=%"PRIu64"), " + "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", " + "st_mode=%d, st_nlink=%"GF_PRI_NLINK", st_uid=%d, " + "st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", " + "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", " + "st_atime=%s, st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, fd, inode->ino, buf->st_dev, + buf->st_ino, buf->st_mode, buf->st_nlink, + buf->st_uid, buf->st_gid, buf->st_rdev, buf->st_size, + buf->st_blksize, + buf->st_blocks, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; +} + +int32_t +trace_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + ERR_EINVAL_NORETURN (!this); + + if (trace_fop_names[GF_FOP_OPEN].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d, *fd=%p)", + frame->root->unique, op_ret, op_errno, fd); + } + + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + +int32_t +trace_stat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this); + + if (trace_fop_names[GF_FOP_STAT].enabled) { + + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, buf {st_dev=%"GF_PRI_DEV", " + "st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", " + "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64 + ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", " + "st_atime=%s, st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_dev, buf->st_ino, + buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid, + buf->st_rdev, buf->st_size, buf->st_blksize, + buf->st_blocks, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this); + + if (trace_fop_names[GF_FOP_READ].enabled) { + + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", " + "st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", " + "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", " + "st_size=%"PRId64", st_blksize=%"GF_PRI_BLKSIZE", " + "st_blocks=%"PRId64", st_atime=%s, st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_dev, buf->st_ino, + buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid, + buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks, + atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf); + return 0; +} + +int32_t +trace_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this); + + if (trace_fop_names[GF_FOP_WRITE].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", " + "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, " + "st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_ino, buf->st_size, + buf->st_blocks, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entries, + int32_t count) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_GETDENTS].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d, count=%d)", + frame->root->unique, op_ret, op_errno, count); + } + + STACK_UNWIND (frame, op_ret, op_errno, entries, count); + return 0; +} + +int32_t +trace_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *buf) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_READDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64" :(op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + + return 0; +} + +int32_t +trace_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FSYNC].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_chown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_CHOWN].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, " + "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_ino, buf->st_mode, + buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_chmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_CHMOD].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, " + "st_atime=%s, st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_ino, buf->st_mode, + atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_fchmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FCHMOD].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, " + "st_atime=%s, st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_ino, buf->st_mode, + atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_fchown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FCHOWN].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%d, " + "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_ino, buf->st_mode, + buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_UNLINK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_RENAME].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d, buf {st_ino=%"PRIu64"})", + frame->root->unique, op_ret, op_errno, + (buf? buf->st_ino : 0)); + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *buf) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_READLINK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s)", + frame->root->unique, op_ret, op_errno, buf); + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *xattr) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_LOOKUP].enabled) { + if (op_ret >= 0) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, ino=%"PRIu64", " + "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", st_mode=%d, " + "st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, " + "st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", " + "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})", + frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino, + buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid, + buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr); + return 0; +} + +int32_t +trace_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_SYMLINK].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_ino=%"PRIu64", " + "st_mode=%d, st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, " + "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, " + "st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, inode->ino, buf->st_ino, + buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid, + buf->st_size, buf->st_blocks, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +trace_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_MKNOD].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_dev=%"GF_PRI_DEV + ", st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", " + "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64 + ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, " + "st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino, + buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid, + buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks, + atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + + +int32_t +trace_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_MKDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d, ino=%"PRIu64"", + frame->root->unique, op_ret, op_errno, + (inode? inode->ino : 0)); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +trace_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_LINK].enabled) { + if (op_ret >= 0) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, ino=%"PRIu64", " + "*buf {st_nlink=%"GF_PRI_NLINK"})", + frame->root->unique, op_ret, inode->ino, buf->st_nlink); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +trace_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FLUSH].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +trace_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_OPENDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d, fd=%p)", + frame->root->unique, op_ret, op_errno, fd); + } + + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + +int32_t +trace_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_RMDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_TRUNCATE].enabled) { + if (op_ret >= 0) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", st_blksize=%" + GF_PRI_BLKSIZE", st_blocks=%"PRId64"})", + frame->root->unique, op_ret, buf->st_size, buf->st_blksize, + buf->st_blocks); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_UTIMENS].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_atime=%s, st_mtime=%s, " + "st_ctime=%s})", + frame->root->unique, op_ret, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *buf) +{ + ERR_EINVAL_NORETURN (!this); + + if (trace_fop_names[GF_FOP_STATFS].enabled) { + if (op_ret >= 0) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK + ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", " + "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%" + GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d", + frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks, + buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree, + buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_SETXATTR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + ERR_EINVAL_NORETURN (!this || !dict); + + if (trace_fop_names[GF_FOP_GETXATTR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d, dict=%p)", + frame->root->unique, op_ret, op_errno, dict); + } + + STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +trace_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +trace_fsyncdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_access_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_ACCESS].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_ftruncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) { + if (op_ret >= 0) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", " + "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})", + frame->root->unique, op_ret, buf->st_size, buf->st_blksize, + buf->st_blocks); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_fstat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + char atime_buf[256], mtime_buf[256], ctime_buf[256]; + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FSTAT].enabled) { + if (op_ret >= 0) { + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime)); + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime)); + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", " + "st_ino=%"PRIu64", st_mode=%d, st_nlink=%"GF_PRI_NLINK", " + "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", " + "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, " + "st_mtime=%s, st_ctime=%s})", + frame->root->unique, op_ret, buf->st_dev, buf->st_ino, + buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid, + buf->st_rdev, buf->st_size, buf->st_blksize, + buf->st_blocks, atime_buf, mtime_buf, ctime_buf); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trace_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_LK].enabled) { + if (op_ret >= 0) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, {l_type=%d, l_whence=%d, " + "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})", + frame->root->unique, op_ret, lock->l_type, lock->l_whence, + lock->l_start, lock->l_len, lock->l_pid); + } else { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, lock); + return 0; +} + + +int32_t +trace_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_SETDENTS].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": op_ret=%d, op_errno=%d", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_entrylk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_ENTRYLK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": op_ret=%d, op_errno=%d", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +trace_xattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + ERR_EINVAL_NORETURN (!this || !dict); + + if (trace_fop_names[GF_FOP_XATTROP].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +trace_fxattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + ERR_EINVAL_NORETURN (!this || !dict); + + if (trace_fop_names[GF_FOP_FXATTROP].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (op_ret=%d, op_errno=%d)", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +trace_inodelk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_INODELK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": op_ret=%d, op_errno=%d", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +trace_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + ERR_EINVAL_NORETURN (!this || !loc || !basename); + + if (trace_fop_names[GF_FOP_ENTRYLK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc= {path=%s, ino=%"PRIu64"} basename=%s, cmd=%s, type=%s)", + frame->root->unique, loc->path, loc->inode->ino, basename, + ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"), + ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK")); + } + + STACK_WIND (frame, + trace_entrylk_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->entrylk, + loc, basename, cmd, type); + return 0; +} + +int32_t +trace_inodelk (call_frame_t *frame, + xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *flock) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_INODELK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, cmd=%s)", + frame->root->unique, loc->path, loc->inode->ino, + ((cmd == F_SETLK)? "F_SETLK" : "unknown")); + } + + STACK_WIND (frame, + trace_inodelk_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->inodelk, + loc, cmd, flock); + return 0; +} + + +int32_t +trace_finodelk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + ERR_EINVAL_NORETURN (!this ); + + if (trace_fop_names[GF_FOP_FINODELK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": op_ret=%d, op_errno=%d", + frame->root->unique, op_ret, op_errno); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +trace_finodelk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *flock) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FINODELK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (fd=%p, cmd=%s)", + frame->root->unique, fd, + ((cmd == F_SETLK) ? "F_SETLK" : "unknown")); + } + + STACK_WIND (frame, + trace_finodelk_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->finodelk, + fd, cmd, flock); + return 0; +} + + +int32_t +trace_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_XATTROP].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (path=%s, ino=%"PRIu64" flags=%d)", + frame->root->unique, loc->path, loc->inode->ino, flags); + + } + + STACK_WIND (frame, trace_xattrop_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, + loc, flags, dict); + + return 0; +} + +int32_t +trace_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FXATTROP].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (fd=%p, flags=%d)", + frame->root->unique, fd, flags); + + } + + STACK_WIND (frame, trace_fxattrop_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, + fd, flags, dict); + + return 0; +} + +int32_t +trace_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_LOOKUP].enabled) { + /* TODO: print all the keys mentioned in xattr_req */ + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"})", + frame->root->unique, loc->path, + loc->inode->ino); + } + + STACK_WIND (frame, trace_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, xattr_req); + + return 0; +} + +int32_t +trace_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ERR_EINVAL_NORETURN (!this || !loc ); + + + if (trace_fop_names[GF_FOP_STAT].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"})", + frame->root->unique, loc->path, loc->inode->ino); + } + + STACK_WIND (frame, + trace_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + + return 0; +} + +int32_t +trace_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + ERR_EINVAL_NORETURN (!this || !loc || (size < 1)); + + if (trace_fop_names[GF_FOP_READLINK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, size=%"GF_PRI_SIZET")", + frame->root->unique, loc->path, loc->inode->ino, size); + } + + STACK_WIND (frame, + trace_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + loc, + size); + + return 0; +} + +int32_t +trace_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t dev) +{ + ERR_EINVAL_NORETURN (!this || !loc->path); + + if (trace_fop_names[GF_FOP_MKNOD].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%d, dev=%"GF_PRI_DEV")", + frame->root->unique, loc->path, loc->inode->ino, mode, dev); + } + + STACK_WIND (frame, + trace_mknod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + loc, + mode, + dev); + + return 0; +} + +int32_t +trace_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + ERR_EINVAL_NORETURN (!this || !loc || !loc->path); + + if (trace_fop_names[GF_FOP_MKDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (path=%s, ino=%"PRIu64", mode=%d)", + frame->root->unique, loc->path, + ((loc->inode)? loc->inode->ino : 0), mode); + } + + STACK_WIND (frame, + trace_mkdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, + loc, + mode); + return 0; +} + +int32_t +trace_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_UNLINK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"})", + frame->root->unique, loc->path, loc->inode->ino); + } + + STACK_WIND (frame, + trace_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + loc); + return 0; +} + +int32_t +trace_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_RMDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"})", + frame->root->unique, loc->path, loc->inode->ino); + } + + STACK_WIND (frame, + trace_rmdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + loc); + + return 0; +} + +int32_t +trace_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + ERR_EINVAL_NORETURN (!this || !linkpath || !loc || !loc->path); + + if (trace_fop_names[GF_FOP_SYMLINK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (linkpath=%s, loc {path=%s, ino=%"PRIu64"})", + frame->root->unique, linkpath, loc->path, + ((loc->inode)? loc->inode->ino : 0)); + } + + STACK_WIND (frame, + trace_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + linkpath, + loc); + + return 0; +} + +int32_t +trace_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + ERR_EINVAL_NORETURN (!this || !oldloc || !newloc); + + if (trace_fop_names[GF_FOP_RENAME].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, " + "newloc{path=%s, ino=%"PRIu64"})", + frame->root->unique, oldloc->path, oldloc->ino, + newloc->path, newloc->ino); + } + + STACK_WIND (frame, + trace_rename_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, + oldloc, + newloc); + + return 0; +} + +int32_t +trace_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + ERR_EINVAL_NORETURN (!this || !oldloc || !newloc); + + if (trace_fop_names[GF_FOP_LINK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, " + "newloc {path=%s, ino=%"PRIu64"})", + frame->root->unique, oldloc->path, oldloc->inode->ino, + newloc->path, newloc->inode->ino); + } + + STACK_WIND (frame, + trace_link_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, + oldloc, + newloc); + return 0; +} + +int32_t +trace_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_CHMOD].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%o)", + frame->root->unique, loc->path, loc->inode->ino, mode); + } + + STACK_WIND (frame, + trace_chmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chmod, + loc, + mode); + + return 0; +} + +int32_t +trace_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_CHOWN].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, uid=%d, gid=%d)", + frame->root->unique, loc->path, loc->inode->ino, uid, gid); + } + + STACK_WIND (frame, + trace_chown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chown, + loc, + uid, + gid); + + return 0; +} + +int32_t +trace_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_TRUNCATE].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, offset=%"PRId64")", + frame->root->unique, loc->path, loc->inode->ino, offset); + } + + STACK_WIND (frame, + trace_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + + return 0; +} + +int32_t +trace_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + char actime_str[256]; + char modtime_str[256]; + + ERR_EINVAL_NORETURN (!this || !loc || !tv); + + if (trace_fop_names[GF_FOP_UTIMENS].enabled) { + strftime (actime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[0].tv_sec)); + strftime (modtime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[1].tv_sec)); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, " + "*tv=%p {actime=%s, modtime=%s})", + frame->root->unique, loc->path, loc->inode->ino, + tv, actime_str, modtime_str); + } + + STACK_WIND (frame, + trace_utimens_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimens, + loc, + tv); + + return 0; +} + +int32_t +trace_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_OPEN].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=%d, fd=%p)", + frame->root->unique, loc->path, loc->inode->ino, flags, fd); + } + + STACK_WIND (frame, + trace_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, + flags, + fd); + return 0; +} + +int32_t +trace_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + ERR_EINVAL_NORETURN (!this || !loc->path); + + if (trace_fop_names[GF_FOP_CREATE].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=0%o mode=0%o)", + frame->root->unique, loc->path, loc->inode->ino, flags, mode); + } + + STACK_WIND (frame, + trace_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, + flags, + mode, + fd); + return 0; +} + +int32_t +trace_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + ERR_EINVAL_NORETURN (!this || !fd || (size < 1)); + + if (trace_fop_names[GF_FOP_READ].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + frame->root->unique, fd, size, offset); + } + + STACK_WIND (frame, + trace_readv_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, + fd, + size, + offset); + return 0; +} + +int32_t +trace_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + ERR_EINVAL_NORETURN (!this || !fd || !vector || (count < 1)); + + if (trace_fop_names[GF_FOP_WRITE].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*fd=%p, *vector=%p, count=%d, offset=%"PRId64")", + frame->root->unique, fd, vector, count, offset); + } + + STACK_WIND (frame, + trace_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, + vector, + count, + offset); + return 0; +} + +int32_t +trace_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_STATFS].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"})", + frame->root->unique, loc->path, + ((loc->inode)? loc->inode->ino : 0)); + } + + STACK_WIND (frame, + trace_statfs_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs, + loc); + return 0; +} + +int32_t +trace_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FLUSH].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*fd=%p)", + frame->root->unique, fd); + } + + STACK_WIND (frame, + trace_flush_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + return 0; +} + + +int32_t +trace_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FSYNC].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (flags=%d, *fd=%p)", + frame->root->unique, flags, fd); + } + + STACK_WIND (frame, + trace_fsync_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, + fd, + flags); + return 0; +} + +int32_t +trace_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + ERR_EINVAL_NORETURN (!this || !loc || !dict); + + if (trace_fop_names[GF_FOP_SETXATTR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, dict=%p, flags=%d)", + frame->root->unique, loc->path, + ((loc->inode)? loc->inode->ino : 0), dict, flags); + } + + STACK_WIND (frame, + trace_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, + dict, + flags); + return 0; +} + +int32_t +trace_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_GETXATTR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}), name=%s", + frame->root->unique, loc->path, + ((loc->inode)? loc->inode->ino : 0), name); + } + + STACK_WIND (frame, + trace_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, + name); + return 0; +} + +int32_t +trace_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + ERR_EINVAL_NORETURN (!this || !loc || !name); + + if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, name=%s)", + frame->root->unique, loc->path, + ((loc->inode)? loc->inode->ino : 0), name); + } + + STACK_WIND (frame, + trace_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, + name); + + return 0; +} + +int32_t +trace_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + ERR_EINVAL_NORETURN (!this || !loc ); + + if (trace_fop_names[GF_FOP_OPENDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64":( loc {path=%s, ino=%"PRIu64"}, fd=%p)", + frame->root->unique, loc->path, loc->inode->ino, fd); + } + + STACK_WIND (frame, + trace_opendir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + loc, + fd); + return 0; +} + +int32_t +trace_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, + int32_t flag) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_GETDENTS].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64", flag=0x%x)", + frame->root->unique, fd, size, offset, flag); + } + + STACK_WIND (frame, + trace_getdents_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getdents, + fd, + size, + offset, + flag); + return 0; +} + + +int32_t +trace_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_READDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + frame->root->unique, fd, size, offset); + } + + STACK_WIND (frame, + trace_readdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, + fd, + size, + offset); + + return 0; +} + + +int32_t +trace_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (datasync=%d, *fd=%p)", + frame->root->unique, datasync, fd); + } + + STACK_WIND (frame, + trace_fsyncdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, + fd, + datasync); + return 0; +} + +int32_t +trace_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + ERR_EINVAL_NORETURN (!this || !loc); + + if (trace_fop_names[GF_FOP_ACCESS].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*loc {path=%s, ino=%"PRIu64"}, mask=0%o)", + frame->root->unique, loc->path, + ((loc->inode)? loc->inode->ino : 0), mask); + } + + STACK_WIND (frame, + trace_access_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, + loc, + mask); + return 0; +} + +int32_t +trace_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (offset=%"PRId64", *fd=%p)", + frame->root->unique, offset, fd); + } + + STACK_WIND (frame, + trace_ftruncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset); + + return 0; +} + +int32_t +trace_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FCHOWN].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*fd=%p, uid=%d, gid=%d)", + frame->root->unique, fd, uid, gid); + } + + STACK_WIND (frame, + trace_fchown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fchown, + fd, + uid, + gid); + return 0; +} + +int32_t +trace_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FCHMOD].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (mode=%o, *fd=%p)", + frame->root->unique, mode, fd); + } + + STACK_WIND (frame, + trace_fchmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fchmod, + fd, + mode); + return 0; +} + +int32_t +trace_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_FSTAT].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*fd=%p)", + frame->root->unique, fd); + } + + STACK_WIND (frame, + trace_fstat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, + fd); + return 0; +} + +int32_t +trace_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + ERR_EINVAL_NORETURN (!this || !fd); + + if (trace_fop_names[GF_FOP_LK].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, " + "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})", + frame->root->unique, fd, cmd, lock->l_type, lock->l_whence, + lock->l_start, lock->l_len, lock->l_pid); + } + + STACK_WIND (frame, + trace_lk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, + fd, + cmd, + lock); + return 0; +} + +int32_t +trace_setdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, + dir_entry_t *entries, + int32_t count) +{ + if (trace_fop_names[GF_FOP_SETDENTS].enabled) { + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (*fd=%p, flags=%d, count=%d", + frame->root->unique, fd, flags, count); + } + + STACK_WIND (frame, + trace_setdents_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setdents, + fd, + flags, + entries, + count); + return 0; +} + + +int32_t +trace_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *fchecksum, + uint8_t *dchecksum) +{ + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": op_ret (%d), op_errno(%d)", + frame->root->unique, op_ret, op_errno); + + STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum); + + return 0; +} + +int32_t +trace_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": loc->path (%s) flag (%d)", + frame->root->unique, loc->path, flag); + + STACK_WIND (frame, + trace_checksum_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->checksum, + loc, + flag); + + return 0; +} + + +int32_t +trace_stats_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct xlator_stats *stats) +{ + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": op_ret (%d), op_errno(%d)", + frame->root->unique, op_ret, op_errno); + + STACK_UNWIND (frame, op_ret, op_errno, stats); + return 0; +} + +int32_t +trace_stats (call_frame_t *frame, + xlator_t *this, + int32_t flags) +{ + ERR_EINVAL_NORETURN (!this); + + gf_log (this->name, GF_LOG_NORMAL, + "%"PRId64": (flags=%d)", + frame->root->unique, flags); + + STACK_WIND (frame, + trace_stats_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->mops->stats, + flags); + + return 0; +} + +void +enable_all_calls (int enabled) +{ + int i; + for (i = 0; i < GF_FOP_MAXVALUE; i++) + trace_fop_names[i].enabled = enabled; +} + +void +enable_call (const char *name, int enabled) +{ + int i; + for (i = 0; i < GF_FOP_MAXVALUE; i++) + if (!strcasecmp(trace_fop_names[i].name, name)) + trace_fop_names[i].enabled = enabled; +} + + +/* + include = 1 for "include-ops" + = 0 for "exclude-ops" +*/ +void +process_call_list (const char *list, int include) +{ + enable_all_calls (include ? 0 : 1); + + char *call = strsep ((char **)&list, ","); + while (call) { + enable_call (call, include); + call = strsep ((char **)&list, ","); + } +} + + +int32_t +init (xlator_t *this) +{ + dict_t *options = this->options; + char *includes = NULL, *excludes = NULL; + + if (!this) + return -1; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "trace translator requires one subvolume"); + return -1; + } + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + + includes = data_to_str (dict_get (options, "include-ops")); + excludes = data_to_str (dict_get (options, "exclude-ops")); + + { + int i; + for (i = 0; i < GF_FOP_MAXVALUE; i++) { + trace_fop_names[i].name = (gf_fop_list[i] ? + gf_fop_list[i] : ":O"); + trace_fop_names[i].enabled = 1; + } + } + + if (includes && excludes) { + gf_log (this->name, + GF_LOG_ERROR, + "must specify only one of 'include-ops' and 'exclude-ops'"); + return -1; + } + if (includes) + process_call_list (includes, 1); + if (excludes) + process_call_list (excludes, 0); + + gf_log_set_loglevel (GF_LOG_NORMAL); + + /* Set this translator's inode table pointer to child node's pointer. */ + this->itable = FIRST_CHILD (this)->itable; + + return 0; +} + +void +fini (xlator_t *this) +{ + if (!this) + return; + + gf_log (this->name, GF_LOG_NORMAL, + "trace translator unloaded"); + return; +} + +struct xlator_fops fops = { + .stat = trace_stat, + .readlink = trace_readlink, + .mknod = trace_mknod, + .mkdir = trace_mkdir, + .unlink = trace_unlink, + .rmdir = trace_rmdir, + .symlink = trace_symlink, + .rename = trace_rename, + .link = trace_link, + .chmod = trace_chmod, + .chown = trace_chown, + .truncate = trace_truncate, + .utimens = trace_utimens, + .open = trace_open, + .readv = trace_readv, + .writev = trace_writev, + .statfs = trace_statfs, + .flush = trace_flush, + .fsync = trace_fsync, + .setxattr = trace_setxattr, + .getxattr = trace_getxattr, + .removexattr = trace_removexattr, + .opendir = trace_opendir, + .readdir = trace_readdir, + .fsyncdir = trace_fsyncdir, + .access = trace_access, + .ftruncate = trace_ftruncate, + .fstat = trace_fstat, + .create = trace_create, + .fchown = trace_fchown, + .fchmod = trace_fchmod, + .lk = trace_lk, + .inodelk = trace_inodelk, + .finodelk = trace_finodelk, + .entrylk = trace_entrylk, + .lookup = trace_lookup, + .setdents = trace_setdents, + .getdents = trace_getdents, + .checksum = trace_checksum, + .xattrop = trace_xattrop, + .fxattrop = trace_fxattrop, +}; + +struct xlator_mops mops = { + .stats = trace_stats, +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {"include-ops", "include"}, + .type = GF_OPTION_TYPE_STR, + /*.value = { ""} */ + }, + { .key = {"exclude-ops", "exclude"}, + .type = GF_OPTION_TYPE_STR + /*.value = { ""} */ + }, + { .key = {NULL} }, +}; + diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am new file mode 100644 index 000000000..2cbde680f --- /dev/null +++ b/xlators/encryption/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = rot-13 + +CLEANFILES = diff --git a/xlators/encryption/rot-13/Makefile.am b/xlators/encryption/rot-13/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/encryption/rot-13/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/encryption/rot-13/src/Makefile.am b/xlators/encryption/rot-13/src/Makefile.am new file mode 100644 index 000000000..ba5e623d8 --- /dev/null +++ b/xlators/encryption/rot-13/src/Makefile.am @@ -0,0 +1,14 @@ +xlator_LTLIBRARIES = rot-13.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption + +rot_13_la_LDFLAGS = -module -avoidversion + +rot_13_la_SOURCES = rot-13.c +rot_13_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = rot-13.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c new file mode 100644 index 000000000..7cae46134 --- /dev/null +++ b/xlators/encryption/rot-13/src/rot-13.c @@ -0,0 +1,200 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <ctype.h> +#include <sys/uio.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "logging.h" + +#include "rot-13.h" + +/* + * This is a rot13 ``encryption'' xlator. It rot13's data when + * writing to disk and rot13's it back when reading it. + * This xlator is meant as an example, NOT FOR PRODUCTION + * USE ;) (hence no error-checking) + */ + +void +rot13 (char *buf, int len) +{ + int i; + for (i = 0; i < len; i++) { + if (buf[i] >= 'a' && buf[i] <= 'z') + buf[i] = 'a' + ((buf[i] - 'a' + 13) % 26); + else if (buf[i] >= 'A' && buf[i] <= 'Z') + buf[i] = 'A' + ((buf[i] - 'A' + 13) % 26); + } +} + +void +rot13_iovec (struct iovec *vector, int count) +{ + int i; + for (i = 0; i < count; i++) { + rot13 (vector[i].iov_base, vector[i].iov_len); + } +} + +int32_t +rot13_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + rot_13_private_t *priv = (rot_13_private_t *)this->private; + + if (priv->decrypt_read) + rot13_iovec (vector, count); + + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + return 0; +} + +int32_t +rot13_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + STACK_WIND (frame, + rot13_readv_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->readv, + fd, size, offset); + return 0; +} + +int32_t +rot13_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + +int32_t +rot13_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + rot_13_private_t *priv = (rot_13_private_t *)this->private; + if (priv->encrypt_write) + rot13_iovec (vector, count); + + STACK_WIND (frame, + rot13_writev_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset); + return 0; +} + +int32_t +init (xlator_t *this) +{ + data_t *data = NULL; + rot_13_private_t *priv = NULL; + + if (!this->children || this->children->next) { + gf_log ("rot13", GF_LOG_ERROR, + "FATAL: rot13 should have exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = CALLOC (sizeof (rot_13_private_t), 1); + ERR_ABORT (priv); + priv->decrypt_read = 1; + priv->encrypt_write = 1; + + data = dict_get (this->options, "encrypt-write"); + if (data) { + if (gf_string2boolean (data->data, &priv->encrypt_write) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "encrypt-write takes only boolean options"); + return -1; + } + } + + data = dict_get (this->options, "decrypt-read"); + if (data) { + if (gf_string2boolean (data->data, &priv->decrypt_read) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "decrypt-read takes only boolean options"); + return -1; + } + } + + this->private = priv; + gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded"); + return 0; +} + +void +fini (xlator_t *this) +{ + rot_13_private_t *priv = this->private; + + FREE (priv); + + return; +} + +struct xlator_fops fops = { + .readv = rot13_readv, + .writev = rot13_writev +}; + +struct xlator_mops mops = { +}; + + +struct volume_options options[] = { + { .key = {"encrypt-write"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"decrypt-read"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h new file mode 100644 index 000000000..43e60c326 --- /dev/null +++ b/xlators/encryption/rot-13/src/rot-13.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __ROT_13_H__ +#define __ROT_13_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +typedef struct { + gf_boolean_t encrypt_write; + gf_boolean_t decrypt_read; +} rot_13_private_t; + +#endif /* __ROT_13_H__ */ diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am new file mode 100644 index 000000000..9ac9b6f19 --- /dev/null +++ b/xlators/features/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = locks trash path-convertor filter quota + +CLEANFILES = diff --git a/xlators/features/filter/Makefile.am b/xlators/features/filter/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/features/filter/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am new file mode 100644 index 000000000..fa0b92214 --- /dev/null +++ b/xlators/features/filter/src/Makefile.am @@ -0,0 +1,13 @@ +xlator_LTLIBRARIES = filter.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +filter_la_LDFLAGS = -module -avoidversion + +filter_la_SOURCES = filter.c +filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c new file mode 100644 index 000000000..67ea45d3a --- /dev/null +++ b/xlators/features/filter/src/filter.c @@ -0,0 +1,1768 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" + +#define GF_FILTER_NOBODY_UID 65534 +#define GF_FILTER_NOBODY_GID 65534 +#define GF_FILTER_ROOT_UID 0 +#define GF_FILTER_ROOT_GID 0 + +#define GF_MAXIMUM_FILTERING_ALLOWED 32 + +/* + option root-filtering on (off by default) + option translate-uid <uid-range=newuid,uid=newuid> + option translate-gid <gid-range=newgid,gid=newgid> + option read-only <yes|true> + option fixed-uid <uid> + option fixed-gid <gid> + option filter-uid <uid-range,uid> + option filter-gid <gid-range,gid> // not supported yet + +*/ + +struct gf_filter { + /* Flags */ + gf_boolean_t complete_read_only; + char fixed_uid_set; + char fixed_gid_set; + char partial_filter; + + /* Options */ + /* Mapping/Filtering/Translate whatever you want to call */ + int translate_num_uid_entries; + int translate_num_gid_entries; + int translate_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2]; + int translate_output_uid[GF_MAXIMUM_FILTERING_ALLOWED]; + int translate_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2]; + int translate_output_gid[GF_MAXIMUM_FILTERING_ALLOWED]; + + /* Fixed uid/gid */ + int fixed_uid; + int fixed_gid; + + /* Filter */ + int filter_num_uid_entries; + int filter_num_gid_entries; + int filter_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2]; + int filter_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2]; + +}; + +/* update_frame: The main logic of the whole translator. + Return values: + 0: no change + // TRANSLATE + 1: only uid changed + 2: only gid changed + 3: both uid/gid changed + // FILTER + 4: uid in filter range + 5: gid in filter range // not supported yet + 6: complete fs is readonly +*/ + +#define GF_FILTER_NO_CHANGE 0 +#define GF_FILTER_MAP_UID 1 +#define GF_FILTER_MAP_GID 2 +#define GF_FILTER_MAP_BOTH 3 +#define GF_FILTER_FILTER_UID 4 +#define GF_FILTER_FILTER_GID 5 +#define GF_FILTER_RO_FS 6 + +static int32_t +update_frame (call_frame_t *frame, + inode_t *inode, + struct gf_filter *filter) +{ + uid_t uid = 0; + int32_t idx = 0; + int32_t ret = 0; + int32_t dictret = 0; + uint64_t tmp_uid = 0; + + for (idx = 0; idx < filter->translate_num_uid_entries; idx++) { + if ((frame->root->uid >=filter->translate_input_uid[idx][0]) && + (frame->root->uid <=filter->translate_input_uid[idx][1])) { + dictret = inode_ctx_get (inode, frame->this, &tmp_uid); + uid = (uid_t)tmp_uid; + if (dictret == 0) { + if (frame->root->uid != uid) + ret = GF_FILTER_MAP_UID; + } else { + ret = GF_FILTER_MAP_UID; + } + break; + } + } + + for (idx = 0; idx < filter->translate_num_gid_entries; idx++) { + if ((frame->root->gid >=filter->translate_input_gid[idx][0]) && + (frame->root->gid <=filter->translate_input_gid[idx][1])) { + if (ret == GF_FILTER_NO_CHANGE) + ret = GF_FILTER_MAP_GID; + else + ret = GF_FILTER_MAP_BOTH; + break; + } + } + + + if (filter->complete_read_only) + return GF_FILTER_RO_FS; + + if (filter->partial_filter) { + dictret = inode_ctx_get (inode, frame->this, &tmp_uid); + uid = (uid_t)tmp_uid; + if (dictret != -1) { + for (idx = 0; idx < filter->filter_num_uid_entries; + idx++) { + if ((uid >=filter->filter_input_uid[idx][0]) && + (uid <=filter->filter_input_uid[idx][1])) { + return GF_FILTER_FILTER_UID; + } + } + } + } + + return ret; +} + +/* if 'root' don't change the uid/gid */ +static int32_t +update_stat (struct stat *stbuf, + struct gf_filter *filter) +{ + int32_t idx = 0; + for (idx = 0; idx < filter->translate_num_uid_entries; idx++) { + if (stbuf->st_uid == GF_FILTER_ROOT_UID) + continue; + if ((stbuf->st_uid >= filter->translate_input_uid[idx][0]) && + (stbuf->st_uid <= filter->translate_input_uid[idx][1])) { + stbuf->st_uid = filter->translate_output_uid[idx]; + break; + } + } + + for (idx = 0; idx < filter->translate_num_gid_entries; idx++) { + if (stbuf->st_gid == GF_FILTER_ROOT_GID) + continue; + if ((stbuf->st_gid >= filter->translate_input_gid[idx][0]) && + (stbuf->st_gid <= filter->translate_input_gid[idx][1])) { + stbuf->st_gid = filter->translate_output_gid[idx]; + break; + } + } + + if (filter->fixed_uid_set) { + stbuf->st_uid = filter->fixed_uid; + } + + if (filter->fixed_gid_set) { + stbuf->st_gid = filter->fixed_gid; + } + + return 0; +} + +static int32_t +filter_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int ret = 0; + if (op_ret >= 0) { + update_stat (buf, this->private); + ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "couldn't set context"); + } + } + STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict); + return 0; +} + +int32_t +filter_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + STACK_WIND (frame, + filter_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, + xattr_req); + return 0; +} + + +static int32_t +filter_stat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +filter_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + STACK_WIND (frame, + filter_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + return 0; +} + +static int32_t +filter_chmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +filter_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM, NULL); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL); + return 0; + default: + break; + } + + STACK_WIND (frame, + filter_chmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chmod, + loc, + mode); + return 0; +} + + +static int32_t +filter_fchmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +filter_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + STACK_WIND (frame, + filter_fchmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fchmod, + fd, + mode); + return 0; +} + +static int32_t +filter_chown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +filter_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM, NULL); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL); + return 0; + default: + break; + } + + STACK_WIND (frame, + filter_chown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chown, + loc, + uid, + gid); + return 0; +} + +static int32_t +filter_fchown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +filter_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + STACK_WIND (frame, + filter_fchown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fchown, + fd, + uid, + gid); + return 0; +} + +static int32_t +filter_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +filter_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM, NULL); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL); + return 0; + } + + STACK_WIND (frame, + filter_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + return 0; +} + +static int32_t +filter_ftruncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +filter_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + STACK_WIND (frame, + filter_ftruncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset); + return 0; +} + +int32_t +filter_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +int32_t +filter_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM, NULL); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL); + return 0; + } + + STACK_WIND (frame, + filter_utimens_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimens, + loc, + tv); + return 0; +} + +static int32_t +filter_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + STACK_UNWIND (frame, op_ret, op_errno, path); + return 0; +} + +int32_t +filter_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IRGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IROTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM, NULL); + return 0; + } + STACK_WIND (frame, + filter_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + loc, + size); + return 0; +} + + +static int32_t +filter_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + + if (op_ret >= 0) { + update_stat (buf, this->private); + ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "couldn't set context"); + } + } + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +filter_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t rdev) +{ + int ret = 0; + inode_t *parent = loc->parent; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (parent->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (parent->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL, NULL); + return 0; + } + STACK_WIND (frame, + filter_mknod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + loc, mode, rdev); + return 0; +} + +static int32_t +filter_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + if (op_ret >= 0) { + update_stat (buf, this->private); + ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "couldn't set context"); + } + } + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +filter_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int ret = 0; + inode_t *parent = loc->parent; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (parent->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (parent->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL, NULL); + return 0; + } + STACK_WIND (frame, + filter_mkdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, + loc, mode); + return 0; +} + +static int32_t +filter_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +filter_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t ret = 0; + inode_t *parent = loc->parent; + if (!parent) + parent = inode_parent (loc->inode, 0, NULL); + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (parent->st_mode & S_IWGRP) + break; + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (parent->st_mode & S_IWOTH) + break; + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS); + return 0; + } + STACK_WIND (frame, + filter_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + loc); + return 0; +} + +static int32_t +filter_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +filter_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t ret = 0; + inode_t *parent = loc->parent; + if (!parent) + parent = inode_parent (loc->inode, 0, NULL); + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (parent->st_mode & S_IWGRP) + break; + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (parent->st_mode & S_IWOTH) + break; + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS); + return 0; + } + STACK_WIND (frame, + filter_rmdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + loc); + return 0; +} + +static int32_t +filter_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + if (op_ret >= 0) { + update_stat (buf, this->private); + ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "couldn't set context"); + } + } + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +filter_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + int ret = 0; + inode_t *parent = loc->parent; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (parent->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (parent->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL, NULL); + return 0; + } + STACK_WIND (frame, + filter_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + linkpath, loc); + return 0; +} + + +static int32_t +filter_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +filter_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int32_t ret = 0; + inode_t *parent = oldloc->parent; + if (!parent) + parent = inode_parent (oldloc->inode, 0, NULL); + ret = update_frame (frame, oldloc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (parent->st_mode & S_IWGRP) + break; + if (oldloc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (parent->st_mode & S_IWOTH) + break; + if (oldloc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, + "%s -> %s: returning permission denied", oldloc->path, newloc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL); + return 0; + } + STACK_WIND (frame, + filter_rename_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, + oldloc, newloc); + return 0; +} + + +static int32_t +filter_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + if (op_ret >= 0) { + update_stat (buf, this->private); + ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "couldn't set context"); + } + } + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +filter_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int ret = 0; + ret = update_frame (frame, oldloc->inode, this->private); + switch (ret) { + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL, NULL); + return 0; + } + STACK_WIND (frame, + filter_link_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, + oldloc, newloc); + return 0; +} + + +static int32_t +filter_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + if (op_ret >= 0) { + update_stat (buf, this->private); + ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "couldn't set context"); + } + } + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; +} + +int32_t +filter_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, fd_t *fd) +{ + int ret = 0; + inode_t *parent = loc->parent; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (parent->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (parent->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL, NULL, NULL); + return 0; + } + STACK_WIND (frame, filter_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, flags, mode, fd); + return 0; +} + +static int32_t +filter_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + +int32_t +filter_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + if (!((flags & O_WRONLY) || (flags & O_RDWR)) + && (loc->inode->st_mode & S_IRGRP)) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + if (!((flags & O_WRONLY) || (flags & O_RDWR)) + && (loc->inode->st_mode & S_IROTH)) + break; + gf_log (this->name, GF_LOG_DEBUG, + "%s: returning permission denied (mode: 0%o, flag=0%o)", + loc->path, loc->inode->st_mode, flags); + STACK_UNWIND (frame, -1, EPERM, fd); + return 0; + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + if (!((flags & O_WRONLY) || (flags & O_RDWR))) + break; + STACK_UNWIND (frame, -1, EROFS, NULL); + return 0; + + } + STACK_WIND (frame, + filter_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, flags, fd); + return 0; +} + +static int32_t +filter_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + if (op_ret >= 0) { + update_stat (stbuf, this->private); + } + STACK_UNWIND (frame, + op_ret, + op_errno, + vector, + count, + stbuf); + return 0; +} + +int32_t +filter_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + STACK_WIND (frame, + filter_readv_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, + fd, + size, + offset); + return 0; +} + + +static int32_t +filter_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + if (op_ret >= 0) { + update_stat (stbuf, this->private); + } + STACK_UNWIND (frame, + op_ret, + op_errno, + stbuf); + return 0; +} + +int32_t +filter_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t off) +{ + int32_t ret = 0; + ret = update_frame (frame, fd->inode, this->private); + switch (ret) { + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS, NULL); + return 0; + } + + STACK_WIND (frame, + filter_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, + vector, + count, + off); + return 0; +} + +static int32_t +filter_fstat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + if (op_ret >= 0) { + update_stat (buf, this->private); + } + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +filter_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + STACK_WIND (frame, + filter_fstat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, + fd); + return 0; +} + +static int32_t +filter_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + fd); + return 0; +} + +int32_t +filter_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, fd_t *fd) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + if (loc->inode->st_mode & S_IRGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + if (loc->inode->st_mode & S_IROTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM, fd); + return 0; + } + STACK_WIND (frame, + filter_opendir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + loc, fd); + return 0; +} + + +static int32_t +filter_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +filter_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS); + return 0; + } + + STACK_WIND (frame, + filter_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, + dict, + flags); + return 0; +} + +static int32_t +filter_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + dict); + return 0; +} + +int32_t +filter_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IRGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IROTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM, NULL); + return 0; + } + + STACK_WIND (frame, + filter_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, + name); + return 0; +} + +static int32_t +filter_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +filter_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int32_t ret = 0; + ret = update_frame (frame, loc->inode, this->private); + switch (ret) { + case GF_FILTER_MAP_UID: + if (loc->inode->st_mode & S_IWGRP) + break; + case GF_FILTER_MAP_BOTH: + if (loc->inode->st_mode & S_IWOTH) + break; + gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); + STACK_UNWIND (frame, -1, EPERM); + return 0; + case GF_FILTER_FILTER_UID: + case GF_FILTER_FILTER_GID: + case GF_FILTER_RO_FS: + STACK_UNWIND (frame, -1, EROFS); + return 0; + } + + STACK_WIND (frame, + filter_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, + name); + return 0; +} + +int32_t +init (xlator_t *this) +{ + char *value = NULL; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *tmp_str2 = NULL; + char *dup_str = NULL; + char *input_value_str1 = NULL; + char *input_value_str2 = NULL; + char *output_value_str = NULL; + int32_t input_value = 0; + int32_t output_value = 0; + data_t *option_data = NULL; + struct gf_filter *filter = NULL; + gf_boolean_t tmp_bool = 0; + + if (!this->children || this->children->next) { + gf_log (this->name, + GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + filter = CALLOC (sizeof (*filter), 1); + ERR_ABORT (filter); + + if (dict_get (this->options, "read-only")) { + value = data_to_str (dict_get (this->options, "read-only")); + if (gf_string2boolean (value, &filter->complete_read_only) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "wrong value provided for 'read-only'"); + return -1; + } + } + + if (dict_get (this->options, "root-squashing")) { + value = data_to_str (dict_get (this->options, "root-squashing")); + if (gf_string2boolean (value, &tmp_bool) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "wrong value provided for 'root-squashing'"); + return -1; + } + if (tmp_bool) { + filter->translate_num_uid_entries = 1; + filter->translate_num_gid_entries = 1; + filter->translate_input_uid[0][0] = GF_FILTER_ROOT_UID; /* root */ + filter->translate_input_uid[0][1] = GF_FILTER_ROOT_UID; /* root */ + filter->translate_input_gid[0][0] = GF_FILTER_ROOT_GID; /* root */ + filter->translate_input_gid[0][1] = GF_FILTER_ROOT_GID; /* root */ + filter->translate_output_uid[0] = GF_FILTER_NOBODY_UID; + filter->translate_output_gid[0] = GF_FILTER_NOBODY_GID; + } + } + + if (dict_get (this->options, "translate-uid")) { + option_data = dict_get (this->options, "translate-uid"); + value = strtok_r (option_data->data, ",", &tmp_str); + while (value) { + dup_str = strdup (value); + input_value_str1 = strtok_r (dup_str, "=", &tmp_str1); + if (input_value_str1) { + /* Check for n-m */ + char *temp_string = strdup (input_value_str1); + input_value_str2 = strtok_r (temp_string, "-", &tmp_str2); + if (gf_string2int (input_value_str2, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str2); + return -1; + } + filter->translate_input_uid[filter->translate_num_uid_entries][0] = input_value; + input_value_str2 = strtok_r (NULL, "-", &tmp_str2); + if (input_value_str2) { + if (gf_string2int (input_value_str2, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str2); + return -1; + } + } + filter->translate_input_uid[filter->translate_num_uid_entries][1] = input_value; + FREE (temp_string); + output_value_str = strtok_r (NULL, "=", &tmp_str1); + if (output_value_str) { + if (gf_string2int (output_value_str, &output_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + output_value_str); + return -1; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "mapping string not valid"); + return -1; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "mapping string not valid"); + return -1; + } + filter->translate_output_uid[filter->translate_num_uid_entries] = output_value; + gf_log (this->name, + GF_LOG_DEBUG, + "pair %d: input uid '%d' will be changed to uid '%d'", + filter->translate_num_uid_entries, input_value, output_value); + + filter->translate_num_uid_entries++; + if (filter->translate_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED) + break; + value = strtok_r (NULL, ",", &tmp_str); + FREE (dup_str); + } + } + + tmp_str1 = NULL; + tmp_str2 = NULL; + tmp_str = NULL; + + if (dict_get (this->options, "translate-gid")) { + option_data = dict_get (this->options, "translate-gid"); + value = strtok_r (option_data->data, ",", &tmp_str); + while (value) { + dup_str = strdup (value); + input_value_str1 = strtok_r (dup_str, "=", &tmp_str1); + if (input_value_str1) { + /* Check for n-m */ + char *temp_string = strdup (input_value_str1); + input_value_str2 = strtok_r (temp_string, "-", &tmp_str2); + if (gf_string2int (input_value_str2, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str2); + return -1; + } + filter->translate_input_gid[filter->translate_num_gid_entries][0] = input_value; + input_value_str2 = strtok_r (NULL, "-", &tmp_str2); + if (input_value_str2) { + if (gf_string2int (input_value_str2, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str2); + return -1; + } + } + filter->translate_input_gid[filter->translate_num_gid_entries][1] = input_value; + FREE (temp_string); + output_value_str = strtok_r (NULL, "=", &tmp_str1); + if (output_value_str) { + if (gf_string2int (output_value_str, &output_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + output_value_str); + return -1; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "translate-gid value not valid"); + return -1; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "translate-gid value not valid"); + return -1; + } + + filter->translate_output_gid[filter->translate_num_gid_entries] = output_value; + + gf_log (this->name, GF_LOG_DEBUG, + "pair %d: input gid '%d' will be changed to gid '%d'", + filter->translate_num_gid_entries, input_value, output_value); + + filter->translate_num_gid_entries++; + if (filter->translate_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED) + break; + value = strtok_r (NULL, ",", &tmp_str); + FREE (dup_str); + } + } + + tmp_str = NULL; + tmp_str1 = NULL; + + if (dict_get (this->options, "filter-uid")) { + option_data = dict_get (this->options, "filter-uid"); + value = strtok_r (option_data->data, ",", &tmp_str); + while (value) { + dup_str = strdup (value); + /* Check for n-m */ + input_value_str1 = strtok_r (dup_str, "-", &tmp_str1); + if (gf_string2int (input_value_str1, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str1); + return -1; + } + filter->filter_input_uid[filter->filter_num_uid_entries][0] = input_value; + input_value_str1 = strtok_r (NULL, "-", &tmp_str1); + if (input_value_str1) { + if (gf_string2int (input_value_str1, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str1); + return -1; + } + } + filter->filter_input_uid[filter->filter_num_uid_entries][1] = input_value; + + gf_log (this->name, + GF_LOG_DEBUG, + "filter [%d]: input uid(s) '%s' will be filtered", + filter->filter_num_uid_entries, dup_str); + + filter->filter_num_uid_entries++; + if (filter->filter_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED) + break; + value = strtok_r (NULL, ",", &tmp_str); + FREE (dup_str); + } + filter->partial_filter = 1; + } + + tmp_str = NULL; + tmp_str1 = NULL; + + if (dict_get (this->options, "filter-gid")) { + option_data = dict_get (this->options, "filter-gid"); + value = strtok_r (option_data->data, ",", &tmp_str); + while (value) { + dup_str = strdup (value); + /* Check for n-m */ + input_value_str1 = strtok_r (dup_str, "-", &tmp_str1); + if (gf_string2int (input_value_str1, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str1); + return -1; + } + filter->filter_input_gid[filter->filter_num_gid_entries][0] = input_value; + input_value_str1 = strtok_r (NULL, "-", &tmp_str1); + if (input_value_str1) { + if (gf_string2int (input_value_str1, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + input_value_str1); + return -1; + } + } + filter->filter_input_gid[filter->filter_num_gid_entries][1] = input_value; + + gf_log (this->name, + GF_LOG_DEBUG, + "filter [%d]: input gid(s) '%s' will be filtered", + filter->filter_num_gid_entries, dup_str); + + filter->filter_num_gid_entries++; + if (filter->filter_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED) + break; + value = strtok_r (NULL, ",", &tmp_str); + FREE (dup_str); + } + gf_log (this->name, GF_LOG_ERROR, "this option is not supported currently.. exiting"); + return -1; + filter->partial_filter = 1; + } + + if (dict_get (this->options, "fixed-uid")) { + option_data = dict_get (this->options, "fixed-uid"); + if (gf_string2int (option_data->data, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + option_data->data); + return -1; + } + filter->fixed_uid = input_value; + filter->fixed_uid_set = 1; + } + + if (dict_get (this->options, "fixed-gid")) { + option_data = dict_get (this->options, "fixed-gid"); + if (gf_string2int (option_data->data, &input_value) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\"", + option_data->data); + return -1; + } + filter->fixed_gid = input_value; + filter->fixed_gid_set = 1; + } + + this->private = filter; + return 0; +} + + +void +fini (xlator_t *this) +{ + struct gf_filter *filter = this->private; + + FREE (filter); + + return; +} + + +struct xlator_fops fops = { + .lookup = filter_lookup, + .stat = filter_stat, + .fstat = filter_fstat, + .chmod = filter_chmod, + .fchmod = filter_fchmod, + .readlink = filter_readlink, + .mknod = filter_mknod, + .mkdir = filter_mkdir, + .unlink = filter_unlink, + .rmdir = filter_rmdir, + .symlink = filter_symlink, + .rename = filter_rename, + .link = filter_link, + .chown = filter_chown, + .fchown = filter_fchown, + .truncate = filter_truncate, + .ftruncate = filter_ftruncate, + .create = filter_create, + .open = filter_open, + .readv = filter_readv, + .writev = filter_writev, + .setxattr = filter_setxattr, + .getxattr = filter_getxattr, + .removexattr = filter_removexattr, + .opendir = filter_opendir, + .utimens = filter_utimens, +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = { "root-squashing" }, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = { "read-only" }, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = { "fixed-uid" }, + .type = GF_OPTION_TYPE_INT + }, + { .key = { "fixed-gid" }, + .type = GF_OPTION_TYPE_INT + }, + { .key = { "translate-uid" }, + .type = GF_OPTION_TYPE_ANY + }, + { .key = { "translate-gid" }, + .type = GF_OPTION_TYPE_ANY + }, + { .key = { "filter-uid" }, + .type = GF_OPTION_TYPE_ANY + }, + { .key = { "filter-gid" }, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/locks/Makefile.am b/xlators/features/locks/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/features/locks/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am new file mode 100644 index 000000000..ec4a953eb --- /dev/null +++ b/xlators/features/locks/src/Makefile.am @@ -0,0 +1,20 @@ +xlator_LTLIBRARIES = locks.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +locks_la_LDFLAGS = -module -avoidversion + +locks_la_SOURCES = common.c posix.c internal.c +locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = locks.h common.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles + +CLEANFILES = + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/posix-locks.so + +install-data-hook: + ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
\ No newline at end of file diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c new file mode 100644 index 000000000..9ac1250cc --- /dev/null +++ b/xlators/features/locks/src/common.c @@ -0,0 +1,561 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" + +#include "locks.h" + + +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); +static void +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); + + +pl_inode_t * +pl_inode_get (xlator_t *this, inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + mode_t st_mode = 0; + uint64_t tmp_pl_inode = 0; + int ret = 0; + + LOCK (&inode->lock); + { + ret = inode_ctx_get (inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto out; + } + + pl_inode = CALLOC (1, sizeof (*pl_inode)); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + st_mode = inode->st_mode; + if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP)) + pl_inode->mandatory = 1; + + + pthread_mutex_init (&pl_inode->mutex, NULL); + + INIT_LIST_HEAD (&pl_inode->dir_list); + INIT_LIST_HEAD (&pl_inode->ext_list); + INIT_LIST_HEAD (&pl_inode->int_list); + INIT_LIST_HEAD (&pl_inode->rw_list); + + ret = inode_ctx_put (inode, this, (uint64_t)(long)pl_inode); + } +out: + UNLOCK (&inode->lock); + return pl_inode; +} + + +/* Create a new posix_lock_t */ +posix_lock_t * +new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid) +{ + posix_lock_t *lock = NULL; + + lock = CALLOC (1, sizeof (posix_lock_t)); + if (!lock) { + return NULL; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->transport = transport; + lock->client_pid = client_pid; + + INIT_LIST_HEAD (&lock->list); + + return lock; +} + + +/* Delete a lock from the inode's lock list */ +void +__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) +{ + list_del_init (&lock->list); +} + + +/* Destroy a posix_lock */ +void +__destroy_lock (posix_lock_t *lock) +{ + free (lock); +} + + +/* Convert a posix_lock to a struct flock */ +void +posix_lock_to_flock (posix_lock_t *lock, struct flock *flock) +{ + flock->l_pid = lock->client_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + + if (lock->fl_end == 0) + flock->l_len = LLONG_MAX; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; +} + + +/* Insert the lock into the inode's lock list */ +void +pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +{ + list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom)); + + return; +} + + +/* Return true if the locks overlap, false otherwise */ +int +locks_overlap (posix_lock_t *l1, posix_lock_t *l2) +{ + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ + + return ((l1->fl_end >= l2->fl_start) && + (l2->fl_end >= l1->fl_start)); +} + + +/* Return true if the locks have the same owner */ +int +same_owner (posix_lock_t *l1, posix_lock_t *l2) +{ + return ((l1->client_pid == l2->client_pid) && + (l1->transport == l2->transport)); +} + + +/* Delete all F_UNLCK locks */ +void +__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + + list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->fl_type == F_UNLCK) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } +} + + +/* Add two locks */ +static posix_lock_t * +add_locks (posix_lock_t *l1, posix_lock_t *l2) +{ + posix_lock_t *sum = NULL; + + sum = CALLOC (1, sizeof (posix_lock_t)); + if (!sum) + return NULL; + + sum->fl_start = min (l1->fl_start, l2->fl_start); + sum->fl_end = max (l1->fl_end, l2->fl_end); + + return sum; +} + +/* Subtract two locks */ +struct _values { + posix_lock_t *locks[3]; +}; + +/* {big} must always be contained inside {small} */ +static struct _values +subtract_locks (posix_lock_t *big, posix_lock_t *small) +{ + struct _values v = { .locks = {0, 0, 0} }; + + if ((big->fl_start == small->fl_start) && + (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_type = small->fl_type; + } + else if ((small->fl_start > big->fl_start) && + (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + v.locks[2] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[2]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + memcpy (v.locks[2], big, sizeof (posix_lock_t)); + v.locks[2]->fl_start = small->fl_end + 1; + } + /* one edge coincides with big */ + else if (small->fl_start == big->fl_start) { + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_start = small->fl_end + 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + } + else if (small->fl_end == big->fl_end) { + v.locks[0] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[0]); + v.locks[1] = CALLOC (1, sizeof (posix_lock_t)); + ERR_ABORT (v.locks[1]); + + memcpy (v.locks[0], big, sizeof (posix_lock_t)); + v.locks[0]->fl_end = small->fl_start - 1; + + memcpy (v.locks[1], small, sizeof (posix_lock_t)); + } + else { + gf_log ("posix-locks", GF_LOG_DEBUG, + "unexpected case in subtract_locks"); + } + + return v; +} + +/* + Start searching from {begin}, and return the first lock that + conflicts, NULL if no conflict + If {begin} is NULL, then start from the beginning of the list +*/ +static posix_lock_t * +first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + + list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->blocked) + continue; + + if (locks_overlap (l, lock)) + return l; + } + + return NULL; +} + + + +/* Return true if lock is grantable */ +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) { + if (!l->blocked && locks_overlap (lock, l)) { + if (((l->fl_type == F_WRLCK) + || (lock->fl_type == F_WRLCK)) + && (lock->fl_type != F_UNLCK) + && !same_owner (l, lock)) { + ret = 0; + break; + } + } + } + return ret; +} + + +extern void do_blocked_rw (pl_inode_t *); + + +static void +__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom) +{ + posix_lock_t *conf = NULL; + posix_lock_t *t = NULL; + posix_lock_t *sum = NULL; + int i = 0; + struct _values v = { .locks = {0, 0, 0} }; + + list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) { + if (!locks_overlap (conf, lock)) + continue; + + if (same_owner (conf, lock)) { + if (conf->fl_type == lock->fl_type) { + sum = add_locks (lock, conf); + + sum->fl_type = lock->fl_type; + sum->transport = lock->transport; + sum->client_pid = lock->client_pid; + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __destroy_lock (lock); + __insert_and_merge (pl_inode, sum, dom); + + return; + } else { + sum = add_locks (lock, conf); + + sum->fl_type = conf->fl_type; + sum->transport = conf->transport; + sum->client_pid = conf->client_pid; + + v = subtract_locks (sum, lock); + + __delete_lock (pl_inode, conf); + __destroy_lock (conf); + + __delete_lock (pl_inode, lock); + __destroy_lock (lock); + + __destroy_lock (sum); + + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; + + if (v.locks[i]->fl_type == F_UNLCK) { + __destroy_lock (v.locks[i]); + continue; + } + __insert_and_merge (pl_inode, + v.locks[i], dom); + } + + __delete_unlck_locks (pl_inode, dom); + return; + } + } + + if (lock->fl_type == F_UNLCK) { + continue; + } + + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + pl_insert_lock (pl_inode, lock, dom); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + pl_insert_lock (pl_inode, lock, dom); + } else { + __destroy_lock (lock); + } +} + + +void +__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, + gf_lk_domain_t dom, struct list_head *granted) +{ + struct list_head tmp_list; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *conf = NULL; + + INIT_LIST_HEAD (&tmp_list); + + list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) { + if (l->blocked) { + conf = first_overlap (pl_inode, l, dom); + if (conf) + continue; + + l->blocked = 0; + list_move_tail (&l->list, &tmp_list); + } + } + + list_for_each_entry_safe (l, tmp, &tmp_list, list) { + list_del_init (&l->list); + + if (pl_is_lock_grantable (pl_inode, l, dom)) { + conf = CALLOC (1, sizeof (*conf)); + + if (!conf) { + l->blocked = 1; + pl_insert_lock (pl_inode, l, dom); + continue; + } + + conf->frame = l->frame; + l->frame = NULL; + + posix_lock_to_flock (l, &conf->user_flock); + + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => Granted", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", + l->client_pid, + l->user_flock.l_start, + l->user_flock.l_len); + + __insert_and_merge (pl_inode, l, dom); + + list_add (&conf->list, granted); + } else { + l->blocked = 1; + pl_insert_lock (pl_inode, l, dom); + } + } +} + + +void +grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom) +{ + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + + INIT_LIST_HEAD (&granted_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_locks (this, pl_inode, dom, &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted_list, list) { + list_del_init (&lock->list); + + STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock); + + FREE (lock); + } + + return; +} + + +int +pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block, gf_lk_domain_t dom) +{ + int ret = 0; + + errno = 0; + + pthread_mutex_lock (&pl_inode->mutex); + { + if (pl_is_lock_grantable (pl_inode, lock, dom)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + __insert_and_merge (pl_inode, lock, dom); + } else if (can_block) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + lock->blocked = 1; + pl_insert_lock (pl_inode, lock, dom); + ret = -1; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, + lock->user_flock.l_start, + lock->user_flock.l_len); + errno = EAGAIN; + ret = -1; + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_locks (this, pl_inode, dom); + + do_blocked_rw (pl_inode); + + return ret; +} + + +posix_lock_t * +pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom) +{ + posix_lock_t *conf = NULL; + + conf = first_overlap (pl_inode, lock, dom); + + if (conf == NULL) { + lock->fl_type = F_UNLCK; + return lock; + } + + return conf; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h new file mode 100644 index 000000000..135f33011 --- /dev/null +++ b/xlators/features/locks/src/common.h @@ -0,0 +1,59 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __COMMON_H__ +#define __COMMON_H__ + +posix_lock_t * +new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid); + +pl_inode_t * +pl_inode_get (xlator_t *this, inode_t *inode); + +posix_lock_t * +pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain); + +int +pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, + int can_block, gf_lk_domain_t domain); + +int +pl_is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock, + gf_lk_domain_t dom); + +void +pl_insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom); + +void +grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain); + +void +posix_lock_to_flock (posix_lock_t *lock, struct flock *flock); + +int +locks_overlap (posix_lock_t *l1, posix_lock_t *l2); + +int +same_owner (posix_lock_t *l1, posix_lock_t *l2); + +void __delete_lock (pl_inode_t *, posix_lock_t *); + +void __destroy_lock (posix_lock_t *); + +#endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/internal.c b/xlators/features/locks/src/internal.c new file mode 100644 index 000000000..7f454a78e --- /dev/null +++ b/xlators/features/locks/src/internal.c @@ -0,0 +1,762 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + + + +static int +delete_locks_of_transport (pl_inode_t *pinode, transport_t *trans) +{ + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + + list_for_each_entry_safe (l, tmp, &pinode->dir_list, list) { + if (l->transport == trans) { + __delete_lock (pinode, tmp); + __destroy_lock (tmp); + } + } + + return 0; +} + + +static posix_lock_t * +__find_exact_matching_lock (pl_inode_t *pinode, posix_lock_t *lock) +{ + posix_lock_t *l = NULL; + posix_lock_t *match = NULL; + + list_for_each_entry (l, DOMAIN_HEAD (pinode, GF_LOCK_INTERNAL), list) { + if (same_owner (l, lock) + && (l->fl_start == lock->fl_start) + && (l->fl_end == lock->fl_end)) { + match = l; + break; + } + } + + return match; +} + +/** + * pl_inodelk: + * + * This fop provides fcntl-style locking on files for internal + * purposes. Locks held through this fop reside in a domain different + * from those held by applications. This fop is for the use of AFR. + */ + + +static int +pl_inodelk_common (call_frame_t *frame, xlator_t *this, + inode_t *inode, int32_t cmd, struct flock *flock) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int can_block = 0; + + posix_locks_private_t * priv = NULL; + transport_t * transport = NULL; + pid_t client_pid = -1; + pl_inode_t * pinode = NULL; + + posix_lock_t * reqlock = NULL; + posix_lock_t * matchlock = NULL; /* steady, fire! */ + + VALIDATE_OR_GOTO (frame, unwind); + VALIDATE_OR_GOTO (inode, unwind); + VALIDATE_OR_GOTO (flock, unwind); + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_errno = EINVAL; + goto unwind; + } + + transport = frame->root->trans; + client_pid = frame->root->pid; + + priv = (posix_locks_private_t *) this->private; + + VALIDATE_OR_GOTO (priv, unwind); + + pinode = pl_inode_get (this, inode); + if (!pinode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto unwind; + } + + if (client_pid == 0) { + /* + special case: this means release all locks + from this transport + */ + + gf_log (this->name, GF_LOG_DEBUG, + "releasing all locks from transport %p", transport); + + delete_locks_of_transport (pinode, transport); + goto unwind; + } + + reqlock = new_posix_lock (flock, transport, client_pid); + if (!reqlock) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pthread_mutex_lock (&pinode->mutex); + { + switch (cmd) { + case F_SETLKW: + can_block = 1; + reqlock->frame = frame; + reqlock->this = this; + + /* fall through */ + + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + + switch (flock->l_type) { + + case F_WRLCK: + if (!pl_is_lock_grantable (pinode, reqlock, GF_LOCK_INTERNAL)) { + if (can_block) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => blocked", + reqlock->fl_type == F_UNLCK ? "unlock" : "lock", + reqlock->client_pid, + reqlock->user_flock.l_start, + reqlock->user_flock.l_len); + pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL); + + goto unlock; + } + + __destroy_lock (reqlock); + + + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => NOK", + reqlock->fl_type == F_UNLCK ? "unlock" : "lock", + reqlock->client_pid, reqlock->user_flock.l_start, + reqlock->user_flock.l_len); + op_errno = EAGAIN; + + goto unlock; + } + + gf_log (this->name, GF_LOG_DEBUG, + "%s (pid=%d) %"PRId64" - %"PRId64" => OK", + reqlock->fl_type == F_UNLCK ? "unlock" : "lock", + reqlock->client_pid, + reqlock->user_flock.l_start, + reqlock->user_flock.l_len); + pl_insert_lock (pinode, reqlock, GF_LOCK_INTERNAL); + + break; + + case F_UNLCK: + matchlock = __find_exact_matching_lock (pinode, reqlock); + + __destroy_lock (reqlock); + if (!matchlock) { + op_errno = EINVAL; + goto unlock; + } + + __delete_lock (pinode, matchlock); + __destroy_lock (matchlock); + + break; + + default: + op_errno = ENOTSUP; + gf_log (this->name, GF_LOG_ERROR, + "lock type %d not supported for [F]INODELK", + flock->l_type); + goto unlock; + } + + + break; + + default: + op_errno = ENOTSUP; + gf_log (this->name, GF_LOG_ERROR, + "lock command F_GETLK not supported for [F]INODELK (cmd=%d)", + cmd); + goto unlock; + } + + op_ret = 0; + + unlock: + if (pinode) + pthread_mutex_unlock (&pinode->mutex); + } + +unwind: + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int +pl_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *flock) +{ + return pl_inodelk_common (frame, this, loc->inode, cmd, flock); +} + + +int +pl_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *flock) +{ + return pl_inodelk_common (frame, this, fd->inode, cmd, flock); +} + + +/** + * types_conflict - do two types of lock conflict? + * @t1: type + * @t2: type + * + * two read locks do not conflict + * any other case conflicts + */ + +static int +types_conflict (entrylk_type t1, entrylk_type t2) +{ + return !((t1 == ENTRYLK_RDLCK) && (t2 == ENTRYLK_RDLCK)); +} + +/** + * all_names - does a basename represent all names? + * @basename: name to check + */ + +#define all_names(basename) ((basename == NULL) ? 1 : 0) + +/** + * names_conflict - do two names conflict? + * @n1: name + * @n2: name + */ + +static int +names_conflict (const char *n1, const char *n2) +{ + return all_names (n1) || all_names (n2) || !strcmp (n1, n2); +} + + +static int +names_equal (const char *n1, const char *n2) +{ + return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); +} + +/** + * lock_grantable - is this lock grantable? + * @inode: inode in which to look + * @basename: name we're trying to lock + * @type: type of lock + */ + +static pl_entry_lock_t * +__lock_grantable (pl_inode_t *pinode, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + + if (list_empty (&pinode->dir_list)) + return NULL; + + list_for_each_entry (lock, &pinode->dir_list, inode_list) { + if (names_conflict (lock->basename, basename) && + types_conflict (lock->type, type)) + return lock; + } + + return NULL; +} + +/** + * find_most_matching_lock - find the lock struct which most matches in order of: + * lock on the exact basename || + * an all_names lock + * + * + * @inode: inode in which to look + * @basename: name to search for + */ + +static pl_entry_lock_t * +__find_most_matching_lock (pl_inode_t *pinode, const char *basename) +{ + pl_entry_lock_t *lock; + pl_entry_lock_t *all = NULL; + pl_entry_lock_t *exact = NULL; + + if (list_empty (&pinode->dir_list)) + return NULL; + + list_for_each_entry (lock, &pinode->dir_list, inode_list) { + if (all_names (lock->basename)) + all = lock; + else if (names_equal (lock->basename, basename)) + exact = lock; + } + + return (exact ? exact : all); +} + + +/** + * insert_new_lock - insert a new dir lock into the inode with the given parameters + * @pinode: inode to insert into + * @basename: basename for the lock + * @type: type of the lock + */ + +static pl_entry_lock_t * +new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, + transport_t *trans) +{ + pl_entry_lock_t *newlock = NULL; + + newlock = CALLOC (sizeof (pl_entry_lock_t), 1); + if (!newlock) { + goto out; + } + + newlock->basename = basename ? strdup (basename) : NULL; + newlock->type = type; + newlock->trans = trans; + + if (type == ENTRYLK_RDLCK) + newlock->read_count = 1; + + INIT_LIST_HEAD (&newlock->inode_list); + INIT_LIST_HEAD (&newlock->blocked_locks); + +out: + return newlock; +} + +/** + * lock_name - lock a name in a directory + * @inode: inode for the directory in which to lock + * @basename: name of the entry to lock + * if null, lock the entire directory + * + * the entire directory being locked is represented as: a single + * pl_entry_lock_t present in the entrylk_locks list with its + * basename = NULL + */ + +int +__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, + call_frame_t *frame, xlator_t *this, int nonblock) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *conf = NULL; + + transport_t *trans = NULL; + + int ret = -EINVAL; + + trans = frame->root->trans; + + conf = __lock_grantable (pinode, basename, type); + if (conf) { + ret = -EAGAIN; + if (nonblock) + goto out; + + lock = new_entrylk_lock (pinode, basename, type, trans); + + if (!lock) { + ret = -ENOMEM; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "blocking lock: {pinode=%p, basename=%s}", + pinode, basename); + + lock->frame = frame; + lock->this = this; + lock->blocked = 1; + + list_add (&lock->blocked_locks, &conf->blocked_locks); + + + goto out; + } + + switch (type) { + case ENTRYLK_RDLCK: + lock = __find_most_matching_lock (pinode, basename); + + if (lock && names_equal (lock->basename, basename)) { + lock->read_count++; + + FREE (lock->basename); + FREE (lock); + + lock = NULL; + } else { + lock = new_entrylk_lock (pinode, basename, type, trans); + + if (!lock) { + ret = -ENOMEM; + goto out; + } + + list_add (&lock->inode_list, &pinode->dir_list); + } + break; + + case ENTRYLK_WRLCK: + lock = new_entrylk_lock (pinode, basename, type, trans); + + if (!lock) { + ret = -ENOMEM; + goto out; + } + + list_add (&lock->inode_list, &pinode->dir_list); + break; + } + + ret = 0; +out: + return ret; +} + + +/** + * unlock_name - unlock a name in a directory + * @inode: inode for the directory to unlock in + * @basename: name of the entry to unlock + * if null, unlock the entire directory + */ + +pl_entry_lock_t * +__unlock_name (pl_inode_t *pinode, const char *basename, entrylk_type type) +{ + pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *ret_lock = NULL; + + lock = __find_most_matching_lock (pinode, basename); + + if (!lock) { + gf_log ("locks", GF_LOG_DEBUG, + "unlock on %s (type=%s) attempted but no matching lock found", + basename, type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : + "ENTRYLK_WRLCK"); + goto out; + } + + if (names_equal (lock->basename, basename) + && lock->type == type) { + if (type == ENTRYLK_RDLCK) { + lock->read_count--; + } + if (type == ENTRYLK_WRLCK || lock->read_count == 0) { + list_del (&lock->inode_list); + ret_lock = lock; + } + } else { + gf_log ("locks", GF_LOG_ERROR, + "unlock for a non-existing lock!"); + goto out; + } + +out: + return ret_lock; +} + + +void +__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *lock, + struct list_head *granted) +{ + int bl_ret = 0; + pl_entry_lock_t *bl = NULL; + pl_entry_lock_t *tmp = NULL; + + list_for_each_entry_safe (bl, tmp, &lock->blocked_locks, + blocked_locks) { + list_del_init (&bl->blocked_locks); + + /* TODO: error checking */ + + gf_log ("locks", GF_LOG_DEBUG, + "trying to unblock: {pinode=%p, basename=%s}", + pl_inode, bl->basename); + + bl_ret = __lock_name (pl_inode, bl->basename, bl->type, + bl->frame, bl->this, 0); + + if (bl_ret == 0) { + list_add (&bl->blocked_locks, granted); + } else { + if (bl->basename) + FREE (bl->basename); + FREE (bl); + } + } + return; +} + + +void +grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, + pl_entry_lock_t *unlocked) +{ + struct list_head granted_list; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lock = NULL; + + INIT_LIST_HEAD (&granted_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + __grant_blocked_entry_locks (this, pl_inode, unlocked, + &granted_list); + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND (lock->frame, 0, 0); + + FREE (lock->basename); + FREE (lock); + } + + FREE (unlocked->basename); + FREE (unlocked); + + return; +} + + +/** + * release_entry_locks_for_transport: release all entry locks from this + * transport for this loc_t + */ + +static int +release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode, + transport_t *trans) +{ + pl_entry_lock_t *lock; + pl_entry_lock_t *tmp; + struct list_head granted; + + INIT_LIST_HEAD (&granted); + + pthread_mutex_lock (&pinode->mutex); + { + if (list_empty (&pinode->dir_list)) { + goto unlock; + } + + list_for_each_entry_safe (lock, tmp, &pinode->dir_list, + inode_list) { + if (lock->trans != trans) + continue; + + list_del_init (&lock->inode_list); + __grant_blocked_entry_locks (this, pinode, lock, + &granted); + + FREE (lock->basename); + FREE (lock); + } + } +unlock: + pthread_mutex_unlock (&pinode->mutex); + + list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { + list_del_init (&lock->blocked_locks); + + STACK_UNWIND (lock->frame, 0, 0); + + FREE (lock->basename); + FREE (lock); + } + + return 0; +} + + +/** + * pl_entrylk: + * + * Locking on names (directory entries) + */ + +int +pl_entrylk_common (call_frame_t *frame, xlator_t *this, + inode_t *inode, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + + transport_t * transport = NULL; + pid_t pid = -1; + + pl_inode_t * pinode = NULL; + int ret = -1; + pl_entry_lock_t *unlocked = NULL; + char unwind = 1; + + pinode = pl_inode_get (this, inode); + if (!pinode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto out; + } + + pid = frame->root->pid; + transport = frame->root->trans; + + if (pid == 0) { + /* + this is a special case that means release + all locks from this transport + */ + + gf_log (this->name, GF_LOG_DEBUG, + "releasing locks for transport %p", transport); + + release_entry_locks_for_transport (this, pinode, transport); + op_ret = 0; + + goto out; + } + + switch (cmd) { + case ENTRYLK_LOCK: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, this, 0); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + if (ret == -EAGAIN) + unwind = 0; + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_LOCK_NB: + pthread_mutex_lock (&pinode->mutex); + { + ret = __lock_name (pinode, basename, type, + frame, this, 1); + } + pthread_mutex_unlock (&pinode->mutex); + + if (ret < 0) { + op_errno = -ret; + goto out; + } + + break; + + case ENTRYLK_UNLOCK: + pthread_mutex_lock (&pinode->mutex); + { + unlocked = __unlock_name (pinode, basename, type); + } + pthread_mutex_unlock (&pinode->mutex); + + if (unlocked) + grant_blocked_entry_locks (this, pinode, unlocked); + + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "unexpected case!"); + goto out; + } + + op_ret = 0; +out: + if (unwind) { + STACK_UNWIND (frame, op_ret, op_errno); + } + + return 0; +} + + +int +pl_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + return pl_entrylk_common (frame, this, loc->inode, basename, cmd, type); +} + + +int +pl_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + return pl_entrylk_common (frame, this, fd->inode, basename, cmd, type); +} diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h new file mode 100644 index 000000000..8ed7bb63f --- /dev/null +++ b/xlators/features/locks/src/locks.h @@ -0,0 +1,111 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __POSIX_LOCKS_H__ +#define __POSIX_LOCKS_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "compat-errno.h" +#include "transport.h" +#include "stack.h" +#include "call-stub.h" + +struct __pl_fd; + +struct __posix_lock { + struct list_head list; + + short fl_type; + off_t fl_start; + off_t fl_end; + + short blocked; /* waiting to acquire */ + struct flock user_flock; /* the flock supplied by the user */ + xlator_t *this; /* required for blocked locks */ + fd_t *fd; + + call_frame_t *frame; + + /* These two together serve to uniquely identify each process + across nodes */ + + transport_t *transport; /* to identify client node */ + pid_t client_pid; /* pid of client process */ +}; +typedef struct __posix_lock posix_lock_t; + +struct __pl_rw_req_t { + struct list_head list; + call_stub_t *stub; + posix_lock_t region; +}; +typedef struct __pl_rw_req_t pl_rw_req_t; + + +struct __entry_lock { + struct list_head inode_list; /* list_head back to pl_inode_t */ + struct list_head blocked_locks; /* locks blocked due to this lock */ + + call_frame_t *frame; + xlator_t *this; + int blocked; + + const char *basename; + entrylk_type type; + unsigned int read_count; /* number of read locks */ + transport_t *trans; +}; +typedef struct __entry_lock pl_entry_lock_t; + + +/* The "simulated" inode. This contains a list of all the locks associated + with this file */ + +struct __pl_inode { + pthread_mutex_t mutex; + + struct list_head dir_list; /* list of entry locks */ + struct list_head ext_list; /* list of fcntl locks */ + struct list_head int_list; /* list of internal locks */ + struct list_head rw_list; /* list of waiting r/w requests */ + int mandatory; /* if mandatory locking is enabled */ +}; +typedef struct __pl_inode pl_inode_t; + +#define DOMAIN_HEAD(pl_inode, dom) (dom == GF_LOCK_POSIX \ + ? &pl_inode->ext_list \ + : &pl_inode->int_list) + + +struct __pl_fd { + gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */ +}; +typedef struct __pl_fd pl_fd_t; + + +typedef struct { + gf_boolean_t mandatory; /* if mandatory locking is enabled */ +} posix_locks_private_t; + + +#endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c new file mode 100644 index 000000000..e2b336607 --- /dev/null +++ b/xlators/features/locks/src/posix.c @@ -0,0 +1,834 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" + +#include "locks.h" +#include "common.h" + +#ifndef LLONG_MAX +#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ +#endif /* LLONG_MAX */ + +/* Forward declarations */ + + +void do_blocked_rw (pl_inode_t *); +static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t); + +struct _truncate_ops { + loc_t loc; + fd_t *fd; + off_t offset; + enum {TRUNCATE, FTRUNCATE} op; +}; + + +int +pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct _truncate_ops *local = NULL; + + local = frame->local; + + if (local->op == TRUNCATE) + loc_wipe (&local->loc); + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +static int +truncate_allowed (pl_inode_t *pl_inode, + transport_t *transport, pid_t client_pid, + off_t offset) +{ + posix_lock_t *l = NULL; + posix_lock_t region = {.list = {0, }, }; + int ret = 1; + + region.fl_start = offset; + region.fl_end = LLONG_MAX; + region.transport = transport; + region.client_pid = client_pid; + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (!l->blocked + && locks_overlap (®ion, l) + && !same_owner (®ion, l)) { + ret = 0; + break; + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + return ret; +} + + +static int +truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + posix_locks_private_t *priv = NULL; + struct _truncate_ops *local = NULL; + inode_t *inode = NULL; + pl_inode_t *pl_inode = NULL; + + + priv = this->private; + local = frame->local; + + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "got error (errno=%d, stderror=%s) from child", + op_errno, strerror (op_errno)); + goto unwind; + } + + if (local->op == TRUNCATE) + inode = local->loc.inode; + else + inode = local->fd->inode; + + pl_inode = pl_inode_get (this, inode); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, + "unable to get pl_inode from %p", inode); + op_errno = ENOMEM; + goto unwind; + } + + if (priv->mandatory + && pl_inode->mandatory + && !truncate_allowed (pl_inode, frame->root->trans, + frame->root->pid, local->offset)) { + op_errno = EAGAIN; + goto unwind; + } + + switch (local->op) { + case TRUNCATE: + STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->truncate, + &local->loc, local->offset); + break; + case FTRUNCATE: + STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->ftruncate, + local->fd, local->offset); + break; + } + + return 0; + +unwind: + if (local->op == TRUNCATE) + loc_wipe (&local->loc); + + STACK_UNWIND (frame, -1, ENOMEM, buf); + return 0; +} + + +int +pl_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset) +{ + struct _truncate_ops *local = NULL; + + local = CALLOC (1, sizeof (struct _truncate_ops)); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto unwind; + } + + local->op = TRUNCATE; + local->offset = offset; + loc_copy (&local->loc, loc); + + frame->local = local; + + STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->stat, loc); + + return 0; + +unwind: + STACK_UNWIND (frame, -1, ENOMEM, NULL); + + return 0; +} + + +int +pl_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + struct _truncate_ops *local = NULL; + + local = CALLOC (1, sizeof (struct _truncate_ops)); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto unwind; + } + + local->op = FTRUNCATE; + local->offset = offset; + local->fd = fd; + + frame->local = local; + + STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd); + return 0; + +unwind: + STACK_UNWIND (frame, -1, ENOMEM, NULL); + + return 0; +} + + +static void +__delete_locks_of_owner (pl_inode_t *pl_inode, + transport_t *transport, pid_t pid) +{ + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + + /* TODO: what if it is a blocked lock with pending l->frame */ + + list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { + if ((l->transport == transport) + && (l->client_pid == pid)) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } + + list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) { + if ((l->transport == transport) + && (l->client_pid == pid)) { + __delete_lock (pl_inode, l); + __destroy_lock (l); + } + } + + return; +} + + +int +pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + +int +pl_flush (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + pthread_mutex_lock (&pl_inode->mutex); + { + __delete_locks_of_owner (pl_inode, frame->root->trans, + frame->root->pid); + } + pthread_mutex_unlock (&pl_inode->mutex); + + grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX); + grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL); + + do_blocked_rw (pl_inode); + + STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd); + return 0; +} + + +int +pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + + +int +pl_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd) +{ + /* why isn't O_TRUNC being handled ? */ + STACK_WIND (frame, pl_open_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, + loc, flags & ~O_TRUNC, fd); + + return 0; +} + + +int +pl_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + + return 0; +} + + +int +pl_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + STACK_WIND (frame, pl_create_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, + loc, flags, mode, fd); + return 0; +} + + +int +pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + + return 0; +} + +int +pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + + return 0; +} + + +void +do_blocked_rw (pl_inode_t *pl_inode) +{ + struct list_head wind_list; + pl_rw_req_t *rw = NULL; + pl_rw_req_t *tmp = NULL; + + INIT_LIST_HEAD (&wind_list); + + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) { + if (__rw_allowable (pl_inode, &rw->region, + rw->stub->fop)) { + list_del_init (&rw->list); + list_add_tail (&rw->list, &wind_list); + } + } + } + pthread_mutex_unlock (&pl_inode->mutex); + + list_for_each_entry_safe (rw, tmp, &wind_list, list) { + list_del_init (&rw->list); + call_resume (rw->stub); + free (rw); + } + + return; +} + + +static int +__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region, + glusterfs_fop_t op) +{ + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry (l, &pl_inode->ext_list, list) { + if (locks_overlap (l, region) && !same_owner (l, region)) { + if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK)) + continue; + ret = 0; + break; + } + } + + return ret; +} + + +int +pl_readv_cont (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + STACK_WIND (frame, pl_readv_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, + fd, size, offset); + + return 0; +} + + +int +pl_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = {.list = {0, }, }; + int op_ret = 0; + int op_errno = 0; + char allowable = 0; + + + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + + if (priv->mandatory && pl_inode->mandatory) { + region.fl_start = offset; + region.fl_end = offset + size - 1; + region.transport = frame->root->trans; + region.client_pid = frame->root->pid; + + pthread_mutex_lock (&pl_inode->mutex); + { + allowable = __rw_allowable (pl_inode, ®ion, + GF_FOP_READ); + if (allowable) + goto unlock; + + if (fd->flags & O_NONBLOCK) { + gf_log (this->name, GF_LOG_DEBUG, + "returning EWOULDBLOCK"); + op_errno = EWOULDBLOCK; + op_ret = -1; + goto unlock; + } + + rw = CALLOC (1, sizeof (*rw)); + if (!rw) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } + + rw->stub = fop_readv_stub (frame, pl_readv_cont, + fd, size, offset); + if (!rw->stub) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + free (rw); + goto unlock; + } + + rw->region = region; + + list_add_tail (&rw->list, &pl_inode->rw_list); + } + unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + goto unwind; + } + + + STACK_WIND (frame, pl_readv_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, + fd, size, offset); + return 0; + +unwind: + if (op_ret == -1) + STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + + return 0; +} + + +int +pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int count, off_t offset) +{ + STACK_WIND (frame, pl_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset); + + return 0; +} + + +int +pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset) +{ + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = {.list = {0, }, }; + int op_ret = 0; + int op_errno = 0; + char allowable = 0; + + + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + if (priv->mandatory && pl_inode->mandatory) { + region.fl_start = offset; + region.fl_end = offset + iov_length (vector, count) - 1; + region.transport = frame->root->trans; + region.client_pid = frame->root->pid; + + pthread_mutex_lock (&pl_inode->mutex); + { + allowable = __rw_allowable (pl_inode, ®ion, + GF_FOP_WRITE); + if (allowable) + goto unlock; + + if (fd->flags & O_NONBLOCK) { + gf_log (this->name, GF_LOG_DEBUG, + "returning EWOULDBLOCK"); + op_errno = EWOULDBLOCK; + op_ret = -1; + goto unlock; + } + + rw = CALLOC (1, sizeof (*rw)); + if (!rw) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } + + rw->stub = fop_writev_stub (frame, pl_writev_cont, + fd, vector, count, offset); + if (!rw->stub) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + op_ret = -1; + free (rw); + goto unlock; + } + + rw->region = region; + + list_add_tail (&rw->list, &pl_inode->rw_list); + } + unlock: + pthread_mutex_unlock (&pl_inode->mutex); + + goto unwind; + } + + + STACK_WIND (frame, pl_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset); + return 0; + +unwind: + if (op_ret == -1) + STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + + return 0; +} + + +int +pl_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *flock) +{ + transport_t *transport = NULL; + pid_t client_pid = 0; + posix_locks_private_t *priv = NULL; + pl_inode_t *pl_inode = NULL; + int op_ret = 0; + int op_errno = 0; + int can_block = 0; + posix_lock_t *reqlock = NULL; + posix_lock_t *conf = NULL; + int ret = 0; + + transport = frame->root->trans; + client_pid = frame->root->pid; + priv = this->private; + + pl_inode = pl_inode_get (this, fd->inode); + if (!pl_inode) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + reqlock = new_posix_lock (flock, transport, client_pid); + if (!reqlock) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + switch (cmd) { + +#if F_GETLK != F_GETLK64 + case F_GETLK64: +#endif + case F_GETLK: + conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX); + posix_lock_to_flock (conf, flock); + __destroy_lock (reqlock); + + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + can_block = 1; + reqlock->frame = frame; + reqlock->this = this; + reqlock->fd = fd; + + /* fall through */ + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + memcpy (&reqlock->user_flock, flock, sizeof (struct flock)); + ret = pl_setlk (this, pl_inode, reqlock, + can_block, GF_LOCK_POSIX); + + if (ret == -1) { + if (can_block) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); + op_ret = -1; + op_errno = EAGAIN; + __destroy_lock (reqlock); + } + } + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, flock); +out: + return 0; +} + + +/* TODO: this function just logs, no action required?? */ +int +pl_forget (xlator_t *this, + inode_t *inode) +{ + pl_inode_t *pl_inode = NULL; + + pl_inode = pl_inode_get (this, inode); + + if (!list_empty (&pl_inode->rw_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "pending R/W requests found!"); + } + + if (!list_empty (&pl_inode->ext_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Pending fcntl locks found!"); + } + + if (!list_empty (&pl_inode->int_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Pending internal locks found!"); + } + + if (!list_empty (&pl_inode->dir_list)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Pending entry locks found!"); + } + + FREE (pl_inode); + + return 0; +} + + +int +init (xlator_t *this) +{ + posix_locks_private_t *priv = NULL; + xlator_list_t *trav = NULL; + data_t *mandatory = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: posix-locks should have exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + trav = this->children; + while (trav->xlator->children) + trav = trav->xlator->children; + + if (strncmp ("storage/", trav->xlator->type, 8)) { + gf_log (this->name, GF_LOG_ERROR, + "'posix-locks' not loaded over storage translator"); + return -1; + } + + priv = CALLOC (1, sizeof (*priv)); + + mandatory = dict_get (this->options, "mandatory-locks"); + if (mandatory) { + if (gf_string2boolean (mandatory->data, + &priv->mandatory) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'mandatory-locks' takes only boolean " + "options"); + return -1; + } + } + + this->private = priv; + return 0; +} + + +int +fini (xlator_t *this) +{ + posix_locks_private_t *priv = NULL; + + priv = this->private; + free (priv); + + return 0; +} + + +int +pl_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *flock); + +int +pl_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *flock); + +int +pl_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +int +pl_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +struct xlator_fops fops = { + .create = pl_create, + .truncate = pl_truncate, + .ftruncate = pl_ftruncate, + .open = pl_open, + .readv = pl_readv, + .writev = pl_writev, + .lk = pl_lk, + .inodelk = pl_inodelk, + .finodelk = pl_finodelk, + .entrylk = pl_entrylk, + .fentrylk = pl_fentrylk, + .flush = pl_flush, +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { + .forget = pl_forget, +}; + + +struct volume_options options[] = { + { .key = { "mandatory-locks", "mandatory" }, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c new file mode 100644 index 000000000..6a1bfbf68 --- /dev/null +++ b/xlators/features/locks/tests/unit-test.c @@ -0,0 +1,75 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "compat.h" +#include "xlator.h" +#include "inode.h" +#include "logging.h" +#include "common-utils.h" +#include "list.h" + +#include "locks.h" +#include "common.h" + +#define expect(cond) if (!(cond)) { goto out; } + +extern int lock_name (pl_inode_t *, const char *, entrylk_type); +extern int unlock_name (pl_inode_t *, const char *, entrylk_type); + +int main (int argc, char **argv) +{ + int ret = 1; + int r = -1; + + pl_inode_t *pinode = CALLOC (sizeof (pl_inode_t), 1); + pthread_mutex_init (&pinode->dir_lock_mutex, NULL); + INIT_LIST_HEAD (&pinode->gf_dir_locks); + + r = lock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0); + { + r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN); + } + r = unlock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0); + + r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); + { + r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); + { + r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN); + } + r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); + } + r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); + + r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0); + r = unlock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0); + + r = lock_name (pinode, "baz", ENTRYLK_WRLCK); expect (r == 0); + r = lock_name (pinode, "baz", ENTRYLK_RDLCK); expect (r == -EAGAIN); + + ret = 0; +out: + return ret; +} diff --git a/xlators/features/path-convertor/Makefile.am b/xlators/features/path-convertor/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/features/path-convertor/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am new file mode 100644 index 000000000..1fde19352 --- /dev/null +++ b/xlators/features/path-convertor/src/Makefile.am @@ -0,0 +1,14 @@ + +xlator_LTLIBRARIES = path-converter.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +path_converter_la_LDFLAGS = -module -avoidversion + +path_converter_la_SOURCES = path.c +path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c new file mode 100644 index 000000000..41ef1d8a8 --- /dev/null +++ b/xlators/features/path-convertor/src/path.c @@ -0,0 +1,1217 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/* TODO: add gf_log to all the cases returning errors */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/** + * xlators/features/path-translator: + * This translator converts the path it gets into user specified targets. + */ + +#include <sys/types.h> +#include <regex.h> +#include <time.h> +#include <errno.h> +#include "glusterfs.h" +#include "xlator.h" + +typedef struct path_private +{ + int32_t this_len; + int32_t start_off; + int32_t end_off; + char *this; + char *that; + char *path; + regex_t *preg; +} path_private_t; + +static char * +name_this_to_that (xlator_t *xl, const char *path, const char *name) +{ + path_private_t *priv = xl->private; + char priv_path[ZR_PATH_MAX] = {0,}; + char *tmp_name = NULL; + int32_t path_len = strlen (path); + int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN; + int32_t total_len = path_len + name_len; + int32_t i = 0, j = 0; + + if (path_len >= priv->end_off) + return (char *)name; + + if (priv->end_off && (total_len > priv->end_off)) { + j = priv->start_off; + tmp_name = CALLOC (1, (total_len + ZR_FILE_CONTENT_STRLEN)); + ERR_ABORT (tmp_name); + + /* Get the complete path for the file first */ + strcpy (tmp_name, path); + strcat (tmp_name, name + ZR_FILE_CONTENT_STRLEN); + + strncpy (priv_path, tmp_name, priv->start_off); + for (i = priv->start_off; i < priv->end_off; i++) { + if (tmp_name[i] == '/') + continue; + priv_path[j++] = tmp_name[i]; + } + memcpy ((priv_path + j), + (tmp_name + priv->end_off), + (total_len - priv->end_off)); + priv_path[(total_len - (priv->end_off - j))] = '\0'; + + strcpy (tmp_name, ZR_FILE_CONTENT_STR); + strcat (tmp_name, priv_path); + + return tmp_name; + } + + return (char *)name; +} + +/* This function should return + * NULL - + * converted path - if path match + * same path - if it doesn't match + */ +static char * +path_this_to_that (xlator_t *xl, const char *path) +{ + path_private_t *priv = xl->private; + char *priv_path = NULL; + int32_t path_len = strlen (path); + int32_t i = 0, j = 0; + + if (priv->end_off && (path_len > priv->start_off)) { + priv_path = CALLOC (1, path_len); + ERR_ABORT (priv_path); + + if (priv->start_off && (path_len > priv->start_off)) + memcpy (priv_path, path, priv->start_off); + if (path_len > priv->end_off) { + j = priv->start_off; + for (i = priv->start_off; i < priv->end_off; i++) { + if (path[i] == '/') + continue; + priv_path[j++] = path[i]; + } + memcpy ((priv_path + j), + (path + priv->end_off), + (path_len - priv->end_off)); + priv_path[(path_len - (priv->end_off - j))] = '\0'; + } + return priv_path; + } + return (char *)path; +} + +int32_t +path_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; +} + +int32_t +path_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + +int32_t +path_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entries, + int32_t count) +{ + STACK_UNWIND (frame, op_ret, op_errno, entries, count); + return 0; +} + +int32_t +path_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +int32_t +path_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +path_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *xattr) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr); + return 0; +} + + +int32_t +path_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +path_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + + +int32_t +path_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +path_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + +int32_t +path_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + + +int32_t +path_common_buf_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +path_common_dict_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +path_common_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/* */ +int32_t +path_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, path_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, xattr_req); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + loc, + size); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t dev) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_mknod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + loc, + mode, + dev); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_mkdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, + loc, + mode); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + loc); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + loc); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + linkpath, + loc); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + char *oldloc_path = (char *)oldloc->path; + char *tmp_oldloc_path = NULL; + + char *newloc_path = (char *)newloc->path; + char *tmp_newloc_path = NULL; + + if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + oldloc->path = tmp_oldloc_path; + + if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + newloc->path = tmp_newloc_path; + + STACK_WIND (frame, + path_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, + oldloc, + newloc); + + oldloc->path = oldloc_path; + if (tmp_oldloc_path != oldloc_path) + FREE (tmp_oldloc_path); + + newloc->path = newloc_path; + if (tmp_newloc_path != newloc_path) + FREE (tmp_newloc_path); + + return 0; +} + +int32_t +path_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + char *oldloc_path = (char *)oldloc->path; + char *tmp_oldloc_path = NULL; + + char *newloc_path = (char *)newloc->path; + char *tmp_newloc_path = NULL; + + if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + oldloc->path = tmp_oldloc_path; + + if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + newloc->path = tmp_newloc_path; + + STACK_WIND (frame, + path_link_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, + oldloc, + newloc); + + oldloc->path = oldloc_path; + if (tmp_oldloc_path != oldloc_path) + FREE (tmp_oldloc_path); + + newloc->path = newloc_path; + if (tmp_newloc_path != newloc_path) + FREE (tmp_newloc_path); + + return 0; +} + +int32_t +path_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chmod, + loc, + mode); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chown, + loc, + uid, + gid); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_buf_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimens, + loc, + tv); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, + flags, + fd); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, + flags, + mode, + fd); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + char *tmp_name = NULL; + data_pair_t *trav = dict->members_list; + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + if (ZR_FILE_CONTENT_REQUEST(trav->key)) { + tmp_name = name_this_to_that (this, loc->path, trav->key); + if (tmp_name != trav->key) { + trav->key = tmp_name; + } else { + tmp_name = NULL; + } + } + + STACK_WIND (frame, + path_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, + dict, + flags); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + if (tmp_name) + FREE (tmp_name); + + return 0; +} + +int32_t +path_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + char *tmp_name = (char *)name; + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + if (ZR_FILE_CONTENT_REQUEST(name)) { + tmp_name = name_this_to_that (this, loc->path, name); + } + + STACK_WIND (frame, + path_common_dict_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, + tmp_name); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + if (tmp_name != name) + FREE (tmp_name); + + return 0; +} + +int32_t +path_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + char *tmp_name = (char *)name; + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + if (ZR_FILE_CONTENT_REQUEST(name)) { + tmp_name = name_this_to_that (this, loc->path, name); + } + + STACK_WIND (frame, + path_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, + tmp_name); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + if (tmp_name != name) + FREE (tmp_name); + + return 0; +} + +int32_t +path_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_opendir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, + loc, + fd); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, + loc, + mask); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *fchecksum, + uint8_t *dchecksum) +{ + STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum); + return 0; +} + +int32_t +path_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_checksum_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->checksum, + loc, + flag); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + + +int32_t +path_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, path_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, + loc, basename, cmd, type); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + +int32_t +path_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *lock) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, + loc, cmd, lock); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + + +int32_t +path_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + char *loc_path = (char *)loc->path; + char *tmp_path = NULL; + + if (!(tmp_path = path_this_to_that (this, loc->path))) { + STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); + return 0; + } + loc->path = tmp_path; + + STACK_WIND (frame, + path_common_dict_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, + loc, + flags, + dict); + + loc->path = loc_path; + if (tmp_path != loc_path) + FREE (tmp_path); + + return 0; +} + + +int32_t +init (xlator_t *this) +{ + dict_t *options = this->options; + path_private_t *priv = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "path translator requires exactly one subvolume"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = CALLOC (1, sizeof (*priv)); + ERR_ABORT (priv); + if (dict_get (options, "start-offset")) { + priv->start_off = data_to_int32 (dict_get (options, + "start-offset")); + } + if (dict_get (options, "end-offset")) { + priv->end_off = data_to_int32 (dict_get (options, + "end-offset")); + } + + if (dict_get (options, "regex")) { + int32_t ret = 0; + priv->preg = CALLOC (1, sizeof (regex_t)); + ERR_ABORT (priv->preg); + ret = regcomp (priv->preg, + data_to_str (dict_get (options, "regex")), + REG_EXTENDED); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to compile the 'option regex'"); + FREE (priv); + return -1; + } + if (dict_get (options, "replace-with")) { + priv->that = data_to_str (dict_get (options, + "replace-with")); + } else { + priv->that = ""; + } + } + + this->private = priv; + return 0; +} + +void +fini (xlator_t *this) +{ + return; +} + +struct xlator_fops fops = { + .stat = path_stat, + .readlink = path_readlink, + .mknod = path_mknod, + .mkdir = path_mkdir, + .unlink = path_unlink, + .rmdir = path_rmdir, + .symlink = path_symlink, + .rename = path_rename, + .link = path_link, + .chmod = path_chmod, + .chown = path_chown, + .truncate = path_truncate, + .utimens = path_utimens, + .open = path_open, + .setxattr = path_setxattr, + .getxattr = path_getxattr, + .removexattr = path_removexattr, + .opendir = path_opendir, + .access = path_access, + .create = path_create, + .lookup = path_lookup, + .checksum = path_checksum, + .xattrop = path_xattrop, + .entrylk = path_entrylk, + .inodelk = path_inodelk, +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {"start-offset"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 4095 + }, + { .key = {"end-offset"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 4096 + }, + { .key = {"replace-with"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/quota/Makefile.am b/xlators/features/quota/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/features/quota/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am new file mode 100644 index 000000000..886d83964 --- /dev/null +++ b/xlators/features/quota/src/Makefile.am @@ -0,0 +1,13 @@ +xlator_LTLIBRARIES = quota.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +quota_la_LDFLAGS = -module -avoidversion + +quota_la_SOURCES = quota.c +quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c new file mode 100644 index 000000000..c898899b5 --- /dev/null +++ b/xlators/features/quota/src/quota.c @@ -0,0 +1,1056 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <sys/time.h> + +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" + +struct quota_local { + struct stat stbuf; + inode_t *inode; + char *path; + fd_t *fd; + off_t offset; + int32_t count; + struct iovec *vector; + dict_t *refs; + loc_t loc; +}; + + +struct quota_priv { + char only_first_time; /* Used to make sure a call is done only one time */ + gf_lock_t lock; /* Used while updating variables */ + + uint64_t disk_usage_limit; /* Used for Disk usage quota */ + uint64_t current_disk_usage; /* Keep the current usage value */ + + uint32_t min_free_disk_limit; /* user specified limit, in %*/ + uint32_t current_free_disk; /* current free disk space available, in % */ + uint32_t refresh_interval; /* interval in seconds */ + uint32_t min_disk_last_updated_time; /* used for interval calculation */ +}; + + +int +quota_statvfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *stbuf) +{ + struct quota_priv *priv = this->private; + + if (op_ret >= 0) { + priv->current_free_disk = + (stbuf->f_bavail * 100) / stbuf->f_blocks; + } + + STACK_DESTROY (frame->root); + return 0; +} + + +static void +build_root_loc (xlator_t *this, loc_t *loc) +{ + loc->path = "/"; +} + + +void +gf_quota_usage_subtract (xlator_t *this, size_t size) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + LOCK (&priv->lock); + { + if (priv->current_disk_usage < size) + priv->current_disk_usage = 0; + else + priv->current_disk_usage -= size; + } + UNLOCK (&priv->lock); +} + + +void +gf_quota_usage_add (xlator_t *this, size_t size) +{ + struct quota_priv *priv = this->private; + + LOCK (&priv->lock); + { + priv->current_disk_usage += size; + } + UNLOCK (&priv->lock); +} + + +void +gf_quota_update_current_free_disk (xlator_t *this) +{ + call_frame_t *frame = NULL; + call_pool_t *pool = NULL; + loc_t loc; + + pool = this->ctx->pool; + frame = create_frame (this, pool); + + build_root_loc (this, &loc); + + STACK_WIND (frame, quota_statvfs_cbk, + this->children->xlator, + this->children->xlator->fops->statfs, &loc); + + return ; +} + + +int +gf_quota_check_free_disk (xlator_t *this) +{ + struct quota_priv * priv = NULL; + struct timeval tv = {0, 0}; + + priv = this->private; + if (priv->min_free_disk_limit) { + gettimeofday (&tv, NULL); + if (tv.tv_sec > (priv->refresh_interval + + priv->min_disk_last_updated_time)) { + priv->min_disk_last_updated_time = tv.tv_sec; + gf_quota_update_current_free_disk (this); + } + if (priv->current_free_disk <= priv->min_free_disk_limit) + return -1; + } + + return 0; +} + + +int +quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct quota_priv *priv = this->private; + struct quota_local *local = NULL; + + local = frame->local; + + if ((op_ret >= 0) && priv->disk_usage_limit) { + gf_quota_usage_subtract (this, (local->stbuf.st_blocks - + buf->st_blocks) * 512); + loc_wipe (&local->loc); + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +int +quota_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + + priv = this->private; + local = frame->local; + + if (op_ret >= 0) { + local->stbuf = *buf; + } + + STACK_WIND (frame, quota_truncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, + &local->loc, local->offset); + return 0; +} + + +int +quota_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + + priv = this->private; + + if (priv->disk_usage_limit) { + local = CALLOC (1, sizeof (struct quota_local)); + frame->local = local; + + loc_copy (&local->loc, loc); + local->offset = offset; + + STACK_WIND (frame, quota_truncate_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc); + return 0; + } + + STACK_WIND (frame, quota_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, offset); + return 0; +} + + +int +quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct quota_priv *priv = NULL; + struct quota_local *local = NULL; + + local = frame->local; + priv = this->private; + + if ((op_ret >= 0) && priv->disk_usage_limit) { + gf_quota_usage_subtract (this, (local->stbuf.st_blocks - + buf->st_blocks) * 512); + fd_unref (local->fd); + } + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +int +quota_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + + priv = this->private; + local = frame->local; + + if (op_ret >= 0) { + local->stbuf = *buf; + } + + STACK_WIND (frame, quota_ftruncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, + local->fd, local->offset); + return 0; +} + + +int +quota_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + + + priv = this->private; + + if (priv->disk_usage_limit) { + local = CALLOC (1, sizeof (struct quota_local)); + frame->local = local; + + local->fd = fd_ref (fd); + local->offset = offset; + + STACK_WIND (frame, quota_ftruncate_fstat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd); + return 0; + } + + STACK_WIND (frame, quota_ftruncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, offset); + return 0; +} + + +int +quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + if ((op_ret >= 0) && priv->disk_usage_limit) { + gf_quota_usage_add (this, buf->st_blocks * 512); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + + +int +quota_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + if (gf_quota_check_free_disk (this) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "min-free-disk limit (%u) crossed, current available is %u", + priv->min_free_disk_limit, priv->current_free_disk); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL); + return 0; + } + + if (priv->current_disk_usage > priv->disk_usage_limit) { + gf_log (this->name, GF_LOG_ERROR, + "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", + priv->disk_usage_limit, priv->current_disk_usage); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL); + return 0; + } + + STACK_WIND (frame, quota_mknod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + loc, mode, rdev); + return 0; +} + + +int +quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct stat *buf) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + if ((op_ret >= 0) && priv->disk_usage_limit) { + gf_quota_usage_subtract (this, buf->st_blocks * 512); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + + +int +quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + if (gf_quota_check_free_disk (this) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "min-free-disk limit (%u) crossed, current available is %u", + priv->min_free_disk_limit, priv->current_free_disk); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL); + return 0; + + } + + if (priv->current_disk_usage > priv->disk_usage_limit) { + gf_log (this->name, GF_LOG_ERROR, + "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", + priv->disk_usage_limit, priv->current_disk_usage); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL); + return 0; + } + + STACK_WIND (frame, quota_mkdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, + loc, mode); + + return 0; +} + + +int +quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + struct quota_local *local = NULL; + + local = frame->local; + + if (local) { + if (op_ret >= 0) { + gf_quota_usage_subtract (this, + local->stbuf.st_blocks * 512); + } + loc_wipe (&local->loc); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int +quota_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct quota_local *local = NULL; + + local = frame->local; + + if (op_ret >= 0) { + if (buf->st_nlink == 1) { + local->stbuf = *buf; + } + } + + STACK_WIND (frame, quota_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + &local->loc); + + return 0; +} + + +int +quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + + priv = this->private; + + if (priv->disk_usage_limit) { + local = CALLOC (1, sizeof (struct quota_local)); + frame->local = local; + + loc_copy (&local->loc, loc); + + STACK_WIND (frame, + quota_unlink_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + return 0; + } + + STACK_WIND (frame, quota_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + loc); + return 0; +} + + +int +quota_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + struct quota_local *local = NULL; + + local = frame->local; + + if (local) { + if (op_ret >= 0) { + gf_quota_usage_subtract (this, local->stbuf.st_blocks * 512); + } + loc_wipe (&local->loc); + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int +quota_rmdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct quota_local *local = NULL; + + local = frame->local; + + if (op_ret >= 0) { + local->stbuf = *buf; + } + + STACK_WIND (frame, quota_rmdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + &local->loc); + + return 0; +} + + +int +quota_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + + priv = this->private; + + if (priv->disk_usage_limit) { + local = CALLOC (1, sizeof (struct quota_local)); + frame->local = local; + + loc_copy (&local->loc, loc); + + STACK_WIND (frame, quota_rmdir_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc); + return 0; + } + + STACK_WIND (frame, quota_rmdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + loc); + return 0; +} + + +int +quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct stat *buf) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + if ((op_ret >= 0) && priv->disk_usage_limit) { + gf_quota_usage_add (this, buf->st_blocks * 512); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + + +int +quota_symlink (call_frame_t *frame, xlator_t *this, + const char *linkpath, loc_t *loc) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + if (gf_quota_check_free_disk (this) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "min-free-disk limit (%u) crossed, current available is %u", + priv->min_free_disk_limit, priv->current_free_disk); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL); + return 0; + + } + if (priv->current_disk_usage > priv->disk_usage_limit) { + gf_log (this->name, GF_LOG_ERROR, + "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", + priv->disk_usage_limit, priv->current_disk_usage); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL); + return 0; + } + + STACK_WIND (frame, quota_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + linkpath, loc); + return 0; +} + + +int +quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct stat *buf) +{ + struct quota_priv *priv = this->private; + int ret = 0; + + if ((op_ret >= 0) && priv->disk_usage_limit) { + gf_quota_usage_add (this, buf->st_blocks * 512); + + ret = fd_ctx_set (fd, this, 1); + } + + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; +} + + +int +quota_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + struct quota_priv *priv = NULL; + + priv = this->private; + + if (gf_quota_check_free_disk (this) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "min-free-disk limit (%u) crossed, current available is %u", + priv->min_free_disk_limit, priv->current_free_disk); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL, NULL); + return 0; + + } + if (priv->current_disk_usage > priv->disk_usage_limit) { + gf_log (this->name, GF_LOG_ERROR, + "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"", + priv->disk_usage_limit, priv->current_disk_usage); + STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL, NULL); + return 0; + } + + STACK_WIND (frame, quota_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, flags, mode, fd); + return 0; +} + + +int +quota_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + int ret = 0; + + if (op_ret >= 0) + ret = fd_ctx_set (fd, this, 1); + + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + + +int +quota_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd) +{ + STACK_WIND (frame, quota_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, flags, fd); + return 0; +} + + +int +quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + struct quota_priv *priv = NULL; + struct quota_local *local = NULL; + + + priv = this->private; + local = frame->local; + + if (priv->disk_usage_limit) { + if (op_ret >= 0) { + gf_quota_usage_add (this, (stbuf->st_blocks - + local->stbuf.st_blocks) * 512); + } + fd_unref (local->fd); + dict_unref (local->refs); + } + + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + + +int +quota_writev_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + int iovlen = 0; + + + local = frame->local; + priv = this->private; + + if (op_ret >= 0) { + if (priv->current_disk_usage > priv->disk_usage_limit) { + iovlen = iov_length (local->vector, local->count); + + if (iovlen > (buf->st_blksize - (buf->st_size % buf->st_blksize))) { + fd_unref (local->fd); + dict_unref (local->refs); + STACK_UNWIND (frame, -1, ENOSPC, NULL); + return 0; + } + } + local->stbuf = *buf; + } + + STACK_WIND (frame, quota_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + local->fd, local->vector, local->count, local->offset); + + return 0; +} + + +int +quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off) +{ + struct quota_local *local = NULL; + struct quota_priv *priv = NULL; + + priv = this->private; + + if (gf_quota_check_free_disk (this) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "min-free-disk limit (%u) crossed, current available is %u", + priv->min_free_disk_limit, priv->current_free_disk); + STACK_UNWIND (frame, -1, ENOSPC, NULL); + return 0; + } + + if (priv->disk_usage_limit) { + local = CALLOC (1, sizeof (struct quota_local)); + local->fd = fd_ref (fd); + local->refs = dict_ref (frame->root->req_refs); + local->vector = vector; + local->count = count; + local->offset = off; + frame->local = local; + + STACK_WIND (frame, quota_writev_fstat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd); + return 0; + } + + STACK_WIND (frame, quota_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, vector, count, off); + return 0; +} + + +int +quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + if (op_ret == -1) { + gf_log (this->name, GF_LOG_CRITICAL, + "failed to remove the disk-usage value: %s", + strerror (op_errno)); + } + + STACK_DESTROY (frame->root); + return 0; +} + + +int +quota_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + if (op_ret == -1) { + gf_log (this->name, GF_LOG_CRITICAL, + "failed to set the disk-usage value: %s", + strerror (op_errno)); + } + + STACK_DESTROY (frame->root); + return 0; +} + + +int +quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *statvfs) +{ + struct quota_priv *priv = NULL; + uint64_t f_blocks = 0; + int64_t f_bfree = 0; + uint64_t f_bused = 0; + + + priv = this->private; + + if (op_ret != 0) + goto unwind; + + f_blocks = priv->disk_usage_limit / statvfs->f_frsize; + f_bused = priv->current_disk_usage / statvfs->f_frsize; + + if (f_blocks && (f_blocks < statvfs->f_blocks)) + statvfs->f_blocks = f_blocks; + + f_bfree = (statvfs->f_blocks - f_bused); + + if (f_bfree >= 0) + statvfs->f_bfree = statvfs->f_bavail = f_bfree; + else + statvfs->f_bfree = statvfs->f_bavail = 0; + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, statvfs); + return 0; +} + + +int +quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + STACK_WIND (frame, quota_statfs_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->statfs, loc); + + return 0; +} + + +int +quota_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *value) +{ + data_t *data = NULL; + struct quota_priv *priv = this->private; + + if (op_ret >= 0) { + data = dict_get (value, "trusted.glusterfs-quota-du"); + if (data) { + LOCK (&priv->lock); + { + priv->current_disk_usage = data_to_uint64 (data); + } + UNLOCK (&priv->lock); + + return 0; + } + } + + STACK_DESTROY (frame->root); + + return 0; +} + + +void +gf_quota_get_disk_usage (xlator_t *this) +{ + call_frame_t *frame = NULL; + call_pool_t *pool = NULL; + loc_t loc; + + pool = this->ctx->pool; + frame = create_frame (this, pool); + build_root_loc (this, &loc); + + STACK_WIND (frame, quota_getxattr_cbk, + this->children->xlator, + this->children->xlator->fops->getxattr, + &loc, + "trusted.glusterfs-quota-du"); + return ; +} + + +void +gf_quota_cache_sync (xlator_t *this) +{ + struct quota_priv *priv = NULL; + call_frame_t *frame = NULL; + dict_t *dict = get_new_dict (); + loc_t loc; + + + priv = this->private; + build_root_loc (this, &loc); + + frame = create_frame (this, this->ctx->pool); + dict_set (dict, "trusted.glusterfs-quota-du", + data_from_uint64 (priv->current_disk_usage)); + + STACK_WIND (frame, quota_setxattr_cbk, + this->children->xlator, + this->children->xlator->fops->setxattr, + &loc, dict, 0); +} + + +int +quota_release (xlator_t *this, fd_t *fd) +{ + gf_quota_cache_sync (this); + + return 0; +} + + +/* notify */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + struct quota_priv *priv = this->private; + + switch (event) + { + case GF_EVENT_CHILD_UP: + if (priv->only_first_time) { + priv->only_first_time = 0; + if (priv->disk_usage_limit) { + gf_quota_get_disk_usage (this); + } + } + default: + default_notify (this, event, data); + break; + } + + return 0; +} + + +int32_t +init (xlator_t *this) +{ + int ret = 0; + data_t *data = NULL; + struct quota_priv *_private = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: quota should have exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + _private = CALLOC (1, sizeof (struct quota_priv)); + _private->disk_usage_limit = 0; + data = dict_get (this->options, "disk-usage-limit"); + if (data) { + if (gf_string2bytesize (data->data, &_private->disk_usage_limit) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number '%s' for disk-usage limit", data->data); + ret = -1; + goto out; + } + + LOCK_INIT (&_private->lock); + _private->current_disk_usage = 0; + } + + _private->min_free_disk_limit = 0; + data = dict_get (this->options, "min-free-disk-limit"); + if (data) { + if (gf_string2percent (data->data, &_private->min_free_disk_limit) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid percent '%s' for min-free-disk limit", data->data); + ret = -1; + goto out; + } + _private->refresh_interval = 20; /* 20seconds is default */ + data = dict_get (this->options, "refresh-interval"); + if (data) { + if (gf_string2time (data->data, + &_private->refresh_interval)!= 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid time '%s' for refresh " + "interval", data->data); + ret = -1; + goto out; + } + } + } + + _private->only_first_time = 1; + this->private = (void *)_private; + ret = 0; + out: + return ret; +} + +void +fini (xlator_t *this) +{ + struct quota_priv *_private = this->private; + + if (_private) { + gf_quota_cache_sync (this); + this->private = NULL; + } + + return ; +} + +struct xlator_fops fops = { + .create = quota_create, + .open = quota_open, + .truncate = quota_truncate, + .ftruncate = quota_ftruncate, + .writev = quota_writev, + .unlink = quota_unlink, + .rmdir = quota_rmdir, + .mknod = quota_mknod, + .mkdir = quota_mkdir, + .symlink = quota_symlink, + .statfs = quota_statfs, +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { + .release = quota_release +}; + +struct volume_options options[] = { + { .key = {"min-free-disk-limit"}, + .type = GF_OPTION_TYPE_PERCENT + }, + { .key = {"refresh-interval"}, + .type = GF_OPTION_TYPE_TIME + }, + { .key = {"disk-usage-limit"}, + .type = GF_OPTION_TYPE_SIZET + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/trash/Makefile.am b/xlators/features/trash/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/features/trash/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am new file mode 100644 index 000000000..d61f608aa --- /dev/null +++ b/xlators/features/trash/src/Makefile.am @@ -0,0 +1,13 @@ +xlator_LTLIBRARIES = trash.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +trash_la_LDFLAGS = -module -avoidversion + +trash_la_SOURCES = trash.c +trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c new file mode 100644 index 000000000..c8e7357ee --- /dev/null +++ b/xlators/features/trash/src/trash.c @@ -0,0 +1,596 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "defaults.h" + +#include <libgen.h> + +/* TODO: currently it can work only above posix, no other translators + * between them. Not a good thing. Try making more reliable methods. + */ + +struct trash_struct { + inode_t *inode; + loc_t loc1; + loc_t loc2; + char origpath[ZR_PATH_MAX]; + char newpath[ZR_PATH_MAX]; + char oldpath[ZR_PATH_MAX]; // used only in case of rename +}; +typedef struct trash_struct trash_local_t; + +struct trash_priv { + char trash_dir[ZR_PATH_MAX]; +}; +typedef struct trash_priv trash_private_t; + +int32_t +trash_unlink_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf); +int32_t +trash_rename_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf); + +/** + * trash_common_unwind_cbk - + */ +int32_t +trash_common_unwind_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + trash_local_t *local = frame->local; + + if (local->loc1.path) + loc_wipe (&local->loc1); + + if (local->loc2.path) + loc_wipe (&local->loc2); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * trash_common_unwind_buf_cbk - + */ +int32_t +trash_common_unwind_buf_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + trash_local_t *local = frame->local; + + if (local->loc1.path) + loc_wipe (&local->loc1); + + if (local->loc2.path) + loc_wipe (&local->loc2); + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +trash_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf) +{ + trash_local_t *local = frame->local; + char *tmp_str = strdup (local->newpath); + int32_t count = 0; + char *tmp_path = NULL; + char *tmp_dirname = NULL; + + if (op_ret == -1 && op_errno == ENOENT) { + tmp_dirname = strchr (tmp_str, '/'); + while (tmp_dirname) { + count = tmp_dirname - tmp_str; + if (count == 0) + count = 1; + tmp_path = CALLOC (1, count + 1); + ERR_ABORT (tmp_path); + memcpy (tmp_path, local->newpath, count); + loc_t tmp_loc = { + .inode = NULL, + .path = tmp_path, + }; + + /* TODO:create the directory with proper permissions */ + STACK_WIND_COOKIE (frame, + trash_mkdir_cbk, + tmp_path, + this->children->xlator, + this->children->xlator->fops->mkdir, + &tmp_loc, + 0777); + tmp_dirname = strchr (tmp_str + count + 1, '/'); + } + free (cookie); + free (tmp_str); + return 0; + } + char *dir_name = dirname (tmp_str); + if (strcmp((char*)cookie, dir_name) == 0) { + loc_t new_loc = { + .inode = NULL, + .path = local->newpath + }; + STACK_WIND (frame, + trash_unlink_rename_cbk, + this->children->xlator, + this->children->xlator->fops->rename, + &local->loc2, + &new_loc); + + } + free (cookie); /* strdup (dir_name) was sent here :) */ + free (tmp_str); + return 0; +} + +/** + * trash_unlink_rename_cbk - + */ +int32_t +trash_unlink_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + trash_local_t *local = frame->local; + if (op_ret == -1 && op_errno == ENOENT) { + /* check for the errno, if its ENOENT create directory and call + * rename later + */ + char *tmp_str = strdup (local->newpath); + char *dir_name = dirname (tmp_str); + loc_t tmp_loc = { + .inode = NULL, + .path = dir_name, + }; + /* TODO: create the directory with proper permissions */ + STACK_WIND_COOKIE (frame, + trash_mkdir_cbk, + strdup (dir_name), + this->children->xlator, + this->children->xlator->fops->mkdir, + &tmp_loc, + 0777); + free (tmp_str); + } else if (op_ret == -1 && op_errno == ENOTDIR) { + gf_log (this->name, GF_LOG_WARNING, + "Target exists, cannot keep the copy, deleting"); + STACK_WIND (frame, + trash_common_unwind_cbk, + this->children->xlator, + this->children->xlator->fops->unlink, + &local->loc2); + } else if (op_ret == -1 && op_errno == EISDIR) { + gf_log (this->name, GF_LOG_WARNING, + "Target exists as a directory, cannot keep the copy, " + "deleting"); + STACK_WIND (frame, + trash_common_unwind_cbk, + this->children->xlator, + this->children->xlator->fops->unlink, + &local->loc2); + } else { + /* */ + STACK_UNWIND (frame, 0, op_errno); + } + + return 0; +} + + +/** + * trash_unlink - + */ +int32_t +trash_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + trash_private_t *priv = this->private; + trash_local_t *local = NULL; + time_t utime = 0; + struct tm *tm = NULL; + char timestr[256]; + + if (strncmp (loc->path, priv->trash_dir, + strlen(priv->trash_dir)) == 0) { + /* Trying to rename from the trash can dir, do the + actual unlink */ + STACK_WIND (frame, + trash_common_unwind_cbk, + this->children->xlator, + this->children->xlator->fops->unlink, + loc); + } else { + local = CALLOC (1, sizeof (trash_local_t)); + if (!local) { + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + } + frame->local = local; + + loc_copy (&local->loc2, loc); + + strcpy (local->newpath, priv->trash_dir); + strcat (local->newpath, loc->path); + + utime = time (NULL); + tm = localtime (&utime); + strftime (timestr, 256, ".%Y%m%d%H%M%S", tm); + strcat (local->newpath, timestr); + + { + loc_t new_loc = { + .inode = NULL, + .path = local->newpath + }; + STACK_WIND (frame, + trash_unlink_rename_cbk, + this->children->xlator, + this->children->xlator->fops->rename, + loc, + &new_loc); + } + } + return 0; +} + +/* */ +int32_t +trash_rename_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf) +{ + trash_local_t *local = frame->local; + char *tmp_str = strdup (local->newpath); + + if (op_ret == -1 && op_errno == ENOENT) { + int32_t count = 0; + char *tmp_path = NULL; + char *tmp_dirname = strchr (tmp_str, '/'); + + while (tmp_dirname) { + count = tmp_dirname - tmp_str; + if (count == 0) + count = 1; + tmp_path = CALLOC (1, count + 2); + ERR_ABORT (tmp_path); + memcpy (tmp_path, local->newpath, count); + loc_t tmp_loc = { + .inode = NULL, + .path = tmp_path, + }; + + /* TODO:create the directory with proper permissions */ + STACK_WIND_COOKIE (frame, + trash_rename_mkdir_cbk, + tmp_path, + this->children->xlator, + this->children->xlator->fops->mkdir, + &tmp_loc, + 0777); + tmp_dirname = strchr (tmp_str + count + 1, '/'); + } + free (cookie); + free (tmp_str); + return 0; + } + char *dir_name = dirname (tmp_str); + if (strcmp((char*)cookie, dir_name) == 0) { + loc_t new_loc = { + .inode = NULL, + .path = local->newpath + }; + STACK_WIND (frame, + trash_rename_rename_cbk, + this->children->xlator, + this->children->xlator->fops->rename, + &local->loc2, + &new_loc); + + } + free (cookie); /* strdup (dir_name) was sent here :) */ + free (tmp_str); + return 0; +} + + +/** + * trash_unlink_rename_cbk - + */ +int32_t +trash_rename_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + trash_local_t *local = frame->local; + if (op_ret == -1 && op_errno == ENOENT) { + /* check for the errno, if its ENOENT create directory and call + * rename later + */ + char *tmp_str = strdup (local->newpath); + char *dir_name = dirname (tmp_str); + loc_t tmp_loc = { + .inode = NULL, + .path = dir_name, + }; + /* TODO: create the directory with proper permissions */ + STACK_WIND_COOKIE (frame, + trash_rename_mkdir_cbk, + strdup (dir_name), + this->children->xlator, + this->children->xlator->fops->mkdir, + &tmp_loc, + 0777); + free (tmp_str); + return 0; + } else if (op_ret == -1 && op_errno == ENOTDIR) { + gf_log (this->name, GF_LOG_WARNING, + "Target exists, cannot keep the dest entry %s, " + "renaming", + local->loc2.path); + } else if (op_ret == -1 && op_errno == EISDIR) { + gf_log (this->name, GF_LOG_WARNING, + "Target exists as a directory, cannot keep the " + "copy %s, renaming", + local->loc2.path); + } + loc_t new_loc = { + .inode = NULL, + .parent = local->loc2.parent, + .path = local->loc2.path, + }; + STACK_WIND (frame, + trash_common_unwind_buf_cbk, + this->children->xlator, + this->children->xlator->fops->rename, + &local->loc1, + &new_loc); + + return 0; +} + +/** + * trash_rename_lookup_cbk - + */ +int32_t +trash_rename_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *xattr) +{ + trash_local_t *local = frame->local; + + if (op_ret == -1) { + STACK_WIND (frame, + trash_common_unwind_buf_cbk, + this->children->xlator, + this->children->xlator->fops->rename, + &local->loc1, + &local->loc2); + return 0; + } + + loc_t oldloc = { + .parent = local->loc2.parent, + .inode = inode, + .path = local->loc2.path, + }; + loc_t newloc = { + .inode = NULL, + .path = local->newpath + }; + STACK_WIND (frame, + trash_rename_rename_cbk, + this->children->xlator, + this->children->xlator->fops->rename, + &oldloc, + &newloc); + + return 0; +} + + +/** + * trash_rename - + */ +int32_t +trash_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + trash_private_t *priv = this->private; + trash_local_t *local = NULL; + time_t utime = 0; + struct tm *tm = NULL; + char timestr[256]; + + if (strncmp (oldloc->path, priv->trash_dir, + strlen(priv->trash_dir)) == 0) { + /* Trying to rename from the trash can dir, + do the actual rename */ + STACK_WIND (frame, + trash_common_unwind_buf_cbk, + this->children->xlator, + this->children->xlator->fops->rename, + oldloc, + newloc); + } else { + /* Trying to rename a regular file from GlusterFS */ + local = CALLOC (1, sizeof (trash_local_t)); + if (!local) { + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + frame->local = local; + loc_copy (&local->loc1, oldloc); + loc_copy (&local->loc2, newloc); + + strcpy (local->newpath, priv->trash_dir); + strcat (local->newpath, newloc->path); + + utime = time (NULL); + tm = localtime (&utime); + strftime (timestr, 256, ".%Y%m%d%H%M%S", tm); + strcat (local->newpath, timestr); + + /* Send a lookup call on newloc, to ensure we are not + overwriting */ + STACK_WIND (frame, + trash_rename_lookup_cbk, + this->children->xlator, + this->children->xlator->fops->lookup, + newloc, + 0); + } + return 0; +} + +/** + * trash_init - + */ +int32_t +init (xlator_t *this) +{ + data_t *trash_dir = NULL; + xlator_list_t *trav = NULL; + trash_private_t *_priv = NULL; + + /* Create .trashcan directory in init */ + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "not configured with exactly one child. exiting"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + trav = this->children; + while (trav->xlator->children) + trav = trav->xlator->children; + + if (strncmp ("storage/", trav->xlator->type, 8)) + { + gf_log (this->name, GF_LOG_ERROR, + "'trash' translator not loaded over storage " + "translator, not a supported setup"); + return -1; + } + + _priv = CALLOC (1, sizeof (*_priv)); + ERR_ABORT (_priv); + + trash_dir = dict_get (this->options, "trash-dir"); + if (!trash_dir) { + gf_log (this->name, GF_LOG_WARNING, + "no option specified for 'trash-dir', " + "using \"/.trashcan/\""); + strcpy (_priv->trash_dir, "/.trashcan"); + } else { + /* Need a path with '/' as the first char, if not + given, append it */ + if (trash_dir->data[0] == '/') { + strcpy (_priv->trash_dir, trash_dir->data); + } else { + strcpy (_priv->trash_dir, "/"); + strcat (_priv->trash_dir, trash_dir->data); + } + } + + this->private = (void *)_priv; + return 0; +} + +void +fini (xlator_t *this) +{ + trash_private_t *priv = this->private; + FREE (priv); + return; +} + + +struct xlator_fops fops = { + .unlink = trash_unlink, + .rename = trash_rename, +}; + +struct xlator_mops mops = { + +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = { "trash-dir" }, + .type = GF_OPTION_TYPE_PATH + }, + { .key = {NULL} }, +}; diff --git a/xlators/meta/Makefile.am b/xlators/meta/Makefile.am new file mode 100644 index 000000000..e1c45f305 --- /dev/null +++ b/xlators/meta/Makefile.am @@ -0,0 +1 @@ +SUBDIRS=src
\ No newline at end of file diff --git a/xlators/meta/src/Makefile.am b/xlators/meta/src/Makefile.am new file mode 100644 index 000000000..385ff553f --- /dev/null +++ b/xlators/meta/src/Makefile.am @@ -0,0 +1,10 @@ +xlator_PROGRAMS = meta.so +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/ + +meta_so_SOURCES = meta.c tree.c misc.c view.c +noinst_HEADERS = meta.h tree.h misc.h view.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles + +CLEANFILES = diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c new file mode 100644 index 000000000..ce49ed2c4 --- /dev/null +++ b/xlators/meta/src/meta.c @@ -0,0 +1,1285 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "dict.h" +#include "xlator.h" + +#include "meta.h" +#include "view.h" + +int32_t +meta_getattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +meta_getattr (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + if (file) { + if (file->fops && file->fops->getattr) { + STACK_WIND (frame, meta_getattr_cbk, + this, file->fops->getattr, path); + return 0; + } + else { + STACK_UNWIND (frame, 0, 0, file->stbuf); + return 0; + } + } + else { + STACK_WIND (frame, meta_getattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getattr, + path); + return 0; + } +} + +int32_t +meta_chmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_chmod (call_frame_t *frame, + xlator_t *this, + const char *path, + mode_t mode) +{ + STACK_WIND (frame, + meta_chmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chmod, + path, + mode); + return 0; +} + +int32_t +meta_chown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_chown (call_frame_t *frame, + xlator_t *this, + const char *path, + uid_t uid, + gid_t gid) +{ + STACK_WIND (frame, + meta_chown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chown, + path, + uid, + gid); + return 0; +} + + +int32_t +meta_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_truncate (call_frame_t *frame, + xlator_t *this, + const char *path, + off_t offset) +{ + STACK_WIND (frame, + meta_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + path, + offset); + return 0; +} + + +int32_t +meta_ftruncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_ftruncate (call_frame_t *frame, + xlator_t *this, + dict_t *fd, + off_t offset) +{ + STACK_WIND (frame, + meta_ftruncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset); + return 0; +} + + +int32_t +meta_utimes_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_utimes (call_frame_t *frame, + xlator_t *this, + const char *path, + struct timespec *buf) +{ + STACK_WIND (frame, + meta_utimes_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimes, + path, + buf); + return 0; +} + + +int32_t +meta_access_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_access (call_frame_t *frame, + xlator_t *this, + const char *path, + mode_t mode) +{ + STACK_WIND (frame, + meta_access_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, + path, + mode); + return 0; +} + +int32_t +meta_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + char *dest) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + dest); + return 0; +} + +int32_t +meta_readlink (call_frame_t *frame, + xlator_t *this, + const char *path, + size_t size) +{ + STACK_WIND (frame, + meta_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + path, + size); + return 0; +} + +int32_t +meta_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_mknod (call_frame_t *frame, + xlator_t *this, + const char *path, + mode_t mode, + dev_t dev) +{ + STACK_WIND (frame, + meta_mknod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, + path, + mode, + dev); + return 0; +} + +int32_t +meta_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_mkdir (call_frame_t *frame, + xlator_t *this, + const char *path, + mode_t mode) +{ + STACK_WIND (frame, + meta_mkdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, + path, + mode); + return 0; +} + +int32_t +meta_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_unlink (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + STACK_WIND (frame, + meta_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + path); + return 0; +} + +int32_t +meta_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_rmdir (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + STACK_WIND (frame, + meta_rmdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, + path); + return 0; +} + +int32_t +meta_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_symlink (call_frame_t *frame, + xlator_t *this, + const char *oldpath, + const char *newpath) +{ + STACK_WIND (frame, + meta_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + oldpath, + newpath); + return 0; +} + +int32_t +meta_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_rename (call_frame_t *frame, + xlator_t *this, + const char *oldpath, + const char *newpath) +{ + STACK_WIND (frame, + meta_rename_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, + oldpath, + newpath); + return 0; +} + +int32_t +meta_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_link (call_frame_t *frame, + xlator_t *this, + const char *oldpath, + const char *newpath) +{ + STACK_WIND (frame, + meta_link_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, + oldpath, + newpath); + return 0; +} + +struct _open_local { + const char *path; +}; + +int32_t +meta_open_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *ctx, struct stat *buf) +{ + struct _open_local *local = frame->local; + if (local) + dict_set (ctx, this->name, str_to_data (local->path)); + STACK_UNWIND (frame, op_ret, op_errno, ctx, buf); + return 0; +} + +int32_t +meta_open (call_frame_t *frame, xlator_t *this, + const char *path, int32_t flags, mode_t mode) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + if (file) { + if (file->fops && file->fops->open) { + struct _open_local *local = CALLOC (1, sizeof (struct _open_local)); + ERR_ABORT (local); + local->path = strdup (path); + frame->local = local; + STACK_WIND (frame, meta_open_cbk, + this, file->fops->open, + path, flags, mode); + return 0; + } + else { + dict_t *ctx = get_new_dict (); + dict_ref (ctx); + dict_set (ctx, this->name, str_to_data (strdup (path))); + STACK_UNWIND (frame, 0, 0, ctx, file->stbuf); + return 0; + } + } + else { + STACK_WIND (frame, meta_open_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, + path, flags, mode); + return 0; + } +} + +int32_t +meta_create (call_frame_t *frame, xlator_t *this, + const char *path, int32_t flags, mode_t mode) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + if (file) { + if (file->fops && file->fops->create) { + struct _open_local *local = CALLOC (1, sizeof (struct _open_local)); + ERR_ABORT (local); + local->path = strdup (path); + frame->local = local; + STACK_WIND (frame, meta_open_cbk, + this, file->fops->create, + path, flags, mode); + return 0; + } + else { + STACK_UNWIND (frame, -1, 0, NULL, NULL); + return 0; + } + } + else { + STACK_WIND (frame, meta_open_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, + path, flags, mode); + return 0; + } +} + +int32_t +meta_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + vector, + count); + return 0; +} + +int32_t +meta_readv (call_frame_t *frame, + xlator_t *this, + dict_t *fd, + size_t size, + off_t offset) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + data_t *path_data = dict_get (fd, this->name); + + if (path_data) { + const char *path = data_to_str (path_data); + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + if (file && file->fops && file->fops->readv) { + STACK_WIND (frame, meta_readv_cbk, + this, file->fops->readv, + fd, size, offset); + return 0; + } + } + else { + STACK_WIND (frame, meta_readv_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, + fd, size, offset); + return 0; + } +} + +int32_t +meta_writev_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +meta_writev (call_frame_t *frame, xlator_t *this, + dict_t *fd, + struct iovec *vector, int32_t count, off_t offset) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + data_t *path_data = dict_get (fd, this->name); + + if (path_data) { + const char *path = data_to_str (path_data); + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + if (file && file->fops && file->fops->writev) { + STACK_WIND (frame, meta_writev_cbk, + this, file->fops->writev, + fd, vector, count, offset); + return 0; + } + } + else { + STACK_WIND (frame, meta_readv_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, + fd, vector, count, offset); + return 0; + } +} + +int32_t +meta_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_flush (call_frame_t *frame, + xlator_t *this, + dict_t *fd) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + data_t *path_data = dict_get (fd, this->name); + + if (path_data) { + const char *path = data_to_str (path_data); + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + if (file) { + if (file->fops && file->fops->flush) { + STACK_WIND (frame, meta_flush_cbk, + this, file->fops->flush, + fd); + return 0; + } + else { + STACK_UNWIND (frame, 0, 0); + return 0; + } + } + } + else { + STACK_WIND (frame, meta_flush_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush, + fd); + return 0; + } +} + +int32_t +meta_release_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_release (call_frame_t *frame, + xlator_t *this, + dict_t *fd) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + data_t *path_data = dict_get (fd, this->name); + + if (path_data) { + const char *path = data_to_str (path_data); + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + if (file) { + dict_unref (fd); + STACK_UNWIND (frame, 0, 0); + return 0; + } + } + else { + STACK_WIND (frame, meta_release_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->release, + fd); + return 0; + } +} + +int32_t +meta_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_fsync (call_frame_t *frame, + xlator_t *this, + dict_t *fd, + int32_t flags) +{ + STACK_WIND (frame, + meta_fsync_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, + fd, + flags); + return 0; +} + +int32_t +meta_fgetattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_fgetattr (call_frame_t *frame, + xlator_t *this, + dict_t *fd) +{ + STACK_WIND (frame, + meta_fgetattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetattr, + fd); + return 0; +} + +int32_t +meta_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *fd) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + fd); + return 0; +} + +int32_t +meta_opendir (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + meta_dirent_t *dir = lookup_meta_entry (root, path, NULL); + + if (dir) { + dict_t *ctx = get_new_dict (); + dict_set (ctx, this->name, str_to_data (strdup (path))); + STACK_UNWIND (frame, 0, 0, ctx); + return 0; + } + else { + STACK_WIND (frame, meta_opendir_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir, + path); + return 0; + } +} + +int32_t +meta_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entries, + int32_t count) +{ + meta_private_t *priv = (meta_private_t *)this->private; + + if ((int) cookie == 1) { + dir_entry_t *dir = CALLOC (1, sizeof (dir_entry_t)); + ERR_ABORT (dir); + + dir->name = strdup (".meta"); + memcpy (&dir->buf, priv->tree->stbuf, sizeof (struct stat)); + dir->next = entries->next; + entries->next = dir; + + STACK_UNWIND (frame, op_ret, op_errno, entries, count+1); + return 0; + } + + STACK_UNWIND (frame, op_ret, op_errno, entries, count); + return 0; +} + +int32_t +meta_readdir (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + + meta_dirent_t *dir = lookup_meta_entry (root, path, NULL); + if (dir) { + if (dir->fops && dir->fops->readdir) { + STACK_WIND (frame, meta_readdir_cbk, + this, dir->fops->readdir, path); + return 0; + } + else { + int count = 0; + dir = dir->children; + dir_entry_t *entries = NULL; + + while (dir) { + dir_entry_t *d = CALLOC (1, sizeof (dir_entry_t)); + ERR_ABORT (d); + d->name = dir->name; + d->buf = *dir->stbuf; + d->next = entries; + entries = d; + count++; + dir = dir->next; + } + + dir_entry_t *header = CALLOC (1, sizeof (dir_entry_t)); + ERR_ABORT (header); + header->next = entries; + STACK_UNWIND (frame, 0, 0, header, count); + return 0; + } + } + else { + if (!strcmp (path, "/")) { + STACK_WIND_COOKIE (frame, meta_readdir_cbk, + (int) 1, /* cookie to tell _cbk to add .meta entry */ + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, + path); + } + else { + STACK_WIND (frame, meta_readdir_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, + path); + } + } + return 0; +} + +int32_t +meta_releasedir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_releasedir (call_frame_t *frame, + xlator_t *this, + dict_t *fd) +{ + STACK_WIND (frame, + meta_releasedir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->releasedir, + fd); + return 0; +} + +int32_t +meta_fsyncdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_fsyncdir (call_frame_t *frame, + xlator_t *this, + dict_t *fd, + int32_t flags) +{ + STACK_WIND (frame, + meta_fsyncdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, + fd, + flags); + return 0; +} + +int32_t +meta_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *buf) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + buf); + return 0; +} + +int32_t +meta_statfs (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + STACK_WIND (frame, + meta_statfs_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, + path); + return 0; +} + +int32_t +meta_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_setxattr (call_frame_t *frame, + xlator_t *this, + const char *path, + const char *name, + const char *value, + size_t size, + int32_t flags) +{ + STACK_WIND (frame, + meta_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + path, + name, + value, + size, + flags); + return 0; +} + +int32_t +meta_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + char *value) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + value); + return 0; +} + +int32_t +meta_getxattr (call_frame_t *frame, + xlator_t *this, + const char *path, + const char *name, + size_t size) +{ + STACK_WIND (frame, + meta_getxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + path, + name, + size); + return 0; +} + +int32_t +meta_listxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + char *value) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + value); + return 0; +} + +int32_t +meta_listxattr (call_frame_t *frame, + xlator_t *this, + const char *path, + size_t size) +{ + STACK_WIND (frame, + meta_listxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->listxattr, + path, + size); + return 0; +} + +int32_t +meta_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, + op_ret, + op_errno); + return 0; +} + +int32_t +meta_removexattr (call_frame_t *frame, + xlator_t *this, + const char *path, + const char *name) +{ + STACK_WIND (frame, + meta_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + path, + name); + return 0; +} + +int32_t +meta_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + STACK_UNWIND (frame, + op_ret, + op_errno, + lock); + return 0; +} + +int32_t +meta_lk (call_frame_t *frame, + xlator_t *this, + dict_t *file, + int32_t cmd, + struct flock *lock) +{ + STACK_WIND (frame, + meta_lk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, + file, + cmd, + lock); + return 0; +} + +static void +add_xlator_to_tree (meta_dirent_t *tree, xlator_t *this, + const char *prefix) +{ + char *dir; + asprintf (&dir, "%s/%s", prefix, this->name); + + char *children; + asprintf (&children, "%s/%s", dir, "subvolumes"); + + char *type; + asprintf (&type, "%s/%s", dir, "type"); + + char *view; + asprintf (&view, "%s/%s", dir, "view"); + + insert_meta_entry (tree, dir, S_IFDIR, NULL, NULL); + insert_meta_entry (tree, children, S_IFDIR, NULL, NULL); + meta_dirent_t *v = insert_meta_entry (tree, view, S_IFDIR, NULL, + &meta_xlator_view_fops); + v->view_xlator = this; + meta_dirent_t *t = insert_meta_entry (tree, type, S_IFREG, NULL, + &meta_xlator_type_fops); + t->view_xlator = this; + + xlator_list_t *trav = this->children; + while (trav) { + add_xlator_to_tree (tree, trav->xlator, children); + trav = trav->next; + } +} + +static void +build_meta_tree (xlator_t *this) +{ + meta_private_t *priv = (meta_private_t *) this->private; + priv->tree = CALLOC (1, sizeof (meta_dirent_t)); + ERR_ABORT (priv->tree); + priv->tree->name = strdup (".meta"); + priv->tree->stbuf = new_stbuf (); + priv->tree->stbuf->st_mode = S_IFDIR | S_IRUSR | S_IRGRP | S_IROTH | + S_IXUSR | S_IXGRP | S_IXOTH; + + insert_meta_entry (priv->tree, "/.meta/version", + S_IFREG, NULL, &meta_version_fops); + + insert_meta_entry (priv->tree, "/.meta/xlators", + S_IFDIR, NULL, NULL); + + xlator_list_t *trav = this->children; + while (trav) { + add_xlator_to_tree (priv->tree, trav->xlator, "/.meta/xlators"); + trav = trav->next; + } +} + +int32_t +init (xlator_t *this) +{ + if (this->parent != NULL) { + gf_log ("meta", GF_LOG_ERROR, "FATAL: meta should be the root of the xlator tree"); + return -1; + } + + meta_private_t *priv = CALLOC (1, sizeof (meta_private_t)); + ERR_ABORT (priv); + + data_t *directory = dict_get (this->options, "directory"); + if (directory) { + priv->directory = strdup (data_to_str (directory)); + } + else { + priv->directory = ".meta"; + } + + this->private = priv; + build_meta_tree (this); + + return 0; +} + +int32_t +fini (xlator_t *this) +{ + return 0; +} + +struct xlator_fops fops = { + .getattr = meta_getattr, + .readlink = meta_readlink, + .mknod = meta_mknod, + .mkdir = meta_mkdir, + .unlink = meta_unlink, + .rmdir = meta_rmdir, + .symlink = meta_symlink, + .rename = meta_rename, + .link = meta_link, + .chmod = meta_chmod, + .chown = meta_chown, + .truncate = meta_truncate, + .utimes = meta_utimes, + .open = meta_open, + .readv = meta_readv, + .writev = meta_writev, + .statfs = meta_statfs, + .flush = meta_flush, + .release = meta_release, + .fsync = meta_fsync, + .setxattr = meta_setxattr, + .getxattr = meta_getxattr, + .listxattr = meta_listxattr, + .removexattr = meta_removexattr, + .opendir = meta_opendir, + .readdir = meta_readdir, + .releasedir = meta_releasedir, + .fsyncdir = meta_fsyncdir, + .access = meta_access, + .ftruncate = meta_ftruncate, + .fgetattr = meta_fgetattr, + .create = meta_create, + .lk = meta_lk, +}; + +struct xlator_mops mops = { +}; diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h new file mode 100644 index 000000000..6823ef85b --- /dev/null +++ b/xlators/meta/src/meta.h @@ -0,0 +1,48 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __META_H__ +#define __META_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +struct _meta_dirent { + const char *name; + int type; + struct _meta_dirent *children; + struct _meta_dirent *parent; + struct _meta_dirent *next; + struct stat *stbuf; + xlator_t *view_xlator; + struct xlator_fops *fops; +}; +typedef struct _meta_dirent meta_dirent_t; + +typedef struct { + const char *directory; + meta_dirent_t *tree; +} meta_private_t; + +#include "tree.h" +#include "misc.h" + +#endif /* __META_H__ */ diff --git a/xlators/meta/src/misc.c b/xlators/meta/src/misc.c new file mode 100644 index 000000000..9c2f50d34 --- /dev/null +++ b/xlators/meta/src/misc.c @@ -0,0 +1,67 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <sys/uio.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "meta.h" + +#define min(x,y) ((x) < (y) ? (x) : (y)) + +/* /.meta/version */ +static const char *version_str = PACKAGE_NAME " " PACKAGE_VERSION "\n"; + +int32_t +meta_version_readv (call_frame_t *frame, xlator_t *this, + dict_t *fd, size_t size, off_t offset) +{ + static int version_size; + version_size = strlen (version_str); + + struct iovec vec; + vec.iov_base = version_str + offset; + vec.iov_len = min (version_size - offset, size); + + STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1); + return 0; +} + +int32_t +meta_version_getattr (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + file->stbuf->st_size = strlen (version_str); + STACK_UNWIND (frame, 0, 0, file->stbuf); +} + +struct xlator_fops meta_version_fops = { + .readv = meta_version_readv, + .getattr = meta_version_getattr +}; + diff --git a/xlators/meta/src/misc.h b/xlators/meta/src/misc.h new file mode 100644 index 000000000..433c604eb --- /dev/null +++ b/xlators/meta/src/misc.h @@ -0,0 +1,31 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __MISC_H__ +#define __MISC_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +struct xlator_fops meta_version_fops; + +#endif /* __MISC_H__ */ diff --git a/xlators/meta/src/tree.c b/xlators/meta/src/tree.c new file mode 100644 index 000000000..ec88c42a0 --- /dev/null +++ b/xlators/meta/src/tree.c @@ -0,0 +1,176 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <string.h> + +#include "glusterfs.h" +#include "xlator.h" + +#include "meta.h" + +static int +is_meta_path (const char *path) +{ + while (*path == '/') + path++; + if (!strncmp (path, ".meta", strlen (".meta"))) + return 1; + return 0; +} + +struct stat * +new_stbuf (void) +{ + static int next_inode = 0; + struct stat *stbuf = CALLOC (1, sizeof (struct stat)); + + ERR_ABORT (stbuf); + + stbuf->st_dev = 0; + stbuf->st_ino = next_inode++; + stbuf->st_mode = S_IRUSR | S_IRGRP | S_IROTH; + stbuf->st_nlink = 1; + stbuf->st_uid = 0; + stbuf->st_gid = 0; + stbuf->st_rdev = 0; + stbuf->st_size = 0; + stbuf->st_blksize = 0; + stbuf->st_blocks = 0; + stbuf->st_atime = time (NULL); + stbuf->st_atim.tv_nsec = 0; + stbuf->st_mtime = stbuf->st_atime; + stbuf->st_mtim.tv_nsec = 0; + stbuf->st_ctime = stbuf->st_ctime; + stbuf->st_ctim.tv_nsec = 0; + + return stbuf; +} + +/* find an entry among the siblings of an entry */ +static meta_dirent_t * +find_entry (meta_dirent_t *node, const char *dir) +{ + meta_dirent_t *trav = node; + while (trav) { + if (!strcmp (trav->name, dir)) + return trav; + trav = trav->next; + } + return NULL; +} + +/* + * Return the meta_dirent_t corresponding to the pathname. + * + * If pathname does not exist in the meta tree, try to return + * its highest parent that does exist. The part of the + * pathname that is left over is returned in the value-result + * variable {remain}. + * For example, for "/.meta/xlators/brick1/view/foo/bar/baz", + * return the entry for "/.meta/xlators/brick1/view" + * and set remain to "/bar/baz" + */ + +meta_dirent_t * +lookup_meta_entry (meta_dirent_t *root, const char *path, + char **remain) +{ + char *_path = strdup (path); + + if (!is_meta_path (path)) + return NULL; + + meta_dirent_t *trav = root; + char *dir = strtok (_path, "/"); + dir = strtok (NULL, "/"); + + while (dir) { + meta_dirent_t *ntrav; + ntrav = find_entry (trav->children, dir); + if (!ntrav) { + /* we have reached bottom of the meta tree. + Unknown dragons lie further below */ + if (remain) { + char *piece = dir; + while (piece) { + char *tmp = *remain; + if (*remain) + asprintf (remain, "/%s/%s", *remain, piece); + else + asprintf (remain, "/%s", piece); + if (tmp) free (tmp); + piece = strtok (NULL, "/"); + } + } + return trav; + } + dir = strtok (NULL, "/"); + trav = ntrav; + } + + free (_path); + return trav; +} + +meta_dirent_t * +insert_meta_entry (meta_dirent_t *root, const char *path, + int type, struct stat *stbuf, struct xlator_fops *fops) +{ + if (!is_meta_path (path)) + return NULL; + char *slashpos = strrchr (path, '/'); + char *dir = strndup (path, slashpos - path); + meta_dirent_t *parent = lookup_meta_entry (root, dir, NULL); + if (!dir) + return NULL; + + meta_dirent_t *new = CALLOC (1, sizeof (meta_dirent_t)); + ERR_ABORT (new); + new->name = strdup (slashpos+1); + new->type = type; + new->parent = parent; + new->next = parent->children; + parent->children = new; + if (stbuf) + new->stbuf = stbuf; + else + new->stbuf = new_stbuf (); + + new->stbuf->st_mode |= type; + new->fops = fops; + return new; +} + +int main (void) +{ + meta_dirent_t *root = CALLOC (1, sizeof (meta_dirent_t)); + ERR_ABORT (root); + root->name = strdup (".meta"); + + insert_meta_entry (root, "/.meta/version", S_IFREG, NULL, NULL); + return 0; +} diff --git a/xlators/meta/src/tree.h b/xlators/meta/src/tree.h new file mode 100644 index 000000000..eb2cf0220 --- /dev/null +++ b/xlators/meta/src/tree.h @@ -0,0 +1,35 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __TREE_H__ +#define __TREE_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +meta_dirent_t * +insert_meta_entry (meta_dirent_t *root, const char *path, + int type, struct stat *stbuf, struct xlator_fops *fops); +meta_dirent_t * +lookup_meta_entry (meta_dirent_t *root, const char *path, + char **remain); + +#endif /* __TREE_H__ */ diff --git a/xlators/meta/src/view.c b/xlators/meta/src/view.c new file mode 100644 index 000000000..7104d10e9 --- /dev/null +++ b/xlators/meta/src/view.c @@ -0,0 +1,258 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" + +#include "meta.h" + +/* + * This file contains fops for the files and directories in + * an xlator directory + */ + +/* /.meta/xlators/.../type */ + +int32_t +meta_xlator_type_readv (call_frame_t *frame, xlator_t *this, + dict_t *fd, size_t size, off_t offset) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + data_t *path_data = dict_get (fd, this->name); + + if (path_data) { + const char *path = data_to_str (path_data); + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + xlator_t *view_xlator = file->view_xlator; + + int type_size; + type_size = strlen (view_xlator->type); + + struct iovec vec; + vec.iov_base = view_xlator->type + offset; + vec.iov_len = min (type_size - offset, size); + + STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1); + return 0; + } +} + +int32_t +meta_xlator_type_getattr (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + xlator_t *view_xlator = file->view_xlator; + file->stbuf->st_size = strlen (view_xlator->type); + + STACK_UNWIND (frame, 0, 0, file->stbuf); + return 0; +} + +struct xlator_fops meta_xlator_type_fops = { + .readv = meta_xlator_type_readv, + .getattr = meta_xlator_type_getattr +}; + +/* + * fops for the "view" directory + * {xlator}/view shows the filesystem as it appears + * to {xlator} + */ + +static int32_t +meta_xlator_view_getattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +meta_xlator_view_getattr (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + char *op_path = NULL; + + meta_dirent_t *file = lookup_meta_entry (root, path, &op_path); + + if (op_path) { + STACK_WIND (frame, meta_xlator_view_getattr_cbk, file->view_xlator, + file->view_xlator->fops->getattr, + op_path); + } + else { + STACK_UNWIND (frame, 0, 0, file->stbuf); + } + + return 0; +} + +static int32_t +meta_xlator_view_readdir_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dir_entry_t *entries, int32_t count) +{ + STACK_UNWIND (frame, op_ret, op_errno, entries, count); + return 0; +} + +int32_t +meta_xlator_view_readdir (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + char *op_path = NULL; + + meta_dirent_t *dir = lookup_meta_entry (root, path, &op_path); + + STACK_WIND (frame, meta_xlator_view_readdir_cbk, + dir->view_xlator, dir->view_xlator->fops->readdir, + op_path ? op_path : "/"); + return 0; +} + +static int32_t +meta_xlator_view_open_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + dict_t *ctx, struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, ctx, buf); + return 0; +} + +int32_t +meta_xlator_view_open (call_frame_t *frame, xlator_t *this, + const char *path, int32_t flags, mode_t mode) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + char *op_path = NULL; + + meta_dirent_t *file = lookup_meta_entry (root, path, &op_path); + STACK_WIND (frame, meta_xlator_view_open_cbk, + file->view_xlator, file->view_xlator->fops->open, + op_path, flags, mode); + return 0; +} + +int32_t +meta_xlator_view_create (call_frame_t *frame, xlator_t *this, + const char *path, int32_t flags, mode_t mode) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + char *op_path = NULL; + + meta_dirent_t *file = lookup_meta_entry (root, path, &op_path); + STACK_WIND (frame, meta_xlator_view_open_cbk, + file->view_xlator, file->view_xlator->fops->create, + op_path, flags, mode); + return 0; +} + +static int32_t +meta_xlator_view_readv_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, + int32_t count) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count); + return 0; +} + +int32_t +meta_xlator_view_readv (call_frame_t *frame, xlator_t *this, + dict_t *fd, size_t size, off_t offset) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + data_t *path_data = dict_get (fd, this->name); + + if (path_data) { + const char *path = data_to_str (path_data); + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + STACK_WIND (frame, meta_xlator_view_readv_cbk, + file->view_xlator, file->view_xlator->fops->readv, + fd, size, offset); + return 0; + } + + STACK_UNWIND (frame, -1, EBADFD, NULL, 0); + return 0; +} + +static int32_t +meta_xlator_view_writev_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +meta_xlator_view_writev (call_frame_t *frame, xlator_t *this, + dict_t *fd, + struct iovec *vector, int32_t count, off_t offset) +{ + meta_private_t *priv = (meta_private_t *) this->private; + meta_dirent_t *root = priv->tree; + data_t *path_data = dict_get (fd, this->name); + + if (path_data) { + const char *path = data_to_str (path_data); + meta_dirent_t *file = lookup_meta_entry (root, path, NULL); + + STACK_WIND (frame, meta_xlator_view_writev_cbk, + file->view_xlator, file->view_xlator->fops->writev, + fd, vector, count, offset); + return 0; + } + + STACK_UNWIND (frame, -1, EBADFD, NULL, 0); + return 0; +} + +struct xlator_fops meta_xlator_view_fops = { + .getattr = meta_xlator_view_getattr, + .readdir = meta_xlator_view_readdir, + .open = meta_xlator_view_open, + .create = meta_xlator_view_create, + .readv = meta_xlator_view_readv, + .writev = meta_xlator_view_writev +}; diff --git a/xlators/meta/src/view.h b/xlators/meta/src/view.h new file mode 100644 index 000000000..2e1ac3ebf --- /dev/null +++ b/xlators/meta/src/view.h @@ -0,0 +1,32 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __VIEW_H__ +#define __VIEW_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +struct xlator_fops meta_xlator_type_fops; +struct xlator_fops meta_xlator_view_fops; + +#endif /* __VIEW_H__ */ diff --git a/xlators/mount/Makefile.am b/xlators/mount/Makefile.am new file mode 100644 index 000000000..945982d95 --- /dev/null +++ b/xlators/mount/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = @FUSE_CLIENT_SUBDIR@ + +CLEANFILES = diff --git a/xlators/mount/fuse/Makefile.am b/xlators/mount/fuse/Makefile.am new file mode 100644 index 000000000..3b344b1d7 --- /dev/null +++ b/xlators/mount/fuse/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src utils + +CLEANFILES = diff --git a/xlators/mount/fuse/src/Makefile.am b/xlators/mount/fuse/src/Makefile.am new file mode 100644 index 000000000..9d8d45e4f --- /dev/null +++ b/xlators/mount/fuse/src/Makefile.am @@ -0,0 +1,14 @@ + +noinst_HEADERS = fuse-extra.h + +xlator_LTLIBRARIES = fuse.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount +fuse_la_SOURCES = fuse-bridge.c fuse-extra.c +fuse_la_LDFLAGS = -module -avoidversion -shared -nostartfiles $(GF_FUSE_LDADD) + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \ + -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -DFUSE_USE_VERSION=26 + + +CLEANFILES = + diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c new file mode 100644 index 000000000..8e7055878 --- /dev/null +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -0,0 +1,2859 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/* + * TODO: + * Need to free_state() when fuse_reply_err() + return. + * Check loc->path for "" after fuse_loc_fill in all fops + * (now being done in getattr, lookup) or better - make + * fuse_loc_fill() and inode_path() return success/failure. + */ + +#include <stdint.h> +#include <signal.h> +#include <pthread.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif /* _CONFIG_H */ + +#include "glusterfs.h" +#include "logging.h" +#include "xlator.h" +#include "glusterfs.h" +#include "defaults.h" +#include "common-utils.h" + +#include <fuse/fuse_lowlevel.h> + +#include "fuse-extra.h" +#include "list.h" +#include "dict.h" + +#include "compat.h" +#include "compat-errno.h" + +/* TODO: when supporting posix acl, remove this definition */ +#define DISABLE_POSIX_ACL + +#define ZR_MOUNTPOINT_OPT "mountpoint" +#define ZR_DIRECT_IO_OPT "direct-io-mode" + +#define BIG_FUSE_CHANNEL_SIZE 1048576 + +struct fuse_private { + int fd; + struct fuse *fuse; + struct fuse_session *se; + struct fuse_chan *ch; + char *volfile; + size_t volfile_size; + char *mount_point; + data_t *buf; + pthread_t fuse_thread; + char fuse_thread_started; + uint32_t direct_io_mode; + uint32_t entry_timeout; + uint32_t attribute_timeout; + +}; +typedef struct fuse_private fuse_private_t; + +#define _FI_TO_FD(fi) ((fd_t *)((long)fi->fh)) + +#define FI_TO_FD(fi) ((_FI_TO_FD (fi))?(fd_ref (_FI_TO_FD(fi))):((fd_t *) 0)) + +#define FUSE_FOP(state, ret, op_num, fop, args ...) \ + do { \ + call_frame_t *frame = get_call_frame_for_req (state, 1); \ + xlator_t *xl = frame->this->children ? \ + frame->this->children->xlator : NULL; \ + dict_t *refs = frame->root->req_refs; \ + frame->root->state = state; \ + frame->root->op = op_num; \ + STACK_WIND (frame, ret, xl, xl->fops->fop, args); \ + dict_unref (refs); \ + } while (0) + + +typedef struct { + void *pool; + xlator_t *this; + inode_table_t *itable; + loc_t loc; + loc_t loc2; + fuse_req_t req; + int32_t flags; + off_t off; + size_t size; + unsigned long nlookup; + fd_t *fd; + dict_t *dict; + char *name; + char is_revalidate; +} fuse_state_t; + +int fuse_chan_receive (struct fuse_chan *ch, + char *buf, + int32_t size); + + +static void +free_state (fuse_state_t *state) +{ + loc_wipe (&state->loc); + + loc_wipe (&state->loc2); + + if (state->dict) { + dict_unref (state->dict); + state->dict = (void *)0xaaaaeeee; + } + if (state->name) { + FREE (state->name); + state->name = NULL; + } + if (state->fd) { + fd_unref (state->fd); + state->fd = (void *)0xfdfdfdfd; + } +#ifdef DEBUG + memset (state, 0x90, sizeof (*state)); +#endif + FREE (state); + state = NULL; +} + + +fuse_state_t * +state_from_req (fuse_req_t req) +{ + fuse_state_t *state; + xlator_t *this = NULL; + + this = fuse_req_userdata (req); + + state = (void *)calloc (1, sizeof (*state)); + ERR_ABORT (state); + state->pool = this->ctx->pool; + state->itable = this->itable; + state->req = req; + state->this = this; + + return state; +} + +static pid_t +get_pid_from_req (fuse_req_t req) +{ + const struct fuse_ctx *ctx = NULL; + ctx = fuse_req_ctx(req); + return ctx->pid; +} + +static call_frame_t * +get_call_frame_for_req (fuse_state_t *state, char d) +{ + call_pool_t *pool = state->pool; + fuse_req_t req = state->req; + const struct fuse_ctx *ctx = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + + + if (req) { + this = fuse_req_userdata (req); + } else { + this = state->this; + } + priv = this->private; + + frame = create_frame (this, pool); + + if (req) { + ctx = fuse_req_ctx(req); + + frame->root->uid = ctx->uid; + frame->root->gid = ctx->gid; + frame->root->pid = ctx->pid; + frame->root->unique = req_callid (req); + } + + if (d) { + frame->root->req_refs = dict_ref (get_new_dict ()); + dict_set (frame->root->req_refs, NULL, priv->buf); + } + + frame->root->type = GF_OP_TYPE_FOP_REQUEST; + + return frame; +} + + +GF_MUST_CHECK static int32_t +fuse_loc_fill (loc_t *loc, + fuse_state_t *state, + ino_t ino, + ino_t par, + const char *name) +{ + inode_t *inode = NULL, *parent = NULL; + int32_t ret = -1; + char *path = NULL; + + /* resistance against multiple invocation of loc_fill not to get + reference leaks via inode_search() */ + + inode = loc->inode; + + if (!inode) { + if (ino) + inode = inode_search (state->itable, ino, NULL); + if (par && name) + inode = inode_search (state->itable, par, name); + + loc->inode = inode; + if (inode) + loc->ino = inode->ino; + } + + parent = loc->parent; + if (!parent) { + if (inode) + parent = inode_parent (inode, par, name); + else + parent = inode_search (state->itable, par, NULL); + loc->parent = parent; + } + + if (name && parent) { + ret = inode_path (parent, name, &path); + if (ret <= 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "inode_path failed for %"PRId64"/%s", + parent->ino, name); + goto fail; + } else { + loc->path = path; + } + } else if (inode) { + ret = inode_path (inode, NULL, &path); + if (ret <= 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "inode_path failed for %"PRId64, + inode->ino); + goto fail; + } else { + loc->path = path; + } + } + if (loc->path) { + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + else loc->name = ""; + } + + if ((ino != 1) && + (parent == NULL)) { + gf_log ("fuse-bridge", GF_LOG_ERROR, + "failed to search parent for %"PRId64"/%s (%"PRId64")", + (ino_t)par, name, (ino_t)ino); + ret = -1; + goto fail; + } + ret = 0; +fail: + return ret; +} + + +static int +need_fresh_lookup (int32_t op_ret, int32_t op_errno, + loc_t *loc, struct stat *buf) +{ + if (op_ret == -1) { + gf_log ("fuse-bridge", + (op_errno == ENOENT)? GF_LOG_DEBUG: GF_LOG_WARNING, + "revalidate of %s failed (%s)", + loc->path, strerror (op_errno)); + return 1; + } + + if (loc->inode->ino != buf->st_ino) { + gf_log ("fuse-bridge", GF_LOG_WARNING, + "inode num of %s changed %"PRId64" -> %"PRId64, + loc->path, loc->inode->ino, buf->st_ino); + return 1; + } + + if ((loc->inode->st_mode & S_IFMT) ^ (buf->st_mode & S_IFMT)) { + gf_log ("fuse-bridge", GF_LOG_WARNING, + "inode mode of %s changed 0%o -> 0%o", + loc->path, loc->inode->st_mode, buf->st_mode); + return 1; + } + + return 0; +} + + +static int +fuse_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stat, + dict_t *dict); + +static int +fuse_entry_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + fuse_state_t *state; + fuse_req_t req; + struct fuse_entry_param e = {0, }; + fuse_private_t *priv = this->private; + + state = frame->root->state; + req = state->req; + + if (!op_ret && state->loc.ino == 1) { + buf->st_ino = 1; + } + + if (state->is_revalidate == 1 + && need_fresh_lookup (op_ret, op_errno, &state->loc, buf)) { + inode_unref (state->loc.inode); + state->loc.inode = inode_new (state->itable); + state->is_revalidate = 2; + + STACK_WIND (frame, fuse_lookup_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->lookup, + &state->loc, state->dict); + + return 0; + } + + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s() %s => %"PRId64" (%"PRId64")", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, buf->st_ino, state->loc.ino); + + inode_link (inode, state->loc.parent, state->loc.name, buf); + + inode_lookup (inode); + + /* TODO: make these timeouts configurable (via meta?) */ + e.ino = inode->ino; + +#ifdef GF_DARWIN_HOST_OS + e.generation = 0; +#else + e.generation = buf->st_ctime; +#endif + + e.entry_timeout = priv->entry_timeout; + e.attr_timeout = priv->attribute_timeout; + e.attr = *buf; + e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE; + + if (!e.ino || !buf->st_ino) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": %s() %s returning inode 0", + frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path); + } + + if (state->loc.parent) + fuse_reply_entry (req, &e); + else + fuse_reply_attr (req, buf, priv->attribute_timeout); + } else { + gf_log ("glusterfs-fuse", + (op_errno == ENOENT ? GF_LOG_DEBUG : GF_LOG_ERROR), + "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + strerror (op_errno)); + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + return 0; +} + + +static int +fuse_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stat, + dict_t *dict) +{ + fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat); + return 0; +} + + +static void +fuse_lookup (fuse_req_t req, + fuse_ino_t par, + const char *name) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": LOOKUP %"PRId64"/%s (fuse_loc_fill() failed)", + req_callid (req), (ino_t)par, name); + free_state (state); + fuse_reply_err (req, EINVAL); + return; + } + + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": LOOKUP %s", req_callid (req), + state->loc.path); + + state->loc.inode = inode_new (state->itable); + /* to differntiate in entry_cbk what kind of call it is */ + state->is_revalidate = -1; + } else { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": LOOKUP %s(%"PRId64")", req_callid (req), + state->loc.path, state->loc.inode->ino); + state->is_revalidate = 1; + } + + state->dict = dict_new(); + + FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP, + lookup, &state->loc, state->dict); +} + + +static void +fuse_forget (fuse_req_t req, + fuse_ino_t ino, + unsigned long nlookup) +{ + inode_t *fuse_inode; + fuse_state_t *state; + + if (ino == 1) { + fuse_reply_none (req); + return; + } + + state = state_from_req (req); + fuse_inode = inode_search (state->itable, ino, NULL); + if (fuse_inode) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "got forget on inode (%lu)", ino); + inode_forget (fuse_inode, nlookup); + inode_unref (fuse_inode); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "got forget, but inode (%lu) not found", ino); + } + + free_state (state); + fuse_reply_none (req); +} + + +static int +fuse_attr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + fuse_state_t *state; + fuse_req_t req; + fuse_private_t *priv = this->private; + + state = frame->root->state; + req = state->req; + + if (op_ret == 0) { + gf_log ("glusterfs-fuse", + (buf->st_ino ? GF_LOG_DEBUG : GF_LOG_ERROR), + "%"PRId64": %s() %s => %"PRId64, frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + buf->st_ino); + + /* TODO: make these timeouts configurable via meta */ + /* TODO: what if the inode number has changed by now */ + buf->st_blksize = BIG_FUSE_CHANNEL_SIZE; + + fuse_reply_attr (req, buf, priv->attribute_timeout); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + strerror (op_errno)); + + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + return 0; +} + + +static void +fuse_getattr (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + int32_t ret = -1; + + state = state_from_req (req); + + if (ino == 1) { + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": GETATTR %"PRId64" (fuse_loc_fill() failed)", + req_callid(req), (ino_t)ino); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + if (state->loc.inode) + state->is_revalidate = 1; + else + state->is_revalidate = -1; + + state->dict = dict_new(); + + FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP, + lookup, &state->loc, state->dict); + return; + } + + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + + if (!state->loc.inode) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": GETATTR %"PRId64" (%s) (fuse_loc_fill() returned NULL inode)", + req_callid (req), (int64_t)ino, state->loc.path); + fuse_reply_err (req, EINVAL); + return; + } + + fd = fd_lookup (state->loc.inode, get_pid_from_req (req)); + state->fd = fd; + if (!fd || S_ISDIR (state->loc.inode->st_mode)) { + /* this is the @ret of fuse_loc_fill, checked here + to permit fstat() to happen even when fuse_loc_fill fails + */ + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": GETATTR %"PRId64" (fuse_loc_fill() failed)", + req_callid(req), (ino_t)ino); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": GETATTR %"PRId64" (%s)", + req_callid (req), (int64_t)ino, state->loc.path); + + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT, + stat, &state->loc); + } else { + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": FGETATTR %"PRId64" (%s/%p)", + req_callid (req), (int64_t)ino, state->loc.path, fd); + + FUSE_FOP (state,fuse_attr_cbk, GF_FOP_FSTAT, + fstat, fd); + } +} + + +static int +fuse_fd_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + fuse_state_t *state; + fuse_req_t req; + fuse_private_t *priv = this->private; + + state = frame->root->state; + req = state->req; + + if (op_ret >= 0) { + struct fuse_file_info fi = {0, }; + + fi.fh = (unsigned long) fd; + fi.flags = state->flags; + + if (!S_ISDIR (fd->inode->st_mode)) { + if ((fi.flags & 3) && priv->direct_io_mode) + fi.direct_io = 1; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s() %s => %p", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, fd); + + fd_ref (fd); + if (fuse_reply_open (req, &fi) == -ENOENT) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "open() got EINTR"); + fd_unref (fd); + goto out; + } + + fd_bind (fd); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + strerror (op_errno)); + + fuse_reply_err (req, op_errno); + } +out: + free_state (state); + STACK_DESTROY (frame->root); + return 0; +} + + + +static void +do_chmod (fuse_req_t req, + fuse_ino_t ino, + struct stat *attr, + struct fuse_file_info *fi) +{ + fuse_state_t *state = state_from_req (req); + fd_t *fd = NULL; + int32_t ret = -1; + + if (fi) { + fd = FI_TO_FD (fi); + state->fd = fd; + } + + if (fd) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": FCHMOD %p", req_callid (req), fd); + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHMOD, + fchmod, fd, attr->st_mode); + } else { + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": CHMOD %"PRId64" (%s) (fuse_loc_fill() failed)", + req_callid (req), (int64_t)ino, + state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": CHMOD %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHMOD, + chmod, &state->loc, attr->st_mode); + } +} + + +static void +do_chown (fuse_req_t req, + fuse_ino_t ino, + struct stat *attr, + int valid, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + int32_t ret = -1; + uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t) -1; + gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t) -1; + + state = state_from_req (req); + + if (fi) { + fd = FI_TO_FD (fi); + state->fd = fd; + } + + if (fd) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": FCHOWN %p", req_callid (req), fd); + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHOWN, + fchown, fd, uid, gid); + } else { + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": CHOWN %"PRId64" (%s) (fuse_loc_fill() failed)", + req_callid (req), (int64_t)ino, + state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": CHOWN %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHOWN, + chown, &state->loc, uid, gid); + } +} + + +static void +do_truncate (fuse_req_t req, + fuse_ino_t ino, + struct stat *attr, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + int32_t ret = -1; + + state = state_from_req (req); + + if (fi) { + fd = FI_TO_FD (fi); + state->fd = fd; + } + if (fd) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": FTRUNCATE %p/%"PRId64, req_callid (req), + fd, attr->st_size); + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FTRUNCATE, + ftruncate, fd, attr->st_size); + } else { + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": TRUNCATE %s/%"PRId64" (fuse_loc_fill() failed)", + req_callid (req), state->loc.path, + attr->st_size); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": TRUNCATE %s/%"PRId64"(%lu)", + req_callid (req), + state->loc.path, attr->st_size, ino); + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_TRUNCATE, + truncate, &state->loc, attr->st_size); + } + + return; +} + + +static void +do_utimes (fuse_req_t req, + fuse_ino_t ino, + struct stat *attr) +{ + fuse_state_t *state; + + struct timespec tv[2]; + int32_t ret = -1; + + tv[0].tv_sec = attr->st_atime; + tv[0].tv_nsec = ST_ATIM_NSEC(attr); + tv[1].tv_sec = attr->st_mtime; + tv[1].tv_nsec = ST_ATIM_NSEC(attr); + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": UTIMENS %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": UTIMENS (%lu)%s", req_callid (req), + ino, state->loc.path); + + FUSE_FOP (state, fuse_attr_cbk, GF_FOP_UTIMENS, + utimens, &state->loc, tv); +} + + +static void +fuse_setattr (fuse_req_t req, + fuse_ino_t ino, + struct stat *attr, + int valid, + struct fuse_file_info *fi) +{ + + if (valid & FUSE_SET_ATTR_MODE) + do_chmod (req, ino, attr, fi); + else if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) + do_chown (req, ino, attr, valid, fi); + else if (valid & FUSE_SET_ATTR_SIZE) + do_truncate (req, ino, attr, fi); + else if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) + do_utimes (req, ino, attr); + else + fuse_getattr (req, ino, fi); +} + + +static int gf_fuse_xattr_enotsup_log; + +static int +fuse_err_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s() %s => 0", frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR"); + + fuse_reply_err (req, 0); + } else { + if (frame->root->op == GF_FOP_SETXATTR) { + op_ret = gf_compat_setxattr (state->dict); + if (op_ret == 0) + op_errno = 0; + if (op_errno == ENOTSUP) { + gf_fuse_xattr_enotsup_log++; + if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER)) + gf_log ("glusterfs-fuse", GF_LOG_CRITICAL, + "[ ERROR ] Extended attribute not supported by the backend storage"); + } + } else { + if ((frame->root->op == GF_FOP_REMOVEXATTR) + && (op_errno == ENOATTR)) { + goto nolog; + } + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": %s() %s => -1 (%s)", + frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + strerror (op_errno)); + } + nolog: + + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + + +static int +fuse_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + + if (op_ret == 0) + inode_unlink (state->loc.inode, state->loc.parent, + state->loc.name); + + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s() %s => 0", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path); + + fuse_reply_err (req, 0); + } else { + gf_log ("glusterfs-fuse", + (op_errno != ENOTEMPTY ? GF_LOG_ERROR : GF_LOG_DEBUG), + "%"PRId64": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + strerror (op_errno)); + + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_access (fuse_req_t req, + fuse_ino_t ino, + int mask) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": ACCESS %"PRId64" (%s) (fuse_loc_fill() failed)", + req_callid (req), (int64_t)ino, state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64" ACCESS %s/%lu mask=%d", req_callid (req), + state->loc.path, ino, mask); + + FUSE_FOP (state, fuse_err_cbk, + GF_FOP_ACCESS, access, + &state->loc, mask); + + return; +} + + + +static int +fuse_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *linkname) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + + if (op_ret > 0) { + ((char *)linkname)[op_ret] = '\0'; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s => %s", frame->root->unique, + state->loc.path, linkname); + + fuse_reply_readlink(req, linkname); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": %s => -1 (%s)", frame->root->unique, + state->loc.path, strerror(op_errno)); + + fuse_reply_err(req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_readlink (fuse_req_t req, + fuse_ino_t ino) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64" READLINK %s/%"PRId64" (fuse_loc_fill() returned NULL inode)", + req_callid (req), state->loc.path, + state->loc.inode->ino); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64" READLINK %s/%"PRId64, req_callid (req), + state->loc.path, state->loc.inode->ino); + + FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK, + readlink, &state->loc, 4096); + + return; +} + + +static void +fuse_mknod (fuse_req_t req, + fuse_ino_t par, + const char *name, + mode_t mode, + dev_t rdev) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64" MKNOD %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + state->loc.inode = inode_new (state->itable); + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": MKNOD %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKNOD, + mknod, &state->loc, mode, rdev); + + return; +} + + +static void +fuse_mkdir (fuse_req_t req, + fuse_ino_t par, + const char *name, + mode_t mode) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64" MKDIR %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + state->loc.inode = inode_new (state->itable); + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": MKDIR %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKDIR, + mkdir, &state->loc, mode); + + return; +} + + +static void +fuse_unlink (fuse_req_t req, + fuse_ino_t par, + const char *name) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": UNLINK %s (fuse_loc_fill() returned NULL inode)", + req_callid (req), state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": UNLINK %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK, + unlink, &state->loc); + + return; +} + + +static void +fuse_rmdir (fuse_req_t req, + fuse_ino_t par, + const char *name) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": RMDIR %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": RMDIR %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR, + rmdir, &state->loc); + + return; +} + + +static void +fuse_symlink (fuse_req_t req, + const char *linkname, + fuse_ino_t par, + const char *name) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64" SYMLINK %s -> %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path, linkname); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + state->loc.inode = inode_new (state->itable); + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": SYMLINK %s -> %s", req_callid (req), + state->loc.path, linkname); + + FUSE_FOP (state, fuse_entry_cbk, GF_FOP_SYMLINK, + symlink, linkname, &state->loc); + + return; +} + + +int +fuse_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s -> %s => 0 (buf->st_ino=%"PRId64" , loc->ino=%"PRId64")", + frame->root->unique, state->loc.path, state->loc2.path, + buf->st_ino, state->loc.ino); + + { + /* ugly ugly - to stay blind to situation where + rename happens on a new inode + */ + buf->st_ino = state->loc.ino; + buf->st_mode = state->loc.inode->st_mode; + } + inode_rename (state->itable, + state->loc.parent, state->loc.name, + state->loc2.parent, state->loc2.name, + state->loc.inode, buf); + + fuse_reply_err (req, 0); + } else { + gf_log ("glusterfs-fuse", + (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR), + "%"PRId64": %s -> %s => -1 (%s)", frame->root->unique, + state->loc.path, state->loc2.path, + strerror (op_errno)); + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + return 0; +} + + +static void +fuse_rename (fuse_req_t req, + fuse_ino_t oldpar, + const char *oldname, + fuse_ino_t newpar, + const char *newname) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + + ret = fuse_loc_fill (&state->loc, state, 0, oldpar, oldname); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "for %s %"PRId64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)", + state->loc.path, req_callid (req), state->loc.path, + state->loc2.path); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + ret = fuse_loc_fill (&state->loc2, state, 0, newpar, newname); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "for %s %"PRId64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)", + state->loc.path, req_callid (req), state->loc.path, + state->loc2.path); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": RENAME `%s (%"PRId64")' -> `%s (%"PRId64")'", + req_callid (req), state->loc.path, state->loc.ino, + state->loc2.path, state->loc2.ino); + + FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME, + rename, &state->loc, &state->loc2); + + return; +} + + +static void +fuse_link (fuse_req_t req, + fuse_ino_t ino, + fuse_ino_t par, + const char *name) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + ret = fuse_loc_fill (&state->loc2, state, ino, 0, NULL); + + if ((state->loc2.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "fuse_loc_fill() failed for %s %"PRId64": LINK %s %s", + state->loc2.path, req_callid (req), + state->loc2.path, state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + state->loc.inode = inode_ref (state->loc2.inode); + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": LINK() %s (%"PRId64") -> %s (%"PRId64")", + req_callid (req), state->loc2.path, state->loc2.ino, + state->loc.path, state->loc.ino); + + FUSE_FOP (state, fuse_entry_cbk, GF_FOP_LINK, + link, &state->loc2, &state->loc); + + return; +} + + +static int +fuse_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + fuse_private_t *priv = this->private; + + struct fuse_file_info fi = {0, }; + struct fuse_entry_param e = {0, }; + + fi.flags = state->flags; + if (op_ret >= 0) { + fi.fh = (unsigned long) fd; + + if ((fi.flags & 3) && priv->direct_io_mode) + fi.direct_io = 1; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s() %s => %p (ino=%"PRId64")", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, fd, buf->st_ino); + + e.ino = buf->st_ino; + +#ifdef GF_DARWIN_HOST_OS + e.generation = 0; +#else + e.generation = buf->st_ctime; +#endif + + e.entry_timeout = priv->entry_timeout; + e.attr_timeout = priv->attribute_timeout; + e.attr = *buf; + e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE; + + fi.keep_cache = 0; + + inode_link (inode, state->loc.parent, + state->loc.name, buf); + + inode_lookup (inode); + + fd_ref (fd); + if (fuse_reply_create (req, &e, &fi) == -ENOENT) { + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "create() got EINTR"); + inode_forget (inode, 1); + fd_unref (fd); + goto out; + } + + fd_bind (fd); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": %s => -1 (%s)", req_callid (req), + state->loc.path, strerror (op_errno)); + fuse_reply_err (req, op_errno); + } +out: + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_create (fuse_req_t req, + fuse_ino_t par, + const char *name, + mode_t mode, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd; + int32_t ret = -1; + + state = state_from_req (req); + state->flags = fi->flags; + + ret = fuse_loc_fill (&state->loc, state, 0, par, name); + if (ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64" CREATE %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path); + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + state->loc.inode = inode_new (state->itable); + + fd = fd_create (state->loc.inode, get_pid_from_req (req)); + state->fd = fd; + fd->flags = state->flags; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": CREATE %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE, + create, &state->loc, state->flags, mode, fd); + + return; +} + + +static void +fuse_open (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd; + int32_t ret = -1; + + state = state_from_req (req); + state->flags = fi->flags; + + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": OPEN %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + + fd = fd_create (state->loc.inode, get_pid_from_req (req)); + state->fd = fd; + fd->flags = fi->flags; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": OPEN %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN, + open, &state->loc, fi->flags, fd); + + return; +} + + +static int +fuse_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + + if (op_ret >= 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64, + frame->root->unique, + op_ret, state->size, state->off, stbuf->st_size); + + fuse_reply_vec (req, vector, count); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": READ => %d (%s)", frame->root->unique, + op_ret, strerror (op_errno)); + + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + +static void +fuse_readv (fuse_req_t req, + fuse_ino_t ino, + size_t size, + off_t off, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + state = state_from_req (req); + state->size = size; + state->off = off; + + fd = FI_TO_FD (fi); + state->fd = fd; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": READ (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + req_callid (req), fd, size, off); + + FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ, + readv, fd, size, off); + +} + + +static int +fuse_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + + if (op_ret >= 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64, + frame->root->unique, + op_ret, state->size, state->off, stbuf->st_size); + + fuse_reply_write (req, op_ret); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": WRITE => -1 (%s)", frame->root->unique, + strerror(op_errno)); + + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_write (fuse_req_t req, + fuse_ino_t ino, + const char *buf, + size_t size, + off_t off, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + struct iovec vector; + fd_t *fd = NULL; + + state = state_from_req (req); + state->size = size; + state->off = off; + fd = FI_TO_FD (fi); + state->fd = fd; + vector.iov_base = (void *)buf; + vector.iov_len = size; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": WRITE (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + req_callid (req), fd, size, off); + + FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE, + writev, fd, &vector, 1, off); + return; +} + + +static void +fuse_flush (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + + state = state_from_req (req); + fd = FI_TO_FD (fi); + state->fd = fd; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": FLUSH %p", req_callid (req), fd); + + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, + flush, fd); + + return; +} + + +static void +fuse_release (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + + state = state_from_req (req); + state->fd = FI_TO_FD (fi); + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": RELEASE %p", req_callid (req), state->fd); + + fd_unref (state->fd); + + fuse_reply_err (req, 0); + + free_state (state); + return; +} + + +static void +fuse_fsync (fuse_req_t req, + fuse_ino_t ino, + int datasync, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + + state = state_from_req (req); + fd = FI_TO_FD (fi); + state->fd = fd; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": FSYNC %p", req_callid (req), fd); + + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNC, + fsync, fd, datasync); + + return; +} + + +static void +fuse_opendir (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": OPENDIR %s (fuse_loc_fill() failed)", + req_callid (req), state->loc.path); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + fd = fd_create (state->loc.inode, get_pid_from_req (req)); + state->fd = fd; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": OPENDIR %s", req_callid (req), + state->loc.path); + + FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPENDIR, + opendir, &state->loc, fd); +} + +static int +fuse_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + int size = 0; + int entry_size = 0; + char *buf = NULL; + gf_dirent_t *entry = NULL; + struct stat stbuf = {0, }; + + if (op_ret < 0) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": READDIR => -1 (%s)", frame->root->unique, + strerror (op_errno)); + + fuse_reply_err (req, op_errno); + goto out; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": READDIR => %d/%"GF_PRI_SIZET",%"PRId64, + frame->root->unique, op_ret, state->size, state->off); + + list_for_each_entry (entry, &entries->list, list) { + size += fuse_dirent_size (strlen (entry->d_name)); + } + + buf = CALLOC (1, size); + if (!buf) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": READDIR => -1 (%s)", frame->root->unique, + strerror (ENOMEM)); + fuse_reply_err (req, -ENOMEM); + goto out; + } + + size = 0; + list_for_each_entry (entry, &entries->list, list) { + stbuf.st_ino = entry->d_ino; + entry_size = fuse_dirent_size (strlen (entry->d_name)); + fuse_add_direntry (req, buf + size, entry_size, + entry->d_name, &stbuf, + entry->d_off); + size += entry_size; + } + + fuse_reply_buf (req, (void *)buf, size); + +out: + free_state (state); + STACK_DESTROY (frame->root); + if (buf) + FREE (buf); + return 0; + +} + +static void +fuse_readdir (fuse_req_t req, + fuse_ino_t ino, + size_t size, + off_t off, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + + state = state_from_req (req); + state->size = size; + state->off = off; + fd = FI_TO_FD (fi); + state->fd = fd; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": READDIR (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")", + req_callid (req), fd, size, off); + + FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR, + readdir, fd, size, off); +} + + +static void +fuse_releasedir (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + + state = state_from_req (req); + state->fd = FI_TO_FD (fi); + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": RELEASEDIR %p", req_callid (req), state->fd); + + fd_unref (state->fd); + + fuse_reply_err (req, 0); + + free_state (state); + + return; +} + + +static void +fuse_fsyncdir (fuse_req_t req, + fuse_ino_t ino, + int datasync, + struct fuse_file_info *fi) +{ + fuse_state_t *state; + fd_t *fd = NULL; + + fd = FI_TO_FD (fi); + + state = state_from_req (req); + state->fd = fd; + + FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR, + fsyncdir, fd, datasync); + + return; +} + + +static int +fuse_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *buf) +{ + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + + /* + Filesystems (like ZFS on solaris) reports + different ->f_frsize and ->f_bsize. Old coreutils + df tools use statfs() and do not see ->f_frsize. + the ->f_blocks, ->f_bavail and ->f_bfree are + w.r.t ->f_frsize and not ->f_bsize which makes the + df tools report wrong values. + + Scale the block counts to match ->f_bsize. + */ + /* TODO: with old coreutils, f_bsize is taken from stat()'s st_blksize + * so the df with old coreutils this wont work :( + */ + + if (op_ret == 0) { +#ifndef GF_DARWIN_HOST_OS + /* MacFUSE doesn't respect anyof these tweaks */ + buf->f_blocks *= buf->f_frsize; + buf->f_blocks /= BIG_FUSE_CHANNEL_SIZE; + + buf->f_bavail *= buf->f_frsize; + buf->f_bavail /= BIG_FUSE_CHANNEL_SIZE; + + buf->f_bfree *= buf->f_frsize; + buf->f_bfree /= BIG_FUSE_CHANNEL_SIZE; + + buf->f_frsize = buf->f_bsize = BIG_FUSE_CHANNEL_SIZE; +#endif /* GF_DARWIN_HOST_OS */ + fuse_reply_statfs (req, buf); + + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": ERR => -1 (%s)", frame->root->unique, + strerror(op_errno)); + fuse_reply_err (req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_statfs (fuse_req_t req, + fuse_ino_t ino) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, 1, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": STATFS (fuse_loc_fill() fail)", + req_callid (req)); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": STATFS", req_callid (req)); + + FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS, + statfs, &state->loc); +} + + +static void +fuse_setxattr (fuse_req_t req, + fuse_ino_t ino, + const char *name, + const char *value, + size_t size, + int flags) +{ + fuse_state_t *state; + char *dict_value = NULL; + int32_t ret = -1; + +#ifdef DISABLE_POSIX_ACL + if (!strncmp (name, "system.", 7)) { + fuse_reply_err (req, EOPNOTSUPP); + return; + } +#endif + + state = state_from_req (req); + state->size = size; + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": SETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)", + req_callid (req), + state->loc.path, (int64_t)ino, name); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + state->dict = get_new_dict (); + + dict_value = memdup (value, size); + dict_set (state->dict, (char *)name, + data_from_dynptr ((void *)dict_value, size)); + dict_ref (state->dict); + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": SETXATTR %s/%"PRId64" (%s)", req_callid (req), + state->loc.path, (int64_t)ino, name); + + FUSE_FOP (state, fuse_err_cbk, GF_FOP_SETXATTR, + setxattr, &state->loc, state->dict, flags); + + return; +} + + +static int +fuse_xattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + int need_to_free_dict = 0; + int32_t ret = op_ret; + char *value = ""; + fuse_state_t *state = frame->root->state; + fuse_req_t req = state->req; + +#ifdef GF_DARWIN_HOST_OS + /* This is needed in MacFuse, where MacOSX Finder needs some specific + * keys to be supported from FS + */ + int32_t dummy_ret = 0; + if (state->name) { + if (!dict) { + dict = get_new_dict (); + need_to_free_dict = 1; + } + dummy_ret = gf_compat_getxattr (state->name, dict); + if (dummy_ret != -1) + ret = dummy_ret; + } else { + if (!dict) { + dict = get_new_dict (); + need_to_free_dict = 1; + } + dummy_ret = gf_compat_listxattr (ret, dict, state->size); + if (dummy_ret != -1) + ret = dummy_ret; + } +#endif /* DARWIN */ + + if (ret >= 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s() %s => %d", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, op_ret); + + /* if successful */ + if (state->name) { + /* if callback for getxattr */ + data_t *value_data = dict_get (dict, state->name); + if (value_data) { + ret = value_data->len; /* Don't return the value for '\0' */ + value = value_data->data; + + /* linux kernel limits the size of xattr value to 64k */ + if (ret > GLUSTERFS_XATTR_LEN_MAX) { + fuse_reply_err (req, E2BIG); + } else if (state->size) { + /* if callback for getxattr and asks for value */ + fuse_reply_buf (req, value, ret); + } else { + /* if callback for getxattr and asks for value length only */ + fuse_reply_xattr (req, ret); + } /* if(ret >...)...else if...else */ + } else if (!strcmp (state->name, "user.glusterfs-booster-volfile")) { + fuse_private_t *priv = this->private; + + if (!priv->volfile) { + int32_t fd = -1, ret = -1; + struct stat st; + char *file = NULL; + + memset (&st, 0, sizeof (st)); + fd = fileno (this->ctx->specfp); + ret = fstat (fd, &st); + if (ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "fstat on fd (%d) failed (%s)", fd, strerror (errno)); + fuse_reply_err (req, ENODATA); + } + + priv->volfile_size = st.st_size; + file = priv->volfile = CALLOC (1, priv->volfile_size); + ret = lseek (fd, 0, SEEK_SET); + while ((ret = read (fd, file, GF_UNIT_KB)) > 0) { + file += ret; + } + } + + if (priv->volfile_size > GLUSTERFS_XATTR_LEN_MAX) { + fuse_reply_err (req, E2BIG); + } else if (state->size) { + /* if callback for getxattr and asks for value */ + fuse_reply_buf (req, priv->volfile, priv->volfile_size); + } else { + /* if callback for getxattr and asks for value length only */ + fuse_reply_xattr (req, priv->volfile_size); + } /* if(ret >...)...else if...else */ + } else if (!strcmp (state->name, "user.glusterfs-booster-path")) { + if (state->size) { + fuse_reply_buf (req, state->loc.path, strlen (state->loc.path) + 1); + } else { + fuse_reply_xattr (req, strlen (state->loc.path) + 1); + } + } else { + fuse_reply_err (req, ENODATA); + } /* if(value_data)...else */ + } else { + /* if callback for listxattr */ + int32_t len = 0; + data_pair_t *trav = dict->members_list; + while (trav) { + len += strlen (trav->key) + 1; + trav = trav->next; + } /* while(trav) */ + value = alloca (len + 1); + ERR_ABORT (value); + len = 0; + trav = dict->members_list; + while (trav) { + strcpy (value + len, trav->key); + value[len + strlen(trav->key)] = '\0'; + len += strlen (trav->key) + 1; + trav = trav->next; + } /* while(trav) */ + if (state->size) { + /* if callback for listxattr and asks for list of keys */ + fuse_reply_buf (req, value, len); + } else { + /* if callback for listxattr and asks for length of keys only */ + fuse_reply_xattr (req, len); + } /* if(state->size)...else */ + } /* if(state->name)...else */ + } else { + /* if failure - no need to check if listxattr or getxattr */ + if (op_errno != ENODATA) { + if (op_errno == ENOTSUP) + { + gf_fuse_xattr_enotsup_log++; + if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER)) + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "[ ERROR ] Extended attribute not supported by the backend storage"); + } + else + { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": %s() %s => -1 (%s)", + frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path, strerror(op_errno)); + } + } else { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": %s() %s => -1 (%s)", + frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + strerror(op_errno)); + } /* if(op_errno!= ENODATA)...else */ + + fuse_reply_err (req, op_errno); + } /* if(op_ret>=0)...else */ + + if (need_to_free_dict) + dict_unref (dict); + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_getxattr (fuse_req_t req, + fuse_ino_t ino, + const char *name, + size_t size) +{ + fuse_state_t *state; + int32_t ret = -1; + +#ifdef DISABLE_POSIX_ACL + if (!strncmp (name, "system.", 7)) { + fuse_reply_err (req, ENODATA); + return; + } +#endif + + state = state_from_req (req); + state->size = size; + state->name = strdup (name); + + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": GETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)", + req_callid (req), state->loc.path, (int64_t)ino, name); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": GETXATTR %s/%"PRId64" (%s)", req_callid (req), + state->loc.path, (int64_t)ino, name); + + FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, + getxattr, &state->loc, name); + + return; +} + + +static void +fuse_listxattr (fuse_req_t req, + fuse_ino_t ino, + size_t size) +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + state->size = size; + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": LISTXATTR %s/%"PRId64" (fuse_loc_fill() failed)", + req_callid (req), state->loc.path, (int64_t)ino); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": LISTXATTR %s/%"PRId64, req_callid (req), + state->loc.path, (int64_t)ino); + + FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, + getxattr, &state->loc, NULL); + + return; +} + + +static void +fuse_removexattr (fuse_req_t req, + fuse_ino_t ino, + const char *name) + +{ + fuse_state_t *state; + int32_t ret = -1; + + state = state_from_req (req); + ret = fuse_loc_fill (&state->loc, state, ino, 0, NULL); + if ((state->loc.inode == NULL) || + (ret < 0)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s) (fuse_loc_fill() failed)", + req_callid (req), state->loc.path, (int64_t)ino, name); + + fuse_reply_err (req, EINVAL); + free_state (state); + return; + } + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s)", req_callid (req), + state->loc.path, (int64_t)ino, name); + + FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR, + removexattr, &state->loc, name); + + return; +} + + +static int gf_fuse_lk_enosys_log; + +static int +fuse_getlk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + fuse_state_t *state = frame->root->state; + + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": ERR => 0", frame->root->unique); + fuse_reply_lock (state->req, lock); + } else { + if (op_errno == ENOSYS) { + gf_fuse_lk_enosys_log++; + if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "[ ERROR ] loading 'features/posix-locks' on server side may help your application"); + } + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "%"PRId64": ERR => -1 (%s)", + frame->root->unique, strerror (op_errno)); + } + fuse_reply_err (state->req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_getlk (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi, + struct flock *lock) +{ + fuse_state_t *state; + fd_t *fd = NULL; + + fd = FI_TO_FD (fi); + state = state_from_req (req); + state->req = req; + state->fd = fd; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": GETLK %p", req_callid (req), fd); + + FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK, + lk, fd, F_GETLK, lock); + + return; +} + + +static int +fuse_setlk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + fuse_state_t *state = frame->root->state; + + if (op_ret == 0) { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": ERR => 0", frame->root->unique); + fuse_reply_err (state->req, 0); + } else { + if (op_errno == ENOSYS) { + gf_fuse_lk_enosys_log++; + if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "[ ERROR ] loading 'features/posix-locks' on server side may help your application"); + } + } else { + gf_log ("glusterfs-fuse", + (op_errno == EAGAIN) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "%"PRId64": ERR => -1 (%s)", + frame->root->unique, strerror (op_errno)); + } + + fuse_reply_err (state->req, op_errno); + } + + free_state (state); + STACK_DESTROY (frame->root); + + return 0; +} + + +static void +fuse_setlk (fuse_req_t req, + fuse_ino_t ino, + struct fuse_file_info *fi, + struct flock *lock, + int sleep) +{ + fuse_state_t *state; + fd_t *fd = NULL; + + fd = FI_TO_FD (fi); + state = state_from_req (req); + state->req = req; + state->fd = fd; + + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, + "%"PRId64": SETLK %p (sleep=%d)", req_callid (req), fd, + sleep); + + FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK, + lk, fd, (sleep ? F_SETLKW : F_SETLK), lock); + + return; +} + + +static void +fuse_init (void *data, struct fuse_conn_info *conn) +{ + xlator_t *this_xl = NULL; + + if (data == NULL) { + return ; + } + + this_xl = data; + + this_xl->itable = inode_table_new (0, this_xl); + + return ; +} + +static void +fuse_destroy (void *data) +{ + +} + +static struct fuse_lowlevel_ops fuse_ops = { + .init = fuse_init, + .destroy = fuse_destroy, + .lookup = fuse_lookup, + .forget = fuse_forget, + .getattr = fuse_getattr, + .setattr = fuse_setattr, + .opendir = fuse_opendir, + .readdir = fuse_readdir, + .releasedir = fuse_releasedir, + .access = fuse_access, + .readlink = fuse_readlink, + .mknod = fuse_mknod, + .mkdir = fuse_mkdir, + .unlink = fuse_unlink, + .rmdir = fuse_rmdir, + .symlink = fuse_symlink, + .rename = fuse_rename, + .link = fuse_link, + .create = fuse_create, + .open = fuse_open, + .read = fuse_readv, + .write = fuse_write, + .flush = fuse_flush, + .release = fuse_release, + .fsync = fuse_fsync, + .fsyncdir = fuse_fsyncdir, + .statfs = fuse_statfs, + .setxattr = fuse_setxattr, + .getxattr = fuse_getxattr, + .listxattr = fuse_listxattr, + .removexattr = fuse_removexattr, + .getlk = fuse_getlk, + .setlk = fuse_setlk +}; + + +static void * +fuse_thread_proc (void *data) +{ + char *mount_point = NULL; + xlator_t *this = data; + fuse_private_t *priv = this->private; + int32_t res = 0; + data_t *buf = priv->buf; + int32_t ref = 0; + size_t chan_size = fuse_chan_bufsize (priv->ch); + char *recvbuf = CALLOC (1, chan_size); + ERR_ABORT (recvbuf); + + while (!fuse_session_exited (priv->se)) { + + + res = fuse_chan_receive (priv->ch, + recvbuf, + chan_size); + + if (res == -1) { + if (errno != EINTR) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "fuse_chan_receive() returned -1 (%d)", errno); + } + if (errno == ENODEV) + break; + continue; + } + + buf = priv->buf; + + if (res && res != -1) { + if (buf->len < (res)) { + if (buf->data) { + FREE (buf->data); + buf->data = NULL; + } + buf->data = CALLOC (1, res); + ERR_ABORT (buf->data); + buf->len = res; + } + memcpy (buf->data, recvbuf, res); // evil evil + + fuse_session_process (priv->se, + buf->data, + res, + priv->ch); + } + + LOCK (&buf->lock); + ref = buf->refcount; + UNLOCK (&buf->lock); + if (1) { + data_unref (buf); + + priv->buf = data_ref (data_from_dynptr (NULL, 0)); + } + } + if (dict_get (this->options, ZR_MOUNTPOINT_OPT)) + mount_point = data_to_str (dict_get (this->options, + ZR_MOUNTPOINT_OPT)); + if (mount_point) { + gf_log (this->name, GF_LOG_WARNING, + "unmounting %s", mount_point); + dict_del (this->options, ZR_MOUNTPOINT_OPT); + } + fuse_session_remove_chan (priv->ch); + fuse_session_destroy (priv->se); + // fuse_unmount (priv->mount_point, priv->ch); + + raise (SIGTERM); + + return NULL; +} + + +int32_t +notify (xlator_t *this, int32_t event, + void *data, ...) +{ + + switch (event) + { + case GF_EVENT_CHILD_UP: + +#ifndef GF_DARWIN_HOST_OS + /* + * This is because macfuse sends statfs() once the fuse thread + * gets activated, and by that time if the client is not + * connected, it give 'Device not configured' error. Hence, + * create thread only when client sends CHILD_UP (ie, client + * is connected). + */ + + /* TODO: somehow, try to get the mountpoint active as soon as + * init() is complete, so that the hang effect when the + * server is not not started is removed. + */ + + /* This code causes problem with 'automount' too */ + /* case GF_EVENT_CHILD_CONNECTING: */ +#endif /* DARWIN */ + + { + fuse_private_t *private = this->private; + int32_t ret = 0; + + if (!private->fuse_thread_started) + { + private->fuse_thread_started = 1; + + ret = pthread_create (&private->fuse_thread, NULL, + fuse_thread_proc, this); + + if (ret != 0) + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "pthread_create() failed (%s)", strerror (errno)); + assert (ret == 0); + } + break; + } + case GF_EVENT_PARENT_UP: + { + default_notify (this, GF_EVENT_PARENT_UP, data); + } + default: + break; + } + return 0; +} + +int +init (xlator_t *this_xl) +{ + int ret = 0; + dict_t *options = NULL; + char *value_string = NULL; + fuse_private_t *priv = NULL; + struct stat stbuf = {0,}; + +#ifdef GF_DARWIN_HOST_OS + int fuse_argc = 9; + char *fuse_argv[] = {"glusterfs", + "-o", "allow_other", + "-o", "default_permissions", + "-o", "fsname=glusterfs", + "-o", "local", + NULL}; + +#elif GF_LINUX_HOST_OS /* ! DARWIN_OS */ + int fuse_argc = 19; + + char *fuse_argv[] = {"glusterfs", + "-o", "nonempty", + "-o", "max_readahead=1048576", + "-o", "max_read=1048576", + "-o", "max_write=1048576", + "-o", "allow_other", + "-o", "default_permissions", + "-o", "fsname=glusterfs", + "-o", "dev", + "-o", "suid", + NULL}; + +#else /* BSD || SOLARIS */ + /* BSD fuse doesn't support '-o dev', '-o nonempty' option */ + int fuse_argc = 15; + + char *fuse_argv[] = {"glusterfs", + "-o", "max_readahead=1048576", + "-o", "max_read=1048576", + "-o", "max_write=1048576", + "-o", "allow_other", + "-o", "default_permissions", + "-o", "fsname=glusterfs", + "-o", "suid", + NULL}; + +#endif /* ! DARWIN_OS || ! LINUX */ + struct fuse_args args = FUSE_ARGS_INIT (fuse_argc, fuse_argv); + + if (this_xl == NULL) + return -1; + + if (this_xl->options == NULL) + return -1; + + options = this_xl->options; + + if (this_xl->name == NULL) + this_xl->name = strdup ("fuse"); + + priv = CALLOC (1, sizeof (*priv)); + ERR_ABORT (priv); + this_xl->private = (void *) priv; + + +#ifdef GF_DARWIN_HOST_OS + if (dict_get (options, "macfuse-local")) { + /* This way, GlusterFS will be detected as 'servers' instead + * of 'devices'. This method is useful if you want to do + * 'umount <mount_point>' over network, instead of 'eject'ing + * it from desktop. Works better for servers + */ + /* Make the '-o local' in argv as NULL, so that its not + in effect */ + fuse_argv[--args.argc] = NULL; + fuse_argv[--args.argc] = NULL; + } +#endif /* ! DARWIN */ + + /* get options from option dictionary */ + ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string); + if (value_string == NULL) { + gf_log ("fuse", GF_LOG_ERROR, + "mandatory option mountpoint is not specified"); + return -1; + } + + if (stat (value_string, &stbuf) != 0) { + if (errno == ENOENT) { + gf_log (this_xl->name, GF_LOG_ERROR , + "%s %s does not exist", + ZR_MOUNTPOINT_OPT, value_string); + } else if (errno == ENOTCONN) { + gf_log (this_xl->name, GF_LOG_ERROR , + "mountpoint %s seems to have a stale " + "mount, run 'umount %s' and try again", + value_string, value_string); + } else { + gf_log (this_xl->name, GF_LOG_ERROR , + "%s %s : stat returned %s", + ZR_MOUNTPOINT_OPT, + value_string, strerror (errno)); + } + return -1; + } + + if (S_ISDIR (stbuf.st_mode) == 0) { + gf_log (this_xl->name, GF_LOG_ERROR , + "%s %s is not a directory", + ZR_MOUNTPOINT_OPT, value_string); + return -1; + } + priv->mount_point = strdup (value_string); + + + ret = dict_get_uint32 (options, "attribute-timeout", + &priv->attribute_timeout); + if (!priv->attribute_timeout) + priv->attribute_timeout = 1; /* default */ + + ret = dict_get_uint32 (options, "entry-timeout", + &priv->entry_timeout); + if (!priv->entry_timeout) + priv->entry_timeout = 1; /* default */ + + + priv->direct_io_mode = 1; + ret = dict_get_str (options, ZR_DIRECT_IO_OPT, &value_string); + if (value_string) { + ret = gf_string2boolean (value_string, &priv->direct_io_mode); + } + + priv->ch = fuse_mount (priv->mount_point, &args); + if (priv->ch == NULL) { + if (errno == ENOTCONN) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "A stale mount present on %s. " + "run 'umount %s' and try again", + priv->mount_point, + priv->mount_point); + } else { + if (errno == ENOENT) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "unable to mount on %s. run " + "'modprobe fuse' and try again", + priv->mount_point); + } else { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "fuse_mount() failed with error %s " + "on mount point %s", + strerror (errno), + priv->mount_point); + } + } + + goto cleanup_exit; + } + + priv->se = fuse_lowlevel_new (&args, &fuse_ops, + sizeof (fuse_ops), this_xl); + if (priv->se == NULL) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "fuse_lowlevel_new() failed with error %s on " + "mount point %s", + strerror (errno), priv->mount_point); + goto umount_exit; + } + + ret = fuse_set_signal_handlers (priv->se); + if (ret == -1) { + gf_log ("glusterfs-fuse", GF_LOG_ERROR, + "fuse_set_signal_handlers() failed on mount point %s", + priv->mount_point); + goto umount_exit; + } + + fuse_opt_free_args (&args); + + fuse_session_add_chan (priv->se, priv->ch); + + priv->fd = fuse_chan_fd (priv->ch); + priv->buf = data_ref (data_from_dynptr (NULL, 0)); + + this_xl->ctx->top = this_xl; + return 0; + +umount_exit: + fuse_unmount (priv->mount_point, priv->ch); +cleanup_exit: + fuse_opt_free_args (&args); + FREE (priv->mount_point); + FREE (priv); + return -1; +} + + +void +fini (xlator_t *this_xl) +{ + fuse_private_t *priv = NULL; + char *mount_point = NULL; + + if (this_xl == NULL) + return; + + if ((priv = this_xl->private) == NULL) + return; + + if (dict_get (this_xl->options, ZR_MOUNTPOINT_OPT)) + mount_point = data_to_str (dict_get (this_xl->options, + ZR_MOUNTPOINT_OPT)); + if (mount_point != NULL) { + gf_log (this_xl->name, GF_LOG_WARNING, + "unmounting '%s'", mount_point); + + dict_del (this_xl->options, ZR_MOUNTPOINT_OPT); + fuse_session_exit (priv->se); + fuse_unmount (mount_point, priv->ch); + } +} + +struct xlator_fops fops = { +}; + +struct xlator_cbks cbks = { +}; + +struct xlator_mops mops = { +}; + +struct volume_options options[] = { + { .key = {"direct-io-mode"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"macfuse-local"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"mountpoint", "mount-point"}, + .type = GF_OPTION_TYPE_PATH + }, + { .key = {"attribute-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 0, + .max = 3600 + }, + { .key = {"entry-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 0, + .max = 3600 + }, + { .key = {NULL} }, +}; diff --git a/xlators/mount/fuse/src/fuse-extra.c b/xlators/mount/fuse/src/fuse-extra.c new file mode 100644 index 000000000..93574d174 --- /dev/null +++ b/xlators/mount/fuse/src/fuse-extra.c @@ -0,0 +1,137 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif /* _CONFIG_H */ + +#include "fuse-extra.h" +#include "common-utils.h" +#include <stdio.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include "common-utils.h" + +struct fuse_req; +struct fuse_ll; + +struct fuse_req { + struct fuse_ll *f; + uint64_t unique; + int ctr; + pthread_mutex_t lock; + struct fuse_ctx ctx; + struct fuse_chan *ch; + int interrupted; + union { + struct { + uint64_t unique; + } i; + struct { + fuse_interrupt_func_t func; + void *data; + } ni; + } u; + struct fuse_req *next; + struct fuse_req *prev; +}; + +struct fuse_ll { + int debug; + int allow_root; + struct fuse_lowlevel_ops op; + int got_init; + void *userdata; + uid_t owner; + struct fuse_conn_info conn; + struct fuse_req list; + struct fuse_req interrupts; + pthread_mutex_t lock; + int got_destroy; +}; + +struct fuse_out_header { + uint32_t len; + int32_t error; + uint64_t unique; +}; + +uint64_t req_callid (fuse_req_t req) +{ + return req->unique; +} + +static void destroy_req(fuse_req_t req) +{ + pthread_mutex_destroy (&req->lock); + FREE (req); +} + +static void list_del_req(struct fuse_req *req) +{ + struct fuse_req *prev = req->prev; + struct fuse_req *next = req->next; + prev->next = next; + next->prev = prev; +} + +static void +free_req (fuse_req_t req) +{ + int ctr; + struct fuse_ll *f = req->f; + + pthread_mutex_lock(&req->lock); + req->u.ni.func = NULL; + req->u.ni.data = NULL; + pthread_mutex_unlock(&req->lock); + + pthread_mutex_lock(&f->lock); + list_del_req(req); + ctr = --req->ctr; + pthread_mutex_unlock(&f->lock); + if (!ctr) + destroy_req(req); +} + +int32_t +fuse_reply_vec (fuse_req_t req, + struct iovec *vector, + int32_t count) +{ + int32_t error = 0; + struct fuse_out_header out; + struct iovec *iov; + int res; + + iov = alloca ((count + 1) * sizeof (*vector)); + out.unique = req->unique; + out.error = error; + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + memcpy (&iov[1], vector, count * sizeof (*vector)); + count++; + out.len = iov_length(iov, count); + res = fuse_chan_send(req->ch, iov, count); + free_req(req); + + return res; +} diff --git a/xlators/mount/fuse/src/fuse-extra.h b/xlators/mount/fuse/src/fuse-extra.h new file mode 100644 index 000000000..0e8052b5a --- /dev/null +++ b/xlators/mount/fuse/src/fuse-extra.h @@ -0,0 +1,42 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _FUSE_EXTRA_H +#define _FUSE_EXTRA_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif /* _CONFIG_H */ + +#include <stdlib.h> +#include <fuse/fuse_lowlevel.h> + +#define GLUSTERFS_XATTR_LEN_MAX 65536 + +uint64_t req_callid (fuse_req_t req); + +size_t fuse_dirent_size (size_t dname_len); + +int32_t +fuse_reply_vec (fuse_req_t req, + struct iovec *vector, + int32_t count); + +#endif /* _FUSE_EXTRA_H */ diff --git a/xlators/mount/fuse/utils/Makefile.am b/xlators/mount/fuse/utils/Makefile.am new file mode 100644 index 000000000..1217c30da --- /dev/null +++ b/xlators/mount/fuse/utils/Makefile.am @@ -0,0 +1,10 @@ +utildir = $(destdir)/sbin + +if GF_DARWIN_HOST_OS +util_SCRIPTS = mount_glusterfs +else +util_SCRIPTS = mount.glusterfs +endif + +CLEANFILES = + diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in new file mode 100755 index 000000000..481fd265f --- /dev/null +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -0,0 +1,152 @@ +#!/bin/sh +# (C) 2006, 2007, 2008 Z RESEARCH Inc. <http://www.zresearch.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301 USA + + + +_init () +{ + # log level definitions + LOG_NONE=NONE; + LOG_CRITICAL=CRITICAL; + LOG_ERROR=ERROR; + LOG_WARNING=WARNING; + LOG_DEBUG=DEBUG; + + # set default log level to ERROR + log_level=$LOG_WARNING; +} + +start_glusterfs () +{ + prefix="@prefix@"; + exec_prefix=@exec_prefix@; + cmd_line=$(echo "@sbindir@/glusterfs"); + + if [ -n "$log_level_str" ]; then + case "$log_level_str" in + "ERROR") + log_level=$LOG_ERROR; + ;; + "DEBUG") + log_level=$LOG_DEBUG; + ;; + "CRITICAL") + log_level=$LOG_CRITICAL; + ;; + "WARNING") + log_level=$LOG_WARNING; + ;; + "NONE") + log_level=$LOG_NONE; + ;; + *) + echo "invalid log level $log_level_str, using ERROR"; + log_level=$LOG_ERROR; + ;; + esac + fi + cmd_line=$(echo "$cmd_line --log-level=$log_level"); + + if [ -n "$log_file" ]; then + cmd_line=$(echo "$cmd_line --log-file=$log_file"); + fi + + if [ -n "$direct_io_mode" ]; then + cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); + fi + + if [ -z "$volfile_loc" ]; then + if [ -n "$transport" ]; then + cmd_line=$(echo "$cmd_line \ +--volfile-server=$server_ip \ +--volfile-server-port=$server_port \ +--volfile-server-transport=$transport"); + else + cmd_line=$(echo "$cmd_line \ +--volfile-server=$server_ip \ +--volfile-server-port=$server_port"); + fi + else + cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + fi + + if [ -n "$volume_name" ]; then + cmd_line=$(echo "$cmd_line --volume-name=$volume_name"); + fi + + if [ -n "$volume_id" ]; then + cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + fi + + cmd_line=$(echo "$cmd_line $mount_point"); + exec $cmd_line; +} + + +main () +{ + options=$(echo "$@" | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p'); + new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p'); + + [ -n "$new_log_level" ] && { + log_level_str="$new_log_level"; + } + log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p'); + + transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p'); + + direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p'); + + volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p'); + + volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p'); + + volfile_loc="$1"; + + [ -r "$volfile_loc" ] || { + server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p'); + server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); + [ -n "$server_port" ] || { + server_port="6996"; + } + + volfile_loc=""; + } + new_fs_options=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \ + -e 's/[,]*log-level=[^,]*//' \ + -e 's/[,]*volume-name=[^,]*//' \ + -e 's/[,]*direct-io-mode=[^,]*//' \ + -e 's/[,]*transport=[^,]*//' \ + -e 's/[,]*volume-id=[^,]*//'); + # following line is product of love towards sed + # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p'); + + mount_point="$2"; + + # Simple check to avoid multiple identical mounts + if grep -q "glusterfs $mount_point fuse" /etc/mtab; then + echo "$0: according to mtab, GlusterFS is already mounted on $mount_point" + exit 1 + fi + + fs_options=$(echo "$fs_options,$new_fs_options"); + + start_glusterfs; +} + +_init "$@" && main "$@"; diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in new file mode 100755 index 000000000..1376a8897 --- /dev/null +++ b/xlators/mount/fuse/utils/mount_glusterfs.in @@ -0,0 +1,181 @@ +#!/bin/sh +# (C) 2008 Z RESEARCH Inc. <http://www.zresearch.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301 USA + + + +_init () +{ + # log level definitions + LOG_NONE=NONE; + LOG_CRITICAL=CRITICAL; + LOG_ERROR=ERROR; + LOG_WARNING=WARNING; + LOG_DEBUG=DEBUG; + + # set default log level to ERROR + log_level=$LOG_WARNING; +} + +start_glusterfs () +{ + prefix="@prefix@"; + exec_prefix=@exec_prefix@; + cmd_line=$(echo "@sbindir@/glusterfs"); + + if [ -n "$log_level_str" ]; then + case "$log_level_str" in + "ERROR") + log_level=$LOG_ERROR; + ;; + "DEBUG") + log_level=$LOG_DEBUG; + ;; + "CRITICAL") + log_level=$LOG_CRITICAL; + ;; + "WARNING") + log_level=$LOG_WARNING; + ;; + "NONE") + log_level=$LOG_NONE; + ;; + *) + echo "invalid log level $log_level_str, using ERROR"; + log_level=$LOG_WARNING; + ;; + esac + fi + cmd_line=$(echo "$cmd_line --log-level=$log_level"); + + if [ -n "$log_file" ]; then + cmd_line=$(echo "$cmd_line --log-file=$log_file"); + fi + + if [ -n "$direct_io_mode" ]; then + cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); + fi + + if [ -z "$volfile_loc" ]; then + if [ -n "$transport" ]; then + cmd_line=$(echo "$cmd_line \ +--volfile-server=$server_ip \ +--volfile-server-port=$server_port \ +--volfile-server-transport=$transport"); + else + cmd_line=$(echo "$cmd_line \ +--volfile-server=$server_ip \ +--volfile-server-port=$server_port"); + fi + else + cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + fi + + if [ -n "$volume_name" ]; then + cmd_line=$(echo "$cmd_line --volume-name=$volume_name"); + fi + + if [ -n "$volume_id" ]; then + cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + fi + + cmd_line=$(echo "$cmd_line $mount_point"); + exec $cmd_line; +} + + +main () +{ + + new_log_level="" + log_file="" + transport="" + direct_io_mode="" + volume_name="" + new_fs_options="" + + while getopts o: opt; do + case "$opt" in + o) + options=$(echo $OPTARG | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p'); + [ -z $new_log_level ] && { + new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p'); + } + + [ -z $log_file ] && { + log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p'); + } + + [ -z $transport ] && { + transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p'); + } + + [ -z $direct_io_mode ] && { + direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p'); + } + + [ -z $volume_name ] && { + volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p'); + } + + [ -z $volume_id ] && { + volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p'); + } + + this_option=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \ + -e 's/[,]*log-level=[^,]*//' \ + -e 's/[,]*volume-name=[^,]*//' \ + -e 's/[,]*direct-io-mode=[^,]*//' \ + -e 's/[,]*transport=[^,]*//' \ + -e 's/[,]*volume-id=[^,]*//'); + new_fs_options="$new_fs_options $this_option"; + ;; + esac + done + + [ -n "$new_log_level" ] && { + log_level_str="$new_log_level"; + } + + # TODO: use getopt. This is very much darwin specific + volfile_loc="$1"; + while [ "$volfile_loc" == "-o" ] ; do + shift ; + shift ; + volfile_loc="$1"; + done + + [ -r "$volfile_loc" ] || { + server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p'); + server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); + [ -n "$server_port" ] || { + server_port="6996"; + } + + volfile_loc=""; + } + # following line is product of love towards sed + # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p'); + + mount_point="$2"; + + fs_options=$(echo "$fs_options,$new_fs_options"); + + start_glusterfs; +} + +_init "$@" && main "$@"; diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am new file mode 100644 index 000000000..f7504bbe8 --- /dev/null +++ b/xlators/performance/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache + +CLEANFILES = diff --git a/xlators/performance/io-cache/Makefile.am b/xlators/performance/io-cache/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/performance/io-cache/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am new file mode 100644 index 000000000..b1bf5bfbf --- /dev/null +++ b/xlators/performance/io-cache/src/Makefile.am @@ -0,0 +1,14 @@ +xlator_LTLIBRARIES = io-cache.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +io_cache_la_LDFLAGS = -module -avoidversion + +io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c +io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = io-cache.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c new file mode 100644 index 000000000..f367cdb88 --- /dev/null +++ b/xlators/performance/io-cache/src/io-cache.c @@ -0,0 +1,1478 @@ +/* + Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "io-cache.h" +#include <assert.h> +#include <sys/time.h> + +static uint32_t +ioc_get_priority (ioc_table_t *table, + const char *path); + +static uint32_t +ioc_get_priority (ioc_table_t *table, + const char *path); + +static inline ioc_inode_t * +ioc_inode_reupdate (ioc_inode_t *ioc_inode) +{ + ioc_table_t *table = ioc_inode->table; + + list_add_tail (&ioc_inode->inode_lru, + &table->inode_lru[ioc_inode->weight]); + + return ioc_inode; +} + +static inline ioc_inode_t * +ioc_get_inode (dict_t *dict, + char *name) +{ + ioc_inode_t *ioc_inode = NULL; + data_t *ioc_inode_data = dict_get (dict, name); + ioc_table_t *table = NULL; + + if (ioc_inode_data) { + ioc_inode = data_to_ptr (ioc_inode_data); + table = ioc_inode->table; + + ioc_table_lock (table); + { + if (list_empty (&ioc_inode->inode_lru)) { + ioc_inode = ioc_inode_reupdate (ioc_inode); + } + } + ioc_table_unlock (table); + } + + return ioc_inode; +} + +int32_t +ioc_inode_need_revalidate (ioc_inode_t *ioc_inode) +{ + int8_t need_revalidate = 0; + struct timeval tv = {0,}; + int32_t ret = -1; + ioc_table_t *table = ioc_inode->table; + + ret = gettimeofday (&tv, NULL); + + if (time_elapsed (&tv, &ioc_inode->tv) >= table->cache_timeout) + need_revalidate = 1; + + return need_revalidate; +} + +/* + * __ioc_inode_flush - flush all the cached pages of the given inode + * + * @ioc_inode: + * + * assumes lock is held + */ +int32_t +__ioc_inode_flush (ioc_inode_t *ioc_inode) +{ + ioc_page_t *curr = NULL, *next = NULL; + int32_t destroy_size = 0; + int32_t ret = 0; + + list_for_each_entry_safe (curr, next, &ioc_inode->pages, pages) { + ret = ioc_page_destroy (curr); + + if (ret != -1) + destroy_size += ret; + } + + return destroy_size; +} + +void +ioc_inode_flush (ioc_inode_t *ioc_inode) +{ + int32_t destroy_size = 0; + + ioc_inode_lock (ioc_inode); + { + destroy_size = __ioc_inode_flush (ioc_inode); + } + ioc_inode_unlock (ioc_inode); + + if (destroy_size) { + ioc_table_lock (ioc_inode->table); + { + ioc_inode->table->cache_used -= destroy_size; + } + ioc_table_unlock (ioc_inode->table); + } + + return; +} + +/* + * ioc_utimens_cbk - + * + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + */ +int32_t +ioc_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + +/* + * ioc_utimens - + * + * @frame: + * @this: + * @loc: + * @tv: + * + */ +int32_t +ioc_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec *tv) +{ + uint64_t ioc_inode = 0; + inode_ctx_get (loc->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND (frame, ioc_utimens_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->utimens, + loc, tv); + return 0; +} + +int32_t +ioc_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf, + dict_t *dict) +{ + ioc_inode_t *ioc_inode = NULL; + ioc_local_t *local = frame->local; + ioc_table_t *table = this->private; + ioc_page_t *page = NULL; + data_t *page_data = NULL; + data_t *content_data = NULL; + char *src = NULL; + char *dst = NULL; + char need_unref = 0; + uint8_t cache_still_valid = 0; + uint32_t weight = 0; + uint64_t tmp_ioc_inode = 0; + char *buf = NULL; + char *tmp = NULL; + int i; + + if (op_ret != 0) + goto out; + + inode_ctx_get (inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + if (ioc_inode) { + cache_still_valid = ioc_cache_still_valid (ioc_inode, + stbuf); + + if (!cache_still_valid) { + ioc_inode_flush (ioc_inode); + } + /* update the time-stamp of revalidation */ + ioc_inode_lock (ioc_inode); + { + gettimeofday (&ioc_inode->tv, NULL); + } + ioc_inode_unlock (ioc_inode); + + ioc_table_lock (ioc_inode->table); + { + list_move_tail (&ioc_inode->inode_lru, + &table->inode_lru[ioc_inode->weight]); + } + ioc_table_unlock (ioc_inode->table); + } + + if (local && stbuf->st_size && + local->need_xattr >= stbuf->st_size) { + if (!ioc_inode) { + weight = ioc_get_priority (table, + local->file_loc.path); + ioc_inode = ioc_inode_update (table, + inode, weight); + inode_ctx_put (inode, this, + (uint64_t)(long)ioc_inode); + } + + ioc_inode_lock (ioc_inode); + { + content_data = dict_get (dict, "glusterfs.content"); + page = ioc_page_get (ioc_inode, 0); + + if (content_data) { + if (page) { + dict_unref (page->ref); + free (page->vector); + page->vector = NULL; + + ioc_table_lock (table); + { + table->cache_used -= + page->size; + } + ioc_table_unlock (table); + } else { + page = ioc_page_create (ioc_inode, 0); + } + + dst = CALLOC (1, stbuf->st_size); + page->ref = dict_ref (get_new_dict ()); + page_data = data_from_dynptr (dst, + stbuf->st_size); + dict_set (page->ref, NULL, page_data); + + src = data_to_ptr (content_data); + memcpy (dst, src, stbuf->st_size); + + page->vector = CALLOC (1, + sizeof (*page->vector)); + page->vector->iov_base = dst; + page->vector->iov_len = stbuf->st_size; + page->count = 1; + + page->waitq = NULL; + page->size = stbuf->st_size; + page->ready = 1; + + ioc_table_lock (table); + { + table->cache_used += page->size; + } + ioc_table_unlock (table); + + } else { + if (!(page && page->ready)) { + gf_log (this->name, GF_LOG_DEBUG, + "page not present"); + + ioc_inode_unlock (ioc_inode); + STACK_WIND (frame, + ioc_lookup_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->lookup, + &local->file_loc, + local->xattr_req); + return 0; + } + buf = CALLOC (1, stbuf->st_size); + tmp = buf; + + for (i = 0; i < page->count; i++) { + memcpy (tmp, page->vector[i].iov_base, + page->vector[i].iov_len); + tmp += page->vector[i].iov_len; + } + + gf_log (this->name, GF_LOG_DEBUG, + "serving file %s from cache", + local->file_loc.path); + + if (!dict) { + need_unref = 1; + dict = dict_ref ( + get_new_dict ()); + } + dict_set (dict, "glusterfs.content", + data_from_dynptr (buf, + stbuf->st_size)); + } + + ioc_inode->mtime = stbuf->st_mtime; + gettimeofday (&ioc_inode->tv, NULL); + } + ioc_inode_unlock (ioc_inode); + + if (content_data && + ioc_need_prune (ioc_inode->table)) { + ioc_prune (ioc_inode->table); + } + } + + out: + STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, dict); + + if (need_unref) { + dict_unref (dict); + } + + return 0; +} + +int32_t +ioc_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + uint64_t content_limit = 0; + + if (GF_FILE_CONTENT_REQUESTED(xattr_req, &content_limit)) { + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + ioc_page_t *page = NULL; + ioc_local_t *local = CALLOC (1, sizeof (*local)); + + local->need_xattr = content_limit; + local->file_loc.path = loc->path; + local->file_loc.inode = loc->inode; + frame->local = local; + + inode_ctx_get (loc->inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + + if (ioc_inode) { + ioc_inode_lock (ioc_inode); + { + page = ioc_page_get (ioc_inode, 0); + if ((content_limit <= + ioc_inode->table->page_size) && + page && page->ready) { + local->need_xattr = -1; + } + } + ioc_inode_unlock (ioc_inode); + } + } + + STACK_WIND (frame, + ioc_lookup_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->lookup, + loc, + xattr_req); + return 0; +} + +/* + * ioc_forget - + * + * @frame: + * @this: + * @inode: + * + */ +int32_t +ioc_forget (xlator_t *this, + inode_t *inode) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode); + + return 0; +} + + +/* + * ioc_cache_validate_cbk - + * + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @buf + * + */ +int32_t +ioc_cache_validate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + ioc_local_t *local = frame->local; + ioc_inode_t *ioc_inode = NULL; + size_t destroy_size = 0; + struct stat *local_stbuf = stbuf; + + ioc_inode = local->inode; + + if ((op_ret == -1) || + ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) { + gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, + "cache for inode(%p) is invalid. flushing all pages", + ioc_inode); + /* NOTE: only pages with no waiting frames are flushed by + * ioc_inode_flush. page_fault will be generated for all + * the pages which have waiting frames by ioc_inode_wakeup() + */ + ioc_inode_lock (ioc_inode); + { + destroy_size = __ioc_inode_flush (ioc_inode); + if (op_ret >= 0) + ioc_inode->mtime = stbuf->st_mtime; + } + ioc_inode_unlock (ioc_inode); + local_stbuf = NULL; + } + + if (destroy_size) { + ioc_table_lock (ioc_inode->table); + { + ioc_inode->table->cache_used -= destroy_size; + } + ioc_table_unlock (ioc_inode->table); + } + + if (op_ret < 0) + local_stbuf = NULL; + + ioc_inode_lock (ioc_inode); + { + gettimeofday (&ioc_inode->tv, NULL); + } + ioc_inode_unlock (ioc_inode); + + ioc_inode_wakeup (frame, ioc_inode, local_stbuf); + + /* any page-fault initiated by ioc_inode_wakeup() will have its own + * fd_ref on fd, safe to unref validate frame's private copy + */ + fd_unref (local->fd); + + STACK_DESTROY (frame->root); + + return 0; +} + +static int32_t +ioc_wait_on_inode (ioc_inode_t *ioc_inode, + ioc_page_t *page) +{ + ioc_waitq_t *waiter = NULL, *trav = NULL; + uint32_t page_found = 0; + + trav = ioc_inode->waitq; + + while (trav) { + if (trav->data == page) { + page_found = 1; + break; + } + trav = trav->next; + } + + if (!page_found) { + waiter = CALLOC (1, sizeof (ioc_waitq_t)); + ERR_ABORT (waiter); + waiter->data = page; + waiter->next = ioc_inode->waitq; + ioc_inode->waitq = waiter; + } + + return 0; +} + +/* + * ioc_cache_validate - + * + * @frame: + * @ioc_inode: + * @fd: + * + */ +static int32_t +ioc_cache_validate (call_frame_t *frame, + ioc_inode_t *ioc_inode, + fd_t *fd, + ioc_page_t *page) +{ + call_frame_t *validate_frame = NULL; + ioc_local_t *validate_local = NULL; + + validate_local = CALLOC (1, sizeof (ioc_local_t)); + ERR_ABORT (validate_local); + validate_frame = copy_frame (frame); + validate_local->fd = fd_ref (fd); + validate_local->inode = ioc_inode; + validate_frame->local = validate_local; + + STACK_WIND (validate_frame, + ioc_cache_validate_cbk, + FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->fstat, + fd); + + return 0; +} + +static inline uint32_t +is_match (const char *path, + const char *pattern) +{ + char *pathname = strdup (path); + int32_t ret = 0; + + ret = fnmatch (pattern, path, FNM_NOESCAPE); + + free (pathname); + + return (ret == 0); +} + +static uint32_t +ioc_get_priority (ioc_table_t *table, + const char *path) +{ + uint32_t priority = 0; + struct ioc_priority *curr = NULL; + + list_for_each_entry (curr, &table->priority_list, list) { + if (is_match (path, curr->pattern)) + priority = curr->priority; + } + + return priority; +} + +/* + * ioc_open_cbk - open callback for io cache + * + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @fd: + * + */ +int32_t +ioc_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + uint64_t tmp_ioc_inode = 0; + ioc_local_t *local = frame->local; + ioc_table_t *table = this->private; + ioc_inode_t *ioc_inode = NULL; + inode_t *inode = local->file_loc.inode; + uint32_t weight = 0; + const char *path = local->file_loc.path; + + if (op_ret != -1) { + /* look for ioc_inode corresponding to this fd */ + LOCK (&fd->inode->lock); + //{ + + inode_ctx_get (fd->inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + + if (!ioc_inode) { + /* this is the first time someone is opening this + file, assign weight + */ + weight = ioc_get_priority (table, path); + + ioc_inode = ioc_inode_update (table, inode, weight); + inode_ctx_put (fd->inode, this, + (uint64_t)(long)ioc_inode); + } else { + ioc_table_lock (ioc_inode->table); + //{ + list_move_tail (&ioc_inode->inode_lru, + &table->inode_lru[ioc_inode->weight]); + //} + ioc_table_unlock (ioc_inode->table); + } + + //} + UNLOCK (&fd->inode->lock); + + /* If mandatory locking has been enabled on this file, + we disable caching on it */ + if (((inode->st_mode & S_ISGID) && + !(inode->st_mode & S_IXGRP))) { + fd_ctx_set (fd, this, 1); + } + + /* If O_DIRECT open, we disable caching on it */ + if ((local->flags & O_DIRECT)){ + /* O_DIRECT is only for one fd, not the inode + * as a whole + */ + fd_ctx_set (fd, this, 1); + } + } + + FREE (local); + frame->local = NULL; + + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + +/* + * ioc_create_cbk - create callback for io cache + * + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @fd: + * @inode: + * @buf: + * + */ +int32_t +ioc_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + ioc_local_t *local = frame->local; + ioc_table_t *table = this->private; + ioc_inode_t *ioc_inode = NULL; + uint32_t weight = 0; + const char *path = local->file_loc.path; + + if (op_ret != -1) { + { + /* assign weight */ + weight = ioc_get_priority (table, path); + + ioc_inode = ioc_inode_update (table, inode, weight); + LOCK (&fd->inode->lock); + { + inode_ctx_put (fd->inode, this, + (uint64_t)(long)ioc_inode); + } + UNLOCK (&fd->inode->lock); + } + /* If mandatory locking has been enabled on this file, + we disable caching on it */ + if ((inode->st_mode & S_ISGID) && + !(inode->st_mode & S_IXGRP)) { + fd_ctx_set (fd, this, 1); + } + + /* If O_DIRECT open, we disable caching on it */ + if (local->flags & O_DIRECT){ + /* O_DIRECT is only for one fd, not the inode + * as a whole + */ + fd_ctx_set (fd, this, 1); + } + + } + + frame->local = NULL; + FREE (local); + + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + + return 0; +} + +/* + * ioc_open - open fop for io cache + * @frame: + * @this: + * @loc: + * @flags: + * + */ +int32_t +ioc_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + + ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t)); + ERR_ABORT (local); + + local->flags = flags; + local->file_loc.path = loc->path; + local->file_loc.inode = loc->inode; + + frame->local = local; + + STACK_WIND (frame, + ioc_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, + flags, + fd); + + return 0; +} + +/* + * ioc_create - create fop for io cache + * + * @frame: + * @this: + * @pathname: + * @flags: + * @mode: + * + */ +int32_t +ioc_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t)); + ERR_ABORT (local); + + local->flags = flags; + local->file_loc.path = loc->path; + frame->local = local; + + STACK_WIND (frame, ioc_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, flags, mode, fd); + return 0; +} + + + + +/* + * ioc_release - release fop for io cache + * + * @frame: + * @this: + * @fd: + * + */ +int32_t +ioc_release (xlator_t *this, + fd_t *fd) +{ + return 0; +} + +/* + * ioc_readv_disabled_cbk + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @vector: + * @count: + * + */ +int32_t +ioc_readv_disabled_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + return 0; +} + + +int32_t +ioc_need_prune (ioc_table_t *table) +{ + int64_t cache_difference = 0; + + ioc_table_lock (table); + { + cache_difference = table->cache_used - table->cache_size; + } + ioc_table_unlock (table); + + if (cache_difference > 0) + return 1; + else + return 0; +} + +/* + * dispatch_requests - + * + * @frame: + * @inode: + * + * + */ +static void +dispatch_requests (call_frame_t *frame, + ioc_inode_t *ioc_inode, + fd_t *fd, + off_t offset, + size_t size) +{ + ioc_local_t *local = frame->local; + ioc_table_t *table = ioc_inode->table; + ioc_page_t *trav = NULL; + ioc_waitq_t *waitq = NULL; + off_t rounded_offset = 0; + off_t rounded_end = 0; + off_t trav_offset = 0; + int32_t fault = 0; + int8_t need_validate = 0; + int8_t might_need_validate = 0; /* if a page exists, do we need + to validate it? */ + + rounded_offset = floor (offset, table->page_size); + rounded_end = roof (offset + size, table->page_size); + trav_offset = rounded_offset; + + /* once a frame does read, it should be waiting on something */ + local->wait_count++; + + /* Requested region can fall in three different pages, + * 1. Ready - region is already in cache, we just have to serve it. + * 2. In-transit - page fault has been generated on this page, we need + * to wait till the page is ready + * 3. Fault - page is not in cache, we have to generate a page fault + */ + + might_need_validate = ioc_inode_need_revalidate (ioc_inode); + + while (trav_offset < rounded_end) { + size_t trav_size = 0; + off_t local_offset = 0; + + ioc_inode_lock (ioc_inode); + //{ + + /* look for requested region in the cache */ + trav = ioc_page_get (ioc_inode, trav_offset); + + local_offset = max (trav_offset, offset); + trav_size = min (((offset+size) - local_offset), + table->page_size); + + if (!trav) { + /* page not in cache, we need to generate page fault */ + trav = ioc_page_create (ioc_inode, trav_offset); + fault = 1; + if (!trav) { + gf_log (frame->this->name, GF_LOG_CRITICAL, + "ioc_page_create returned NULL"); + } + } + + ioc_wait_on_page (trav, frame, local_offset, trav_size); + + if (trav->ready) { + /* page found in cache */ + if (!might_need_validate) { + /* fresh enough */ + gf_log (frame->this->name, GF_LOG_DEBUG, + "cache hit for trav_offset=%"PRId64"" + "/local_offset=%"PRId64"", + trav_offset, local_offset); + waitq = ioc_page_wakeup (trav); + } else { + /* if waitq already exists, fstat revalidate is + already on the way */ + if (!ioc_inode->waitq) { + need_validate = 1; + } + ioc_wait_on_inode (ioc_inode, trav); + } + } + + //} + ioc_inode_unlock (ioc_inode); + + ioc_waitq_return (waitq); + waitq = NULL; + + if (fault) { + fault = 0; + /* new page created, increase the table->cache_used */ + ioc_page_fault (ioc_inode, frame, fd, trav_offset); + } + + if (need_validate) { + need_validate = 0; + gf_log (frame->this->name, GF_LOG_DEBUG, + "sending validate request for " + "inode(%"PRId64") at offset=%"PRId64"", + fd->inode->ino, trav_offset); + ioc_cache_validate (frame, ioc_inode, fd, trav); + } + + trav_offset += table->page_size; + } + + ioc_frame_return (frame); + + if (ioc_need_prune (ioc_inode->table)) { + ioc_prune (ioc_inode->table); + } + + return; +} + + +/* + * ioc_readv - + * + * @frame: + * @this: + * @fd: + * @size: + * @offset: + * + */ +int32_t +ioc_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + ioc_local_t *local = NULL; + uint32_t weight = 0; + + inode_ctx_get (fd->inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + if (!ioc_inode) { + /* caching disabled, go ahead with normal readv */ + STACK_WIND (frame, + ioc_readv_disabled_cbk, + FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->readv, + fd, + size, + offset); + return 0; + } + + if (!fd_ctx_get (fd, this, NULL)) { + /* disable caching for this fd, go ahead with normal readv */ + STACK_WIND (frame, + ioc_readv_disabled_cbk, + FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->readv, + fd, + size, + offset); + return 0; + } + + local = (ioc_local_t *) CALLOC (1, sizeof (ioc_local_t)); + ERR_ABORT (local); + INIT_LIST_HEAD (&local->fill_list); + + frame->local = local; + local->pending_offset = offset; + local->pending_size = size; + local->offset = offset; + local->size = size; + local->inode = ioc_inode; + + gf_log (this->name, GF_LOG_DEBUG, + "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"", + frame, offset, size); + + weight = ioc_inode->weight; + + ioc_table_lock (ioc_inode->table); + { + list_move_tail (&ioc_inode->inode_lru, + &ioc_inode->table->inode_lru[weight]); + } + ioc_table_unlock (ioc_inode->table); + + dispatch_requests (frame, ioc_inode, fd, offset, size); + + return 0; +} + +/* + * ioc_writev_cbk - + * + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + */ +int32_t +ioc_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + ioc_local_t *local = frame->local; + uint64_t ioc_inode = 0; + + inode_ctx_get (local->fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + +/* + * ioc_writev + * + * @frame: + * @this: + * @fd: + * @vector: + * @count: + * @offset: + * + */ +int32_t +ioc_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + ioc_local_t *local = NULL; + uint64_t ioc_inode = 0; + + local = CALLOC (1, sizeof (ioc_local_t)); + ERR_ABORT (local); + + /* TODO: why is it not fd_ref'ed */ + local->fd = fd; + frame->local = local; + + inode_ctx_get (fd->inode, this, &ioc_inode); + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND (frame, + ioc_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, + vector, + count, + offset); + + return 0; +} + +/* + * ioc_truncate_cbk - + * + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @buf: + * + */ +int32_t +ioc_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +/* + * ioc_truncate - + * + * @frame: + * @this: + * @loc: + * @offset: + * + */ +int32_t +ioc_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + uint64_t ioc_inode = 0; + inode_ctx_get (loc->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND (frame, + ioc_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + return 0; +} + +/* + * ioc_ftruncate - + * + * @frame: + * @this: + * @fd: + * @offset: + * + */ +int32_t +ioc_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + uint64_t ioc_inode = 0; + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND (frame, + ioc_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset); + return 0; +} + +int32_t +ioc_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + STACK_UNWIND (frame, op_ret, op_errno, lock); + return 0; +} + +int32_t +ioc_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + ioc_inode_t *ioc_inode = NULL; + uint64_t tmp_inode = 0; + + inode_ctx_get (fd->inode, this, &tmp_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_inode; + if (!ioc_inode) { + gf_log (this->name, GF_LOG_ERROR, + "inode context is NULL: returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + ioc_inode_lock (ioc_inode); + { + gettimeofday (&ioc_inode->tv, NULL); + } + ioc_inode_unlock (ioc_inode); + + STACK_WIND (frame, ioc_lk_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->lk, fd, cmd, lock); + return 0; +} + +int32_t +ioc_get_priority_list (const char *opt_str, struct list_head *first) +{ + int32_t max_pri = 0; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *tmp_str2 = NULL; + char *dup_str = NULL; + char *stripe_str = NULL; + char *pattern = NULL; + char *priority = NULL; + char *string = strdup (opt_str); + struct ioc_priority *curr = NULL; + + /* Get the pattern for cache priority. + * "option priority *.jpg:1,abc*:2" etc + */ + /* TODO: inode_lru in table is statically hard-coded to 5, + * should be changed to run-time configuration + */ + stripe_str = strtok_r (string, ",", &tmp_str); + while (stripe_str) { + curr = CALLOC (1, sizeof (struct ioc_priority)); + ERR_ABORT (curr); + list_add_tail (&curr->list, first); + + dup_str = strdup (stripe_str); + pattern = strtok_r (dup_str, ":", &tmp_str1); + if (!pattern) + return -1; + priority = strtok_r (NULL, ":", &tmp_str1); + if (!priority) + return -1; + gf_log ("io-cache", + GF_LOG_DEBUG, + "ioc priority : pattern %s : priority %s", + pattern, + priority); + curr->pattern = strdup (pattern); + curr->priority = strtol (priority, &tmp_str2, 0); + if (tmp_str2 && (*tmp_str2)) + return -1; + else + max_pri = max (max_pri, curr->priority); + stripe_str = strtok_r (NULL, ",", &tmp_str); + } + + return max_pri; +} + +/* + * init - + * @this: + * + */ +int32_t +init (xlator_t *this) +{ + ioc_table_t *table; + dict_t *options = this->options; + uint32_t index = 0; + char *page_size_string = NULL; + char *cache_size_string = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: io-cache not configured with exactly " + "one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + table = (void *) CALLOC (1, sizeof (*table)); + ERR_ABORT (table); + + table->xl = this; + table->page_size = IOC_PAGE_SIZE; + table->cache_size = IOC_CACHE_SIZE; + + if (dict_get (options, "page-size")) + page_size_string = data_to_str (dict_get (options, + "page-size")); + + if (page_size_string) { + if (gf_string2bytesize (page_size_string, + &table->page_size) != 0) { + gf_log ("io-cache", GF_LOG_ERROR, + "invalid number format \"%s\" of " + "\"option page-size\"", + page_size_string); + return -1; + } + gf_log (this->name, GF_LOG_DEBUG, + "using page-size %"PRIu64"", table->page_size); + } + + if (dict_get (options, "cache-size")) + cache_size_string = data_to_str (dict_get (options, + "cache-size")); + if (cache_size_string) { + if (gf_string2bytesize (cache_size_string, + &table->cache_size) != 0) { + gf_log ("io-cache", GF_LOG_ERROR, + "invalid number format \"%s\" of " + "\"option cache-size\"", + cache_size_string); + return -1; + } + + gf_log (this->name, GF_LOG_DEBUG, + "using cache-size %"PRIu64"", table->cache_size); + } + + table->cache_timeout = 1; + + if (dict_get (options, "cache-timeout")) { + table->cache_timeout = + data_to_uint32 (dict_get (options, + "cache-timeout")); + gf_log (this->name, GF_LOG_DEBUG, + "Using %d seconds to revalidate cache", + table->cache_timeout); + } + + INIT_LIST_HEAD (&table->priority_list); + if (dict_get (options, "priority")) { + char *option_list = data_to_str (dict_get (options, + "priority")); + gf_log (this->name, GF_LOG_DEBUG, + "option path %s", option_list); + /* parse the list of pattern:priority */ + table->max_pri = ioc_get_priority_list (option_list, + &table->priority_list); + + if (table->max_pri == -1) + return -1; + } + table->max_pri ++; + INIT_LIST_HEAD (&table->inodes); + + table->inode_lru = CALLOC (table->max_pri, sizeof (struct list_head)); + ERR_ABORT (table->inode_lru); + for (index = 0; index < (table->max_pri); index++) + INIT_LIST_HEAD (&table->inode_lru[index]); + + pthread_mutex_init (&table->table_lock, NULL); + this->private = table; + return 0; +} + +/* + * fini - + * + * @this: + * + */ +void +fini (xlator_t *this) +{ + ioc_table_t *table = this->private; + + pthread_mutex_destroy (&table->table_lock); + FREE (table); + + this->private = NULL; + return; +} + +struct xlator_fops fops = { + .open = ioc_open, + .create = ioc_create, + .readv = ioc_readv, + .writev = ioc_writev, + .truncate = ioc_truncate, + .ftruncate = ioc_ftruncate, + .utimens = ioc_utimens, + .lookup = ioc_lookup, + .lk = ioc_lk +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { + .forget = ioc_forget, + .release = ioc_release +}; + +struct volume_options options[] = { + { .key = {"priority"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"cache-timeout", "force-revalidate-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 60 + }, + { .key = {"page-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 16 * GF_UNIT_KB, + .max = 4 * GF_UNIT_MB + }, + { .key = {"cache-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 4 * GF_UNIT_MB, + .max = 6 * GF_UNIT_GB + }, + { .key = {NULL} }, +}; diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h new file mode 100644 index 000000000..e997f6e7c --- /dev/null +++ b/xlators/performance/io-cache/src/io-cache.h @@ -0,0 +1,330 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __IO_CACHE_H +#define __IO_CACHE_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include "compat-errno.h" + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "common-utils.h" +#include "call-stub.h" +#include <sys/time.h> +#include <fnmatch.h> + +#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */ +#define IOC_CACHE_SIZE (32 * 1024 * 1024) + +struct ioc_table; +struct ioc_local; +struct ioc_page; +struct ioc_inode; + +struct ioc_priority { + struct list_head list; + char *pattern; + uint32_t priority; +}; + +/* + * ioc_waitq - this structure is used to represents the waiting + * frames on a page + * + * @next: pointer to next object in waitq + * @data: pointer to the frame which is waiting + */ +struct ioc_waitq { + struct ioc_waitq *next; + void *data; + off_t pending_offset; + size_t pending_size; +}; + +/* + * ioc_fill - + * + */ +struct ioc_fill { + struct list_head list; /* list of ioc_fill structures of a frame */ + off_t offset; + size_t size; + struct iovec *vector; + int32_t count; + dict_t *refs; +}; + +struct ioc_local { + mode_t mode; + int32_t flags; + loc_t file_loc; + off_t offset; + size_t size; + int32_t op_ret; + int32_t op_errno; + struct list_head fill_list; /* list of ioc_fill structures */ + off_t pending_offset; /* offset from this frame should continue */ + size_t pending_size; /* size of data this frame is waiting on */ + struct ioc_inode *inode; + int32_t wait_count; + pthread_mutex_t local_lock; + struct ioc_waitq *waitq; + void *stub; + fd_t *fd; + int32_t need_xattr; + dict_t *xattr_req; +}; + +/* + * ioc_page - structure to store page of data from file + * + */ +struct ioc_page { + struct list_head pages; + struct list_head page_lru; + struct ioc_inode *inode; /* inode this page belongs to */ + struct ioc_priority *priority; + char dirty; + char ready; + struct iovec *vector; + int32_t count; + off_t offset; + size_t size; + struct ioc_waitq *waitq; + dict_t *ref; + pthread_mutex_t page_lock; +}; + +struct ioc_inode { + struct ioc_table *table; + struct list_head pages; /* list of pages of this inode */ + struct list_head inode_list; /* list of inodes, maintained by io-cache translator */ + struct list_head inode_lru; + struct list_head page_lru; + struct ioc_waitq *waitq; + pthread_mutex_t inode_lock; + uint32_t weight; /* weight of the inode, increases on each read */ + time_t mtime; /* mtime of the server file when last cached */ + struct timeval tv; /* time-stamp at last re-validate */ +}; + +struct ioc_table { + uint64_t page_size; + uint64_t cache_size; + uint64_t cache_used; + struct list_head inodes; /* list of inodes cached */ + struct list_head active; + struct list_head *inode_lru; + struct list_head priority_list; + int32_t readv_count; + pthread_mutex_t table_lock; + xlator_t *xl; + uint32_t inode_count; + int32_t cache_timeout; + int32_t max_pri; +}; + +typedef struct ioc_table ioc_table_t; +typedef struct ioc_local ioc_local_t; +typedef struct ioc_page ioc_page_t; +typedef struct ioc_inode ioc_inode_t; +typedef struct ioc_waitq ioc_waitq_t; +typedef struct ioc_fill ioc_fill_t; + +void * +str_to_ptr (char *string); + +char * +ptr_to_str (void *ptr); + +int32_t +ioc_readv_disabled_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf); + +ioc_page_t * +ioc_page_get (ioc_inode_t *ioc_inode, + off_t offset); + +ioc_page_t * +ioc_page_create (ioc_inode_t *ioc_inode, + off_t offset); + +void +ioc_page_fault (ioc_inode_t *ioc_inode, + call_frame_t *frame, + fd_t *fd, + off_t offset); +void +ioc_wait_on_page (ioc_page_t *page, + call_frame_t *frame, + off_t offset, + size_t size); + +ioc_waitq_t * +ioc_page_wakeup (ioc_page_t *page); + +void +ioc_page_flush (ioc_page_t *page); + +ioc_waitq_t * +ioc_page_error (ioc_page_t *page, + int32_t op_ret, + int32_t op_errno); +void +ioc_page_purge (ioc_page_t *page); + +void +ioc_frame_return (call_frame_t *frame); + +void +ioc_waitq_return (ioc_waitq_t *waitq); + +void +ioc_frame_fill (ioc_page_t *page, + call_frame_t *frame, + off_t offset, + size_t size); + +#define ioc_inode_lock(ioc_inode) \ + do { \ + gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, \ + "locked inode(%p)", ioc_inode); \ + pthread_mutex_lock (&ioc_inode->inode_lock); \ + } while (0) + + +#define ioc_inode_unlock(ioc_inode) \ + do { \ + gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, \ + "unlocked inode(%p)", ioc_inode); \ + pthread_mutex_unlock (&ioc_inode->inode_lock); \ + } while (0) + + +#define ioc_table_lock(table) \ + do { \ + gf_log (table->xl->name, GF_LOG_DEBUG, \ + "locked table(%p)", table); \ + pthread_mutex_lock (&table->table_lock); \ + } while (0) + + +#define ioc_table_unlock(table) \ + do { \ + gf_log (table->xl->name, GF_LOG_DEBUG, \ + "unlocked table(%p)", table); \ + pthread_mutex_unlock (&table->table_lock); \ + } while (0) + + +#define ioc_local_lock(local) \ + do { \ + gf_log (local->inode->table->xl->name, GF_LOG_DEBUG, \ + "locked local(%p)", local); \ + pthread_mutex_lock (&local->local_lock); \ + } while (0) + + +#define ioc_local_unlock(local) \ + do { \ + gf_log (local->inode->table->xl->name, GF_LOG_DEBUG, \ + "unlocked local(%p)", local); \ + pthread_mutex_unlock (&local->local_lock); \ + } while (0) + + +#define ioc_page_lock(page) \ + do { \ + gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, \ + "locked page(%p)", page); \ + pthread_mutex_lock (&page->page_lock); \ + } while (0) + + +#define ioc_page_unlock(page) \ + do { \ + gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, \ + "unlocked page(%p)", page); \ + pthread_mutex_unlock (&page->page_lock); \ + } while (0) + + +static inline uint64_t +time_elapsed (struct timeval *now, + struct timeval *then) +{ + uint64_t sec = now->tv_sec - then->tv_sec; + + if (sec) + return sec; + + return 0; +} + +ioc_inode_t * +ioc_inode_search (ioc_table_t *table, + inode_t *inode); + +void +ioc_inode_destroy (ioc_inode_t *ioc_inode); + +ioc_inode_t * +ioc_inode_update (ioc_table_t *table, + inode_t *inode, + uint32_t weight); + +int64_t +ioc_page_destroy (ioc_page_t *page); + +int32_t +__ioc_inode_flush (ioc_inode_t *ioc_inode); + +void +ioc_inode_flush (ioc_inode_t *ioc_inode); + +void +ioc_inode_wakeup (call_frame_t *frame, + ioc_inode_t *ioc_inode, + struct stat *stbuf); + +int8_t +ioc_cache_still_valid (ioc_inode_t *ioc_inode, + struct stat *stbuf); + +int32_t +ioc_prune (ioc_table_t *table); + +int32_t +ioc_need_prune (ioc_table_t *table); + +#endif /* __READ_AHEAD_H */ diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c new file mode 100644 index 000000000..2e2e561dd --- /dev/null +++ b/xlators/performance/io-cache/src/ioc-inode.c @@ -0,0 +1,201 @@ +/* + Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "io-cache.h" + + +/* + * str_to_ptr - convert a string to pointer + * @string: string + * + */ +void * +str_to_ptr (char *string) +{ + void *ptr = (void *)strtoul (string, NULL, 16); + return ptr; +} + + +/* + * ptr_to_str - convert a pointer to string + * @ptr: pointer + * + */ +char * +ptr_to_str (void *ptr) +{ + char *str; + asprintf (&str, "%p", ptr); + return str; +} + +void +ioc_inode_wakeup (call_frame_t *frame, + ioc_inode_t *ioc_inode, + struct stat *stbuf) +{ + ioc_waitq_t *waiter = NULL, *waited = NULL; + ioc_waitq_t *page_waitq = NULL; + int8_t cache_still_valid = 1; + ioc_local_t *local = frame->local; + int8_t need_fault = 0; + ioc_page_t *waiter_page = NULL; + + ioc_inode_lock (ioc_inode); + { + waiter = ioc_inode->waitq; + ioc_inode->waitq = NULL; + } + ioc_inode_unlock (ioc_inode); + + if (stbuf) + cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf); + else + cache_still_valid = 0; + + if (!waiter) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "cache validate called without any " + "page waiting to be validated"); + } + + while (waiter) { + waiter_page = waiter->data; + page_waitq = NULL; + + if (waiter_page) { + if (cache_still_valid) { + /* cache valid, wake up page */ + ioc_inode_lock (ioc_inode); + { + page_waitq = + ioc_page_wakeup (waiter_page); + } + ioc_inode_unlock (ioc_inode); + if (page_waitq) + ioc_waitq_return (page_waitq); + } else { + /* cache invalid, generate page fault and set + * page->ready = 0, to avoid double faults + */ + ioc_inode_lock (ioc_inode); + + if (waiter_page->ready) { + waiter_page->ready = 0; + need_fault = 1; + } else { + gf_log (frame->this->name, + GF_LOG_DEBUG, + "validate frame(%p) is waiting" + "for in-transit page = %p", + frame, waiter_page); + } + + ioc_inode_unlock (ioc_inode); + + if (need_fault) { + need_fault = 0; + ioc_page_fault (ioc_inode, frame, + local->fd, + waiter_page->offset); + } + } + } + + waited = waiter; + waiter = waiter->next; + + waited->data = NULL; + free (waited); + } +} + +/* + * ioc_inode_update - create a new ioc_inode_t structure and add it to + * the table table. fill in the fields which are derived + * from inode_t corresponding to the file + * + * @table: io-table structure + * @inode: inode structure + * + * not for external reference + */ +ioc_inode_t * +ioc_inode_update (ioc_table_t *table, + inode_t *inode, + uint32_t weight) +{ + ioc_inode_t *ioc_inode = CALLOC (1, sizeof (ioc_inode_t)); + ERR_ABORT (ioc_inode); + + ioc_inode->table = table; + + /* initialize the list for pages */ + INIT_LIST_HEAD (&ioc_inode->pages); + INIT_LIST_HEAD (&ioc_inode->page_lru); + + ioc_table_lock (table); + + table->inode_count++; + list_add (&ioc_inode->inode_list, &table->inodes); + list_add_tail (&ioc_inode->inode_lru, &table->inode_lru[weight]); + + gf_log (table->xl->name, + GF_LOG_DEBUG, + "adding to inode_lru[%d]", weight); + + ioc_table_unlock (table); + + pthread_mutex_init (&ioc_inode->inode_lock, NULL); + ioc_inode->weight = weight; + + return ioc_inode; +} + + +/* + * ioc_inode_destroy - destroy an ioc_inode_t object. + * + * @inode: inode to destroy + * + * to be called only from ioc_forget. + */ +void +ioc_inode_destroy (ioc_inode_t *ioc_inode) +{ + ioc_table_t *table = ioc_inode->table; + + ioc_table_lock (table); + table->inode_count--; + list_del (&ioc_inode->inode_list); + list_del (&ioc_inode->inode_lru); + ioc_table_unlock (table); + + ioc_inode_flush (ioc_inode); + + pthread_mutex_destroy (&ioc_inode->inode_lock); + free (ioc_inode); +} + diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c new file mode 100644 index 000000000..e549f0bb5 --- /dev/null +++ b/xlators/performance/io-cache/src/page.c @@ -0,0 +1,778 @@ +/* + Copyright (c) 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "io-cache.h" +#include <assert.h> +#include <sys/time.h> + +ioc_page_t * +ioc_page_get (ioc_inode_t *ioc_inode, + off_t offset) +{ + int8_t found = 0; + ioc_page_t *page = NULL; + ioc_table_t *table = ioc_inode->table; + off_t rounded_offset = floor (offset, table->page_size); + + if (list_empty (&ioc_inode->pages)) { + return NULL; + } + + list_for_each_entry (page, &ioc_inode->pages, pages) { + if (page->offset == rounded_offset) { + found = 1; + break; + } + } + + /* was previously returning ioc_inode itself.., + * 1st of its type and found one more downstairs :O */ + if (!found){ + page = NULL; + } else { + /* push the page to the end of the lru list */ + list_move_tail (&page->page_lru, &ioc_inode->page_lru); + } + + return page; +} + + +/* + * ioc_page_destroy - + * + * @page: + * + */ +int64_t +ioc_page_destroy (ioc_page_t *page) +{ + int64_t page_size = 0; + + page_size = page->size; + + if (page->waitq) { + /* frames waiting on this page, do not destroy this page */ + page_size = -1; + } else { + + list_del (&page->pages); + list_del (&page->page_lru); + + gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, + "destroying page = %p, offset = %"PRId64" " + "&& inode = %p", + page, page->offset, page->inode); + + if (page->vector){ + dict_unref (page->ref); + free (page->vector); + page->vector = NULL; + } + + page->inode = NULL; + + } + + if (page_size != -1) { + pthread_mutex_destroy (&page->page_lock); + free (page); + } + + return page_size; +} + +/* + * ioc_prune - prune the cache. we have a limit to the number of pages we + * can have in-memory. + * + * @table: ioc_table_t of this translator + * + */ +int32_t +ioc_prune (ioc_table_t *table) +{ + ioc_inode_t *curr = NULL, *next_ioc_inode = NULL; + ioc_page_t *page = NULL, *next = NULL; + int32_t ret = -1; + int32_t index = 0; + uint64_t size_to_prune = 0; + uint64_t size_pruned = 0; + + ioc_table_lock (table); + { + size_to_prune = table->cache_used - table->cache_size; + /* take out the least recently used inode */ + for (index=0; index < table->max_pri; index++) { + list_for_each_entry_safe (curr, next_ioc_inode, + &table->inode_lru[index], + inode_lru) { + /* prune page-by-page for this inode, till + * we reach the equilibrium */ + ioc_inode_lock (curr); + /* { */ + + list_for_each_entry_safe (page, next, + &curr->page_lru, + page_lru) { + /* done with all pages, and not + * reached equilibrium yet?? + * continue with next inode in + * lru_list */ + size_pruned += page->size; + ret = ioc_page_destroy (page); + + if (ret != -1) + table->cache_used -= ret; + + gf_log (table->xl->name, + GF_LOG_DEBUG, + "index = %d && table->cache_" + "used = %"PRIu64" && table->" + "cache_size = %"PRIu64, + index, table->cache_used, + table->cache_size); + + if (size_pruned >= size_to_prune) + break; + } /* list_for_each_entry_safe(page...) */ + if (list_empty (&curr->pages)) { + list_del_init (&curr->inode_lru); + } + + /* } */ + ioc_inode_unlock (curr); + + if (size_pruned >= size_to_prune) + break; + } /* list_for_each_entry_safe (curr...) */ + + if (size_pruned >= size_to_prune) + break; + } /* for(index=0;...) */ + + } /* ioc_inode_table locked region end */ + ioc_table_unlock (table); + + return 0; +} + +/* + * ioc_page_create - create a new page. + * + * @ioc_inode: + * @offset: + * + */ +ioc_page_t * +ioc_page_create (ioc_inode_t *ioc_inode, + off_t offset) +{ + ioc_table_t *table = ioc_inode->table; + ioc_page_t *page = NULL; + off_t rounded_offset = floor (offset, table->page_size); + ioc_page_t *newpage = CALLOC (1, sizeof (*newpage)); + ERR_ABORT (newpage); + + if (ioc_inode) + table = ioc_inode->table; + else { + return NULL; + } + + newpage->offset = rounded_offset; + newpage->inode = ioc_inode; + pthread_mutex_init (&newpage->page_lock, NULL); + + list_add_tail (&newpage->page_lru, &ioc_inode->page_lru); + list_add_tail (&newpage->pages, &ioc_inode->pages); + + page = newpage; + + gf_log ("io-cache", GF_LOG_DEBUG, + "returning new page %p", page); + return page; +} + +/* + * ioc_wait_on_page - pause a frame to wait till the arrival of a page. + * here we need to handle the case when the frame who calls wait_on_page + * himself has caused page_fault + * + * @page: page to wait on + * @frame: call frame who is waiting on page + * + */ +void +ioc_wait_on_page (ioc_page_t *page, + call_frame_t *frame, + off_t offset, + size_t size) +{ + ioc_waitq_t *waitq = NULL; + ioc_local_t *local = frame->local; + + waitq = CALLOC (1, sizeof (*waitq)); + ERR_ABORT (waitq); + + gf_log (frame->this->name, GF_LOG_DEBUG, + "frame(%p) waiting on page = %p, offset=%"PRId64", " + "size=%"GF_PRI_SIZET"", + frame, page, offset, size); + + waitq->data = frame; + waitq->next = page->waitq; + waitq->pending_offset = offset; + waitq->pending_size = size; + page->waitq = waitq; + /* one frame can wait only once on a given page, + * local->wait_count is number of pages a frame is waiting on */ + ioc_local_lock (local); + { + local->wait_count++; + } + ioc_local_unlock (local); +} + + +/* + * ioc_cache_still_valid - see if cached pages ioc_inode are still valid + * against given stbuf + * + * @ioc_inode: + * @stbuf: + * + * assumes ioc_inode is locked + */ +int8_t +ioc_cache_still_valid (ioc_inode_t *ioc_inode, + struct stat *stbuf) +{ + int8_t cache_still_valid = 1; + +#if 0 + if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime) || + (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec)) + cache_still_valid = 0; + +#else + if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime)) + cache_still_valid = 0; + +#endif + +#if 0 + /* talk with avati@zresearch.com to enable this section */ + if (!ioc_inode->mtime && stbuf) { + cache_still_valid = 1; + ioc_inode->mtime = stbuf->st_mtime; + } +#endif + + return cache_still_valid; +} + + +void +ioc_waitq_return (ioc_waitq_t *waitq) +{ + ioc_waitq_t *trav = NULL; + ioc_waitq_t *next = NULL; + call_frame_t *frame = NULL; + + for (trav = waitq; trav; trav = next) { + next = trav->next; + + frame = trav->data; + ioc_frame_return (frame); + free (trav); + } +} + + +int +ioc_fault_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + ioc_local_t *local = frame->local; + off_t offset = local->pending_offset; + ioc_inode_t *ioc_inode = local->inode; + ioc_table_t *table = ioc_inode->table; + ioc_page_t *page = NULL; + off_t trav_offset = 0; + size_t payload_size = 0; + int32_t destroy_size = 0; + size_t page_size = 0; + ioc_waitq_t *waitq = NULL; + + trav_offset = offset; + payload_size = op_ret; + + ioc_inode_lock (ioc_inode); + { + if (op_ret == -1 || + (op_ret >= 0 && + !ioc_cache_still_valid(ioc_inode, stbuf))) { + gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, + "cache for inode(%p) is invalid. flushing " + "all pages", ioc_inode); + destroy_size = __ioc_inode_flush (ioc_inode); + } + + if (op_ret >= 0) + ioc_inode->mtime = stbuf->st_mtime; + + gettimeofday (&ioc_inode->tv, NULL); + + if (op_ret < 0) { + /* error, readv returned -1 */ + page = ioc_page_get (ioc_inode, offset); + if (page) + waitq = ioc_page_error (page, op_ret, + op_errno); + } else { + gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, + "op_ret = %d", op_ret); + page = ioc_page_get (ioc_inode, offset); + if (!page) { + /* page was flushed */ + /* some serious bug ? */ + gf_log (this->name, GF_LOG_DEBUG, + "wasted copy: %"PRId64"[+%"PRId64"] " + "ioc_inode=%p", offset, + table->page_size, ioc_inode); + } else { + if (page->vector) { + dict_unref (page->ref); + free (page->vector); + page->vector = NULL; + } + + /* keep a copy of the page for our cache */ + page->vector = iov_dup (vector, count); + page->count = count; + if (frame->root->rsp_refs) { + dict_ref (frame->root->rsp_refs); + page->ref = frame->root->rsp_refs; + } else { + /* TODO: we have got a response to + * our request and no data */ + gf_log (this->name, GF_LOG_CRITICAL, + "frame>root>rsp_refs is null"); + } /* if(frame->root->rsp_refs) */ + + /* page->size should indicate exactly how + * much the readv call to the child + * translator returned. earlier op_ret + * from child translator was used, which + * gave rise to a bug where reads from + * io-cached volume were resulting in 0 + * byte replies */ + page_size = iov_length(vector, count); + + page->size = page_size; + + if (page->waitq) { + /* wake up all the frames waiting on + * this page, including + * the frame which triggered fault */ + waitq = ioc_page_wakeup (page); + } /* if(page->waitq) */ + } /* if(!page)...else */ + } /* if(op_ret < 0)...else */ + } /* ioc_inode locked region end */ + ioc_inode_unlock (ioc_inode); + + ioc_waitq_return (waitq); + + if (page_size) { + ioc_table_lock (table); + { + table->cache_used += page_size; + } + ioc_table_unlock (table); + } + + if (destroy_size) { + ioc_table_lock (table); + { + table->cache_used -= destroy_size; + } + ioc_table_unlock (table); + } + + if (ioc_need_prune (ioc_inode->table)) { + ioc_prune (ioc_inode->table); + } + + gf_log (this->name, GF_LOG_DEBUG, "fault frame %p returned", frame); + pthread_mutex_destroy (&local->local_lock); + + fd_unref (local->fd); + + STACK_DESTROY (frame->root); + return 0; +} + +/* + * ioc_page_fault - + * + * @ioc_inode: + * @frame: + * @fd: + * @offset: + * + */ +void +ioc_page_fault (ioc_inode_t *ioc_inode, + call_frame_t *frame, + fd_t *fd, + off_t offset) +{ + ioc_table_t *table = ioc_inode->table; + call_frame_t *fault_frame = copy_frame (frame); + ioc_local_t *fault_local = CALLOC (1, sizeof (ioc_local_t)); + ERR_ABORT (fault_local); + + /* NOTE: copy_frame() means, the frame the fop whose fd_ref we + * are using till now won't be valid till we get reply from server. + * we unref this fd, in fault_cbk */ + fault_local->fd = fd_ref (fd); + + fault_frame->local = fault_local; + pthread_mutex_init (&fault_local->local_lock, NULL); + + INIT_LIST_HEAD (&fault_local->fill_list); + fault_local->pending_offset = offset; + fault_local->pending_size = table->page_size; + fault_local->inode = ioc_inode; + + gf_log (frame->this->name, GF_LOG_DEBUG, + "stack winding page fault for offset = %"PRId64" with " + "frame %p", offset, fault_frame); + + STACK_WIND (fault_frame, ioc_fault_cbk, + FIRST_CHILD(fault_frame->this), + FIRST_CHILD(fault_frame->this)->fops->readv, + fd, table->page_size, offset); + return; +} + +void +ioc_frame_fill (ioc_page_t *page, + call_frame_t *frame, + off_t offset, + size_t size) +{ + ioc_local_t *local = frame->local; + ioc_fill_t *fill = NULL; + off_t src_offset = 0; + off_t dst_offset = 0; + ssize_t copy_size = 0; + ioc_inode_t *ioc_inode = page->inode; + + gf_log (frame->this->name, GF_LOG_DEBUG, + "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" " + "&& page->size = %"GF_PRI_SIZET" && wait_count = %d", + frame, offset, size, page->size, local->wait_count); + + /* immediately move this page to the end of the page_lru list */ + list_move_tail (&page->page_lru, &ioc_inode->page_lru); + /* fill local->pending_size bytes from local->pending_offset */ + if (local->op_ret != -1 && page->size) { + if (offset > page->offset) + /* offset is offset in file, convert it to offset in + * page */ + src_offset = offset - page->offset; + /*FIXME: since offset is the offset within page is the + * else case valid? */ + else + /* local->pending_offset is in previous page. do not + * fill until we have filled all previous pages */ + dst_offset = page->offset - offset; + + /* we have to copy from offset to either end of this page + * or till the requested size */ + copy_size = min (page->size - src_offset, + size - dst_offset); + + if (copy_size < 0) { + /* if page contains fewer bytes and the required offset + is beyond the page size in the page */ + copy_size = src_offset = 0; + } + + gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, + "copy_size = %"GF_PRI_SIZET" && src_offset = " + "%"PRId64" && dst_offset = %"PRId64"", + copy_size, src_offset, dst_offset); + + { + ioc_fill_t *new = CALLOC (1, sizeof (*new)); + ERR_ABORT (new); + new->offset = page->offset; + new->size = copy_size; + new->refs = dict_ref (page->ref); + new->count = iov_subset (page->vector, + page->count, + src_offset, + src_offset + copy_size, + NULL); + new->vector = CALLOC (new->count, + sizeof (struct iovec)); + ERR_ABORT (new->vector); + new->count = iov_subset (page->vector, + page->count, + src_offset, + src_offset + copy_size, + new->vector); + + + + /* add the ioc_fill to fill_list for this frame */ + if (list_empty (&local->fill_list)) { + /* if list is empty, then this is the first + * time we are filling frame, add the + * ioc_fill_t to the end of list */ + list_add_tail (&new->list, &local->fill_list); + } else { + int8_t found = 0; + /* list is not empty, we need to look for + * where this offset fits in list */ + list_for_each_entry (fill, &local->fill_list, + list) { + if (fill->offset > new->offset) { + found = 1; + break; + } + } + + if (found) { + found = 0; + list_add_tail (&new->list, + &fill->list); + } else { + list_add_tail (&new->list, + &local->fill_list); + } + } + } + local->op_ret += copy_size; + } +} + +/* + * ioc_frame_unwind - frame unwinds only from here + * + * @frame: call frame to unwind + * + * to be used only by ioc_frame_return(), when a frame has + * finished waiting on all pages, required + * + */ +static void +ioc_frame_unwind (call_frame_t *frame) +{ + ioc_local_t *local = frame->local; + ioc_fill_t *fill = NULL, *next = NULL; + int32_t count = 0; + struct iovec *vector = NULL; + int32_t copied = 0; + dict_t *refs = NULL; + struct stat stbuf = {0,}; + int32_t op_ret = 0; + + // ioc_local_lock (local); + refs = get_new_dict (); + + frame->local = NULL; + + if (list_empty (&local->fill_list)) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "frame(%p) has 0 entries in local->fill_list " + "(offset = %"PRId64" && size = %"GF_PRI_SIZET")", + frame, local->offset, local->size); + } + + list_for_each_entry (fill, &local->fill_list, list) { + count += fill->count; + } + + vector = CALLOC (count, sizeof (*vector)); + ERR_ABORT (vector); + + list_for_each_entry_safe (fill, next, &local->fill_list, list) { + memcpy (((char *)vector) + copied, + fill->vector, + fill->count * sizeof (*vector)); + + copied += (fill->count * sizeof (*vector)); + + dict_copy (fill->refs, refs); + + list_del (&fill->list); + dict_unref (fill->refs); + free (fill->vector); + free (fill); + } + + frame->root->rsp_refs = dict_ref (refs); + + op_ret = iov_length (vector, count); + gf_log (frame->this->name, GF_LOG_DEBUG, + "frame(%p) unwinding with op_ret=%d", frame, op_ret); + + // ioc_local_unlock (local); + + STACK_UNWIND (frame, + op_ret, + local->op_errno, + vector, + count, + &stbuf); + + dict_unref (refs); + + pthread_mutex_destroy (&local->local_lock); + free (local); + free (vector); + + return; +} + +/* + * ioc_frame_return - + * @frame: + * + * to be called only when a frame is waiting on an in-transit page + */ +void +ioc_frame_return (call_frame_t *frame) +{ + ioc_local_t *local = frame->local; + int32_t wait_count; + assert (local->wait_count > 0); + + ioc_local_lock (local); + { + wait_count = --local->wait_count; + } + ioc_local_unlock (local); + + if (!wait_count) { + ioc_frame_unwind (frame); + } + + return; +} + +/* + * ioc_page_wakeup - + * @page: + * + * to be called only when a frame is waiting on an in-transit page + */ +ioc_waitq_t * +ioc_page_wakeup (ioc_page_t *page) +{ + ioc_waitq_t *waitq = NULL, *trav = NULL; + call_frame_t *frame = NULL; + + waitq = page->waitq; + page->waitq = NULL; + + trav = waitq; + page->ready = 1; + + gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, + "page is %p && waitq = %p", page, waitq); + + for (trav = waitq; trav; trav = trav->next) { + frame = trav->data; + ioc_frame_fill (page, frame, trav->pending_offset, + trav->pending_size); + } + + return waitq; +} + + +/* + * ioc_page_error - + * @page: + * @op_ret: + * @op_errno: + * + */ +ioc_waitq_t * +ioc_page_error (ioc_page_t *page, + int32_t op_ret, + int32_t op_errno) +{ + ioc_waitq_t *waitq = NULL, *trav = NULL; + call_frame_t *frame = NULL; + int64_t ret = 0; + ioc_table_t *table = NULL; + ioc_local_t *local = NULL; + + waitq = page->waitq; + page->waitq = NULL; + + gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, + "page error for page = %p & waitq = %p", page, waitq); + + for (trav = waitq; trav; trav = trav->next) { + + frame = trav->data; + + local = frame->local; + ioc_local_lock (local); + { + if (local->op_ret != -1) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + } + ioc_local_unlock (local); + } + + table = page->inode->table; + ret = ioc_page_destroy (page); + + if (ret != -1) { + table->cache_used -= ret; + } + + return waitq; +} diff --git a/xlators/performance/io-threads/Makefile.am b/xlators/performance/io-threads/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/performance/io-threads/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am new file mode 100644 index 000000000..38dea3eb7 --- /dev/null +++ b/xlators/performance/io-threads/src/Makefile.am @@ -0,0 +1,14 @@ +xlator_LTLIBRARIES = io-threads.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +io_threads_la_LDFLAGS = -module -avoidversion + +io_threads_la_SOURCES = io-threads.c +io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = io-threads.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c new file mode 100644 index 000000000..5acdd627d --- /dev/null +++ b/xlators/performance/io-threads/src/io-threads.c @@ -0,0 +1,1254 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "call-stub.h" +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "io-threads.h" + +static void +iot_queue (iot_worker_t *worker, + call_stub_t *stub); + +static call_stub_t * +iot_dequeue (iot_worker_t *worker); + +static iot_worker_t * +iot_schedule (iot_conf_t *conf, + iot_file_t *file, + ino_t ino) +{ + int32_t cnt = (ino % conf->thread_count); + iot_worker_t *trav = conf->workers.next; + + for (; cnt; cnt--) + trav = trav->next; + + if (file) + file->worker = trav; + trav->fd_count++; + return trav; +} + +int32_t +iot_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + iot_conf_t *conf = this->private; + + if (op_ret >= 0) { + iot_file_t *file = CALLOC (1, sizeof (*file)); + ERR_ABORT (file); + + iot_schedule (conf, file, fd->inode->ino); + file->fd = fd; + + fd_ctx_set (fd, this, (uint64_t)(long)file); + + pthread_mutex_lock (&conf->files_lock); + file->next = &conf->files; + file->prev = file->next->prev; + file->next->prev = file; + file->prev->next = file; + pthread_mutex_unlock (&conf->files_lock); + } + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + +int32_t +iot_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + STACK_WIND (frame, + iot_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, + flags, + fd); + return 0; +} + + +int32_t +iot_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *stbuf) +{ + iot_conf_t *conf = this->private; + + if (op_ret >= 0) { + iot_file_t *file = CALLOC (1, sizeof (*file)); + ERR_ABORT (file); + + iot_schedule (conf, file, fd->inode->ino); + file->fd = fd; + + fd_ctx_set (fd, this, (uint64_t)(long)file); + + pthread_mutex_lock (&conf->files_lock); + file->next = &conf->files; + file->prev = file->next->prev; + file->next->prev = file; + file->prev->next = file; + pthread_mutex_unlock (&conf->files_lock); + } + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf); + return 0; +} + +int32_t +iot_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + STACK_WIND (frame, + iot_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, + flags, + mode, + fd); + return 0; +} + + + +int32_t +iot_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + iot_local_t *local = frame->local; + + local->frame_size = 0; //iov_length (vector, count); + + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + + return 0; +} + +static int32_t +iot_readv_wrapper (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + STACK_WIND (frame, + iot_readv_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, + fd, + size, + offset); + return 0; +} + +int32_t +iot_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_file_t *file = NULL; + iot_worker_t *worker = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, + "fd context is NULL, returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (iot_file_t *)(long)tmp_file; + worker = file->worker; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + frame->local = local; + + stub = fop_readv_stub (frame, + iot_readv_wrapper, + fd, + size, + offset); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, + "cannot get readv call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL, 0); + return 0; + } + + iot_queue (worker, stub); + + return 0; +} + +int32_t +iot_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +static int32_t +iot_flush_wrapper (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + STACK_WIND (frame, + iot_flush_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + return 0; +} + +int32_t +iot_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_file_t *file = NULL; + iot_worker_t *worker = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, + "fd context is NULL, returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (iot_file_t *)(long)tmp_file; + worker = file->worker; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + + frame->local = local; + + stub = fop_flush_stub (frame, + iot_flush_wrapper, + fd); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get flush_cbk call stub"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + +int32_t +iot_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +static int32_t +iot_fsync_wrapper (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync) +{ + STACK_WIND (frame, + iot_fsync_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsync, + fd, + datasync); + return 0; +} + +int32_t +iot_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_file_t *file = NULL; + iot_worker_t *worker = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, + "fd context is NULL, returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (iot_file_t *)(long)tmp_file; + worker = file->worker; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + + frame->local = local; + + stub = fop_fsync_stub (frame, + iot_fsync_wrapper, + fd, + datasync); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fsync_cbk call stub"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + +int32_t +iot_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + iot_local_t *local = frame->local; + + local->frame_size = 0; /* hehe, caught me! */ + + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + +static int32_t +iot_writev_wrapper (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + STACK_WIND (frame, + iot_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, + vector, + count, + offset); + return 0; +} + +int32_t +iot_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_file_t *file = NULL; + iot_worker_t *worker = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, + "fd context is NULL, returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (iot_file_t *)(long)tmp_file; + worker = file->worker; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + + if (frame->root->req_refs) + local->frame_size = dict_serialized_length (frame->root->req_refs); + else + local->frame_size = iov_length (vector, count); + frame->local = local; + + stub = fop_writev_stub (frame, iot_writev_wrapper, + fd, vector, count, offset); + + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get writev call stub"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + } + + iot_queue (worker, stub); + + return 0; +} + + +int32_t +iot_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *flock) +{ + STACK_UNWIND (frame, op_ret, op_errno, flock); + return 0; +} + + +static int32_t +iot_lk_wrapper (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *flock) +{ + STACK_WIND (frame, + iot_lk_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, + fd, + cmd, + flock); + return 0; +} + + +int32_t +iot_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *flock) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_file_t *file = NULL; + iot_worker_t *worker = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, + "fd context is NULL, returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (iot_file_t *)(long)tmp_file; + worker = file->worker; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + frame->local = local; + + stub = fop_lk_stub (frame, iot_lk_wrapper, + fd, cmd, flock); + + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_lk call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + + iot_queue (worker, stub); + + return 0; +} + + +int32_t +iot_stat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +static int32_t +iot_stat_wrapper (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + STACK_WIND (frame, + iot_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + return 0; +} + +int32_t +iot_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_worker_t *worker = NULL; + iot_conf_t *conf; + fd_t *fd = NULL; + + conf = this->private; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + frame->local = local; + + fd = fd_lookup (loc->inode, frame->root->pid); + + if (fd == NULL) { + STACK_WIND(frame, + iot_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + return 0; + } + + fd_unref (fd); + + worker = iot_schedule (conf, NULL, loc->inode->ino); + + stub = fop_stat_stub (frame, + iot_stat_wrapper, + loc); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_stat call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + + +int32_t +iot_fstat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +static int32_t +iot_fstat_wrapper (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + STACK_WIND (frame, + iot_fstat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, + fd); + return 0; +} + +int32_t +iot_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_file_t *file = NULL; + iot_worker_t *worker = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, + "fd context is NULL, returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (iot_file_t *)(long)tmp_file; + worker = file->worker; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + frame->local = local; + stub = fop_fstat_stub (frame, + iot_fstat_wrapper, + fd); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_fstat call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + + iot_queue (worker, stub); + + return 0; +} + +int32_t +iot_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +static int32_t +iot_truncate_wrapper (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + STACK_WIND (frame, + iot_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + return 0; +} + +int32_t +iot_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_worker_t *worker = NULL; + iot_conf_t *conf; + fd_t *fd = NULL; + + conf = this->private; + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + frame->local = local; + + fd = fd_lookup (loc->inode, frame->root->pid); + + if (fd == NULL) { + STACK_WIND(frame, + iot_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + return 0; + } + + fd_unref (fd); + + worker = iot_schedule (conf, NULL, loc->inode->ino); + + stub = fop_truncate_stub (frame, + iot_truncate_wrapper, + loc, + offset); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_stat call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + +int32_t +iot_ftruncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +static int32_t +iot_ftruncate_wrapper (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + STACK_WIND (frame, + iot_ftruncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset); + return 0; +} + +int32_t +iot_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_file_t *file = NULL; + iot_worker_t *worker = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, + "fd context is NULL, returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (iot_file_t *)(long)tmp_file; + worker = file->worker; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + frame->local = local; + + stub = fop_ftruncate_stub (frame, + iot_ftruncate_wrapper, + fd, + offset); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_ftruncate call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + +int32_t +iot_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +static int32_t +iot_utimens_wrapper (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + STACK_WIND (frame, + iot_utimens_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimens, + loc, + tv); + + return 0; +} + +int32_t +iot_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + call_stub_t *stub; + iot_local_t *local = NULL; + iot_worker_t *worker = NULL; + iot_conf_t *conf; + fd_t *fd = NULL; + + conf = this->private; + + local = CALLOC (1, sizeof (*local)); + ERR_ABORT (local); + frame->local = local; + + fd = fd_lookup (loc->inode, frame->root->pid); + + if (fd == NULL) { + STACK_WIND(frame, + iot_utimens_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimens, + loc, + tv); + return 0; + } + + fd_unref (fd); + + worker = iot_schedule (conf, NULL, loc->inode->ino); + + stub = fop_utimens_stub (frame, + iot_utimens_wrapper, + loc, + tv); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_utimens call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + + +int32_t +iot_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *file_checksum, + uint8_t *dir_checksum) +{ + STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum); + return 0; +} + +static int32_t +iot_checksum_wrapper (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags) +{ + STACK_WIND (frame, + iot_checksum_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->checksum, + loc, + flags); + + return 0; +} + +int32_t +iot_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags) +{ + call_stub_t *stub = NULL; + iot_local_t *local = NULL; + iot_worker_t *worker = NULL; + iot_conf_t *conf = NULL; + + conf = this->private; + + local = CALLOC (1, sizeof (*local)); + frame->local = local; + + worker = iot_schedule (conf, NULL, conf->misc_thread_index++); + + stub = fop_checksum_stub (frame, + iot_checksum_wrapper, + loc, + flags); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_checksum call stub"); + STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + + +int32_t +iot_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +static int32_t +iot_unlink_wrapper (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + STACK_WIND (frame, + iot_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + loc); + + return 0; +} + +int32_t +iot_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + call_stub_t *stub = NULL; + iot_local_t *local = NULL; + iot_worker_t *worker = NULL; + iot_conf_t *conf = NULL; + + conf = this->private; + + local = CALLOC (1, sizeof (*local)); + frame->local = local; + + worker = iot_schedule (conf, NULL, conf->misc_thread_index++); + + stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot get fop_unlink call stub"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + } + iot_queue (worker, stub); + + return 0; +} + +int32_t +iot_release (xlator_t *this, + fd_t *fd) +{ + iot_file_t *file = NULL; + iot_conf_t *conf = NULL; + uint64_t tmp_file = 0; + int ret = 0; + + conf = this->private; + ret = fd_ctx_del (fd, this, &tmp_file); + if (ret) + return 0; + + file = (iot_file_t *)(long)tmp_file; + + pthread_mutex_lock (&conf->files_lock); + { + (file->prev)->next = file->next; + (file->next)->prev = file->prev; + } + pthread_mutex_unlock (&conf->files_lock); + + FREE (file); + return 0; +} + + +static void +iot_queue (iot_worker_t *worker, + call_stub_t *stub) +{ + iot_queue_t *queue; + iot_conf_t *conf = worker->conf; + iot_local_t *local = stub->frame->local; + size_t frame_size = local->frame_size; + + queue = CALLOC (1, sizeof (*queue)); + ERR_ABORT (queue); + queue->stub = stub; + + pthread_mutex_lock (&conf->lock); + + /* + while (worker->queue_size >= worker->queue_limit) + pthread_cond_wait (&worker->q_cond, &worker->lock); + */ + if (conf->cache_size) { + while (frame_size && (conf->current_size >= conf->cache_size)) + pthread_cond_wait (&conf->q_cond, &conf->lock); + } + + queue->next = &worker->queue; + queue->prev = worker->queue.prev; + + queue->next->prev = queue; + queue->prev->next = queue; + + /* dq_cond */ + worker->queue_size++; + worker->q++; + + conf->current_size += local->frame_size; + + pthread_cond_broadcast (&worker->dq_cond); + + pthread_mutex_unlock (&conf->lock); +} + +static call_stub_t * +iot_dequeue (iot_worker_t *worker) +{ + call_stub_t *stub = NULL; + iot_queue_t *queue = NULL; + iot_conf_t *conf = worker->conf; + iot_local_t *local = NULL; + + + pthread_mutex_lock (&conf->lock); + + while (!worker->queue_size) + /* + pthread_cond_wait (&worker->dq_cond, &worker->lock); + */ + pthread_cond_wait (&worker->dq_cond, &conf->lock); + + queue = worker->queue.next; + + queue->next->prev = queue->prev; + queue->prev->next = queue->next; + + stub = queue->stub; + local = stub->frame->local; + + worker->queue_size--; + worker->dq++; + + /* q_cond */ + conf->current_size -= local->frame_size; + + pthread_cond_broadcast (&conf->q_cond); + + pthread_mutex_unlock (&conf->lock); + + FREE (queue); + + return stub; +} + +static void * +iot_worker (void *arg) +{ + iot_worker_t *worker = arg; + + while (1) { + call_stub_t *stub; + + stub = iot_dequeue (worker); + call_resume (stub); + } +} + +#if 0 +static void * +iot_reply (void *arg) +{ + iot_worker_t *reply = arg; + + while (1) { + call_stub_t *stub; + + stub = iot_dequeue (reply); + FREE (stub->frame->local); + stub->frame->local = NULL; + call_resume (stub); + } +} +#endif + +static void +workers_init (iot_conf_t *conf) +{ + int i; + + conf->workers.next = &conf->workers; + conf->workers.prev = &conf->workers; + + for (i=0; i<conf->thread_count; i++) { + + iot_worker_t *worker = CALLOC (1, sizeof (*worker)); + ERR_ABORT (worker); + + worker->next = &conf->workers; + worker->prev = conf->workers.prev; + worker->next->prev = worker; + worker->prev->next = worker; + + worker->queue.next = &worker->queue; + worker->queue.prev = &worker->queue; + + /* + pthread_mutex_init (&worker->lock, NULL); + pthread_cond_init (&worker->q_cond, NULL); + */ + pthread_cond_init (&worker->dq_cond, NULL); + + /* + worker->queue_limit = conf->queue_limit; + */ + + worker->conf = conf; + + pthread_create (&worker->thread, NULL, iot_worker, worker); + } +} + +int32_t +init (xlator_t *this) +{ + iot_conf_t *conf; + dict_t *options = this->options; + + if (!this->children || this->children->next) { + gf_log ("io-threads", + GF_LOG_ERROR, + "FATAL: iot not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + conf = (void *) CALLOC (1, sizeof (*conf)); + ERR_ABORT (conf); + + conf->thread_count = 1; + + if (dict_get (options, "thread-count")) { + conf->thread_count = data_to_int32 (dict_get (options, + "thread-count")); + gf_log ("io-threads", + GF_LOG_DEBUG, + "Using conf->thread_count = %d", + conf->thread_count); + } + + pthread_mutex_init (&conf->lock, NULL); + pthread_cond_init (&conf->q_cond, NULL); + + conf->files.next = &conf->files; + conf->files.prev = &conf->files; + pthread_mutex_init (&conf->files_lock, NULL); + + workers_init (conf); + + this->private = conf; + return 0; +} + +void +fini (xlator_t *this) +{ + iot_conf_t *conf = this->private; + + FREE (conf); + + this->private = NULL; + return; +} + +struct xlator_fops fops = { + .open = iot_open, + .create = iot_create, + .readv = iot_readv, + .writev = iot_writev, + .flush = iot_flush, + .fsync = iot_fsync, + .lk = iot_lk, + .stat = iot_stat, + .fstat = iot_fstat, + .truncate = iot_truncate, + .ftruncate = iot_ftruncate, + .utimens = iot_utimens, + .checksum = iot_checksum, + .unlink = iot_unlink, +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { + .release = iot_release, +}; + +struct volume_options options[] = { + { .key = {"thread-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 32 + }, + { .key = {NULL} }, +}; diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h new file mode 100644 index 000000000..6595d3e27 --- /dev/null +++ b/xlators/performance/io-threads/src/io-threads.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __IOT_H +#define __IOT_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "compat-errno.h" +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "common-utils.h" + +#define min(a,b) ((a)<(b)?(a):(b)) +#define max(a,b) ((a)>(b)?(a):(b)) + +struct iot_conf; +struct iot_worker; +struct iot_queue; +struct iot_local; +struct iot_file; + +struct iot_local { + struct iot_file *file; + size_t frame_size; +}; + +struct iot_queue { + struct iot_queue *next, *prev; + call_stub_t *stub; +}; + +struct iot_worker { + struct iot_worker *next, *prev; + struct iot_queue queue; + struct iot_conf *conf; + int64_t q,dq; + pthread_cond_t dq_cond; + /* + pthread_cond_t q_cond; + pthread_mutex_t lock; + */ + int32_t fd_count; + int32_t queue_size; + /* + int32_t queue_limit; + */ + pthread_t thread; +}; + +struct iot_file { + struct iot_file *next, *prev; /* all open files via this xlator */ + struct iot_worker *worker; + fd_t *fd; + int32_t pending_ops; +}; + +struct iot_conf { + int32_t thread_count; + int32_t misc_thread_index; /* Used to schedule the miscellaneous calls like checksum */ + struct iot_worker workers; + struct iot_file files; + pthread_mutex_t files_lock; + + uint64_t cache_size; + off_t current_size; + pthread_cond_t q_cond; + pthread_mutex_t lock; +}; + +typedef struct iot_file iot_file_t; +typedef struct iot_conf iot_conf_t; +typedef struct iot_local iot_local_t; +typedef struct iot_worker iot_worker_t; +typedef struct iot_queue iot_queue_t; + +#endif /* __IOT_H */ diff --git a/xlators/performance/read-ahead/Makefile.am b/xlators/performance/read-ahead/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/performance/read-ahead/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am new file mode 100644 index 000000000..7bb902282 --- /dev/null +++ b/xlators/performance/read-ahead/src/Makefile.am @@ -0,0 +1,14 @@ +xlator_LTLIBRARIES = read-ahead.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +read_ahead_la_LDFLAGS = -module -avoidversion + +read_ahead_la_SOURCES = read-ahead.c page.c +read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = read-ahead.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c new file mode 100644 index 000000000..3b8d4d209 --- /dev/null +++ b/xlators/performance/read-ahead/src/page.c @@ -0,0 +1,487 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "read-ahead.h" +#include <assert.h> + + +ra_page_t * +ra_page_get (ra_file_t *file, + off_t offset) +{ + ra_page_t *page = NULL; + off_t rounded_offset = 0; + + page = file->pages.next; + rounded_offset = floor (offset, file->page_size); + + while (page != &file->pages && page->offset < rounded_offset) + page = page->next; + + if (page == &file->pages || page->offset != rounded_offset) + page = NULL; + + return page; +} + + +ra_page_t * +ra_page_create (ra_file_t *file, off_t offset) +{ + ra_page_t *page = NULL; + off_t rounded_offset = 0; + ra_page_t *newpage = NULL; + + page = file->pages.next; + rounded_offset = floor (offset, file->page_size); + + while (page != &file->pages && page->offset < rounded_offset) + page = page->next; + + if (page == &file->pages || page->offset != rounded_offset) { + newpage = CALLOC (1, sizeof (*newpage)); + if (!newpage) + return NULL; + + newpage->offset = rounded_offset; + newpage->prev = page->prev; + newpage->next = page; + newpage->file = file; + page->prev->next = newpage; + page->prev = newpage; + + page = newpage; + } + + return page; +} + + +void +ra_wait_on_page (ra_page_t *page, call_frame_t *frame) +{ + ra_waitq_t *waitq = NULL; + ra_local_t *local = NULL; + + + local = frame->local; + waitq = CALLOC (1, sizeof (*waitq)); + if (!waitq) { + gf_log (frame->this->name, GF_LOG_ERROR, + "out of memory :("); + return; + } + + waitq->data = frame; + waitq->next = page->waitq; + page->waitq = waitq; + + ra_local_lock (local); + { + local->wait_count++; + } + ra_local_unlock (local); +} + + +void +ra_waitq_return (ra_waitq_t *waitq) +{ + ra_waitq_t *trav = NULL; + ra_waitq_t *next = NULL; + call_frame_t *frame = NULL; + + for (trav = waitq; trav; trav = next) { + next = trav->next; + + frame = trav->data; + ra_frame_return (frame); + free (trav); + } +} + + +int +ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct stat *stbuf) +{ + ra_local_t *local = NULL; + off_t pending_offset = 0; + ra_file_t *file = NULL; + ra_page_t *page = NULL; + off_t trav_offset = 0; + size_t payload_size = 0; + ra_waitq_t *waitq = NULL; + fd_t *fd = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + local = frame->local; + fd = local->fd; + + ret = fd_ctx_get (fd, this, &tmp_file); + + file = (ra_file_t *)(long)tmp_file; + pending_offset = local->pending_offset; + trav_offset = pending_offset; + payload_size = op_ret; + + ra_file_lock (file); + { + if (op_ret >= 0) + file->stbuf = *stbuf; + + if (op_ret < 0) { + page = ra_page_get (file, pending_offset); + if (page) + waitq = ra_page_error (page, op_ret, op_errno); + goto unlock; + } + + page = ra_page_get (file, pending_offset); + if (!page) { + gf_log (this->name, GF_LOG_DEBUG, + "wasted copy: %"PRId64"[+%"PRId64"] file=%p", + pending_offset, file->page_size, file); + goto unlock; + } + + if (page->vector) { + dict_unref (page->ref); + free (page->vector); + } + + page->vector = iov_dup (vector, count); + page->count = count; + page->ref = dict_ref (frame->root->rsp_refs); + page->ready = 1; + + page->size = iov_length (vector, count); + + waitq = ra_page_wakeup (page); + } +unlock: + ra_file_unlock (file); + + ra_waitq_return (waitq); + + fd_unref (local->fd); + + free (frame->local); + frame->local = NULL; + + STACK_DESTROY (frame->root); + return 0; +} + + +void +ra_page_fault (ra_file_t *file, + call_frame_t *frame, + off_t offset) +{ + call_frame_t *fault_frame = NULL; + ra_local_t *fault_local = NULL; + + fault_frame = copy_frame (frame); + fault_local = CALLOC (1, sizeof (ra_local_t)); + + fault_frame->local = fault_local; + fault_local->pending_offset = offset; + fault_local->pending_size = file->page_size; + + fault_local->fd = fd_ref (file->fd); + + STACK_WIND (fault_frame, ra_fault_cbk, + FIRST_CHILD (fault_frame->this), + FIRST_CHILD (fault_frame->this)->fops->readv, + file->fd, file->page_size, offset); + return; +} + +void +ra_frame_fill (ra_page_t *page, call_frame_t *frame) +{ + ra_local_t *local = NULL; + ra_fill_t *fill = NULL; + off_t src_offset = 0; + off_t dst_offset = 0; + ssize_t copy_size = 0; + ra_fill_t *new = NULL; + + + local = frame->local; + fill = &local->fill; + + if (local->op_ret != -1 && page->size) { + if (local->offset > page->offset) + src_offset = local->offset - page->offset; + else + dst_offset = page->offset - local->offset; + + copy_size = min (page->size - src_offset, + local->size - dst_offset); + + if (copy_size < 0) { + /* if page contains fewer bytes and the required offset + is beyond the page size in the page */ + copy_size = src_offset = 0; + } + + fill = fill->next; + while (fill != &local->fill) { + if (fill->offset > page->offset) { + break; + } + fill = fill->next; + } + + new = CALLOC (1, sizeof (*new)); + + new->offset = page->offset; + new->size = copy_size; + new->refs = dict_ref (page->ref); + new->count = iov_subset (page->vector, page->count, + src_offset, src_offset+copy_size, + NULL); + new->vector = CALLOC (new->count, sizeof (struct iovec)); + + new->count = iov_subset (page->vector, page->count, + src_offset, src_offset+copy_size, + new->vector); + + new->next = fill; + new->prev = new->next->prev; + new->next->prev = new; + new->prev->next = new; + + local->op_ret += copy_size; + } +} + + +void +ra_frame_unwind (call_frame_t *frame) +{ + ra_local_t *local = NULL; + ra_fill_t *fill = NULL; + int32_t count = 0; + struct iovec *vector; + int32_t copied = 0; + dict_t *refs = NULL; + ra_fill_t *next = NULL; + fd_t *fd = NULL; + ra_file_t *file = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + local = frame->local; + fill = local->fill.next; + + refs = get_new_dict (); + + frame->local = NULL; + + while (fill != &local->fill) { + count += fill->count; + fill = fill->next; + } + + vector = CALLOC (count, sizeof (*vector)); + + fill = local->fill.next; + + while (fill != &local->fill) { + next = fill->next; + + memcpy (((char *)vector) + copied, fill->vector, + fill->count * sizeof (*vector)); + + copied += (fill->count * sizeof (*vector)); + dict_copy (fill->refs, refs); + + fill->next->prev = fill->prev; + fill->prev->next = fill->prev; + + dict_unref (fill->refs); + free (fill->vector); + free (fill); + + fill = next; + } + + frame->root->rsp_refs = dict_ref (refs); + + fd = local->fd; + ret = fd_ctx_get (fd, frame->this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + vector, count, &file->stbuf); + + dict_unref (refs); + pthread_mutex_destroy (&local->local_lock); + free (local); + free (vector); + + return; +} + +/* + * ra_frame_return - + * @frame: + * + */ +void +ra_frame_return (call_frame_t *frame) +{ + ra_local_t *local = NULL; + int32_t wait_count = 0; + + local = frame->local; + assert (local->wait_count > 0); + + ra_local_lock (local); + { + wait_count = --local->wait_count; + } + ra_local_unlock (local); + + if (!wait_count) + ra_frame_unwind (frame); + + return; +} + +/* + * ra_page_wakeup - + * @page: + * + */ +ra_waitq_t * +ra_page_wakeup (ra_page_t *page) +{ + ra_waitq_t *waitq = NULL, *trav = NULL; + call_frame_t *frame; + + waitq = page->waitq; + page->waitq = NULL; + + trav = waitq; + for (trav = waitq; trav; trav = trav->next) { + frame = trav->data; + ra_frame_fill (page, frame); + } + + return waitq; +} + +/* + * ra_page_purge - + * @page: + * + */ +void +ra_page_purge (ra_page_t *page) +{ + page->prev->next = page->next; + page->next->prev = page->prev; + + if (page->ref) { + dict_unref (page->ref); + } + free (page->vector); + free (page); +} + +/* + * ra_page_error - + * @page: + * @op_ret: + * @op_errno: + * + */ +ra_waitq_t * +ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno) +{ + + ra_waitq_t *waitq = NULL; + ra_waitq_t *trav = NULL; + call_frame_t *frame = NULL; + ra_local_t *local = NULL; + + waitq = page->waitq; + page->waitq = NULL; + + trav = waitq; + for (trav = waitq; trav; trav = trav->next) { + frame = trav->data; + + local = frame->local; + if (local->op_ret != -1) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + } + + ra_page_purge (page); + + return waitq; +} + +/* + * ra_file_destroy - + * @file: + * + */ +void +ra_file_destroy (ra_file_t *file) +{ + ra_conf_t *conf = NULL; + ra_page_t *trav = NULL; + + conf = file->conf; + + ra_conf_lock (conf); + { + file->prev->next = file->next; + file->next->prev = file->prev; + } + ra_conf_unlock (conf); + + trav = file->pages.next; + while (trav != &file->pages) { + ra_page_error (trav, -1, EINVAL); + trav = file->pages.next; + } + + pthread_mutex_destroy (&file->file_lock); + free (file); +} + diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c new file mode 100644 index 000000000..0060e00fd --- /dev/null +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -0,0 +1,890 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/* + TODO: + - handle O_DIRECT + - maintain offset, flush on lseek + - ensure efficient memory managment in case of random seek +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "read-ahead.h" +#include <assert.h> +#include <sys/time.h> + + +static void +read_ahead (call_frame_t *frame, + ra_file_t *file); + + +int +ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + ra_conf_t *conf = NULL; + ra_file_t *file = NULL; + int ret = 0; + + conf = this->private; + + if (op_ret == -1) { + goto unwind; + } + + file = CALLOC (1, sizeof (*file)); + if (!file) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto unwind; + } + + ret = fd_ctx_set (fd, this, (uint64_t)(long)file); + + /* If mandatory locking has been enabled on this file, + we disable caching on it */ + + if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP)) + file->disabled = 1; + + /* If O_DIRECT open, we disable caching on it */ + + if ((fd->flags & O_DIRECT) || (fd->flags & O_WRONLY)) + file->disabled = 1; + + file->offset = (unsigned long long) 0; + file->conf = conf; + file->pages.next = &file->pages; + file->pages.prev = &file->pages; + file->pages.offset = (unsigned long long) 0; + file->pages.file = file; + + ra_conf_lock (conf); + { + file->next = conf->files.next; + conf->files.next = file; + file->next->prev = file; + file->prev = &conf->files; + } + ra_conf_unlock (conf); + + file->fd = fd; + file->page_count = conf->page_count; + file->page_size = conf->page_size; + pthread_mutex_init (&file->file_lock, NULL); + + if (!file->disabled) { + file->page_count = 1; + } + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + + +int +ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct stat *buf) +{ + ra_conf_t *conf = NULL; + ra_file_t *file = NULL; + int ret = 0; + + conf = this->private; + + if (op_ret == -1) { + goto unwind; + } + + file = CALLOC (1, sizeof (*file)); + if (!file) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto unwind; + } + + ret = fd_ctx_set (fd, this, (uint64_t)(long)file); + + /* If mandatory locking has been enabled on this file, + we disable caching on it */ + + if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP)) + file->disabled = 1; + + /* If O_DIRECT open, we disable caching on it */ + + if ((fd->flags & O_DIRECT) || (fd->flags & O_WRONLY)) + file->disabled = 1; + + file->offset = (unsigned long long) 0; + //file->size = fd->inode->buf.st_size; + file->conf = conf; + file->pages.next = &file->pages; + file->pages.prev = &file->pages; + file->pages.offset = (unsigned long long) 0; + file->pages.file = file; + + ra_conf_lock (conf); + { + file->next = conf->files.next; + conf->files.next = file; + file->next->prev = file; + file->prev = &conf->files; + } + ra_conf_unlock (conf); + + file->fd = fd; + file->page_count = conf->page_count; + file->page_size = conf->page_size; + pthread_mutex_init (&file->file_lock, NULL); + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + + return 0; +} + + +int +ra_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd) +{ + STACK_WIND (frame, ra_open_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->open, + loc, flags, fd); + + return 0; +} + +int +ra_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + STACK_WIND (frame, ra_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, flags, mode, fd); + + return 0; +} + +/* free cache pages between offset and offset+size, + does not touch pages with frames waiting on it +*/ + +static void +flush_region (call_frame_t *frame, + ra_file_t *file, + off_t offset, + off_t size) +{ + ra_page_t *trav = NULL; + ra_page_t *next = NULL; + + + ra_file_lock (file); + { + trav = file->pages.next; + while (trav != &file->pages + && trav->offset < (offset + size)) { + + next = trav->next; + if (trav->offset >= offset && !trav->waitq) { + ra_page_purge (trav); + } + trav = next; + } + } + ra_file_unlock (file); +} + + + +int +ra_release (xlator_t *this, + fd_t *fd) +{ + uint64_t tmp_file = 0; + int ret = 0; + + ret = fd_ctx_del (fd, this, &tmp_file); + + if (!ret) { + ra_file_destroy ((ra_file_t *)(long)tmp_file); + } + + return 0; +} + + +void +read_ahead (call_frame_t *frame, ra_file_t *file) +{ + off_t ra_offset = 0; + size_t ra_size = 0; + off_t trav_offset = 0; + ra_page_t *trav = NULL; + off_t cap = 0; + char fault = 0; + + if (!file->page_count) + return; + + ra_size = file->page_size * file->page_count; + ra_offset = floor (file->offset, file->page_size); + cap = file->size ? file->size : file->offset + ra_size; + + while (ra_offset < min (file->offset + ra_size, cap)) { + + ra_file_lock (file); + { + trav = ra_page_get (file, ra_offset); + } + ra_file_unlock (file); + + if (!trav) + break; + + ra_offset += file->page_size; + } + + if (trav) + /* comfortable enough */ + return; + + trav_offset = ra_offset; + + trav = file->pages.next; + cap = file->size ? file->size : ra_offset + ra_size; + + while (trav_offset < min(ra_offset + ra_size, cap)) { + fault = 0; + ra_file_lock (file); + { + trav = ra_page_get (file, trav_offset); + if (!trav) { + fault = 1; + trav = ra_page_create (file, trav_offset); + if (trav) + trav->dirty = 1; + } + } + ra_file_unlock (file); + + if (!trav) { + /* OUT OF MEMORY */ + break; + } + + if (fault) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "RA at offset=%"PRId64, trav_offset); + ra_page_fault (file, frame, trav_offset); + } + trav_offset += file->page_size; + } + + return; +} + + +int +ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct stat *stbuf) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +static void +dispatch_requests (call_frame_t *frame, + ra_file_t *file) +{ + ra_local_t *local = NULL; + ra_conf_t *conf = NULL; + off_t rounded_offset = 0; + off_t rounded_end = 0; + off_t trav_offset = 0; + ra_page_t *trav = NULL; + call_frame_t *ra_frame = NULL; + char need_atime_update = 1; + char fault = 0; + + + local = frame->local; + conf = file->conf; + + rounded_offset = floor (local->offset, file->page_size); + rounded_end = roof (local->offset + local->size, file->page_size); + + trav_offset = rounded_offset; + trav = file->pages.next; + + while (trav_offset < rounded_end) { + fault = 0; + + ra_file_lock (file); + { + trav = ra_page_get (file, trav_offset); + if (!trav) { + trav = ra_page_create (file, trav_offset); + fault = 1; + need_atime_update = 0; + } + + if (!trav) + goto unlock; + + if (trav->ready) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "HIT at offset=%"PRId64".", + trav_offset); + ra_frame_fill (trav, frame); + } else { + gf_log (frame->this->name, GF_LOG_DEBUG, + "IN-TRANSIT at offset=%"PRId64".", + trav_offset); + ra_wait_on_page (trav, frame); + need_atime_update = 0; + } + } + unlock: + ra_file_unlock (file); + + if (fault) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "MISS at offset=%"PRId64".", + trav_offset); + ra_page_fault (file, frame, trav_offset); + } + + trav_offset += file->page_size; + } + + if (need_atime_update && conf->force_atime_update) { + /* TODO: use untimens() since readv() can confuse underlying + io-cache and others */ + ra_frame = copy_frame (frame); + STACK_WIND (ra_frame, ra_need_atime_cbk, + FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->readv, + file->fd, 1, 1); + } + + return ; +} + + +int +ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + + return 0; +} + + +int +ra_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + ra_file_t *file = NULL; + ra_local_t *local = NULL; + ra_conf_t *conf = NULL; + int op_errno = 0; + int ret = 0; + char expected_offset = 1; + uint64_t tmp_file = 0; + + conf = this->private; + + gf_log (this->name, GF_LOG_DEBUG, + "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"", + offset, size); + + ret = fd_ctx_get (fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (file->offset != offset) { + gf_log (this->name, GF_LOG_DEBUG, + "unexpected offset (%"PRId64" != %"PRId64") resetting", + file->offset, offset); + + expected_offset = file->expected = file->page_count = 0; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "expected offset (%"PRId64") when page_count=%d", + offset, file->page_count); + + if (file->expected < (conf->page_size * conf->page_count)) { + file->expected += size; + file->page_count = min ((file->expected / file->page_size), + conf->page_count); + } + } + + if (!expected_offset) { + flush_region (frame, file, 0, file->pages.prev->offset + 1); + } + + if (file->disabled) { + STACK_WIND (frame, ra_readv_disabled_cbk, + FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->readv, + file->fd, size, offset); + return 0; + } + + local = (void *) CALLOC (1, sizeof (*local)); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + op_errno = ENOMEM; + goto unwind; + } + + local->fd = fd; + local->offset = offset; + local->size = size; + local->wait_count = 1; + + local->fill.next = &local->fill; + local->fill.prev = &local->fill; + + pthread_mutex_init (&local->local_lock, NULL); + + frame->local = local; + + dispatch_requests (frame, file); + + flush_region (frame, file, 0, floor (offset, file->page_size)); + + read_ahead (frame, file); + + ra_frame_return (frame); + + file->offset = offset + size; + + return 0; + +unwind: + STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + + return 0; +} + + +int +ra_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int +ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + ra_file_t *file = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + ret = fd_ctx_get (fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1); + } + + STACK_WIND (frame, ra_flush_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->flush, + fd); + return 0; +} + + +int +ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t datasync) +{ + ra_file_t *file = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + ret = fd_ctx_get (fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1); + } + + STACK_WIND (frame, ra_flush_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsync, + fd, datasync); + return 0; +} + + +int +ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + fd_t *fd = NULL; + ra_file_t *file = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + fd = frame->local; + + ret = fd_ctx_get (fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1); + } + + frame->local = NULL; + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + + +int +ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset) +{ + ra_file_t *file = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + ret = fd_ctx_get (fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1); + + /* reset the read-ahead counters too */ + file->expected = file->page_count = 0; + } + + frame->local = fd; + + STACK_WIND (frame, ra_writev_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, vector, count, offset); + + return 0; +} + + +int +ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +int +ra_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + inode = loc->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + ret = fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (!file) + continue; + flush_region (frame, file, 0, + file->pages.prev->offset + 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_attr_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->truncate, + loc, offset); + return 0; +} + + +int +ra_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + ret = fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (!file) + continue; + flush_region (frame, file, 0, + file->pages.prev->offset + 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_attr_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fstat, + fd); + return 0; +} + + +int +ra_fchown (call_frame_t *frame, xlator_t *this, + fd_t *fd, uid_t uid, gid_t gid) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + ret = fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + + if (!file) + continue; + flush_region (frame, file, 0, + file->pages.prev->offset + 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_attr_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fchown, + fd, uid, gid); + return 0; +} + + +int +ra_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + int ret = 0; + uint64_t tmp_file = 0; + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + ret = fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + if (!file) + continue; + flush_region (frame, file, 0, + file->pages.prev->offset + 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_attr_cbk, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->ftruncate, + fd, offset); + return 0; +} + + +int +init (xlator_t *this) +{ + ra_conf_t *conf; + dict_t *options = this->options; + char *page_size_string = NULL; + char *page_count_string = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: read-ahead not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + conf = (void *) CALLOC (1, sizeof (*conf)); + ERR_ABORT (conf); + conf->page_size = 256 * 1024; + conf->page_count = 2; + + if (dict_get (options, "page-size")) + page_size_string = data_to_str (dict_get (options, + "page-size")); + if (page_size_string) + { + if (gf_string2bytesize (page_size_string, &conf->page_size) != 0) + { + gf_log ("read-ahead", + GF_LOG_ERROR, + "invalid number format \"%s\" of \"option page-size\"", + page_size_string); + return -1; + } + + gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_size = %"PRIu64"", + conf->page_size); + } + + if (dict_get (options, "page-count")) + page_count_string = data_to_str (dict_get (options, + "page-count")); + if (page_count_string) + { + if (gf_string2uint_base10 (page_count_string, &conf->page_count) != 0) + { + gf_log ("read-ahead", + GF_LOG_ERROR, + "invalid number format \"%s\" of \"option page-count\"", + page_count_string); + return -1; + } + gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_count = %u", + conf->page_count); + } + + if (dict_get (options, "force-atime-update")) { + char *force_atime_update_str = data_to_str (dict_get (options, + "force-atime-update")); + if (gf_string2boolean (force_atime_update_str, &conf->force_atime_update) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'force-atime-update' takes only boolean options"); + return -1; + } + if (conf->force_atime_update) + gf_log (this->name, GF_LOG_DEBUG, "Forcing atime updates on cache hit"); + } + + conf->files.next = &conf->files; + conf->files.prev = &conf->files; + + pthread_mutex_init (&conf->conf_lock, NULL); + this->private = conf; + return 0; +} + +void +fini (xlator_t *this) +{ + ra_conf_t *conf = this->private; + + pthread_mutex_destroy (&conf->conf_lock); + FREE (conf); + + this->private = NULL; + return; +} + +struct xlator_fops fops = { + .open = ra_open, + .create = ra_create, + .readv = ra_readv, + .writev = ra_writev, + .flush = ra_flush, + .fsync = ra_fsync, + .truncate = ra_truncate, + .ftruncate = ra_ftruncate, + .fstat = ra_fstat, + .fchown = ra_fchown, +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { + .release = ra_release, +}; + +struct volume_options options[] = { + { .key = {"force-atime-update"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"page-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 64 * GF_UNIT_KB, + .max = 2 * GF_UNIT_MB + }, + { .key = {"page-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 16 + }, + { .key = {NULL} }, +}; diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h new file mode 100644 index 000000000..d624ca8ab --- /dev/null +++ b/xlators/performance/read-ahead/src/read-ahead.h @@ -0,0 +1,194 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __READ_AHEAD_H +#define __READ_AHEAD_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "common-utils.h" + +struct ra_conf; +struct ra_local; +struct ra_page; +struct ra_file; +struct ra_waitq; + + +struct ra_waitq { + struct ra_waitq *next; + void *data; +}; + + +struct ra_fill { + struct ra_fill *next; + struct ra_fill *prev; + off_t offset; + size_t size; + struct iovec *vector; + int32_t count; + dict_t *refs; +}; + + +struct ra_local { + mode_t mode; + struct ra_fill fill; + off_t offset; + size_t size; + int32_t op_ret; + int32_t op_errno; + off_t pending_offset; + size_t pending_size; + fd_t *fd; + int32_t wait_count; + pthread_mutex_t local_lock; +}; + + +struct ra_page { + struct ra_page *next; + struct ra_page *prev; + struct ra_file *file; + char dirty; + char ready; + struct iovec *vector; + int32_t count; + off_t offset; + size_t size; + struct ra_waitq *waitq; + dict_t *ref; +}; + + +struct ra_file { + struct ra_file *next; + struct ra_file *prev; + struct ra_conf *conf; + fd_t *fd; + int disabled; + size_t expected; + struct ra_page pages; + off_t offset; + size_t size; + int32_t refcount; + pthread_mutex_t file_lock; + struct stat stbuf; + uint64_t page_size; + uint32_t page_count; +}; + + +struct ra_conf { + uint64_t page_size; + uint32_t page_count; + void *cache_block; + struct ra_file files; + gf_boolean_t force_atime_update; + pthread_mutex_t conf_lock; +}; + + +typedef struct ra_conf ra_conf_t; +typedef struct ra_local ra_local_t; +typedef struct ra_page ra_page_t; +typedef struct ra_file ra_file_t; +typedef struct ra_waitq ra_waitq_t; +typedef struct ra_fill ra_fill_t; + +ra_page_t * +ra_page_get (ra_file_t *file, + off_t offset); +ra_page_t * +ra_page_create (ra_file_t *file, + off_t offset); +void +ra_page_fault (ra_file_t *file, + call_frame_t *frame, + off_t offset); +void +ra_wait_on_page (ra_page_t *page, + call_frame_t *frame); +ra_waitq_t * +ra_page_wakeup (ra_page_t *page); + +void +ra_page_flush (ra_page_t *page); + +ra_waitq_t * +ra_page_error (ra_page_t *page, + int32_t op_ret, + int32_t op_errno); +void +ra_page_purge (ra_page_t *page); + +void +ra_frame_return (call_frame_t *frame); +void +ra_frame_fill (ra_page_t *page, + call_frame_t *frame); + +void +ra_file_destroy (ra_file_t *file); + +static inline void +ra_file_lock (ra_file_t *file) +{ + pthread_mutex_lock (&file->file_lock); +} + +static inline void +ra_file_unlock (ra_file_t *file) +{ + pthread_mutex_unlock (&file->file_lock); +} + +static inline void +ra_conf_lock (ra_conf_t *conf) +{ + pthread_mutex_lock (&conf->conf_lock); +} + +static inline void +ra_conf_unlock (ra_conf_t *conf) +{ + pthread_mutex_unlock (&conf->conf_lock); +} +static inline void +ra_local_lock (ra_local_t *local) +{ + pthread_mutex_lock (&local->local_lock); +} + +static inline void +ra_local_unlock (ra_local_t *local) +{ + pthread_mutex_unlock (&local->local_lock); +} + +#endif /* __READ_AHEAD_H */ diff --git a/xlators/performance/stat-prefetch/Makefile.am b/xlators/performance/stat-prefetch/Makefile.am new file mode 100644 index 000000000..af437a64d --- /dev/null +++ b/xlators/performance/stat-prefetch/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/xlators/performance/stat-prefetch/src/Makefile.am b/xlators/performance/stat-prefetch/src/Makefile.am new file mode 100644 index 000000000..e52f2df48 --- /dev/null +++ b/xlators/performance/stat-prefetch/src/Makefile.am @@ -0,0 +1,11 @@ +xlator_PROGRAMS = stat-prefetch.so +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +stat_prefetch_so_SOURCES = stat-prefetch.c +noinst_HEADERS = stat-prefetch.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles + +CLEANFILES = + diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.c b/xlators/performance/stat-prefetch/src/stat-prefetch.c new file mode 100644 index 000000000..f2a78f676 --- /dev/null +++ b/xlators/performance/stat-prefetch/src/stat-prefetch.c @@ -0,0 +1,508 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "stat-prefetch.h" +#include "dict.h" +#include "xlator.h" +#include <sys/time.h> + +struct sp_cache { + struct sp_cache *next; + struct sp_cache *prev; + pid_t pid; + long long tv_time; + char *dirname; + dir_entry_t entries; + int32_t count; + pthread_mutex_t lock; +}; + +static void +stat_prefetch_cache_flush (struct sp_cache *cache, int32_t force) +{ + struct sp_cache *trav; + struct timeval tv; + long long tv_time; + + gettimeofday (&tv, NULL); + tv_time = (tv.tv_usec + (tv.tv_sec * 1000000)); + + pthread_mutex_lock (&cache->lock); + + trav = cache->next; + while (trav != cache) { + struct sp_cache *next = trav->next; + { + if (tv_time > trav->tv_time || force) { + gf_log ("stat-prefetch", + GF_LOG_DEBUG, + "flush on: %s", + trav->dirname); + dir_entry_t *entries; + + trav->prev->next = trav->next; + trav->next->prev = trav->prev; + + entries = trav->entries.next; + + while (entries) { + dir_entry_t *nextentry = entries->next; + { + free (entries->name); + free (entries); + } + entries = nextentry; + } + free (trav->dirname); + free (trav); + } + } + trav = next; + } + + pthread_mutex_unlock (&cache->lock); +} + +static int32_t +stat_prefetch_cache_fill (struct sp_cache *cache, + pid_t pid, + char *dirname, + dir_entry_t *entries) +{ + struct sp_cache *trav; + struct timeval tv; + + pthread_mutex_unlock (&cache->lock); + trav = cache->next; + while (trav != cache) { + // if (trav->pid == pid && !strcmp (trav->dirname, dirname)) { + if (!strcmp (trav->dirname, dirname)) { + break; + } + trav = trav->next; + } + + if (trav == cache) { + trav = CALLOC (1, sizeof (*trav)); + ERR_ABORT (trav); + trav->pid = pid; + trav->dirname = dirname; + + trav->prev = cache->prev; + trav->next = cache; + trav->next->prev = trav; + trav->prev->next = trav; + } else { + free (dirname); + } + + while (trav->entries.next) { + dir_entry_t *tmp = trav->entries.next; + + trav->entries.next = trav->entries.next->next; + free (tmp->name); + free (tmp); + } + trav->entries.next = entries->next; + entries->next = NULL; + + gettimeofday (&tv, NULL); + trav->tv_time = (tv.tv_usec + (tv.tv_sec * 1000000)) + cache->tv_time; + + pthread_mutex_unlock (&cache->lock); + return 0; +} + +static int32_t +stat_prefetch_cache_lookup (struct sp_cache *cache, + pid_t pid, + const char *path, + struct stat *buf) +{ + struct sp_cache *trav; + char *dirname = strdup (path); + char *filename = strrchr (dirname, '/'); + dir_entry_t *entries; + dir_entry_t *prev = NULL; + + *filename = '\0'; + filename ++; + + pthread_mutex_lock (&cache->lock); + trav = cache->next; + while (trav != cache) { + // if ((trav->pid == pid) && !strcmp (dirname, trav->dirname)) + if (!strcmp (dirname, trav->dirname)) + break; + trav = trav->next; + } + if (trav == cache) { + free (dirname); + pthread_mutex_unlock (&cache->lock); + return -1; + } + + entries = trav->entries.next; + prev = &trav->entries; + while (entries) { + if (!strcmp (entries->name, filename)) + break; + prev = entries; + entries = entries->next; + } + if (!entries) { + free (dirname); + pthread_mutex_unlock (&cache->lock); + return -1; + } + + *buf = entries->buf; + prev->next = entries->next; + free (entries->name); + free (entries); + free (dirname); + + pthread_mutex_unlock (&cache->lock); + + return 0; +} + + +int32_t +stat_prefetch_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entries, + int32_t count) +{ + char *path = frame->local; + pid_t pid = frame->root->pid; + frame->local = NULL; + + STACK_UNWIND (frame, op_ret, op_errno, entries, count); + + if (op_ret == 0) + stat_prefetch_cache_fill (this->private, + pid, + path, + entries); + else + free (path); + + return 0; +} + +int32_t +stat_prefetch_readdir (call_frame_t *frame, + xlator_t *this, + const char *path) +{ + stat_prefetch_cache_flush (this->private, 0); + + frame->local = strdup (path); + STACK_WIND (frame, + stat_prefetch_readdir_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, + path); + return 0; +} + + +int32_t +stat_prefetch_getattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +stat_prefetch_getattr (call_frame_t *frame, + struct xlator *this, + const char *path) +{ + struct stat buf; + pid_t pid = frame->root->pid; + stat_prefetch_cache_flush (this->private, 0); + + if (stat_prefetch_cache_lookup (this->private, + pid, + path, + &buf) == 0) { + STACK_UNWIND (frame, 0, 0, &buf); + return 0; + } + + STACK_WIND (frame, + stat_prefetch_getattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getattr, + path); + + return 0; +} + + +int32_t +stat_prefetch_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +stat_prefetch_unlink (call_frame_t *frame, + struct xlator *this, + const char *path) +{ + stat_prefetch_cache_flush (this->private, 1); + + STACK_WIND (frame, + stat_prefetch_unlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + path); + + return 0; +} + + +int32_t +stat_prefetch_chmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +stat_prefetch_chmod (call_frame_t *frame, + struct xlator *this, + const char *path, + mode_t mode) +{ + stat_prefetch_cache_flush (this->private, 1); + + STACK_WIND (frame, + stat_prefetch_chmod_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chmod, + path, + mode); + + return 0; +} + + +int32_t +stat_prefetch_chown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +stat_prefetch_chown (call_frame_t *frame, + struct xlator *this, + const char *path, + uid_t uid, + gid_t gid) +{ + stat_prefetch_cache_flush (this->private, 1); + + STACK_WIND (frame, + stat_prefetch_chown_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->chown, + path, + uid, + gid); + + return 0; +} + + +int32_t +stat_prefetch_utimes_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +stat_prefetch_utimes (call_frame_t *frame, + struct xlator *this, + const char *path, + struct timespec *tvp) +{ + stat_prefetch_cache_flush (this->private, 1); + + STACK_WIND (frame, + stat_prefetch_utimes_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimes, + path, + tvp); + + return 0; +} + + +int32_t +stat_prefetch_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + +int32_t +stat_prefetch_truncate (call_frame_t *frame, + struct xlator *this, + const char *path, + off_t offset) +{ + stat_prefetch_cache_flush (this->private, 1); + + STACK_WIND (frame, + stat_prefetch_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + path, + offset); + + return 0; +} + + +int32_t +stat_prefetch_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +stat_prefetch_rename (call_frame_t *frame, + struct xlator *this, + const char *oldpath, + const char *newpath) +{ + stat_prefetch_cache_flush (this->private, 1); + + STACK_WIND (frame, + stat_prefetch_rename_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, + oldpath, + newpath); + + return 0; +} + +int32_t +init (struct xlator *this) +{ + struct sp_cache *cache; + dict_t *options = this->options; + + if (!this->children || this->children->next) { + gf_log ("stat-prefetch", + GF_LOG_ERROR, + "FATAL: translator %s does not have exactly one child node", + this->name); + return -1; + } + + cache = (void *) CALLOC (1, sizeof (*cache)); + ERR_ABORT (cache); + cache->next = cache->prev = cache; + + cache->tv_time = 1 * 1000000; + + if (dict_get (options, "cache-seconds")) { + cache->tv_time = (data_to_int64 (dict_get (options, "cache-seconds")) * + 1000000); + } + + pthread_mutex_init (&cache->lock, NULL); + + this->private = cache; + return 0; +} + +void +fini (struct xlator *this) +{ + return; +} + + +struct xlator_fops fops = { + .getattr = stat_prefetch_getattr, + .readdir = stat_prefetch_readdir, + .unlink = stat_prefetch_unlink, + .chmod = stat_prefetch_chmod, + .chown = stat_prefetch_chown, + .rename = stat_prefetch_rename, + .utimes = stat_prefetch_utimes, + .truncate = stat_prefetch_truncate, +}; + +struct xlator_mops mops = { +}; diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.h b/xlators/performance/stat-prefetch/src/stat-prefetch.h new file mode 100644 index 000000000..7d9645a2a --- /dev/null +++ b/xlators/performance/stat-prefetch/src/stat-prefetch.h @@ -0,0 +1,32 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _STAT_PREFETCH_H_ +#define _STAT_PREFETCH_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <sys/time.h> +#include "xlator.h" + +#endif /* _STAT_PREFETCH_H_ */ diff --git a/xlators/performance/symlink-cache/Makefile.am b/xlators/performance/symlink-cache/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/performance/symlink-cache/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am new file mode 100644 index 000000000..b8b257c18 --- /dev/null +++ b/xlators/performance/symlink-cache/src/Makefile.am @@ -0,0 +1,12 @@ +xlator_LTLIBRARIES = symlink-cache.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +symlink_cache_la_LDFLAGS = -module -avoidversion + +symlink_cache_la_SOURCES = symlink-cache.c +symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c new file mode 100644 index 000000000..fc207a627 --- /dev/null +++ b/xlators/performance/symlink-cache/src/symlink-cache.c @@ -0,0 +1,399 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "list.h" +#include "compat.h" +#include "compat-errno.h" +#include "common-utils.h" + +struct symlink_cache { + time_t ctime; + char *readlink; +}; + + +static int +symlink_inode_ctx_get (inode_t *inode, xlator_t *this, void **ctx) +{ + int ret = 0; + uint64_t tmp_ctx = 0; + ret = inode_ctx_get (inode, this, &tmp_ctx); + if (-1 == ret) + gf_log (this->name, GF_LOG_ERROR, "dict get failed"); + else + *ctx = (void *)(long)tmp_ctx; + + return 0; +} + + +static int +symlink_inode_ctx_set (inode_t *inode, xlator_t *this, void *ctx) +{ + int ret = 0; + ret = inode_ctx_put (inode, this, (uint64_t)(long) ctx); + if (-1 == ret) + gf_log (this->name, GF_LOG_ERROR, "dict set failed"); + + return 0; +} + + +int +sc_cache_update (xlator_t *this, inode_t *inode, const char *link) +{ + struct symlink_cache *sc = NULL; + + symlink_inode_ctx_get (inode, this, VOID(&sc)); + if (!sc) + return 0; + + if (!sc->readlink) { + gf_log (this->name, GF_LOG_DEBUG, + "updating cache: %s", link); + + sc->readlink = strdup (link); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "not updating existing cache: %s with %s", + sc->readlink, link); + } + + return 0; +} + + +int +sc_cache_set (xlator_t *this, inode_t *inode, struct stat *buf, + const char *link) +{ + struct symlink_cache *sc = NULL; + int ret = -1; + int need_set = 0; + + + symlink_inode_ctx_get (inode, this, VOID(&sc)); + if (!sc) { + need_set = 1; + sc = CALLOC (1, sizeof (*sc)); + if (!sc) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto err; + } + } + + if (sc->readlink) { + gf_log (this->name, GF_LOG_DEBUG, + "replacing old cache: %s with new cache: %s", + sc->readlink, link); + FREE (sc->readlink); + sc->readlink = NULL; + } + + if (link) { + sc->readlink = strdup (link); + if (!sc->readlink) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto err; + } + } + + sc->ctime = buf->st_ctime; + + gf_log (this->name, GF_LOG_DEBUG, + "setting symlink cache: %s", link); + + if (need_set) { + ret = symlink_inode_ctx_set (inode, this, sc); + + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not set inode context (%s)", + strerror (-ret)); + goto err; + } + } + + return 0; +err: + + if (sc) { + if (sc->readlink) + FREE (sc->readlink); + sc->readlink = NULL; + FREE (sc); + } + + return -1; +} + + +int +sc_cache_flush (xlator_t *this, inode_t *inode) +{ + struct symlink_cache *sc = NULL; + + symlink_inode_ctx_get (inode, this, VOID(&sc)); + if (!sc) + return 0; + + if (sc->readlink) { + gf_log (this->name, GF_LOG_DEBUG, + "flushing cache: %s", sc->readlink); + + FREE (sc->readlink); + sc->readlink = NULL; + } + + FREE (sc); + + return 0; +} + + +int +sc_cache_validate (xlator_t *this, inode_t *inode, struct stat *buf) +{ + struct symlink_cache *sc = NULL; + uint64_t tmp_sc = 0; + + if (!S_ISLNK (buf->st_mode)) { + sc_cache_flush (this, inode); + return 0; + } + + symlink_inode_ctx_get (inode, this, VOID(&sc)); + + if (!sc) { + sc_cache_set (this, inode, buf, NULL); + inode_ctx_get (inode, this, &tmp_sc); + + if (!sc) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + return 0; + } + sc = (struct symlink_cache *)(long)tmp_sc; + } + + if (sc->ctime == buf->st_ctime) + return 0; + + /* STALE */ + if (sc->readlink) { + gf_log (this->name, GF_LOG_DEBUG, + "flushing cache: %s", sc->readlink); + + FREE (sc->readlink); + sc->readlink = NULL; + } + + sc->ctime = buf->st_ctime; + + return 0; +} + + + +int +sc_cache_get (xlator_t *this, inode_t *inode, char **link) +{ + struct symlink_cache *sc = NULL; + + symlink_inode_ctx_get (inode, this, VOID(&sc)); + + if (!sc) + return 0; + + if (link && sc->readlink) + *link = strdup (sc->readlink); + return 0; +} + + +int +sc_readlink_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + const char *link) +{ + if (op_ret > 0) + sc_cache_update (this, frame->local, link); + + inode_unref (frame->local); + frame->local = NULL; + + STACK_UNWIND (frame, op_ret, op_errno, link); + return 0; +} + + +int +sc_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size) +{ + char *link = NULL; + + sc_cache_get (this, loc->inode, &link); + + if (link) { + /* cache hit */ + gf_log (this->name, GF_LOG_DEBUG, + "cache hit %s -> %s", + loc->path, link); + STACK_UNWIND (frame, strlen (link) + 1, 0, link); + FREE (link); + return 0; + } + + frame->local = inode_ref (loc->inode); + + STACK_WIND (frame, sc_readlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, + loc, size); + + return 0; +} + + +int +sc_symlink_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *buf) +{ + if (op_ret == 0) { + if (frame->local) { + sc_cache_set (this, inode, buf, frame->local); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; +} + + +int +sc_symlink (call_frame_t *frame, xlator_t *this, + const char *dst, loc_t *src) +{ + frame->local = strdup (dst); + + STACK_WIND (frame, sc_symlink_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + dst, src); + + return 0; +} + + +int +sc_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + if (op_ret == 0) + sc_cache_validate (this, inode, buf); + else + sc_cache_flush (this, inode); + + STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr); + return 0; +} + + +int +sc_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + STACK_WIND (frame, sc_lookup_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, + loc, xattr_req); + + return 0; +} + + +int +sc_forget (xlator_t *this, + inode_t *inode) +{ + sc_cache_flush (this, inode); + + return 0; +} + + +int32_t +init (xlator_t *this) +{ + + if (!this->children || this->children->next) + { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: volume (%s) not configured with exactly one " + "child", this->name); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + return 0; +} + + +void +fini (xlator_t *this) +{ + return; +} + + +struct xlator_fops fops = { + .lookup = sc_lookup, + .symlink = sc_symlink, + .readlink = sc_readlink, +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { + .forget = sc_forget, +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/performance/write-behind/Makefile.am b/xlators/performance/write-behind/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/performance/write-behind/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am new file mode 100644 index 000000000..f800abad5 --- /dev/null +++ b/xlators/performance/write-behind/src/Makefile.am @@ -0,0 +1,12 @@ +xlator_LTLIBRARIES = write-behind.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +write_behind_la_LDFLAGS = -module -avoidversion + +write_behind_la_SOURCES = write-behind.c +write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c new file mode 100644 index 000000000..04a447d49 --- /dev/null +++ b/xlators/performance/write-behind/src/write-behind.c @@ -0,0 +1,1444 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/*TODO: check for non null wb_file_data before getting wb_file */ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "list.h" +#include "compat.h" +#include "compat-errno.h" +#include "common-utils.h" + +#define MAX_VECTOR_COUNT 8 + +typedef struct list_head list_head_t; +struct wb_conf; +struct wb_page; +struct wb_file; + + +struct wb_conf { + uint64_t aggregate_size; + uint64_t window_size; + uint64_t disable_till; + gf_boolean_t enable_O_SYNC; + gf_boolean_t flush_behind; +}; + + +typedef struct wb_local { + list_head_t winds; + struct wb_file *file; + list_head_t unwind_frames; + int op_ret; + int op_errno; + call_frame_t *frame; +} wb_local_t; + + +typedef struct write_request { + call_frame_t *frame; + off_t offset; + /* int32_t op_ret; + int32_t op_errno; */ + struct iovec *vector; + int32_t count; + dict_t *refs; + char write_behind; + char stack_wound; + char got_reply; + list_head_t list; + list_head_t winds; + /* list_head_t unwinds;*/ +} wb_write_request_t; + + +struct wb_file { + int disabled; + uint64_t disable_till; + off_t offset; + size_t window_size; + int32_t refcount; + int32_t op_ret; + int32_t op_errno; + list_head_t request; + fd_t *fd; + gf_lock_t lock; + xlator_t *this; +}; + + +typedef struct wb_conf wb_conf_t; +typedef struct wb_page wb_page_t; +typedef struct wb_file wb_file_t; + + +int32_t +wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all); + +int32_t +wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds); + +int32_t +wb_sync_all (call_frame_t *frame, wb_file_t *file); + +int32_t +__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size); + + +wb_file_t * +wb_file_create (xlator_t *this, + fd_t *fd) +{ + wb_file_t *file = NULL; + wb_conf_t *conf = this->private; + + file = CALLOC (1, sizeof (*file)); + INIT_LIST_HEAD (&file->request); + + /* fd_ref() not required, file should never decide the existance of + * an fd */ + file->fd= fd; + file->disable_till = conf->disable_till; + file->this = this; + file->refcount = 1; + + fd_ctx_set (fd, this, (uint64_t)(long)file); + + return file; +} + +void +wb_file_destroy (wb_file_t *file) +{ + int32_t refcount = 0; + + LOCK (&file->lock); + { + refcount = --file->refcount; + } + UNLOCK (&file->lock); + + if (!refcount){ + LOCK_DESTROY (&file->lock); + FREE (file); + } + + return; +} + + +int32_t +wb_sync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + wb_local_t *local = NULL; + list_head_t *winds = NULL; + wb_file_t *file = NULL; + wb_write_request_t *request = NULL, *dummy = NULL; + + local = frame->local; + winds = &local->winds; + file = local->file; + + LOCK (&file->lock); + { + list_for_each_entry_safe (request, dummy, winds, winds) { + request->got_reply = 1; + if (!request->write_behind && (op_ret == -1)) { + wb_local_t *per_request_local = request->frame->local; + per_request_local->op_ret = op_ret; + per_request_local->op_errno = op_errno; + } + + /* + request->op_ret = op_ret; + request->op_errno = op_errno; + */ + } + } + UNLOCK (&file->lock); + + if (op_ret == -1) + { + file->op_ret = op_ret; + file->op_errno = op_errno; + } + + wb_process_queue (frame, file, 0); + + /* safe place to do fd_unref */ + fd_unref (file->fd); + + STACK_DESTROY (frame->root); + + return 0; +} + +int32_t +wb_sync_all (call_frame_t *frame, wb_file_t *file) +{ + list_head_t winds; + int32_t bytes = 0; + + INIT_LIST_HEAD (&winds); + + LOCK (&file->lock); + { + bytes = __wb_mark_winds (&file->request, &winds, 0); + } + UNLOCK (&file->lock); + + wb_sync (frame, file, &winds); + + return bytes; +} + + +int32_t +wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) +{ + wb_write_request_t *dummy = NULL, *request = NULL, *first_request = NULL, *next = NULL; + size_t total_count = 0, count = 0; + size_t copied = 0; + call_frame_t *sync_frame = NULL; + dict_t *refs = NULL; + wb_local_t *local = NULL; + struct iovec *vector = NULL; + int32_t bytes = 0; + size_t bytecount = 0; + + list_for_each_entry (request, winds, winds) + { + total_count += request->count; + bytes += iov_length (request->vector, request->count); + } + + if (!total_count) { + return 0; + } + + list_for_each_entry_safe (request, dummy, winds, winds) { + if (!vector) { + vector = MALLOC (VECTORSIZE (MAX_VECTOR_COUNT)); + refs = get_new_dict (); + + local = CALLOC (1, sizeof (*local)); + INIT_LIST_HEAD (&local->winds); + + first_request = request; + } + + count += request->count; + bytecount = VECTORSIZE (request->count); + memcpy (((char *)vector)+copied, + request->vector, + bytecount); + copied += bytecount; + + if (request->refs) { + dict_copy (request->refs, refs); + } + + next = NULL; + if (request->winds.next != winds) { + next = list_entry (request->winds.next, struct write_request, winds); + } + + list_del_init (&request->winds); + list_add_tail (&request->winds, &local->winds); + + if (!next || ((count + next->count) > MAX_VECTOR_COUNT)) { + sync_frame = copy_frame (frame); + sync_frame->local = local; + local->file = file; + sync_frame->root->req_refs = dict_ref (refs); + fd_ref (file->fd); + STACK_WIND (sync_frame, + wb_sync_cbk, + FIRST_CHILD(sync_frame->this), + FIRST_CHILD(sync_frame->this)->fops->writev, + file->fd, vector, + count, first_request->offset); + + dict_unref (refs); + FREE (vector); + first_request = NULL; + refs = NULL; + vector = NULL; + copied = count = 0; + } + } + + return bytes; +} + + +int32_t +wb_stat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + wb_local_t *local = NULL; + + local = frame->local; + + if (local->file) + fd_unref (local->file->fd); + + STACK_UNWIND (frame, op_ret, op_errno, buf); + + return 0; +} + + +int32_t +wb_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + wb_file_t *file = NULL; + fd_t *iter_fd = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + if (loc->inode) + { + iter_fd = fd_lookup (loc->inode, frame->root->pid); + if (iter_fd) { + if (!fd_ctx_get (iter_fd, this, &tmp_file)) { + file = (wb_file_t *)(long)tmp_file; + } else { + fd_unref (iter_fd); + } + } + if (file) { + wb_sync_all (frame, file); + } + } + + local = CALLOC (1, sizeof (*local)); + local->file = file; + + frame->local = local; + + STACK_WIND (frame, wb_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, + loc); + return 0; +} + + +int32_t +wb_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + wb_file_t *file = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + file = (wb_file_t *)(long)tmp_file; + if (file) { + fd_ref (file->fd); + wb_sync_all (frame, file); + } + + local = CALLOC (1, sizeof (*local)); + local->file = file; + + frame->local = local; + + STACK_WIND (frame, + wb_stat_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, + fd); + return 0; +} + + +int32_t +wb_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + wb_local_t *local = NULL; + + local = frame->local; + if (local->file) + fd_unref (local->file->fd); + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +int32_t +wb_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + wb_file_t *file = NULL; + fd_t *iter_fd = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + if (loc->inode) + { + iter_fd = fd_lookup (loc->inode, frame->root->pid); + if (iter_fd) { + if (!fd_ctx_get (iter_fd, this, &tmp_file)){ + file = (wb_file_t *)(long)tmp_file; + } else { + fd_unref (iter_fd); + } + } + + if (file) + { + wb_sync_all (frame, file); + } + } + + local = CALLOC (1, sizeof (*local)); + local->file = file; + + frame->local = local; + + STACK_WIND (frame, + wb_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, + loc, + offset); + return 0; +} + + +int32_t +wb_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + wb_file_t *file = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + file = (wb_file_t *)(long)tmp_file; + if (file) + wb_sync_all (frame, file); + + local = CALLOC (1, sizeof (*local)); + local->file = file; + + if (file) + fd_ref (file->fd); + + frame->local = local; + + STACK_WIND (frame, + wb_truncate_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, + fd, + offset); + return 0; +} + + +int32_t +wb_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + wb_local_t *local = NULL; + + local = frame->local; + if (local->file) + fd_unref (local->file->fd); + + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; +} + + +int32_t +wb_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + wb_file_t *file = NULL; + fd_t *iter_fd = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + if (loc->inode) { + iter_fd = fd_lookup (loc->inode, frame->root->pid); + if (iter_fd) { + if (!fd_ctx_get (iter_fd, this, &tmp_file)) { + file = (wb_file_t *)(long)tmp_file; + } else { + fd_unref (iter_fd); + } + } + + if (file) + wb_sync_all (frame, file); + } + + local = CALLOC (1, sizeof (*local)); + local->file = file; + + frame->local = local; + + STACK_WIND (frame, + wb_utimens_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->utimens, + loc, + tv); + return 0; +} + +int32_t +wb_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int32_t flags = 0; + wb_file_t *file = NULL; + wb_conf_t *conf = this->private; + + if (op_ret != -1) + { + file = wb_file_create (this, fd); + + /* If mandatory locking has been enabled on this file, + we disable caching on it */ + + if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP)) + file->disabled = 1; + + /* If O_DIRECT then, we disable chaching */ + if (frame->local) + { + flags = *((int32_t *)frame->local); + if (((flags & O_DIRECT) == O_DIRECT) || + ((flags & O_RDONLY) == O_RDONLY) || + (((flags & O_SYNC) == O_SYNC) && + conf->enable_O_SYNC == _gf_true)) { + file->disabled = 1; + } + } + + LOCK_INIT (&file->lock); + } + + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + + +int32_t +wb_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + frame->local = CALLOC (1, sizeof(int32_t)); + *((int32_t *)frame->local) = flags; + + STACK_WIND (frame, + wb_open_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, + loc, flags, fd); + return 0; +} + + +int32_t +wb_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + wb_file_t *file = NULL; + + if (op_ret != -1) + { + file = wb_file_create (this, fd); + /* + * If mandatory locking has been enabled on this file, + * we disable caching on it + */ + if ((fd->inode->st_mode & S_ISGID) && + !(fd->inode->st_mode & S_IXGRP)) + { + file->disabled = 1; + } + + LOCK_INIT (&file->lock); + } + + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; +} + + +int32_t +wb_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + STACK_WIND (frame, + wb_create_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, + loc, flags, mode, fd); + return 0; +} + + +int32_t +__wb_cleanup_queue (wb_file_t *file) +{ + wb_write_request_t *request = NULL, *dummy = NULL; + int32_t bytes = 0; + + list_for_each_entry_safe (request, dummy, &file->request, list) + { + if (request->got_reply && request->write_behind) + { + bytes += iov_length (request->vector, request->count); + list_del_init (&request->list); + + FREE (request->vector); + dict_unref (request->refs); + + FREE (request); + } + } + + return bytes; +} + + +int32_t +__wb_mark_wind_all (list_head_t *list, list_head_t *winds) +{ + wb_write_request_t *request = NULL; + size_t size = 0; + + list_for_each_entry (request, list, list) + { + if (!request->stack_wound) + { + size += iov_length (request->vector, request->count); + request->stack_wound = 1; + list_add_tail (&request->winds, winds); + } + } + + return size; +} + + +size_t +__wb_get_aggregate_size (list_head_t *list) +{ + wb_write_request_t *request = NULL; + size_t size = 0; + + list_for_each_entry (request, list, list) + { + if (!request->stack_wound) + { + size += iov_length (request->vector, request->count); + } + } + + return size; +} + +uint32_t +__wb_get_incomplete_writes (list_head_t *list) +{ + wb_write_request_t *request = NULL; + uint32_t count = 0; + + list_for_each_entry (request, list, list) + { + if (request->stack_wound && !request->got_reply) + { + count++; + } + } + + return count; +} + +int32_t +__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf) +{ + size_t aggregate_current = 0; + uint32_t incomplete_writes = 0; + + incomplete_writes = __wb_get_incomplete_writes (list); + + aggregate_current = __wb_get_aggregate_size (list); + + if ((incomplete_writes == 0) || (aggregate_current >= aggregate_conf)) + { + __wb_mark_wind_all (list, winds); + } + + return aggregate_current; +} + + +size_t +__wb_get_window_size (list_head_t *list) +{ + wb_write_request_t *request = NULL; + size_t size = 0; + + list_for_each_entry (request, list, list) + { + if (request->write_behind && !request->got_reply) + { + size += iov_length (request->vector, request->count); + } + } + + return size; +} + + +size_t +__wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size) +{ + size_t written_behind = 0; + wb_write_request_t *request = NULL; + + list_for_each_entry (request, list, list) + { + if (written_behind <= size) + { + if (!request->write_behind) + { + wb_local_t *local = request->frame->local; + written_behind += iov_length (request->vector, request->count); + request->write_behind = 1; + list_add_tail (&local->unwind_frames, unwinds); + } + } + else + { + break; + } + } + + return written_behind; +} + + +int32_t +__wb_mark_unwinds (list_head_t *list, list_head_t *unwinds, size_t window_conf) +{ + size_t window_current = 0; + + window_current = __wb_get_window_size (list); + if (window_current <= window_conf) + { + window_current += __wb_mark_unwind_till (list, unwinds, + window_conf - window_current); + } + + return window_current; +} + + +int32_t +wb_stack_unwind (list_head_t *unwinds) +{ + struct stat buf = {0,}; + wb_local_t *local = NULL, *dummy = NULL; + + list_for_each_entry_safe (local, dummy, unwinds, unwind_frames) + { + list_del_init (&local->unwind_frames); + STACK_UNWIND (local->frame, local->op_ret, local->op_errno, &buf); + } + + return 0; +} + + +int32_t +wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds, list_head_t *unwinds) +{ + /* copy the frame before calling wb_stack_unwind, since this request containing current frame might get unwound */ + /* call_frame_t *sync_frame = copy_frame (frame); */ + + wb_stack_unwind (unwinds); + wb_sync (frame, file, winds); + + return 0; +} + + +int32_t +wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all) +{ + list_head_t winds, unwinds; + size_t size = 0; + wb_conf_t *conf = file->this->private; + + INIT_LIST_HEAD (&winds); + INIT_LIST_HEAD (&unwinds); + + if (!file) + { + return -1; + } + + size = flush_all ? 0 : conf->aggregate_size; + LOCK (&file->lock); + { + __wb_cleanup_queue (file); + __wb_mark_winds (&file->request, &winds, size); + __wb_mark_unwinds (&file->request, &unwinds, conf->window_size); + } + UNLOCK (&file->lock); + + wb_do_ops (frame, file, &winds, &unwinds); + return 0; +} + + +wb_write_request_t * +wb_enqueue (wb_file_t *file, + call_frame_t *frame, + struct iovec *vector, + int32_t count, + off_t offset) +{ + wb_write_request_t *request = NULL; + wb_local_t *local = CALLOC (1, sizeof (*local)); + + request = CALLOC (1, sizeof (*request)); + + INIT_LIST_HEAD (&request->list); + INIT_LIST_HEAD (&request->winds); + + request->frame = frame; + request->vector = iov_dup (vector, count); + request->count = count; + request->offset = offset; + request->refs = dict_ref (frame->root->req_refs); + + frame->local = local; + local->frame = frame; + local->op_ret = iov_length (vector, count); + local->op_errno = 0; + INIT_LIST_HEAD (&local->unwind_frames); + + LOCK (&file->lock); + { + list_add_tail (&request->list, &file->request); + file->offset = offset + iov_length (vector, count); + } + UNLOCK (&file->lock); + + return request; +} + + +int32_t +wb_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + + +int32_t +wb_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + wb_file_t *file = NULL; + char offset_expected = 1, wb_disabled = 0; + call_frame_t *process_frame = NULL; + size_t size = 0; + uint64_t tmp_file = 0; + + if (vector != NULL) + size = iov_length (vector, count); + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + file = (wb_file_t *)(long)tmp_file; + if (!file) { + gf_log (this->name, GF_LOG_ERROR, + "wb_file not found for fd %p", fd); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + LOCK (&file->lock); + { + if (file->disabled || file->disable_till) { + if (size > file->disable_till) { + file->disable_till = 0; + } else { + file->disable_till -= size; + } + wb_disabled = 1; + } + + if (file->offset != offset) + offset_expected = 0; + } + UNLOCK (&file->lock); + + if (wb_disabled) { + STACK_WIND (frame, + wb_writev_cbk, + FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->writev, + file->fd, + vector, + count, + offset); + return 0; + } + + process_frame = copy_frame (frame); + + if (!offset_expected) + wb_process_queue (process_frame, file, 1); + + wb_enqueue (file, frame, vector, count, offset); + wb_process_queue (process_frame, file, 0); + + STACK_DESTROY (process_frame->root); + + return 0; +} + + +int32_t +wb_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + wb_local_t *local = NULL; + + local = frame->local; + + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + return 0; +} + + +int32_t +wb_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + wb_file_t *file = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + file = (wb_file_t *)(long)tmp_file; + if (file) + wb_sync_all (frame, file); + + local = CALLOC (1, sizeof (*local)); + local->file = file; + + frame->local = local; + + STACK_WIND (frame, + wb_readv_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, + fd, size, offset); + + return 0; +} + + +int32_t +wb_ffr_bg_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + wb_local_t *local = NULL; + wb_file_t *file = NULL; + + local = frame->local; + file = local->file; + + if (file) { + fd_unref (file->fd); + } + + if (file->op_ret == -1) + { + op_ret = file->op_ret; + op_errno = file->op_errno; + + file->op_ret = 0; + } + + STACK_DESTROY (frame->root); + return 0; +} + + +int32_t +wb_ffr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + wb_local_t *local = NULL; + wb_file_t *file = NULL; + + local = frame->local; + file = local->file; + if (file) { + /* corresponds to the fd_ref() done during wb_file_create() */ + fd_unref (file->fd); + } + + if (file->op_ret == -1) + { + op_ret = file->op_ret; + op_errno = file->op_errno; + + file->op_ret = 0; + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +wb_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + wb_conf_t *conf = NULL; + wb_file_t *file = NULL; + call_frame_t *flush_frame = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + conf = this->private; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (wb_file_t *)(long)tmp_file; + + local = CALLOC (1, sizeof (*local)); + local->file = file; + if (file) + fd_ref (file->fd); + + if (&file->request != file->request.next) { + gf_log (this->name, GF_LOG_DEBUG, + "request queue is not empty, it has to be synced"); + } + + if (conf->flush_behind && + (!file->disabled) && (file->disable_till == 0)) { + flush_frame = copy_frame (frame); + STACK_UNWIND (frame, file->op_ret, + file->op_errno); // liar! liar! :O + + flush_frame->local = local; + wb_sync_all (flush_frame, file); + + STACK_WIND (flush_frame, + wb_ffr_bg_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + } else { + wb_sync_all (frame, file); + + frame->local = local; + STACK_WIND (frame, + wb_ffr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + } + + return 0; +} + + +int32_t +wb_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + wb_local_t *local = NULL; + wb_file_t *file = NULL; + + local = frame->local; + file = local->file; + + if (file->op_ret == -1) + { + op_ret = file->op_ret; + op_errno = file->op_errno; + + file->op_ret = 0; + } + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +wb_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync) +{ + wb_file_t *file = NULL; + wb_local_t *local = NULL; + uint64_t tmp_file = 0; + + if (fd_ctx_get (fd, this, &tmp_file)) { + gf_log (this->name, GF_LOG_ERROR, "returning EBADFD"); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + file = (wb_file_t *)(long)tmp_file; + if (file) + wb_sync_all (frame, file); + + local = CALLOC (1, sizeof (*local)); + local->file = file; + + frame->local = local; + + STACK_WIND (frame, + wb_fsync_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, + fd, datasync); + return 0; +} + + +int32_t +wb_release (xlator_t *this, + fd_t *fd) +{ + uint64_t file = 0; + + fd_ctx_get (fd, this, &file); + wb_file_destroy ((wb_file_t *)(long)file); + + return 0; +} + + +int32_t +init (xlator_t *this) +{ + dict_t *options = NULL; + wb_conf_t *conf = NULL; + char *aggregate_size_string = NULL; + char *window_size_string = NULL; + char *flush_behind_string = NULL; + char *disable_till_string = NULL; + char *enable_O_SYNC_string = NULL; + int32_t ret = -1; + + if ((this->children == NULL) + || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: write-behind (%s) not configured with exactly one child", + this->name); + return -1; + } + + if (this->parents == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile"); + } + + options = this->options; + + conf = CALLOC (1, sizeof (*conf)); + + conf->enable_O_SYNC = _gf_false; + ret = dict_get_str (options, "enable-O_SYNC", + &enable_O_SYNC_string); + if (ret == 0) { + ret = gf_string2boolean (enable_O_SYNC_string, + &conf->enable_O_SYNC); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'enable-O_SYNC' takes only boolean arguments"); + return -1; + } + } + + /* configure 'options aggregate-size <size>' */ + conf->aggregate_size = 0; + ret = dict_get_str (options, "block-size", + &aggregate_size_string); + if (ret == 0) { + ret = gf_string2bytesize (aggregate_size_string, + &conf->aggregate_size); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\" of \"option aggregate-size\"", + aggregate_size_string); + return -1; + } + } + + gf_log (this->name, GF_LOG_DEBUG, + "using aggregate-size = %"PRIu64"", + conf->aggregate_size); + + conf->disable_till = 1; + ret = dict_get_str (options, "disable-for-first-nbytes", + &disable_till_string); + if (ret == 0) { + ret = gf_string2bytesize (disable_till_string, + &conf->disable_till); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\" of \"option disable-for-first-nbytes\"", + disable_till_string); + return -1; + } + } + + gf_log (this->name, GF_LOG_DEBUG, + "disabling write-behind for first %"PRIu64" bytes", + conf->disable_till); + + /* configure 'option window-size <size>' */ + conf->window_size = 0; + ret = dict_get_str (options, "cache-size", + &window_size_string); + if (ret == 0) { + ret = gf_string2bytesize (window_size_string, + &conf->window_size); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "invalid number format \"%s\" of \"option window-size\"", + window_size_string); + FREE (conf); + return -1; + } + } + + if (!conf->window_size && conf->aggregate_size) { + gf_log (this->name, GF_LOG_WARNING, + "setting window-size to be equal to aggregate-size(%"PRIu64")", + conf->aggregate_size); + conf->window_size = conf->aggregate_size; + } + + if (conf->window_size < conf->aggregate_size) { + gf_log (this->name, GF_LOG_ERROR, + "aggregate-size(%"PRIu64") cannot be more than window-size" + "(%"PRIu64")", conf->window_size, conf->aggregate_size); + FREE (conf); + return -1; + } + + /* configure 'option flush-behind <on/off>' */ + conf->flush_behind = 0; + ret = dict_get_str (options, "flush-behind", + &flush_behind_string); + if (ret == 0) { + ret = gf_string2boolean (flush_behind_string, + &conf->flush_behind); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'flush-behind' takes only boolean arguments"); + return -1; + } + + if (conf->flush_behind) { + gf_log (this->name, GF_LOG_DEBUG, + "enabling flush-behind"); + } + } + this->private = conf; + return 0; +} + + +void +fini (xlator_t *this) +{ + wb_conf_t *conf = this->private; + + FREE (conf); + return; +} + + +struct xlator_fops fops = { + .writev = wb_writev, + .open = wb_open, + .create = wb_create, + .readv = wb_readv, + .flush = wb_flush, + .fsync = wb_fsync, + .stat = wb_stat, + .fstat = wb_fstat, + .truncate = wb_truncate, + .ftruncate = wb_ftruncate, + .utimens = wb_utimens, +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { + .release = wb_release +}; + +struct volume_options options[] = { + { .key = {"flush-behind"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"block-size", "aggregate-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 128 * GF_UNIT_KB, + .max = 4 * GF_UNIT_MB + }, + { .key = {"cache-size", "window-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 512 * GF_UNIT_KB, + .max = 1 * GF_UNIT_GB + }, + { .key = {"disable-for-first-nbytes"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 1, + .max = 1 * GF_UNIT_MB, + }, + { .key = {"enable-O_SYNC"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { .key = {NULL} }, +}; diff --git a/xlators/protocol/Makefile.am b/xlators/protocol/Makefile.am new file mode 100644 index 000000000..745e277c2 --- /dev/null +++ b/xlators/protocol/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = client server + +CLEANFILES = diff --git a/xlators/protocol/client/Makefile.am b/xlators/protocol/client/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/protocol/client/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am new file mode 100644 index 000000000..fb720942c --- /dev/null +++ b/xlators/protocol/client/src/Makefile.am @@ -0,0 +1,16 @@ + +xlator_LTLIBRARIES = client.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol + +client_la_LDFLAGS = -module -avoidversion + +client_la_SOURCES = client-protocol.c saved-frames.c +client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = client-protocol.h saved-frames.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/protocol/client/src/client-protocol.c b/xlators/protocol/client/src/client-protocol.c new file mode 100644 index 000000000..5c93bd6f1 --- /dev/null +++ b/xlators/protocol/client/src/client-protocol.c @@ -0,0 +1,6671 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include <inttypes.h> + + +#include "glusterfs.h" +#include "client-protocol.h" +#include "compat.h" +#include "dict.h" +#include "protocol.h" +#include "transport.h" +#include "xlator.h" +#include "logging.h" +#include "timer.h" +#include "defaults.h" +#include "compat.h" +#include "compat-errno.h" + +#include <sys/resource.h> +#include <inttypes.h> + +/* for default_*_cbk functions */ +#include "defaults.c" +#include "saved-frames.h" + + +int protocol_client_cleanup (transport_t *trans); +int protocol_client_interpret (xlator_t *this, transport_t *trans, + char *hdr_p, size_t hdrlen, + char *buf_p, size_t buflen); +int +protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans, + int type, int op, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iovec *vector, int count, + dict_t *refs); + +static gf_op_t gf_fops[]; +static gf_op_t gf_mops[]; +static gf_op_t gf_cbks[]; + + +static ino_t +this_ino_get_from_inode (inode_t *inode, xlator_t *this) +{ + ino_t ino = 0; + int32_t ret = 0; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); + + if (inode->ino == 1) { + ino = 1; + goto out; + } + + ret = inode_ctx_get (inode, this, &ino); + + if (inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "(%"PRId64"): failed to get remote inode number", + inode->ino); + } + +out: + return ino; +} + + +static ino_t +this_ino_get (loc_t *loc, xlator_t *this, int32_t which) +{ + ino_t ino = 0; + int32_t ret = 0; + inode_t *inode = NULL; + + GF_VALIDATE_OR_GOTO ("client", this, out); + + if (which == GF_CLIENT_INODE_SELF) { + inode = loc->inode; + } else if (which == GF_CLIENT_INODE_PARENT) { + inode = loc->parent; + } + GF_VALIDATE_OR_GOTO (this->name, inode, out); + + if (inode->ino == 1) { + ino = 1; + goto out; + } + + ret = inode_ctx_get (inode, this, &ino); + + if (inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s(%s - %"PRId64") failed to get remote inode number", + loc->path, + (which == GF_CLIENT_INODE_SELF? "self" : "parent"), + inode->ino); + } + +out: + return ino; +} + + +static void +this_ino_set (loc_t *loc, xlator_t *this, ino_t ino) +{ + ino_t old_ino = 0; + int32_t ret = -1; + inode_t *inode = NULL; + + GF_VALIDATE_OR_GOTO ("client", this, out); + + inode = loc->inode; + GF_VALIDATE_OR_GOTO (this->name, inode, out); + + ret = inode_ctx_get (inode, this, &old_ino); + + if (old_ino != ino) { + if (old_ino) + gf_log (this->name, GF_LOG_DEBUG, + "%s: inode number changed from %"PRId64" " + "to %"PRId64, + loc->path, old_ino, ino); + + ret = inode_ctx_put (inode, this, ino); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to set remote " + "inode number to inode ctx", + loc->path, ino); + } + } +out: + return; +} + + +static int +this_fd_get (fd_t *file, xlator_t *this, int64_t *remote_fd) +{ + int ret = 0; + int dict_ret = -1; + uint64_t tmp_fd = 0; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, file, out); + GF_VALIDATE_OR_GOTO (this->name, remote_fd, out); + + dict_ret = fd_ctx_get (file, this, &tmp_fd); + + if (dict_ret < 0) { + ret = -1; + } + *remote_fd = (int64_t)tmp_fd; +out: + return ret; +} + + +static void +this_fd_set (fd_t *file, xlator_t *this, loc_t *loc, int64_t fd) +{ + uint64_t old_fd = 0; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, file, out); + + ret = fd_ctx_get (file, this, &old_fd); + if (ret >= 0) { + gf_log (this->name, GF_LOG_WARNING, + "%s (%"PRId64"): trying duplicate remote fd set. " + "%"PRId64" over-rides %"PRId64, + loc->path, loc->inode->ino, fd, old_fd); + } + + ret = fd_ctx_set (file, this, (uint64_t)fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to set remote fd", + loc->path, loc->inode->ino); + } +out: + return; +} + + +static int +client_local_wipe (client_local_t *local) +{ + if (local) { + loc_wipe (&local->loc); + + if (local->fd) + fd_unref (local->fd); + + free (local); + } + + return 0; +} + +/* + * lookup_frame - lookup call frame corresponding to a given callid + * @trans: transport object + * @callid: call id of the frame + * + * not for external reference + */ + +static call_frame_t * +lookup_frame (transport_t *trans, int32_t op, int8_t type, int64_t callid) +{ + client_connection_t *conn = NULL; + call_frame_t *frame = NULL; + + conn = trans->xl_private; + + pthread_mutex_lock (&conn->lock); + { + frame = saved_frames_get (conn->saved_frames, + op, type, callid); + } + pthread_mutex_unlock (&conn->lock); + + return frame; +} + + +static void +call_bail (void *data) +{ + client_connection_t *conn = NULL; + struct timeval current; + int32_t bail_out = 0; + transport_t *trans = NULL; + + GF_VALIDATE_OR_GOTO("client", data, out); + trans = data; + + conn = trans->xl_private; + + gettimeofday (¤t, NULL); + pthread_mutex_lock (&conn->lock); + { + /* Chaining to get call-always functionality from + call-once timer */ + if (conn->timer) { + struct timeval timeout = {0,}; + gf_timer_cbk_t timer_cbk = conn->timer->cbk; + + timeout.tv_sec = 10; + timeout.tv_usec = 0; + + gf_timer_call_cancel (trans->xl->ctx, conn->timer); + conn->timer = gf_timer_call_after (trans->xl->ctx, + timeout, + timer_cbk, + trans); + if (conn->timer == NULL) { + gf_log (trans->xl->name, GF_LOG_DEBUG, + "Cannot create bailout timer"); + } + } + + if (((conn->saved_frames->count > 0) && + (RECEIVE_TIMEOUT(conn, current)) && + (SEND_TIMEOUT(conn, current)))) { + + struct tm last_sent_tm, last_received_tm; + char last_sent[32] = {0,}, last_received[32] = {0,}; + + bail_out = 1; + + localtime_r (&conn->last_sent.tv_sec, + &last_sent_tm); + localtime_r (&conn->last_received.tv_sec, + &last_received_tm); + + strftime (last_sent, 32, + "%Y-%m-%d %H:%M:%S", &last_sent_tm); + strftime (last_received, 32, + "%Y-%m-%d %H:%M:%S", &last_received_tm); + + gf_log (trans->xl->name, GF_LOG_ERROR, + "activating bail-out. pending frames = %d. " + "last sent = %s. last received = %s. " + "transport-timeout = %d", + (int32_t) conn->saved_frames->count, + last_sent, last_received, + conn->transport_timeout); + } + } + + if (bail_out) { + conn->ping_started = 0; + } + + pthread_mutex_unlock (&conn->lock); + + if (bail_out) { + gf_log (trans->xl->name, GF_LOG_CRITICAL, + "bailing transport"); + transport_disconnect (trans); + } +out: + return; +} + + +void +save_frame (transport_t *trans, call_frame_t *frame, + int32_t op, int8_t type, uint64_t callid) +{ + client_connection_t *conn = NULL; + struct timeval timeout = {0, }; + + + conn = trans->xl_private; + + saved_frames_put (conn->saved_frames, frame, op, type, callid); + + if (conn->timer == NULL) { + timeout.tv_sec = 10; + timeout.tv_usec = 0; + conn->timer = gf_timer_call_after (trans->xl->ctx, timeout, + call_bail, (void *) trans); + } +} + + +int +client_get_forgets (xlator_t *this, client_forget_t *forget) +{ + call_frame_t *fr = NULL; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_cbk_forget_req_t *req = NULL; + int ret = -1; + client_conf_t *conf = NULL; + int count = 0; + int index = 0; + + conf = this->private; + + if (conf->forget.count > 0) { + count = conf->forget.count; + + hdrlen = gf_hdr_len (req, (count * sizeof (int64_t))); + hdr = gf_hdr_new (req, (count * sizeof (int64_t))); + GF_VALIDATE_OR_GOTO (this->name, hdr, out); + + req = gf_param (hdr); + + req->count = hton32 (count); + for (index = 0; index < count; index++) { + req->ino_array[index] = + hton64 (conf->forget.ino_array[index]); + } + + fr = create_frame (this, this->ctx->pool); + GF_VALIDATE_OR_GOTO (this->name, fr, out); + + conf->forget.frames_in_transit++; + + forget->frame = fr; + forget->hdr = hdr; + forget->hdrlen = hdrlen; + + ret = count; + + conf->forget.count = 0; + } + out: + return ret; +} + + +void +client_ping_timer_expired (void *data) +{ + xlator_t *this = NULL; + transport_t *trans = NULL; + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + + trans = data; + this = trans->xl; + conf = this->private; + conn = trans->xl_private; + + gf_log (this->name, GF_LOG_ERROR, + "ping timer expired! bailing transport"); + + pthread_mutex_lock (&conn->lock); + { + if (conn->ping_timer) + gf_timer_call_cancel (trans->xl->ctx, + conn->ping_timer); + + conn->ping_started = 0; + conn->ping_timer = NULL; + } + pthread_mutex_unlock (&conn->lock); + transport_disconnect (trans); +} + + +void +client_start_ping (void *data) +{ + xlator_t *this = NULL; + transport_t *trans = NULL; + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + int32_t ret = -1; + gf_hdr_common_t *hdr = NULL; + struct timeval timeout = {0, }; + call_frame_t *dummy_frame = NULL; + size_t hdrlen = -1; + gf_mop_ping_req_t *req = NULL; + + + trans = data; + this = trans->xl; + conf = this->private; + conn = trans->xl_private; + + pthread_mutex_lock (&conn->lock); + { + if ((conn->saved_frames->count == 0) || + !conn->connected) { + /* using goto looked ugly here, + * hence getting out this way */ + if (conn->ping_timer) + gf_timer_call_cancel (trans->xl->ctx, + conn->ping_timer); + conn->ping_timer = NULL; + conn->ping_started = 0; + /* unlock */ + pthread_mutex_unlock (&conn->lock); + return; + } + + if (conn->saved_frames->count < 0) { + gf_log (this->name, GF_LOG_ERROR, + "saved_frames->count is %"PRId64, + conn->saved_frames->count); + conn->saved_frames->count = 0; + } + timeout.tv_sec = conn->ping_timeout; + timeout.tv_usec = 0; + + conn->ping_timer = + gf_timer_call_after (trans->xl->ctx, timeout, + client_ping_timer_expired, + (void *) trans); + + if (conn->ping_timer == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "unable to setup timer"); + } else + conn->ping_started = 1; + } + pthread_mutex_unlock (&conn->lock); + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + + dummy_frame = create_frame (this, this->ctx->pool); + dummy_frame->local = trans; + + ret = protocol_client_xfer (dummy_frame, this, trans, + GF_OP_TYPE_MOP_REQUEST, GF_MOP_PING, + hdr, hdrlen, NULL, 0, NULL); +} + + +int +client_ping_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + xlator_t *this = NULL; + transport_t *trans = NULL; + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + struct timeval timeout = {0, }; + int op_ret = 0; + + trans = frame->local; frame->local = NULL; + this = trans->xl; + conf = this->private; + conn = trans->xl_private; + + op_ret = ntoh32 (hdr->rsp.op_ret); + + if (op_ret == -1) { + /* timer expired and transport bailed out */ + gf_log (this->name, GF_LOG_ERROR, "timer must have expired"); + goto out; + } + + pthread_mutex_lock (&conn->lock); + { + timeout.tv_sec = conn->ping_timeout; + timeout.tv_usec = 0; + + gf_timer_call_cancel (trans->xl->ctx, + conn->ping_timer); + + conn->ping_timer = + gf_timer_call_after (trans->xl->ctx, timeout, + client_start_ping, (void *)trans); + if (conn->ping_timer == NULL) + gf_log (this->name, GF_LOG_ERROR, + "gf_timer_call_after() returned NULL"); + } + pthread_mutex_unlock (&conn->lock); +out: + STACK_DESTROY (frame->root); + return 0; +} + + +int +protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans, + int type, int op, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iovec *vector, int count, + dict_t *refs) +{ + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + uint64_t callid = 0; + int32_t ret = -1; + int start_ping = 0; + gf_hdr_common_t rsphdr = {0, }; + client_forget_t forget = {0, }; + uint8_t send_forget = 0; + + + conf = this->private; + + if (!trans) { + /* default to bulk op since it is 'safer' */ + trans = conf->transport[CHANNEL_BULK]; + } + conn = trans->xl_private; + + if (!((type == GF_OP_TYPE_CBK_REQUEST) && + (op == GF_CBK_FORGET))) + { + LOCK (&conf->forget.lock); + { + ret = client_get_forgets (this, &forget); + if (ret <= 0) + send_forget = 0; + else + send_forget = 1; + } + UNLOCK (&conf->forget.lock); + + if (send_forget) { + ret = protocol_client_xfer (forget.frame, this, NULL, + GF_OP_TYPE_CBK_REQUEST, + GF_CBK_FORGET, + forget.hdr, forget.hdrlen, + NULL, 0, NULL); + } + } + + pthread_mutex_lock (&conn->lock); + { + callid = ++conn->callid; + + hdr->callid = hton64 (callid); + hdr->op = hton32 (op); + hdr->type = hton32 (type); + + if (frame) { + hdr->req.uid = hton32 (frame->root->uid); + hdr->req.gid = hton32 (frame->root->gid); + hdr->req.pid = hton32 (frame->root->pid); + } + + if (conn->connected == 0) + transport_connect (trans); + + ret = -1; + + if (conn->connected || + ((type == GF_OP_TYPE_MOP_REQUEST) && + (op == GF_MOP_SETVOLUME))) { + ret = transport_submit (trans, (char *)hdr, hdrlen, + vector, count, refs); + } + + if ((ret >= 0) && frame) { + /* TODO: check this logic */ + gettimeofday (&conn->last_sent, NULL); + save_frame (trans, frame, op, type, callid); + } + + if (!conn->ping_started && (ret >= 0)) { + start_ping = 1; + } + } + pthread_mutex_unlock (&conn->lock); + + if (start_ping) + client_start_ping ((void *) trans); + + if (frame && (ret < 0)) { + rsphdr.op = op; + rsphdr.rsp.op_ret = hton32 (-1); + rsphdr.rsp.op_errno = hton32 (ENOTCONN); + + if (type == GF_OP_TYPE_FOP_REQUEST) { + rsphdr.type = GF_OP_TYPE_FOP_REPLY; + gf_fops[op] (frame, &rsphdr, sizeof (rsphdr), NULL, 0); + } else if (type == GF_OP_TYPE_MOP_REQUEST) { + rsphdr.type = GF_OP_TYPE_MOP_REPLY; + gf_mops[op] (frame, &rsphdr, sizeof (rsphdr), NULL, 0); + } else { + rsphdr.type = GF_OP_TYPE_CBK_REPLY; + gf_cbks[op] (frame, &rsphdr, sizeof (rsphdr), NULL, 0); + } + } + + return ret; +} + + + +/** + * client_create - create function for client protocol + * @frame: call frame + * @this: this translator structure + * @path: complete path to file + * @flags: create flags + * @mode: create mode + * + * external reference through client_protocol_xlator->fops->create + */ + +int +client_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, + mode_t mode, fd_t *fd) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_create_req_t *req = NULL; + size_t hdrlen = 0; + size_t pathlen = 0; + size_t baselen = 0; + int32_t ret = -1; + ino_t par = 0; + client_conf_t *conf = NULL; + client_local_t *local = NULL; + + + conf = this->private; + + if (conf->child) { + STACK_WIND (frame, default_create_cbk, + conf->child, + conf->child->fops->create, + loc, flags, mode, fd); + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + local->fd = fd_ref (fd); + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0(loc->path); + baselen = STRLEN_0(loc->name); + par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->flags = hton32 (flags); + req->mode = hton32 (mode); + req->par = hton64 (par); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_CREATE, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, fd, NULL, NULL); + return 0; + +} + +/** + * client_open - open function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location of file + * @flags: open flags + * @mode: open modes + * + * external reference through client_protocol_xlator->fops->open + */ +int +client_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_fop_open_req_t *req = NULL; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = NULL; + client_local_t *local = NULL; + + conf = this->private; + if (conf->child) { + /* */ + STACK_WIND (frame, default_open_cbk, + conf->child, + conf->child->fops->open, + loc, flags, fd); + + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + local->fd = fd_ref (fd); + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->flags = hton32 (flags); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_OPEN, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, fd); + return 0; + +} + + +/** + * client_stat - stat function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * + * external reference through client_protocol_xlator->fops->stat + */ +int32_t +client_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_stat_req_t *req = NULL; + size_t hdrlen = -1; + int32_t ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_stat_cbk, + conf->child, + conf->child->fops->stat, + loc); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_STAT, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_readlink - readlink function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @size: + * + * external reference through client_protocol_xlator->fops->readlink + */ +int32_t +client_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readlink_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_readlink_cbk, + conf->child, + conf->child->fops->readlink, + loc, + size); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->size = hton32 (size); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_READLINK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_mknod - mknod function for client protocol + * @frame: call frame + * @this: this translator structure + * @path: pathname of node + * @mode: + * @dev: + * + * external reference through client_protocol_xlator->fops->mknod + */ +int32_t +client_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t dev) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mknod_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + client_conf_t *conf = this->private; + client_local_t *local = NULL; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_mknod_cbk, + conf->child, + conf->child->fops->mknod, + loc, mode, dev); + + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0(loc->path); + baselen = STRLEN_0(loc->name); + par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + req->mode = hton32 (mode); + req->dev = hton64 (dev); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_MKNOD, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL); + return 0; + +} + + +/** + * client_mkdir - mkdir function for client protocol + * @frame: call frame + * @this: this translator structure + * @path: pathname of directory + * @mode: + * + * external reference through client_protocol_xlator->fops->mkdir + */ +int32_t +client_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mkdir_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + client_conf_t *conf = this->private; + client_local_t *local = NULL; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_mkdir_cbk, + conf->child, + conf->child->fops->mkdir, + loc, mode); + + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0(loc->path); + baselen = STRLEN_0(loc->name); + par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + req->mode = hton32 (mode); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_MKDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL); + return 0; + +} + + + +/** + * client_unlink - unlink function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location of file + * + * external reference through client_protocol_xlator->fops->unlink + */ +int32_t +client_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_unlink_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_unlink_cbk, + conf->child, + conf->child->fops->unlink, + loc); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + baselen = STRLEN_0(loc->name); + par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_UNLINK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL); + return 0; + +} + +/** + * client_rmdir - rmdir function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * + * external reference through client_protocol_xlator->fops->rmdir + */ +int32_t +client_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rmdir_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_rmdir_cbk, + conf->child, + conf->child->fops->rmdir, + loc); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + baselen = STRLEN_0(loc->name); + par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_RMDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL); + return 0; + +} + + + +/** + * client_symlink - symlink function for client protocol + * @frame: call frame + * @this: this translator structure + * @oldpath: pathname of target + * @newpath: pathname of symlink + * + * external reference through client_protocol_xlator->fops->symlink + */ +int32_t +client_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkname, + loc_t *loc) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_symlink_req_t *req = NULL; + size_t hdrlen = 0; + size_t pathlen = 0; + size_t newlen = 0; + size_t baselen = 0; + ino_t par = 0; + client_conf_t *conf = this->private; + client_local_t *local = NULL; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_symlink_cbk, + conf->child, + conf->child->fops->symlink, + linkname, loc); + + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0 (loc->path); + baselen = STRLEN_0 (loc->name); + newlen = STRLEN_0 (linkname); + par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, pathlen + baselen + newlen); + hdr = gf_hdr_new (req, pathlen + baselen + newlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + strcpy (req->linkname + pathlen + baselen, linkname); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_SYMLINK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL); + return 0; + +} + + +/** + * client_rename - rename function for client protocol + * @frame: call frame + * @this: this translator structure + * @oldloc: location of old pathname + * @newloc: location of new pathname + * + * external reference through client_protocol_xlator->fops->rename + */ +int32_t +client_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_rename_req_t *req = NULL; + size_t hdrlen = 0; + size_t oldpathlen = 0; + size_t oldbaselen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + ino_t oldpar = 0; + ino_t newpar = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_rename_cbk, + conf->child, + conf->child->fops->rename, + oldloc, newloc); + + return 0; + } + + oldpathlen = STRLEN_0(oldloc->path); + oldbaselen = STRLEN_0(oldloc->name); + newpathlen = STRLEN_0(newloc->path); + newbaselen = STRLEN_0(newloc->name); + oldpar = this_ino_get (oldloc, this, GF_CLIENT_INODE_PARENT); + newpar = this_ino_get (newloc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, (oldpathlen + oldbaselen + + newpathlen + newbaselen)); + hdr = gf_hdr_new (req, (oldpathlen + oldbaselen + + newpathlen + newbaselen)); + + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->oldpar = hton64 (oldpar); + req->newpar = hton64 (newpar); + + strcpy (req->oldpath, oldloc->path); + strcpy (req->oldbname + oldpathlen, oldloc->name); + strcpy (req->newpath + oldpathlen + oldbaselen, newloc->path); + strcpy (req->newbname + oldpathlen + oldbaselen + newpathlen, + newloc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_RENAME, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + + +/** + * client_link - link function for client protocol + * @frame: call frame + * @this: this translator structure + * @oldloc: location of old pathname + * @newpath: new pathname + * + * external reference through client_protocol_xlator->fops->link + */ + +int32_t +client_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_link_req_t *req = NULL; + size_t hdrlen = 0; + size_t oldpathlen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + ino_t oldino = 0; + ino_t newpar = 0; + client_conf_t *conf = this->private; + client_local_t *local = NULL; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_link_cbk, + conf->child, + conf->child->fops->link, + oldloc, newloc); + + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + loc_copy (&local->loc, oldloc); + + frame->local = local; + + oldpathlen = STRLEN_0(oldloc->path); + newpathlen = STRLEN_0(newloc->path); + newbaselen = STRLEN_0(newloc->name); + oldino = this_ino_get (oldloc, this, GF_CLIENT_INODE_SELF); + newpar = this_ino_get (newloc, this, GF_CLIENT_INODE_PARENT); + + hdrlen = gf_hdr_len (req, oldpathlen + newpathlen + newbaselen); + hdr = gf_hdr_new (req, oldpathlen + newpathlen + newbaselen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + strcpy (req->oldpath, oldloc->path); + strcpy (req->newpath + oldpathlen, newloc->path); + strcpy (req->newbname + oldpathlen + newpathlen, newloc->name); + + req->oldino = hton64 (oldino); + req->newpar = hton64 (newpar); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_LINK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, oldloc->inode, NULL); + return 0; +} + + + +/** + * client_chmod - chmod function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @mode: + * + * external reference through client_protocol_xlator->fops->chmod + */ +int32_t +client_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_chmod_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_chmod_cbk, + conf->child, + conf->child->fops->chmod, + loc, + mode); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->mode = hton32 (mode); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHMOD, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_chown - chown function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @uid: uid of new owner + * @gid: gid of new owner group + * + * external reference through client_protocol_xlator->fops->chown + */ +int32_t +client_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_chown_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_chown_cbk, + conf->child, + conf->child->fops->chown, + loc, + uid, + gid); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->uid = hton32 (uid); + req->gid = hton32 (gid); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHOWN, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + +/** + * client_truncate - truncate function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @offset: + * + * external reference through client_protocol_xlator->fops->truncate + */ +int32_t +client_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_truncate_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_truncate_cbk, + conf->child, + conf->child->fops->truncate, + loc, + offset); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->offset = hton64 (offset); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_TRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + + +/** + * client_utimes - utimes function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @tvp: + * + * external reference through client_protocol_xlator->fops->utimes + */ +int32_t +client_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec *tvp) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_utimens_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_utimens_cbk, + conf->child, + conf->child->fops->utimens, + loc, + tvp); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + gf_timespec_from_timespec (req->tv, tvp); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_UTIMENS, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + + +/** + * client_readv - readv function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @size: + * @offset: + * + * external reference through client_protocol_xlator->fops->readv + */ +int32_t +client_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_read_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_readv_cbk, + conf->child, + conf->child->fops->readv, + fd, + size, + offset); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd, returning EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL, 0, NULL); + return 0; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->size = hton32 (size); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_READ, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL, 0, NULL); + return 0; + +} + + +/** + * client_writev - writev function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @vector: + * @count: + * @offset: + * + * external reference through client_protocol_xlator->fops->writev + */ +int32_t +client_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_write_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_writev_cbk, + conf->child, + conf->child->fops->writev, + fd, + vector, + count, + offset); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->size = hton32 (iov_length (vector, count)); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_WRITE, + hdr, hdrlen, vector, count, + frame->root->req_refs); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_statfs - statfs function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * + * external reference through client_protocol_xlator->fops->statfs + */ +int32_t +client_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_statfs_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_statfs_cbk, + conf->child, + conf->child->fops->statfs, + loc); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_STATFS, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_flush - flush function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->fops->flush + */ + +int32_t +client_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_flush_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_flush_cbk, + conf->child, + conf->child->fops->flush, + fd); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FLUSH, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL); + return 0; + +} + + + + +/** + * client_fsync - fsync function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @flags: + * + * external reference through client_protocol_xlator->fops->fsync + */ + +int32_t +client_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsync_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int32_t ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_fsync_cbk, + conf->child, + conf->child->fops->fsync, + fd, + flags); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND(frame, -1, EBADFD); + return 0; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->data = hton32 (flags); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSYNC, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL); + return 0; + +} + +int32_t +client_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_xattrop_req_t *req = NULL; + size_t hdrlen = 0; + size_t dict_len = 0; + int32_t ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO("client", this, unwind); + + conf = this->private; + if (conf->child) { + /* */ + STACK_WIND (frame, + default_xattrop_cbk, + conf->child, + conf->child->fops->xattrop, + loc, + flags, + dict); + + return 0; + } + + GF_VALIDATE_OR_GOTO(this->name, loc, unwind); + + if (dict) { + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict(%p)", + dict); + goto unwind; + } + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, dict_len + pathlen); + hdr = gf_hdr_new (req, dict_len + pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->flags = hton32 (flags); + req->dict_len = hton32 (dict_len); + if (dict) { + ret = dict_serialize (dict, req->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to serialize dictionary(%p)", + dict); + goto unwind; + } + } + req->ino = hton64 (ino); + strcpy (req->path + dict_len, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_XATTROP, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; +} + + +int32_t +client_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fxattrop_req_t *req = NULL; + size_t hdrlen = 0; + size_t dict_len = 0; + int64_t remote_fd = -1; + int32_t ret = -1; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_fxattrop_cbk, + conf->child, + conf->child->fops->fxattrop, + fd, + flags, + dict); + + return 0; + } + + if (dict) { + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict(%p)", + dict); + goto unwind; + } + } + + if (fd) { + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + goto unwind; + } + ino = fd->inode->ino; + } + + hdrlen = gf_hdr_len (req, dict_len); + hdr = gf_hdr_new (req, dict_len); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->flags = hton32 (flags); + req->dict_len = hton32 (dict_len); + if (dict) { + ret = dict_serialize (dict, req->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to serialize dictionary(%p)", + dict); + goto unwind; + } + } + req->fd = hton64 (remote_fd); + req->ino = hton64 (ino); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FXATTROP, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + +} + + +/** + * client_setxattr - setxattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @dict: dictionary which contains key:value to be set. + * @flags: + * + * external reference through client_protocol_xlator->fops->setxattr + */ +int32_t +client_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setxattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t dict_len = 0; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_setxattr_cbk, + conf->child, + conf->child->fops->setxattr, + loc, + dict, + flags); + + return 0; + } + + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict(%p)", + dict); + goto unwind; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, dict_len + pathlen); + hdr = gf_hdr_new (req, dict_len + pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->flags = hton32 (flags); + req->dict_len = hton32 (dict_len); + + ret = dict_serialize (dict, req->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to serialize dictionary(%p)", + dict); + goto unwind; + } + + strcpy (req->path + dict_len, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_SETXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL); + return 0; +} + +/** + * client_getxattr - getxattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * + * external reference through client_protocol_xlator->fops->getxattr + */ +int32_t +client_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_getxattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t pathlen = 0; + size_t namelen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_getxattr_cbk, + conf->child, + conf->child->fops->getxattr, + loc, + name); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + if (name) + namelen = STRLEN_0(name); + + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen + namelen); + hdr = gf_hdr_new (req, pathlen + namelen); + GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->namelen = hton32 (namelen); + strcpy (req->path, loc->path); + if (name) + strcpy (req->name + pathlen, name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_GETXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; +} + +/** + * client_removexattr - removexattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * @name: + * + * external reference through client_protocol_xlator->fops->removexattr + */ +int32_t +client_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_removexattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t namelen = 0; + size_t pathlen = 0; + ino_t ino = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_removexattr_cbk, + conf->child, + conf->child->fops->removexattr, + loc, + name); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + namelen = STRLEN_0(name); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen + namelen); + hdr = gf_hdr_new (req, pathlen + namelen); + GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + strcpy (req->path, loc->path); + strcpy (req->name + pathlen, name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_REMOVEXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL); + return 0; +} + + +/** + * client_opendir - opendir function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * + * external reference through client_protocol_xlator->fops->opendir + */ +int32_t +client_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + gf_fop_opendir_req_t *req = NULL; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + int ret = -1; + ino_t ino = 0; + size_t pathlen = 0; + client_conf_t *conf = this->private; + client_local_t *local = NULL; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_opendir_cbk, + conf->child, + conf->child->fops->opendir, + loc, fd); + + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + loc_copy (&local->loc, loc); + local->fd = fd_ref (fd); + + frame->local = local; + + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + pathlen = STRLEN_0(loc->path); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_OPENDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, fd); + return 0; + +} + + +/** + * client_readdir - readdir function for client protocol + * @frame: call frame + * @this: this translator structure + * + * external reference through client_protocol_xlator->fops->readdir + */ + +int32_t +client_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, + int32_t flag) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_getdents_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_getdents_cbk, + conf->child, + conf->child->fops->getdents, + fd, + size, + offset, + flag); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind); + + req = gf_param (hdr); + GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind); + + req->fd = hton64 (remote_fd); + req->size = hton32 (size); + req->offset = hton64 (offset); + req->flags = hton32 (flag); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_GETDENTS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + STACK_UNWIND(frame, -1, EINVAL, NULL, 0); + return 0; +} + +/** + * client_readdir - readdir function for client protocol + * @frame: call frame + * @this: this translator structure + * + * external reference through client_protocol_xlator->fops->readdir + */ + +int32_t +client_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readdir_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_readdir_cbk, + conf->child, + conf->child->fops->readdir, + fd, size, offset); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + goto unwind; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req->fd = hton64 (remote_fd); + req->size = hton32 (size); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_READDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + +} + + + +/** + * client_fsyncdir - fsyncdir function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @flags: + * + * external reference through client_protocol_xlator->fops->fsyncdir + */ + +int32_t +client_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsyncdir_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int32_t ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_fsyncdir_cbk, + conf->child, + conf->child->fops->fsyncdir, + fd, + flags); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + goto unwind; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->data = hton32 (flags); + req->fd = hton64 (remote_fd); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSYNCDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + STACK_UNWIND (frame, -1, EBADFD); + return 0; +} + + +/** + * client_access - access function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * @mode: + * + * external reference through client_protocol_xlator->fops->access + */ +int32_t +client_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_access_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + ino_t ino = 0; + size_t pathlen = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_access_cbk, + conf->child, + conf->child->fops->access, + loc, + mask); + + return 0; + } + + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + pathlen = STRLEN_0(loc->path); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->mask = hton32 (mask); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_ACCESS, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL); + return 0; + +} + + +/** + * client_ftrucate - ftruncate function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @offset: offset to truncate to + * + * external reference through client_protocol_xlator->fops->ftruncate + */ + +int32_t +client_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_ftruncate_req_t *req = NULL; + int64_t remote_fd = -1; + size_t hdrlen = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_ftruncate_cbk, + conf->child, + conf->child->fops->ftruncate, + fd, + offset); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FTRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_fstat - fstat function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->fops->fstat + */ + +int32_t +client_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fstat_req_t *req = NULL; + int64_t remote_fd = -1; + size_t hdrlen = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_fstat_cbk, + conf->child, + conf->child->fops->fstat, + fd); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSTAT, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_lk - lk function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @cmd: lock command + * @lock: + * + * external reference through client_protocol_xlator->fops->lk + */ +int32_t +client_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *flock) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_lk_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_lk_cbk, + conf->child, + conf->child->fops->lk, + fd, + cmd, + flock); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND(frame, -1, EBADFD, NULL); + return 0; + } + + if (cmd == F_GETLK || cmd == F_GETLK64) + gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + gf_cmd = GF_LK_SETLKW; + else { + gf_log (this->name, GF_LOG_ERROR, + "Unknown cmd (%d)!", gf_cmd); + goto unwind; + } + + switch (flock->l_type) { + case F_RDLCK: + gf_type = GF_LK_F_RDLCK; + break; + case F_WRLCK: + gf_type = GF_LK_F_WRLCK; + break; + case F_UNLCK: + gf_type = GF_LK_F_UNLCK; + break; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->cmd = hton32 (gf_cmd); + req->type = hton32 (gf_type); + gf_flock_from_flock (&req->flock, flock); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_LK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; +} + + +/** + * client_inodelk - inodelk function for client protocol + * @frame: call frame + * @this: this translator structure + * @inode: inode structure + * @cmd: lock command + * @lock: flock struct + * + * external reference through client_protocol_xlator->fops->inodelk + */ +int32_t +client_inodelk (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t cmd, + struct flock *flock) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_inodelk_req_t *req = NULL; + size_t hdrlen = 0; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + ino_t ino = 0; + size_t pathlen = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_inodelk_cbk, + conf->child, + conf->child->fops->inodelk, + loc, cmd, flock); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + if (cmd == F_GETLK || cmd == F_GETLK64) + gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + gf_cmd = GF_LK_SETLKW; + else { + gf_log (this->name, GF_LOG_ERROR, + "Unknown cmd (%d)!", gf_cmd); + goto unwind; + } + + switch (flock->l_type) { + case F_RDLCK: + gf_type = GF_LK_F_RDLCK; + break; + case F_WRLCK: + gf_type = GF_LK_F_WRLCK; + break; + case F_UNLCK: + gf_type = GF_LK_F_UNLCK; + break; + } + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + strcpy (req->path, loc->path); + + req->ino = hton64 (ino); + + req->cmd = hton32 (gf_cmd); + req->type = hton32 (gf_type); + gf_flock_from_flock (&req->flock, flock); + + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, + GF_FOP_INODELK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL); + return 0; + +} + + +/** + * client_finodelk - finodelk function for client protocol + * @frame: call frame + * @this: this translator structure + * @inode: inode structure + * @cmd: lock command + * @lock: flock struct + * + * external reference through client_protocol_xlator->fops->finodelk + */ +int32_t +client_finodelk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *flock) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_finodelk_req_t *req = NULL; + size_t hdrlen = 0; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + int64_t remote_fd = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_finodelk_cbk, + conf->child, + conf->child->fops->finodelk, + fd, cmd, flock); + + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND(frame, -1, EBADFD); + return 0; + } + + if (cmd == F_GETLK || cmd == F_GETLK64) + gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + gf_cmd = GF_LK_SETLKW; + else { + gf_log (this->name, GF_LOG_ERROR, + "Unknown cmd (%d)!", gf_cmd); + goto unwind; + } + + switch (flock->l_type) { + case F_RDLCK: + gf_type = GF_LK_F_RDLCK; + break; + case F_WRLCK: + gf_type = GF_LK_F_WRLCK; + break; + case F_UNLCK: + gf_type = GF_LK_F_UNLCK; + break; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + req->cmd = hton32 (gf_cmd); + req->type = hton32 (gf_type); + gf_flock_from_flock (&req->flock, flock); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, + GF_FOP_FINODELK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL); + return 0; +} + + +int32_t +client_entrylk (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name, + entrylk_cmd cmd, + entrylk_type type) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_entrylk_req_t *req = NULL; + size_t pathlen = 0; + size_t hdrlen = -1; + int ret = -1; + ino_t ino = 0; + size_t namelen = 0; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, default_entrylk_cbk, + conf->child, + conf->child->fops->entrylk, + loc, name, cmd, type); + + return 0; + } + + pathlen = STRLEN_0(loc->path); + if (name) + namelen = STRLEN_0(name); + + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + + hdrlen = gf_hdr_len (req, pathlen + namelen); + hdr = gf_hdr_new (req, pathlen + namelen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->namelen = hton64 (namelen); + + strcpy (req->path, loc->path); + if (name) + strcpy (req->name + pathlen, name); + + req->cmd = hton32 (cmd); + req->type = hton32 (type); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_ENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL); + return 0; + +} + + +int32_t +client_fentrylk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + const char *name, + entrylk_cmd cmd, + entrylk_type type) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fentrylk_req_t *req = NULL; + int64_t remote_fd = -1; + size_t namelen = 0; + size_t hdrlen = -1; + int ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, default_fentrylk_cbk, + conf->child, + conf->child->fops->fentrylk, + fd, name, cmd, type); + + return 0; + } + + if (name) + namelen = STRLEN_0(name); + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + STACK_UNWIND(frame, -1, EBADFD); + return 0; + } + + hdrlen = gf_hdr_len (req, namelen); + hdr = gf_hdr_new (req, namelen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->namelen = hton64 (namelen); + + if (name) + strcpy (req->name, name); + + req->cmd = hton32 (cmd); + req->type = hton32 (type); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + + STACK_UNWIND(frame, -1, EINVAL); + return 0; +} + + +/* + * client_lookup - lookup function for client protocol + * @frame: call frame + * @this: + * @loc: location + * + * not for external reference + */ +int32_t +client_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_lookup_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + ino_t ino = 0; + ino_t par = 0; + size_t dictlen = 0; + size_t pathlen = 0; + size_t baselen = 0; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + client_conf_t *conf = this->private; + client_local_t *local = NULL; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_lookup_cbk, + conf->child, + conf->child->fops->lookup, + loc, + xattr_req); + + return 0; + } + + local = calloc (1, sizeof (*local)); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + GF_VALIDATE_OR_GOTO (this->name, loc, unwind); + GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind); + + if (loc->ino != 1) { + par = this_ino_get (loc, this, GF_CLIENT_INODE_PARENT); + GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind); + baselen = STRLEN_0(loc->name); + } else { + ino = 1; + } + + pathlen = STRLEN_0(loc->path); + + if (xattr_req) { + dictlen = dict_serialized_length (xattr_req); + if (dictlen < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict(%p)", + xattr_req); + ret = dictlen; + goto unwind; + } + } + + hdrlen = gf_hdr_len (req, pathlen + baselen + dictlen); + hdr = gf_hdr_new (req, pathlen + baselen + dictlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->par = hton64 (par); + strcpy (req->path, loc->path); + if (baselen) + strcpy (req->path + pathlen, loc->name); + + if (dictlen) { + ret = dict_serialize (xattr_req, req->dict + baselen + pathlen); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to serialize dictionary(%p)", + xattr_req); + goto unwind; + } + } + + req->dictlen = hton32 (dictlen); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_LOOKUP, + hdr, hdrlen, NULL, 0, NULL); + return ret; + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, NULL, NULL); + return ret; +} + + + +/* + * client_fchmod + * + */ +int32_t +client_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fchmod_req_t *req = NULL; + int64_t remote_fd = -1; + size_t hdrlen = -1; + int ret = -1; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_fchmod_cbk, + conf->child, + conf->child->fops->fchmod, + fd, + mode); + + return 0; + } + + GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + op_errno = EBADFD; + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + goto unwind; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->mode = hton32 (mode); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FCHMOD, + hdr, hdrlen, NULL, 0, NULL); + + return 0; + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, NULL); + return 0; +} + + +/* + * client_fchown - + * + * @frame: + * @this: + * @fd: + * @uid: + * @gid: + * + */ +int32_t +client_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fchown_req_t *req = NULL; + int64_t remote_fd = 0; + size_t hdrlen = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t ret = -1; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_fchown_cbk, + conf->child, + conf->child->fops->fchown, + fd, + uid, + gid); + + return 0; + } + + GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + op_errno = EBADFD; + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + goto unwind; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->uid = hton32 (uid); + req->gid = hton32 (gid); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_FCHOWN, + hdr, hdrlen, NULL, 0, NULL); + + return 0; + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, NULL); + return 0; + +} + +/** + * client_setdents - + */ +int32_t +client_setdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, + dir_entry_t *entries, + int32_t count) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setdents_req_t *req = NULL; + int64_t remote_fd = 0; + char *buffer = NULL; + char *ptr = NULL; + data_t *buf_data = NULL; + dict_t *reply_dict = NULL; + dir_entry_t *trav = NULL; + uint32_t len = 0; + int32_t buf_len = 0; + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t vec_count = 0; + size_t hdrlen = -1; + struct iovec vector[1]; + client_conf_t *conf = this->private; + + if (conf->child) { + /* */ + STACK_WIND (frame, + default_setdents_cbk, + conf->child, + conf->child->fops->setdents, + fd, + flags, + entries, + count); + + return 0; + } + + GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd. returning EBADFD", + fd->inode->ino); + op_errno = EBADFD; + goto unwind; + } + + GF_VALIDATE_OR_GOTO (this->name, entries, unwind); + GF_VALIDATE_OR_GOTO (this->name, count, unwind); + + trav = entries->next; + while (trav) { + len += strlen (trav->name); + len += 1; + len += strlen (trav->link); + len += 1; + len += 256; // max possible for statbuf; + trav = trav->next; + } + buffer = CALLOC (1, len); + GF_VALIDATE_OR_GOTO (this->name, buffer, unwind); + + ptr = buffer; + + trav = entries->next; + while (trav) { + int32_t this_len = 0; + char *tmp_buf = NULL; + struct stat *stbuf = &trav->buf; + { + /* Convert the stat buf to string */ + uint64_t dev = stbuf->st_dev; + uint64_t ino = stbuf->st_ino; + uint32_t mode = stbuf->st_mode; + uint32_t nlink = stbuf->st_nlink; + uint32_t uid = stbuf->st_uid; + uint32_t gid = stbuf->st_gid; + uint64_t rdev = stbuf->st_rdev; + uint64_t size = stbuf->st_size; + uint32_t blksize = stbuf->st_blksize; + uint64_t blocks = stbuf->st_blocks; + + uint32_t atime = stbuf->st_atime; + uint32_t mtime = stbuf->st_mtime; + uint32_t ctime = stbuf->st_ctime; + + uint32_t atime_nsec = ST_ATIM_NSEC(stbuf); + uint32_t mtime_nsec = ST_MTIM_NSEC(stbuf); + uint32_t ctime_nsec = ST_CTIM_NSEC(stbuf); + + asprintf (&tmp_buf, + GF_STAT_PRINT_FMT_STR, + dev, + ino, + mode, + nlink, + uid, + gid, + rdev, + size, + blksize, + blocks, + atime, + atime_nsec, + mtime, + mtime_nsec, + ctime, + ctime_nsec); + } + this_len = sprintf (ptr, "%s/%s%s\n", + trav->name, + tmp_buf, + trav->link); + + FREE (tmp_buf); + trav = trav->next; + ptr += this_len; + } + buf_len = strlen (buffer); + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->flags = hton32 (flags); + req->count = hton32 (count); + + { + buf_data = get_new_data (); + GF_VALIDATE_OR_GOTO (this->name, buf_data, unwind); + reply_dict = get_new_dict(); + GF_VALIDATE_OR_GOTO (this->name, reply_dict, unwind); + + buf_data->data = buffer; + buf_data->len = buf_len; + dict_set (reply_dict, NULL, buf_data); + frame->root->rsp_refs = dict_ref (reply_dict); + vector[0].iov_base = buffer; + vector[0].iov_len = buf_len; + vec_count = 1; + } + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_SETDENTS, + hdr, hdrlen, vector, vec_count, + frame->root->rsp_refs); + + return ret; +unwind: + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +/* + * CBKs + */ +/* + * client_forget - forget function for client protocol + * @this: + * @inode: + * + * not for external reference + */ +int32_t +client_forget (xlator_t *this, + inode_t *inode) +{ + ino_t ino = 0; + client_conf_t *conf = NULL; + client_forget_t forget = {0,}; + uint8_t send_forget = 0; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("client", this, out); + conf = this->private; + + if (conf->child) { + /* */ + /* Yenu beda */ + return 0; + } + + GF_VALIDATE_OR_GOTO (this->name, inode, out); + ino = this_ino_get_from_inode (inode, this); + + LOCK (&conf->forget.lock); + { + conf->forget.ino_array[conf->forget.count++] = ino; + + if ((!conf->forget.frames_in_transit) || + (conf->forget.count >= CLIENT_PROTO_FORGET_LIMIT)) { + ret = client_get_forgets (this, &forget); + if (ret <= 0) + send_forget = 0; + else + send_forget = 1; + } + } + UNLOCK (&conf->forget.lock); + + if (send_forget) { + ret = protocol_client_xfer (forget.frame, this, + CLIENT_CHANNEL (this,CHANNEL_BULK), + GF_OP_TYPE_CBK_REQUEST, + GF_CBK_FORGET, + forget.hdr, forget.hdrlen, + NULL, 0, NULL); + } +out: + return 0; +} + +/** + * client_releasedir - releasedir function for client protocol + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->cbks->releasedir + */ + +int32_t +client_releasedir (xlator_t *this, fd_t *fd) +{ + call_frame_t *fr = NULL; + int32_t ret = -1; + int64_t remote_fd = 0; + char key[32] = {0,}; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_cbk_releasedir_req_t *req = NULL; + client_conf_t *conf = NULL; + + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + conf = this->private; + if (conf->child) { + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1){ + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd.", + fd->inode->ino); + goto out; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, out); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + { + sprintf (key, "%p", fd); + + pthread_mutex_lock (&conf->mutex); + { + dict_del (conf->saved_fds, key); + } + pthread_mutex_unlock (&conf->mutex); + } + + fr = create_frame (this, this->ctx->pool); + GF_VALIDATE_OR_GOTO (this->name, fr, out); + + ret = protocol_client_xfer (fr, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_CBK_REQUEST, GF_CBK_RELEASEDIR, + hdr, hdrlen, NULL, 0, NULL); +out: + return ret; +} + + +/** + * client_release - release function for client protocol + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->cbks->release + * + */ +int +client_release (xlator_t *this, fd_t *fd) +{ + call_frame_t *fr = NULL; + int32_t ret = -1; + int64_t remote_fd = 0; + char key[32] = {0,}; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_cbk_release_req_t *req = NULL; + client_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + conf = this->private; + + if (conf->child) { + return 0; + } + + ret = this_fd_get (fd, this, &remote_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get remote fd.", + fd->inode->ino); + goto out; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, out); + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + { + sprintf (key, "%p", fd); + + pthread_mutex_lock (&conf->mutex); + { + dict_del (conf->saved_fds, key); + } + pthread_mutex_unlock (&conf->mutex); + } + + fr = create_frame (this, this->ctx->pool); + GF_VALIDATE_OR_GOTO (this->name, fr, out); + + ret = protocol_client_xfer (fr, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_CBK_REQUEST, GF_CBK_RELEASE, + hdr, hdrlen, NULL, 0, NULL); +out: + return ret; +} + +/* + * MGMT_OPS + */ + +/** + * client_stats - stats function for client protocol + * @frame: call frame + * @this: this translator structure + * @flags: + * + * external reference through client_protocol_xlator->mops->stats + */ + +int32_t +client_stats (call_frame_t *frame, + xlator_t *this, + int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_mop_stats_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + client_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO ("client", this, unwind); + + conf = this->private; + if (conf->child) { + /* */ + STACK_WIND (frame, + default_stats_cbk, + conf->child, + conf->child->mops->stats, + flags); + + return 0; + } + + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->flags = hton32 (flags); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_MOP_REQUEST, GF_MOP_STATS, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; +} + + +/* Callbacks */ + +int32_t +client_fxattrop_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_xattrop_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t gf_errno = 0; + int32_t op_errno = 0; + int32_t dict_len = 0; + dict_t *dict = NULL; + int32_t ret = -1; + char *dictbuf = NULL; + + rsp = gf_param (hdr); + GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail); + + op_ret = ntoh32 (hdr->rsp.op_ret); + + if (op_ret >= 0) { + op_ret = -1; + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail); + + dict = dict_new(); + GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail); + + ret = dict_unserialize (dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "failed to serialize dictionary(%p)", + dict); + op_errno = -ret; + goto fail; + } else { + dict->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + +fail: + STACK_UNWIND (frame, op_ret, op_errno, dict); + + if (dictbuf) + free (dictbuf); + + if (dict) + dict_unref (dict); + + return 0; +} + +int32_t +client_xattrop_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_xattrop_rsp_t *rsp = NULL; + int32_t op_ret = -1; + int32_t gf_errno = EINVAL; + int32_t op_errno = 0; + int32_t dict_len = 0; + dict_t *dict = NULL; + int32_t ret = -1; + char *dictbuf = NULL; + + rsp = gf_param (hdr); + GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail); + + op_ret = ntoh32 (hdr->rsp.op_ret); + if (op_ret >= 0) { + op_ret = -1; + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail); + + dict = get_new_dict(); + GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail); + dict_ref (dict); + + ret = dict_unserialize (dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "failed to serialize dictionary(%p)", + dict); + goto fail; + } else { + dict->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + + +fail: + STACK_UNWIND (frame, op_ret, op_errno, dict); + + if (dictbuf) + free (dictbuf); + if (dict) + dict_unref (dict); + + return 0; +} + +/* + * client_chown_cbk - + * + * @frame: + * @args: + * + * not for external reference + */ +int32_t +client_fchown_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_fchown_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +/* + * client_fchmod_cbk + * + * @frame: + * @args: + * + * not for external reference + */ +int32_t +client_fchmod_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_fchmod_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +/* + * client_create_cbk - create callback function for client protocol + * @frame: call frame + * @args: arguments in dictionary + * + * not for external reference + */ + +int +client_create_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_create_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + fd_t *fd = NULL; + inode_t *inode = NULL; + struct stat stbuf = {0, }; + int64_t remote_fd = 0; + char key[32] = {0, }; + int32_t ret = -1; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + + + local = frame->local; frame->local = NULL; + conf = frame->this->private; + fd = local->fd; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) { + remote_fd = ntoh64 (rsp->fd); + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + if (op_ret >= 0) { + this_ino_set (&local->loc, frame->this, stbuf.st_ino); + this_fd_set (fd, frame->this, &local->loc, remote_fd); + + sprintf (key, "%p", fd); + + pthread_mutex_lock (&conf->mutex); + { + ret = dict_set_str (conf->saved_fds, key, ""); + } + pthread_mutex_unlock (&conf->mutex); + + if (ret < 0) { + free (key); + gf_log (frame->this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to save remote fd", + local->loc.path, stbuf.st_ino); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, &stbuf); + + client_local_wipe (local); + + return 0; +} + + +/* + * client_open_cbk - open callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_open_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOTCONN; + fd_t *fd = NULL; + int64_t remote_fd = 0; + gf_fop_open_rsp_t *rsp = NULL; + char key[32] = {0,}; + int32_t ret = -1; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + + + local = frame->local; frame->local = NULL; + conf = frame->this->private; + fd = local->fd; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) { + remote_fd = ntoh64 (rsp->fd); + } + + if (op_ret >= 0) { + this_fd_set (fd, frame->this, &local->loc, remote_fd); + + sprintf (key, "%p", fd); + + pthread_mutex_lock (&conf->mutex); + { + ret = dict_set_str (conf->saved_fds, key, ""); + } + pthread_mutex_unlock (&conf->mutex); + + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to save remote fd", + local->loc.path, local->loc.inode->ino); + free (key); + } + + } + + STACK_UNWIND (frame, op_ret, op_errno, fd); + + client_local_wipe (local); + + return 0; +} + +/* + * client_stat_cbk - stat callback for client protocol + * @frame: call frame + * @args: arguments dictionary + * + * not for external reference + */ +int +client_stat_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_stat_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_utimens_cbk - utimens callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_utimens_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_utimens_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_chmod_cbk - chmod for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_chmod_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_chmod_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_chown_cbk - chown for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_chown_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_chown_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_mknod_cbk - mknod callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_mknod_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_mknod_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct stat stbuf = {0, }; + inode_t *inode = NULL; + client_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + this_ino_set (&local->loc, frame->this, stbuf.st_ino); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf); + + client_local_wipe (local); + + return 0; +} + +/* + * client_symlink_cbk - symlink callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_symlink_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_symlink_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct stat stbuf = {0, }; + inode_t *inode = NULL; + client_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + this_ino_set (&local->loc, frame->this, stbuf.st_ino); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf); + + client_local_wipe (local); + + return 0; +} + +/* + * client_link_cbk - link callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_link_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_link_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct stat stbuf = {0, }; + inode_t *inode = NULL; + client_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf); + + client_local_wipe (local); + + return 0; +} + +/* + * client_truncate_cbk - truncate callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_truncate_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_truncate_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* client_fstat_cbk - fstat callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_fstat_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_fstat_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_ftruncate_cbk - ftruncate callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_ftruncate_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_ftruncate_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* client_readv_cbk - readv callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external referece + */ + +int32_t +client_readv_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_read_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iovec vector = {0, }; + struct stat stbuf = {0, }; + dict_t *refs = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret != -1) { + gf_stat_to_stat (&rsp->stat, &stbuf); + vector.iov_base = buf; + vector.iov_len = buflen; + + refs = get_new_dict (); + dict_set (refs, NULL, data_from_dynptr (buf, 0)); + frame->root->rsp_refs = dict_ref (refs); + } + + STACK_UNWIND (frame, op_ret, op_errno, &vector, 1, &stbuf); + + if (refs) + dict_unref (refs); + + return 0; +} + +/* + * client_write_cbk - write callback for client protocol + * @frame: cal frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_write_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_write_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct stat stbuf = {0, }; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) + gf_stat_to_stat (&rsp->stat, &stbuf); + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +int32_t +client_readdir_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_readdir_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + uint32_t buf_size = 0; + gf_dirent_t entries; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + INIT_LIST_HEAD (&entries.list); + if (op_ret > 0) { + buf_size = ntoh32 (rsp->size); + gf_dirent_unserialize (&entries, rsp->buf, buf_size); + } + + STACK_UNWIND (frame, op_ret, op_errno, &entries); + + gf_dirent_free (&entries); + + return 0; +} + +/* + * client_fsync_cbk - fsync callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_fsync_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_fsync_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +/* + * client_unlink_cbk - unlink callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_unlink_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_unlink_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_rename_cbk - rename callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_rename_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + gf_fop_rename_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +/* + * client_readlink_cbk - readlink callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_readlink_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_readlink_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + char *link = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret > 0) { + link = rsp->path; + } + + STACK_UNWIND (frame, op_ret, op_errno, link); + return 0; +} + +/* + * client_mkdir_cbk - mkdir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_mkdir_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_mkdir_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct stat stbuf = {0, }; + inode_t *inode = NULL; + client_local_t *local = NULL; + + local = frame->local; + inode = local->loc.inode; + frame->local = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_stat (&rsp->stat, &stbuf); + this_ino_set (&local->loc, frame->this, stbuf.st_ino); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf); + + client_local_wipe (local); + + return 0; +} + +/* + * client_flush_cbk - flush callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_flush_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + +/* + * client_opendir_cbk - opendir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int +client_opendir_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOTCONN; + fd_t *fd = NULL; + int64_t remote_fd = 0; + gf_fop_opendir_rsp_t *rsp = NULL; + char key[32] = {0,}; + int32_t ret = -1; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + + + local = frame->local; frame->local = NULL; + conf = frame->this->private; + fd = local->fd; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) { + remote_fd = ntoh64 (rsp->fd); + } + + if (op_ret >= 0) { + this_fd_set (fd, frame->this, &local->loc, remote_fd); + + sprintf (key, "%p", fd); + + pthread_mutex_lock (&conf->mutex); + { + ret = dict_set_str (conf->saved_fds, key, ""); + } + pthread_mutex_unlock (&conf->mutex); + + if (ret < 0) { + free (key); + gf_log (frame->this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to save remote fd", + local->loc.path, local->loc.inode->ino); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, fd); + + client_local_wipe (local); + + return 0; +} + + +/* + * client_rmdir_cbk - rmdir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_rmdir_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_rmdir_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_access_cbk - access callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_access_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_access_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + + +/* + * client_lookup_cbk - lookup callback for client protocol + * + * @frame: call frame + * @args: arguments dictionary + * + * not for external reference + */ +int32_t +client_lookup_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct stat stbuf = {0, }; + inode_t *inode = NULL; + dict_t *xattr = NULL; + gf_fop_lookup_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + size_t dict_len = 0; + char *dictbuf = NULL; + int32_t ret = -1; + int32_t gf_errno = 0; + client_local_t *local = NULL; + + local = frame->local; + inode = local->loc.inode; + frame->local = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + + if (op_ret == 0) { + op_ret = -1; + gf_stat_to_stat (&rsp->stat, &stbuf); + this_ino_set (&local->loc, frame->this, stbuf.st_ino); + + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail); + + xattr = dict_new(); + GF_VALIDATE_OR_GOTO(frame->this->name, xattr, fail); + + ret = dict_unserialize (dictbuf, dict_len, &xattr); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to unserialize dictionary", + local->loc.path, inode->ino); + goto fail; + } else { + xattr->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + +fail: + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, xattr); + + client_local_wipe (local); + + if (dictbuf) + free (dictbuf); + + if (xattr) + dict_unref (xattr); + + return 0; +} + +static dir_entry_t * +gf_bin_to_direntry (char *buf, size_t count) +{ + int32_t idx = 0, bread = 0; + size_t rcount = 0; + char *ender = NULL, *buffer = NULL; + char tmp_buf[512] = {0,}; + dir_entry_t *trav = NULL, *prev = NULL; + dir_entry_t *thead = NULL, *head = NULL; + + thead = CALLOC (1, sizeof (dir_entry_t)); + GF_VALIDATE_OR_GOTO("client-protocol", thead, fail); + + buffer = buf; + prev = thead; + + for (idx = 0; idx < count ; idx++) { + bread = 0; + trav = CALLOC (1, sizeof (dir_entry_t)); + GF_VALIDATE_OR_GOTO("client-protocol", trav, fail); + + ender = strchr (buffer, '/'); + if (!ender) + break; + rcount = ender - buffer; + trav->name = CALLOC (1, rcount + 2); + GF_VALIDATE_OR_GOTO("client-protocol", trav->name, fail); + + strncpy (trav->name, buffer, rcount); + bread = rcount + 1; + buffer += bread; + + ender = strchr (buffer, '\n'); + if (!ender) + break; + rcount = ender - buffer; + strncpy (tmp_buf, buffer, rcount); + bread = rcount + 1; + buffer += bread; + + gf_string_to_stat (tmp_buf, &trav->buf); + + ender = strchr (buffer, '\n'); + if (!ender) + break; + rcount = ender - buffer; + *ender = '\0'; + if (S_ISLNK (trav->buf.st_mode)) + trav->link = strdup (buffer); + else + trav->link = ""; + + bread = rcount + 1; + buffer += bread; + + prev->next = trav; + prev = trav; + } + + head = thead; +fail: + return head; +} + +int32_t +gf_free_direntry(dir_entry_t *head) +{ + dir_entry_t *prev = NULL, *trav = NULL; + + prev = head; + GF_VALIDATE_OR_GOTO("client-protocol", prev, fail); + + trav = head->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (head); +fail: + return 0; +} +/* + * client_getdents_cbk - readdir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_getdents_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_getdents_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int32_t gf_errno = 0; + int32_t nr_count = 0; + dir_entry_t *entry = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + + if (op_ret >= 0) { + nr_count = ntoh32 (rsp->count); + entry = gf_bin_to_direntry(buf, nr_count); + if (entry == NULL) { + op_ret = -1; + op_errno = EINVAL; + } + } + + STACK_UNWIND (frame, op_ret, op_errno, entry, nr_count); + + if (op_ret >= 0) { + /* Free the buffer */ + FREE (buf); + gf_free_direntry(entry); + } + + return 0; +} + +/* + * client_statfs_cbk - statfs callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_statfs_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct statvfs stbuf = {0, }; + gf_fop_statfs_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) + { + gf_statfs_to_statfs (&rsp->statfs, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_fsyncdir_cbk - fsyncdir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_fsyncdir_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_setxattr_cbk - setxattr callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_setxattr_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_setxattr_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_getxattr_cbk - getxattr callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_getxattr_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_getxattr_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t gf_errno = 0; + int32_t op_errno = 0; + int32_t dict_len = 0; + dict_t *dict = NULL; + int32_t ret = -1; + char *dictbuf = NULL; + client_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + + rsp = gf_param (hdr); + GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail); + + op_ret = ntoh32 (hdr->rsp.op_ret); + + if (op_ret >= 0) { + op_ret = -1; + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail); + + dict = dict_new(); + GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail); + + ret = dict_unserialize (dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to " + "unserialize xattr dictionary", + local->loc.path, local->loc.inode->ino); + goto fail; + } else { + dict->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); +fail: + STACK_UNWIND (frame, op_ret, op_errno, dict); + + client_local_wipe (local); + + if (dictbuf) + free (dictbuf); + + if (dict) + dict_unref (dict); + + return 0; +} + +/* + * client_removexattr_cbk - removexattr callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_removexattr_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_lk_cbk - lk callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_lk_common_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct flock lock = {0,}; + gf_fop_lk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_flock_to_flock (&rsp->flock, &lock); + } + + STACK_UNWIND (frame, op_ret, op_errno, &lock); + return 0; +} + + +/* + * client_gf_file_lk_cbk - gf_file_lk callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_inodelk_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_inodelk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +client_finodelk_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_finodelk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +/* + * client_entrylk_cbk - entrylk callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int32_t +client_entrylk_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_entrylk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +client_fentrylk_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_fentrylk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +/** + * client_writedir_cbk - + * + * @frame: + * @args: + * + * not for external reference + */ +int32_t +client_setdents_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + + +/* + * client_stats_cbk - stats callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_stats_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct xlator_stats stats = {0,}; + gf_mop_stats_rsp_t *rsp = NULL; + char *buffer = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) + { + buffer = rsp->buf; + + sscanf (buffer, "%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64 + ",%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64"\n", + &stats.nr_files, + &stats.disk_usage, + &stats.free_disk, + &stats.total_disk_size, + &stats.read_usage, + &stats.write_usage, + &stats.disk_speed, + &stats.nr_clients); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stats); + return 0; +} + +/* + * client_getspec - getspec function for client protocol + * @frame: call frame + * @this: client protocol xlator structure + * @flag: + * + * external reference through client_protocol_xlator->fops->getspec + */ +int32_t +client_getspec (call_frame_t *frame, + xlator_t *this, + const char *key, + int32_t flag) +{ + gf_hdr_common_t *hdr = NULL; + gf_mop_getspec_req_t *req = NULL; + size_t hdrlen = -1; + int keylen = 0; + int ret = -1; + + if (key) + keylen = STRLEN_0(key); + + hdrlen = gf_hdr_len (req, keylen); + hdr = gf_hdr_new (req, keylen); + GF_VALIDATE_OR_GOTO(this->name, hdr, unwind); + + req = gf_param (hdr); + req->flags = hton32 (flag); + req->keylen = hton32 (keylen); + if (keylen) + strcpy (req->key, key); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_MOP_REQUEST, GF_MOP_GETSPEC, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + free (hdr); + STACK_UNWIND(frame, -1, EINVAL, NULL); + return 0; +} + + +/* + * client_getspec_cbk - getspec callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_getspec_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_mop_getspec_rsp_t *rsp = NULL; + char *spec_data = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int32_t gf_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + rsp = gf_param (hdr); + + if (op_ret >= 0) { + spec_data = rsp->spec; + } + + STACK_UNWIND (frame, op_ret, op_errno, spec_data); + return 0; +} + +int32_t +client_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_checksum_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + client_conf_t *conf = this->private; + ino_t ino = 0; + + if (conf->child) { + STACK_WIND (frame, + default_checksum_cbk, + conf->child, + conf->child->fops->checksum, + loc, + flag); + + return 0; + } + + hdrlen = gf_hdr_len (req, strlen (loc->path) + 1); + hdr = gf_hdr_new (req, strlen (loc->path) + 1); + req = gf_param (hdr); + + ino = this_ino_get (loc, this, GF_CLIENT_INODE_SELF); + req->ino = hton64 (ino); + req->flag = hton32 (flag); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHECKSUM, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +} + +int32_t +client_checksum_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_checksum_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int32_t gf_errno = 0; + unsigned char *fchecksum = NULL; + unsigned char *dchecksum = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + + if (op_ret >= 0) { + fchecksum = rsp->fchecksum; + dchecksum = rsp->dchecksum + ZR_FILENAME_MAX; + } + + STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum); + return 0; +} + + +/* + * client_setspec_cbk - setspec callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int32_t +client_setspec_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_setvolume_cbk - setvolume callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int +client_setvolume_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_mop_setvolume_rsp_t *rsp = NULL; + client_connection_t *conn = NULL; + client_conf_t *conf = NULL; + glusterfs_ctx_t *ctx = NULL; + xlator_t *this = NULL; + xlator_list_t *parent = NULL; + transport_t *trans = NULL; + dict_t *reply = NULL; + char *remote_subvol = NULL; + char *remote_error = NULL; + char *process_uuid = NULL; + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t dict_len = 0; + + + trans = frame->local; frame->local = NULL; + this = frame->this; + conf = this->private; + conn = trans->xl_private; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret < 0 && op_errno == ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "setvolume failed (%s)", + strerror (op_errno)); + goto out; + } + + reply = dict_new (); + GF_VALIDATE_OR_GOTO(this->name, reply, out); + + dict_len = ntoh32 (rsp->dict_len); + ret = dict_unserialize (rsp->buf, dict_len, &reply); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "failed to unserialize buffer(%p) to dictionary", + rsp->buf); + goto out; + } + + ret = dict_get_str (reply, "ERROR", &remote_error); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get ERROR string from reply dictionary"); + } + + ret = dict_get_str (reply, "process-uuid", &process_uuid); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get 'process-uuid' from reply dictionary"); + } + + if (op_ret < 0) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "SETVOLUME on remote-host failed: %s", + remote_error ? remote_error : strerror (op_errno)); + errno = op_errno; + if (op_errno == ENOTCONN) + goto out; + } else { + ctx = get_global_ctx_ptr (); + if (process_uuid && !strcmp (ctx->process_uuid,process_uuid)) { + ret = dict_get_str (this->options, "remote-subvolume", + &remote_subvol); + if (!remote_subvol) + goto out; + + gf_log (this->name, GF_LOG_WARNING, + "attaching to the local volume '%s'", + remote_subvol); + + /* TODO: */ + conf->child = xlator_search_by_name (this, + remote_subvol); + } + gf_log (trans->xl->name, GF_LOG_INFO, + "connection and handshake succeeded"); + + pthread_mutex_lock (&(conn->lock)); + { + conn->connected = 1; + } + pthread_mutex_unlock (&(conn->lock)); + + parent = trans->xl->parents; + while (parent) { + parent->xlator->notify (parent->xlator, + GF_EVENT_CHILD_UP, + trans->xl); + parent = parent->next; + } + } + +out: + STACK_DESTROY (frame->root); + + if (reply) + dict_unref (reply); + + return op_ret; +} + +/* + * client_enosys_cbk - + * @frame: call frame + * + * not for external reference + */ +int +client_enosys_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +void +client_protocol_reconnect (void *trans_ptr) +{ + transport_t *trans = NULL; + client_connection_t *conn = NULL; + struct timeval tv = {0, 0}; + + trans = trans_ptr; + conn = trans->xl_private; + pthread_mutex_lock (&conn->lock); + { + if (conn->reconnect) + gf_timer_call_cancel (trans->xl->ctx, + conn->reconnect); + conn->reconnect = 0; + + if (conn->connected == 0) { + tv.tv_sec = 10; + + gf_log (trans->xl->name, GF_LOG_DEBUG, + "attempting reconnect"); + transport_connect (trans); + + conn->reconnect = + gf_timer_call_after (trans->xl->ctx, tv, + client_protocol_reconnect, + trans); + } else { + gf_log (trans->xl->name, GF_LOG_DEBUG, + "breaking reconnect chain"); + } + } + pthread_mutex_unlock (&conn->lock); +} + +/* + * client_protocol_cleanup - cleanup function + * @trans: transport object + * + */ +int +protocol_client_cleanup (transport_t *trans) +{ + client_connection_t *conn = NULL; + struct saved_frames *saved_frames = NULL; + + conn = trans->xl_private; + + gf_log (trans->xl->name, GF_LOG_DEBUG, + "cleaning up state in transport object %p", trans); + + pthread_mutex_lock (&conn->lock); + { + saved_frames = conn->saved_frames; + conn->saved_frames = saved_frames_new (); + +/* + trav = conn->saved_fds->members_list; + this = trans->xl; + + while (trav) { + fd_t *fd_tmp = (fd_t *)(long) strtoul (trav->key, + NULL, 0); + fd_ctx_del (fd_tmp, this, NULL); + trav = trav->next; + } + + dict_destroy (conn->saved_fds); + + conn->saved_fds = get_new_dict_full (64); +*/ + /* bailout logic cleanup */ + memset (&(conn->last_sent), 0, + sizeof (conn->last_sent)); + + memset (&(conn->last_received), 0, + sizeof (conn->last_received)); + + if (conn->timer) { + gf_timer_call_cancel (trans->xl->ctx, conn->timer); + conn->timer = NULL; + } + + if (conn->reconnect == NULL) { + /* :O This part is empty.. any thing missing? */ + } + } + pthread_mutex_unlock (&conn->lock); + + saved_frames_destroy (trans->xl, saved_frames, + gf_fops, gf_mops, gf_cbks); + + return 0; +} + + +/* cbk callbacks */ +int +client_releasedir_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +int +client_release_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +int +client_forget_cbk (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + client_conf_t *conf = NULL; + client_forget_t forget = {0, }; + uint8_t send_forget = 0; + int32_t ret = -1; + + + conf = frame->this->private; + LOCK (&conf->forget.lock); + { + conf->forget.frames_in_transit--; + + ret = client_get_forgets (frame->this, &forget); + if (ret <= 0) + send_forget = 0; + else + send_forget = 1; + } + UNLOCK (&conf->forget.lock); + + if (send_forget) { + ret = protocol_client_xfer (forget.frame, frame->this, + CLIENT_CHANNEL (frame->this, + CHANNEL_BULK), + GF_OP_TYPE_CBK_REQUEST, + GF_CBK_FORGET, + forget.hdr, forget.hdrlen, + NULL, 0, NULL); + } + + STACK_DESTROY (frame->root); + return 0; +} + + +static gf_op_t gf_fops[] = { + [GF_FOP_STAT] = client_stat_cbk, + [GF_FOP_READLINK] = client_readlink_cbk, + [GF_FOP_MKNOD] = client_mknod_cbk, + [GF_FOP_MKDIR] = client_mkdir_cbk, + [GF_FOP_UNLINK] = client_unlink_cbk, + [GF_FOP_RMDIR] = client_rmdir_cbk, + [GF_FOP_SYMLINK] = client_symlink_cbk, + [GF_FOP_RENAME] = client_rename_cbk, + [GF_FOP_LINK] = client_link_cbk, + [GF_FOP_CHMOD] = client_chmod_cbk, + [GF_FOP_CHOWN] = client_chown_cbk, + [GF_FOP_TRUNCATE] = client_truncate_cbk, + [GF_FOP_OPEN] = client_open_cbk, + [GF_FOP_READ] = client_readv_cbk, + [GF_FOP_WRITE] = client_write_cbk, + [GF_FOP_STATFS] = client_statfs_cbk, + [GF_FOP_FLUSH] = client_flush_cbk, + [GF_FOP_FSYNC] = client_fsync_cbk, + [GF_FOP_SETXATTR] = client_setxattr_cbk, + [GF_FOP_GETXATTR] = client_getxattr_cbk, + [GF_FOP_REMOVEXATTR] = client_removexattr_cbk, + [GF_FOP_OPENDIR] = client_opendir_cbk, + [GF_FOP_GETDENTS] = client_getdents_cbk, + [GF_FOP_FSYNCDIR] = client_fsyncdir_cbk, + [GF_FOP_ACCESS] = client_access_cbk, + [GF_FOP_CREATE] = client_create_cbk, + [GF_FOP_FTRUNCATE] = client_ftruncate_cbk, + [GF_FOP_FSTAT] = client_fstat_cbk, + [GF_FOP_LK] = client_lk_common_cbk, + [GF_FOP_UTIMENS] = client_utimens_cbk, + [GF_FOP_FCHMOD] = client_fchmod_cbk, + [GF_FOP_FCHOWN] = client_fchown_cbk, + [GF_FOP_LOOKUP] = client_lookup_cbk, + [GF_FOP_SETDENTS] = client_setdents_cbk, + [GF_FOP_READDIR] = client_readdir_cbk, + [GF_FOP_INODELK] = client_inodelk_cbk, + [GF_FOP_FINODELK] = client_finodelk_cbk, + [GF_FOP_ENTRYLK] = client_entrylk_cbk, + [GF_FOP_FENTRYLK] = client_fentrylk_cbk, + [GF_FOP_CHECKSUM] = client_checksum_cbk, + [GF_FOP_XATTROP] = client_xattrop_cbk, + [GF_FOP_FXATTROP] = client_fxattrop_cbk, +}; + +static gf_op_t gf_mops[] = { + [GF_MOP_SETVOLUME] = client_setvolume_cbk, + [GF_MOP_GETVOLUME] = client_enosys_cbk, + [GF_MOP_STATS] = client_stats_cbk, + [GF_MOP_SETSPEC] = client_setspec_cbk, + [GF_MOP_GETSPEC] = client_getspec_cbk, + [GF_MOP_PING] = client_ping_cbk, +}; + +static gf_op_t gf_cbks[] = { + [GF_CBK_FORGET] = client_forget_cbk, + [GF_CBK_RELEASE] = client_release_cbk, + [GF_CBK_RELEASEDIR] = client_releasedir_cbk +}; + +/* + * client_protocol_interpret - protocol interpreter + * @trans: transport object + * @blk: data block + * + */ +int +protocol_client_interpret (xlator_t *this, transport_t *trans, + char *hdr_p, size_t hdrlen, + char *buf_p, size_t buflen) +{ + int ret = -1; + call_frame_t *frame = NULL; + gf_hdr_common_t *hdr = NULL; + uint64_t callid = 0; + int type = -1; + int op = -1; + + + hdr = (gf_hdr_common_t *)hdr_p; + + type = ntoh32 (hdr->type); + op = ntoh32 (hdr->op); + callid = ntoh64 (hdr->callid); + + frame = lookup_frame (trans, op, type, callid); + if (frame == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "no frame for callid=%"PRId64" type=%d op=%d", + callid, type, op); + return 0; + } + + switch (type) { + case GF_OP_TYPE_FOP_REPLY: + if ((op > GF_FOP_MAXVALUE) || + (op < 0)) { + gf_log (trans->xl->name, GF_LOG_WARNING, + "invalid fop '%d'", op); + } else { + ret = gf_fops[op] (frame, hdr, hdrlen, buf_p, buflen); + } + break; + case GF_OP_TYPE_MOP_REPLY: + if ((op > GF_MOP_MAXVALUE) || + (op < 0)) { + gf_log (trans->xl->name, GF_LOG_WARNING, + "invalid fop '%d'", op); + } else { + ret = gf_mops[op] (frame, hdr, hdrlen, buf_p, buflen); + } + break; + case GF_OP_TYPE_CBK_REPLY: + if ((op > GF_CBK_MAXVALUE) || + (op < 0)) { + gf_log (trans->xl->name, GF_LOG_WARNING, + "invalid cbk '%d'", op); + } else { + ret = gf_cbks[op] (frame, hdr, hdrlen, buf_p, buflen); + } + break; + default: + gf_log (trans->xl->name, GF_LOG_ERROR, + "invalid packet type: %d", type); + break; + } + + return ret; +} + +/* + * init - initiliazation function. called during loading of client protocol + * @this: + * + */ +int32_t +init (xlator_t *this) +{ + transport_t *trans = NULL; + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + int32_t transport_timeout = 0; + int32_t ping_timeout = 0; + data_t *remote_subvolume = NULL; + int32_t ret = -1; + int i = 0; + + if (this->children) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: client protocol translator cannot have " + "subvolumes"); + goto out; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + remote_subvolume = dict_get (this->options, "remote-subvolume"); + if (remote_subvolume == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "missing 'option remote-subvolume'."); + goto out; + } + + ret = dict_get_int32 (this->options, "transport-timeout", + &transport_timeout); + if (ret >= 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setting transport-timeout to %d", transport_timeout); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "defaulting transport-timeout to 42"); + transport_timeout = 42; + } + + ret = dict_get_int32 (this->options, "ping-timeout", + &ping_timeout); + if (ret >= 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setting ping-timeout to %d", ping_timeout); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "defaulting ping-timeout to 10"); + ping_timeout = 10; + } + + conf = CALLOC (1, sizeof (client_conf_t)); + + LOCK_INIT (&conf->forget.lock); + pthread_mutex_init (&conf->mutex, NULL); + conf->saved_fds = get_new_dict_full (64); + + this->private = conf; + + for (i = 0; i < CHANNEL_MAX; i++) { + trans = transport_load (this->options, this); + if (trans == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to load transport"); + ret = -1; + goto out; + } + + conn = CALLOC (1, sizeof (*conn)); + + conn->saved_frames = saved_frames_new (); + + conn->callid = 1; + + memset (&(conn->last_sent), 0, sizeof (conn->last_sent)); + memset (&(conn->last_received), 0, + sizeof (conn->last_received)); + + conn->transport_timeout = transport_timeout; + conn->ping_timeout = ping_timeout; + + pthread_mutex_init (&conn->lock, NULL); + + trans->xl_private = conn; + conf->transport[i] = transport_ref (trans); + } + +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + ret = setrlimit (RLIMIT_NOFILE, &lim); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "WARNING: Failed to set 'ulimit -n 1M': %s", + strerror(errno)); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + ret = setrlimit (RLIMIT_NOFILE, &lim); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set max open fd to 64k: %s", + strerror(errno)); + } else { + gf_log (this->name, GF_LOG_ERROR, + "max open fd set to 64k"); + } + + } + } +#endif + ret = 0; +out: + return ret; +} + +/* + * fini - finish function called during unloading of client protocol + * @this: + * + */ +void +fini (xlator_t *this) +{ + /* TODO: Check if its enough.. how to call transport's fini () */ + client_conf_t *conf = NULL; + + conf = this->private; + this->private = NULL; + + if (conf) { + LOCK_DESTROY (&conf->forget.lock); + FREE (conf); + } + return; +} + + +int +protocol_client_handshake (xlator_t *this, transport_t *trans) +{ + gf_hdr_common_t *hdr = NULL; + gf_mop_setvolume_req_t *req = NULL; + dict_t *options = NULL; + int32_t ret = -1; + int hdrlen = 0; + int dict_len = 0; + call_frame_t *fr = NULL; + char *process_uuid_xl; + + options = this->options; + ret = dict_set_str (options, "version", PACKAGE_VERSION); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set version(%s) in options dictionary", + PACKAGE_VERSION); + } + + asprintf (&process_uuid_xl, "%s-%s", this->ctx->process_uuid, + this->name); + ret = dict_set_dynstr (options, "process-uuid", + process_uuid_xl); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set process-uuid(%s) in options dictionary", + PACKAGE_VERSION); + } + + dict_len = dict_serialized_length (options); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict(%p)", + options); + ret = dict_len; + goto fail; + } + + hdrlen = gf_hdr_len (req, dict_len); + hdr = gf_hdr_new (req, dict_len); + GF_VALIDATE_OR_GOTO(this->name, hdr, fail); + + req = gf_param (hdr); + + ret = dict_serialize (options, req->buf); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to serialize dictionary(%p)", + options); + goto fail; + } + + req->dict_len = hton32 (dict_len); + fr = create_frame (this, this->ctx->pool); + GF_VALIDATE_OR_GOTO(this->name, fr, fail); + + fr->local = trans; + ret = protocol_client_xfer (fr, this, trans, + GF_OP_TYPE_MOP_REQUEST, GF_MOP_SETVOLUME, + hdr, hdrlen, NULL, 0, NULL); + return ret; +fail: + if (hdr) + free (hdr); + return ret; +} + + +int +protocol_client_pollout (xlator_t *this, transport_t *trans) +{ + client_connection_t *conn = NULL; + + conn = trans->xl_private; + + pthread_mutex_lock (&conn->lock); + { + gettimeofday (&conn->last_sent, NULL); + } + pthread_mutex_unlock (&conn->lock); + + return 0; +} + + +int +protocol_client_pollin (xlator_t *this, transport_t *trans) +{ + client_connection_t *conn = NULL; + int ret = -1; + char *buf = NULL; + size_t buflen = 0; + char *hdr = NULL; + size_t hdrlen = 0; + int connected = 0; + + conn = trans->xl_private; + + pthread_mutex_lock (&conn->lock); + { + gettimeofday (&conn->last_received, NULL); + connected = conn->connected; + } + pthread_mutex_unlock (&conn->lock); + + ret = transport_receive (trans, &hdr, &hdrlen, &buf, &buflen); + + if (ret == 0) + { + ret = protocol_client_interpret (this, trans, hdr, hdrlen, + buf, buflen); + } + + /* TODO: use mem-pool */ + FREE (hdr); + + return ret; +} + + +/* + * client_protocol_notify - notify function for client protocol + * @this: + * @trans: transport object + * @event + * + */ + +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + int ret = -1; + transport_t *trans = NULL; + client_connection_t *conn = NULL; + + trans = data; + + switch (event) { + case GF_EVENT_POLLOUT: + { + ret = protocol_client_pollout (this, trans); + + break; + } + case GF_EVENT_POLLIN: + { + ret = protocol_client_pollin (this, trans); + + break; + } + /* no break for ret check to happen below */ + case GF_EVENT_POLLERR: + { + ret = -1; + protocol_client_cleanup (trans); + } + + conn = trans->xl_private; + if (conn->connected) { + xlator_list_t *parent = NULL; + + gf_log (this->name, GF_LOG_INFO, "disconnected"); + + parent = this->parents; + while (parent) { + parent->xlator->notify (parent->xlator, + GF_EVENT_CHILD_DOWN, + this); + parent = parent->next; + } + + conn->connected = 0; + if (conn->reconnect == 0) + client_protocol_reconnect (trans); + } + break; + + case GF_EVENT_PARENT_UP: + { + xlator_list_t *parent = NULL; + client_conf_t *conf = NULL; + int i = 0; + transport_t *trans = NULL; + + conf = this->private; + for (i = 0; i < CHANNEL_MAX; i++) { + trans = conf->transport[i]; + if (!trans) { + gf_log (this->name, GF_LOG_DEBUG, + "transport init failed"); + return -1; + } + + conn = trans->xl_private; + + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_PARENT_UP, attempting connect " + "on transport"); + + client_protocol_reconnect (trans); + } + + /* Let the connection/re-connection happen in + * background, for now, don't hang here, + * tell the parents that i am all ok.. + */ + parent = trans->xl->parents; + while (parent) { + parent->xlator->notify (parent->xlator, + GF_EVENT_CHILD_CONNECTING, + trans->xl); + parent = parent->next; + } + } + break; + + case GF_EVENT_CHILD_UP: + { + char *handshake = NULL; + + ret = dict_get_str (this->options, "disable-handshake", + &handshake); + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_CHILD_UP"); + if ((ret < 0) || + (strcasecmp (handshake, "on"))) { + ret = protocol_client_handshake (this, trans); + } else { + conn = trans->xl_private; + conn->connected = 1; + ret = default_notify (this, event, trans); + } + + if (ret) + transport_disconnect (trans); + + } + break; + + default: + gf_log (this->name, GF_LOG_DEBUG, + "got %d, calling default_notify ()", event); + + default_notify (this, event, data); + break; + } + + return ret; +} + + +struct xlator_fops fops = { + .stat = client_stat, + .readlink = client_readlink, + .mknod = client_mknod, + .mkdir = client_mkdir, + .unlink = client_unlink, + .rmdir = client_rmdir, + .symlink = client_symlink, + .rename = client_rename, + .link = client_link, + .chmod = client_chmod, + .chown = client_chown, + .truncate = client_truncate, + .utimens = client_utimens, + .open = client_open, + .readv = client_readv, + .writev = client_writev, + .statfs = client_statfs, + .flush = client_flush, + .fsync = client_fsync, + .setxattr = client_setxattr, + .getxattr = client_getxattr, + .removexattr = client_removexattr, + .opendir = client_opendir, + .readdir = client_readdir, + .fsyncdir = client_fsyncdir, + .access = client_access, + .ftruncate = client_ftruncate, + .fstat = client_fstat, + .create = client_create, + .lk = client_lk, + .inodelk = client_inodelk, + .finodelk = client_finodelk, + .entrylk = client_entrylk, + .fentrylk = client_fentrylk, + .lookup = client_lookup, + .fchmod = client_fchmod, + .fchown = client_fchown, + .setdents = client_setdents, + .getdents = client_getdents, + .checksum = client_checksum, + .xattrop = client_xattrop, + .fxattrop = client_fxattrop, +}; + +struct xlator_mops mops = { + .stats = client_stats, + .getspec = client_getspec, +}; + +struct xlator_cbks cbks = { + .forget = client_forget, + .release = client_release, + .releasedir = client_releasedir +}; + + +struct volume_options options[] = { + { .key = {"username"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"password"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport-type"}, + .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp", + "tcp/client", "ib-verbs/client"}, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"remote-host"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"remote-subvolume"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 5, + .max = 1013, + }, + { .key = {"ping-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 5, + .max = 1013, + }, + { .key = {NULL} }, +}; diff --git a/xlators/protocol/client/src/client-protocol.h b/xlators/protocol/client/src/client-protocol.h new file mode 100644 index 000000000..c90cc980d --- /dev/null +++ b/xlators/protocol/client/src/client-protocol.h @@ -0,0 +1,173 @@ +/* + Copyright (c) 2006, 2007 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CLIENT_PROTOCOL_H +#define _CLIENT_PROTOCOL_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <arpa/inet.h> +#include "inode.h" +#include "timer.h" +#include "byte-order.h" + +#define CLIENT_PROTO_FORGET_LIMIT 128 +#define CLIENT_PORT_CIELING 1023 + +#define GF_CLIENT_INODE_SELF 0 +#define GF_CLIENT_INODE_PARENT 1 + +#define CLIENT_CONF(this) ((client_conf_t *)(this->private)) + +#define RECEIVE_TIMEOUT(_cprivate,_current) \ + ((_cprivate->last_received.tv_sec + \ + _cprivate->transport_timeout) < \ + _current.tv_sec) + +#define SEND_TIMEOUT(_cprivate,_current) \ + ((_cprivate->last_sent.tv_sec + \ + _cprivate->transport_timeout) < \ + _current.tv_sec) + +enum { + CHANNEL_BULK = 0, + CHANNEL_LOWLAT = 1, + CHANNEL_MAX +}; +#define CLIENT_CHANNEL(xl,id) \ + (((client_conf_t *)(xl->private))->transport[id]) + +struct client_connection; +typedef struct client_connection client_connection_t; + +#include "stack.h" +#include "xlator.h" +#include "transport.h" +#include "protocol.h" + +struct _client_conf { + transport_t *transport[CHANNEL_MAX]; + xlator_t *child; + + /* enhancement for 'forget', a must required where lot + of stats happening */ + struct { + uint64_t ino_array[CLIENT_PROTO_FORGET_LIMIT + 4]; + uint32_t count; + uint32_t frames_in_transit; + gf_lock_t lock; + } forget; + dict_t *saved_fds; + pthread_mutex_t mutex; +}; +typedef struct _client_conf client_conf_t; + +/* This will be stored in transport_t->xl_private */ +struct client_connection { + pthread_mutex_t lock; + uint64_t callid; + struct saved_frames *saved_frames; + int32_t transport_timeout; + int32_t ping_started; + int32_t ping_timeout; + gf_timer_t *reconnect; + char connected; + uint64_t max_block_size; + struct timeval last_sent; + struct timeval last_received; + gf_timer_t *timer; + gf_timer_t *ping_timer; +}; + +typedef struct { + loc_t loc; + loc_t loc2; + fd_t *fd; +} client_local_t; + +typedef struct { + gf_hdr_common_t *hdr; + size_t hdrlen; + call_frame_t *frame; +} client_forget_t; + +static inline void +gf_string_to_stat(char *string, struct stat *stbuf) +{ + uint64_t dev = 0; + uint64_t ino = 0; + uint32_t mode = 0; + uint32_t nlink = 0; + uint32_t uid = 0; + uint32_t gid = 0; + uint64_t rdev = 0; + uint64_t size = 0; + uint32_t blksize = 0; + uint64_t blocks = 0; + uint32_t atime = 0; + uint32_t atime_nsec = 0; + uint32_t mtime = 0; + uint32_t mtime_nsec = 0; + uint32_t ctime = 0; + uint32_t ctime_nsec = 0; + + sscanf (string, GF_STAT_PRINT_FMT_STR, + &dev, + &ino, + &mode, + &nlink, + &uid, + &gid, + &rdev, + &size, + &blksize, + &blocks, + &atime, + &atime_nsec, + &mtime, + &mtime_nsec, + &ctime, + &ctime_nsec); + + stbuf->st_dev = dev; + stbuf->st_ino = ino; + stbuf->st_mode = mode; + stbuf->st_nlink = nlink; + stbuf->st_uid = uid; + stbuf->st_gid = gid; + stbuf->st_rdev = rdev; + stbuf->st_size = size; + stbuf->st_blksize = blksize; + stbuf->st_blocks = blocks; + + stbuf->st_atime = atime; + stbuf->st_mtime = mtime; + stbuf->st_ctime = ctime; + + ST_ATIM_NSEC_SET(stbuf, atime_nsec); + ST_MTIM_NSEC_SET(stbuf, mtime_nsec); + ST_CTIM_NSEC_SET(stbuf, ctime_nsec); + +} + +#endif diff --git a/xlators/protocol/client/src/saved-frames.c b/xlators/protocol/client/src/saved-frames.c new file mode 100644 index 000000000..0d1366d82 --- /dev/null +++ b/xlators/protocol/client/src/saved-frames.c @@ -0,0 +1,178 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include "saved-frames.h" +#include "common-utils.h" +#include "protocol.h" +#include "xlator.h" + + + +struct saved_frames * +saved_frames_new (void) +{ + struct saved_frames *saved_frames = NULL; + + saved_frames = CALLOC (sizeof (*saved_frames), 1); + if (!saved_frames) { + return NULL; + } + + INIT_LIST_HEAD (&saved_frames->fops.list); + INIT_LIST_HEAD (&saved_frames->mops.list); + INIT_LIST_HEAD (&saved_frames->cbks.list); + + return saved_frames; +} + + +struct saved_frame * +get_head_frame_for_type (struct saved_frames *frames, int8_t type) +{ + struct saved_frame *head_frame = NULL; + + switch (type) { + case GF_OP_TYPE_FOP_REQUEST: + case GF_OP_TYPE_FOP_REPLY: + head_frame = &frames->fops; + break; + case GF_OP_TYPE_MOP_REQUEST: + case GF_OP_TYPE_MOP_REPLY: + head_frame = &frames->mops; + break; + case GF_OP_TYPE_CBK_REQUEST: + case GF_OP_TYPE_CBK_REPLY: + head_frame = &frames->cbks; + break; + } + + return head_frame; +} + + +int +saved_frames_put (struct saved_frames *frames, call_frame_t *frame, + int32_t op, int8_t type, int64_t callid) +{ + struct saved_frame *saved_frame = NULL; + struct saved_frame *head_frame = NULL; + + head_frame = get_head_frame_for_type (frames, type); + + saved_frame = CALLOC (sizeof (*saved_frame), 1); + if (!saved_frame) { + return -ENOMEM; + } + + INIT_LIST_HEAD (&saved_frame->list); + saved_frame->frame = frame; + saved_frame->op = op; + saved_frame->type = type; + saved_frame->callid = callid; + +// gettimeofday (&saved_frame->saved_at, NULL); + + list_add (&saved_frame->list, &head_frame->list); + frames->count++; + + return 0; +} + + +call_frame_t * +saved_frames_get (struct saved_frames *frames, int32_t op, + int8_t type, int64_t callid) +{ + struct saved_frame *saved_frame = NULL; + struct saved_frame *tmp = NULL; + struct saved_frame *head_frame = NULL; + call_frame_t *frame = NULL; + + head_frame = get_head_frame_for_type (frames, type); + + list_for_each_entry (tmp, &head_frame->list, list) { + if (tmp->callid == callid) { + list_del_init (&tmp->list); + frames->count--; + saved_frame = tmp; + break; + } + } + + if (saved_frame) + frame = saved_frame->frame; + + FREE (saved_frame); + + return frame; +} + + +void +saved_frames_unwind (xlator_t *this, struct saved_frames *saved_frames, + struct saved_frame *head, + gf_op_t gf_ops[], char *gf_op_list[]) +{ + struct saved_frame *trav = NULL; + struct saved_frame *tmp = NULL; + + gf_hdr_common_t hdr = {0, }; + call_frame_t *frame = NULL; + dict_t *reply = NULL; + + reply = get_new_dict(); + dict_ref (reply); + + hdr.rsp.op_ret = hton32 (-1); + hdr.rsp.op_errno = hton32 (ENOTCONN); + + list_for_each_entry_safe (trav, tmp, &head->list, list) { + gf_log (this->name, GF_LOG_ERROR, + "forced unwinding frame type(%d) op(%s)", + trav->type, gf_op_list[trav->op]); + + hdr.type = hton32 (trav->type); + hdr.op = hton32 (trav->op); + + frame = trav->frame; + frame->root->rsp_refs = reply; + + saved_frames->count--; + + gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL, 0); + + list_del_init (&trav->list); + FREE (trav); + } + + dict_unref (reply); +} + + +void +saved_frames_destroy (xlator_t *this, struct saved_frames *frames, + gf_op_t gf_fops[], gf_op_t gf_mops[], gf_op_t gf_cbks[]) +{ + saved_frames_unwind (this, frames, &frames->fops, gf_fops, gf_fop_list); + saved_frames_unwind (this, frames, &frames->mops, gf_mops, gf_mop_list); + saved_frames_unwind (this, frames, &frames->cbks, gf_cbks, gf_cbk_list); + + FREE (frames); +} diff --git a/xlators/protocol/client/src/saved-frames.h b/xlators/protocol/client/src/saved-frames.h new file mode 100644 index 000000000..e402feba3 --- /dev/null +++ b/xlators/protocol/client/src/saved-frames.h @@ -0,0 +1,74 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _SAVED_FRAMES_H +#define _SAVED_FRAMES_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <sys/time.h> +#include "stack.h" +#include "list.h" +#include "protocol.h" + +/* UGLY: have common typedef b/w saved-frames.c and protocol-client.c */ +typedef int32_t (*gf_op_t) (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen); + + +struct saved_frame { + union { + struct list_head list; + struct { + struct saved_frame *frame_next; + struct saved_frame *frame_prev; + }; + }; + + struct timeval saved_at; + call_frame_t *frame; + int32_t op; + int8_t type; + uint64_t callid; +}; + + +struct saved_frames { + int64_t count; + struct saved_frame fops; + struct saved_frame mops; + struct saved_frame cbks; +}; + + +struct saved_frames *saved_frames_new (); +int saved_frames_put (struct saved_frames *frames, call_frame_t *frame, + int32_t op, int8_t type, int64_t callid); +call_frame_t *saved_frames_get (struct saved_frames *frames, int32_t op, + int8_t type, int64_t callid); +void saved_frames_destroy (xlator_t *this, struct saved_frames *frames, + gf_op_t gf_fops[], gf_op_t gf_mops[], + gf_op_t gf_cbks[]); + +#endif /* _SAVED_FRAMES_H */ diff --git a/xlators/protocol/server/Makefile.am b/xlators/protocol/server/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/protocol/server/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am new file mode 100644 index 000000000..dcd92aeed --- /dev/null +++ b/xlators/protocol/server/src/Makefile.am @@ -0,0 +1,18 @@ + +xlator_LTLIBRARIES = server.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol + +server_la_LDFLAGS = -module -avoidversion + +server_la_SOURCES = server-protocol.c server-dentry.c server-helpers.c +server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = server-protocol.h server-helpers.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \ + -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" \ + $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/protocol/server/src/server-dentry.c b/xlators/protocol/server/src/server-dentry.c new file mode 100644 index 000000000..d3a69a393 --- /dev/null +++ b/xlators/protocol/server/src/server-dentry.c @@ -0,0 +1,413 @@ +#include "glusterfs.h" +#include "xlator.h" +#include "server-protocol.h" +#include "server-helpers.h" +#include <libgen.h> + +/* SERVER_DENTRY_STATE_PREPARE - prepare a fresh state for use + * + * @state - an empty state + * @loc - loc_t which needs to resolved + * @parent - most immediate parent of @loc available in dentry cache + * @resolved - component of @loc->path which has been resolved + * through dentry cache + */ +#define SERVER_DENTRY_STATE_PREPARE(_state,_loc,_parent,_resolved) do { \ + size_t pathlen = 0; \ + size_t resolvedlen = 0; \ + char *path = NULL; \ + int pad = 0; \ + pathlen = strlen (_loc->path) + 1; \ + path = CALLOC (1, pathlen); \ + _state->loc.parent = inode_ref (_parent); \ + _state->loc.inode = inode_new (_state->itable); \ + if (_resolved) { \ + resolvedlen = strlen (_resolved); \ + strncpy (path, _resolved, resolvedlen); \ + _state->resolved = memdup (path, pathlen); \ + if (resolvedlen == 1) /* only root resolved */ \ + pad = 0; \ + else { \ + pad = 1; \ + path[resolvedlen] = '/'; \ + } \ + strcpy_till (path + resolvedlen + pad, loc->path + resolvedlen + pad, '/'); \ + } else { \ + strncpy (path, _loc->path, pathlen); \ + } \ + _state->loc.path = path; \ + _state->loc.name = strrchr (path, '/'); \ + if (_state->loc.name) \ + _state->loc.name++; \ + _state->path = strdup (_loc->path); \ + }while (0); + +/* SERVER_DENTRY_UPDATE_STATE - update a server_state_t, to prepare state + * for new lookup + * + * @state - state to be updated. + */ +#define SERVER_DENTRY_UPDATE_STATE(_state) do { \ + char *path = NULL; \ + size_t pathlen = 0; \ + strcpy (_state->resolved, _state->loc.path); \ + pathlen = strlen (_state->loc.path); \ + if (!strcmp (_state->resolved, _state->path)) { \ + free (_state->resolved); \ + _state->resolved = NULL; \ + goto resume; \ + } \ + \ + path = (char *)(_state->loc.path + pathlen); \ + path[0] = '/'; \ + strcpy_till (path + 1, \ + _state->path + pathlen + 1, '/'); \ + _state->loc.name = strrchr (_state->loc.path, '/'); \ + if (_state->loc.name) \ + _state->loc.name++; \ + inode_unref (_state->loc.parent); \ + _state->loc.parent = inode_ref (_state->loc.inode); \ + inode_unref (_state->loc.inode); \ + _state->loc.inode = inode_new (_state->itable); \ + }while (0); + +/* NOTE: should be used only for a state which was created by __do_path_resolve + * using any other state will result in double free corruption. + */ +#define SERVER_STATE_CLEANUP(_state) do { \ + if (_state->resolved) \ + free (_state->resolved); \ + if (_state->path) \ + free (_state->path); \ + server_loc_wipe (&_state->loc); \ + free_state (_state); \ + } while (0); + +/* strcpy_till - copy @dname to @dest, until 'delim' is encountered in @dest + * @dest - destination string + * @dname - source string + * @delim - delimiter character + * + * return - NULL is returned if '0' is encountered in @dname, otherwise returns + * a pointer to remaining string begining in @dest. + */ +static char * +strcpy_till (char *dest, const char *dname, char delim) +{ + char *src = NULL; + int idx = 0; + char *ret = NULL; + + src = (char *)dname; + while (src[idx] && (src[idx] != delim)) { + dest[idx] = src[idx]; + idx++; + } + + dest[idx] = 0; + + if (src[idx] == 0) + ret = NULL; + else + ret = &(src[idx]); + + return ret; +} + +/* __server_path_to_parenti - derive parent inode for @path. if immediate parent is + * not available in the dentry cache, return nearest + * available parent inode and set @reslv to the path of + * the returned directory. + * + * @itable - inode table + * @path - path whose parent has to be looked up. + * @reslv - if immediate parent is not available, reslv will be set to path of the + * resolved parent. + * + * return - should never return NULL. should at least return '/' inode. + */ +static inode_t * +__server_path_to_parenti (inode_table_t *itable, + const char *path, + char **reslv) +{ + char *resolved_till = NULL; + char *strtokptr = NULL; + char *component = NULL; + char *next_component = NULL; + char *pathdup = NULL; + inode_t *curr = NULL; + inode_t *parent = NULL; + size_t pathlen = 0; + + + pathlen = STRLEN_0 (path); + resolved_till = CALLOC (1, pathlen); + + GF_VALIDATE_OR_GOTO("server-dentry", resolved_till, out); + pathdup = strdup (path); + GF_VALIDATE_OR_GOTO("server-dentry", pathdup, out); + + parent = inode_ref (itable->root); + curr = NULL; + + component = strtok_r (pathdup, "/", &strtokptr); + + while (component) { + curr = inode_search (itable, parent->ino, component); + if (!curr) { + /* if current component was the last component + set it to NULL + */ + component = strtok_r (NULL, "/", &strtokptr); + break; + } + + /* It is OK to append the component even if it is the + last component in the path, because, if 'next_component' + returns NULL, @parent will remain the same and + @resolved_till will not be sent back + */ + + strcat (resolved_till, "/"); + strcat (resolved_till, component); + + next_component = strtok_r (NULL, "/", &strtokptr); + + if (next_component) { + inode_unref (parent); + parent = curr; + curr = NULL; + } else { + /* will break */ + inode_unref (curr); + } + + component = next_component; + } + + free (pathdup); + + if (component) { + *reslv = resolved_till; + } else { + free (resolved_till); + } +out: + return parent; +} + + +/* __do_path_resolve_cbk - + * + * @frame - + * @cookie - + * @this - + * @op_ret - + * @op_errno - + * @inode - + * @stbuf - + * @dict - + * + */ +static int32_t +__do_path_resolve_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf, + dict_t *dict) +{ + server_state_t *state = NULL; + call_stub_t *stub = NULL; + inode_t *parent = NULL; + + stub = frame->local; + state = CALL_STATE(frame); + + parent = state->loc.parent; + + if (op_ret == -1) { + if (strcmp (state->path, state->loc.path)) + parent = NULL; + + server_stub_resume (stub, op_ret, op_errno, NULL, parent); + goto cleanup; + } else { + if (inode->ino == 0) { + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "looked up for %s (%"PRId64"/%s)", + state->loc.path, state->loc.parent->ino, state->loc.name); + inode_link (inode, state->loc.parent, state->loc.name, stbuf); + inode_lookup (inode); + } + + if (state->resolved) { + SERVER_DENTRY_UPDATE_STATE(state); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "looking up for %s (%"PRId64"/%s)", + state->loc.path, state->loc.parent->ino, state->loc.name); + + STACK_WIND (frame, + __do_path_resolve_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->lookup, + &(state->loc), + 0); + + goto out; + } + resume: + /* we are done, call stub_resume() to do rest of the job */ + server_stub_resume (stub, op_ret, op_errno, inode, parent); + cleanup: + SERVER_STATE_CLEANUP(state); + /* stub will be freed by stub_resume, leave no traces */ + frame->local = NULL; + STACK_DESTROY (frame->root); + } +out: + return 0; +} + +/* __do_path_resolve - resolve @loc->path into @loc->inode and @loc->parent. also + * update the dentry cache + * + * @stub - call stub to resume after resolving @loc->path + * @loc - loc to resolve before resuming @stub. + * + * return - return value of __do_path_resolve doesn't matter to the caller, if @stub + * is not NULL. + */ +static int32_t +__do_path_resolve (call_stub_t *stub, + const loc_t *loc) +{ + int32_t ret = -1; + char *resolved = NULL; + call_frame_t *new_frame = NULL; + server_state_t *state = NULL, *new_state = NULL; + inode_t *parent = NULL; + + state = CALL_STATE(stub->frame); + parent = loc->parent; + if (parent) { + inode_ref (parent); + gf_log (BOUND_XL(stub->frame)->name, GF_LOG_DEBUG, + "loc->parent(%"PRId64") already present. sending lookup " + "for %"PRId64"/%s", parent->ino, parent->ino, loc->name); + resolved = strdup (loc->path); + resolved = dirname (resolved); + } else { + parent = __server_path_to_parenti (state->itable, loc->path, &resolved); + } + + if (parent == NULL) { + /* fire in the bush.. run! run!! run!!! */ + gf_log ("server", + GF_LOG_CRITICAL, + "failed to get parent inode number"); + goto panic; + } + + if (resolved) { + gf_log (BOUND_XL(stub->frame)->name, + GF_LOG_DEBUG, + "resolved path(%s) till %"PRId64"(%s). " + "sending lookup for remaining path", + loc->path, parent->ino, resolved); + } + + { + new_frame = server_copy_frame (stub->frame); + new_state = CALL_STATE(new_frame); + + SERVER_DENTRY_STATE_PREPARE(new_state, loc, parent, resolved); + + if (parent) + inode_unref (parent); /* __server_path_to_parenti()'s inode_ref */ + free (resolved); + /* now interpret state as: + * state->path - compelete pathname to resolve + * state->resolved - pathname resolved from dentry cache + */ + new_frame->local = stub; + STACK_WIND (new_frame, + __do_path_resolve_cbk, + BOUND_XL(new_frame), + BOUND_XL(new_frame)->fops->lookup, + &(new_state->loc), + 0); + goto out; + } +panic: + server_stub_resume (stub, -1, ENOENT, NULL, NULL); +out: + return ret; +} + + +/* + * do_path_lookup - transform a pathname into inode, with the compelete + * dentry tree upto inode built. + * + * @stub - call stub to resume after completing pathname to inode transform + * @loc - location. valid fields that do_path_lookup() uses in @loc are + * @loc->path - pathname + * @loc->ino - inode number + * + * return - do_path_lookup returns only after complete dentry tree is built + * upto @loc->path. + */ +int32_t +do_path_lookup (call_stub_t *stub, + const loc_t *loc) +{ + char *pathname = NULL; + char *directory = NULL; + inode_t *inode = NULL; + inode_t *parent = NULL; + server_state_t *state = NULL; + + state = CALL_STATE(stub->frame); + + inode = inode_from_path (state->itable, loc->path); + pathname = strdup (loc->path); + directory = dirname (pathname); + parent = inode_from_path (state->itable, directory); + + if (inode && parent) { + gf_log (BOUND_XL(stub->frame)->name, + GF_LOG_DEBUG, + "resolved path(%s) to %"PRId64"/%"PRId64"(%s)", + loc->path, parent->ino, inode->ino, loc->name); + server_stub_resume (stub, 0, 0, inode, parent); + inode_unref (inode); + inode_unref (parent); + } else { + gf_log (BOUND_XL(stub->frame)->name, + GF_LOG_DEBUG, + "resolved path(%s) to %p(%"PRId64")/%p(%"PRId64")", + loc->path, parent, (parent ? parent->ino : 0), + inode, (inode ? inode->ino : 0)); + if (parent) { + inode_unref (parent); + } else if (inode) { + inode_unref (inode); + gf_log (BOUND_XL(stub->frame)->name, + GF_LOG_ERROR, + "undesired behaviour. inode(%"PRId64") for %s " + "exists without parent (%s)", + inode->ino, loc->path, directory); + } + __do_path_resolve (stub, loc); + } + + if (pathname) + free (pathname); + + return 0; +} diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c new file mode 100644 index 000000000..b51c11aa9 --- /dev/null +++ b/xlators/protocol/server/src/server-helpers.c @@ -0,0 +1,586 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "server-protocol.h" +#include "server-helpers.h" + + +/* server_loc_fill - derive a loc_t for a given inode number + * + * NOTE: make sure that @loc is empty, because any pointers it holds with reference will + * be leaked after returning from here. + */ +int +server_loc_fill (loc_t *loc, server_state_t *state, + ino_t ino, ino_t par, + const char *name, const char *path) +{ + inode_t *inode = NULL; + inode_t *parent = NULL; + int32_t ret = -1; + char *dentry_path = NULL; + + + GF_VALIDATE_OR_GOTO ("server", loc, out); + GF_VALIDATE_OR_GOTO ("server", state, out); + GF_VALIDATE_OR_GOTO ("server", path, out); + + /* anything beyond this point is success */ + ret = 0; + loc->ino = ino; + inode = loc->inode; + if (inode == NULL) { + if (ino) + inode = inode_search (state->itable, ino, NULL); + + if ((inode == NULL) && + (par && name)) + inode = inode_search (state->itable, par, name); + + loc->inode = inode; + if (inode) + loc->ino = inode->ino; + } + + parent = loc->parent; + if (parent == NULL) { + if (inode) + parent = inode_parent (inode, par, name); + else + parent = inode_search (state->itable, par, NULL); + loc->parent = parent; + } + + if (name && parent) { + ret = inode_path (parent, name, &dentry_path); + if (ret < 0) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "failed to build path for %"PRId64"/%s: %s", + parent->ino, name, strerror (-ret)); + } + } else if (inode) { + ret = inode_path (inode, NULL, &dentry_path); + if (ret < 0) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "failed to build path for %"PRId64": %s", + inode->ino, strerror (-ret)); + + inode_unref (loc->inode); + loc->inode = NULL; + } + } + + if (dentry_path) { + if (strcmp (dentry_path, path)) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "paths differ for inode(%"PRId64"): " + "client path = %s. dentry path = %s", + ino, path, dentry_path); + } + + loc->path = dentry_path; + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + } else { + loc->path = strdup (path); + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + } + +out: + return ret; +} + +/* + * stat_to_str - convert struct stat to a ASCII string + * @stbuf: struct stat pointer + * + * not for external reference + */ +char * +stat_to_str (struct stat *stbuf) +{ + char *tmp_buf = NULL; + + uint64_t dev = stbuf->st_dev; + uint64_t ino = stbuf->st_ino; + uint32_t mode = stbuf->st_mode; + uint32_t nlink = stbuf->st_nlink; + uint32_t uid = stbuf->st_uid; + uint32_t gid = stbuf->st_gid; + uint64_t rdev = stbuf->st_rdev; + uint64_t size = stbuf->st_size; + uint32_t blksize = stbuf->st_blksize; + uint64_t blocks = stbuf->st_blocks; + uint32_t atime = stbuf->st_atime; + uint32_t mtime = stbuf->st_mtime; + uint32_t ctime = stbuf->st_ctime; + + uint32_t atime_nsec = ST_ATIM_NSEC(stbuf); + uint32_t mtime_nsec = ST_MTIM_NSEC(stbuf); + uint32_t ctime_nsec = ST_CTIM_NSEC(stbuf); + + + asprintf (&tmp_buf, + GF_STAT_PRINT_FMT_STR, + dev, + ino, + mode, + nlink, + uid, + gid, + rdev, + size, + blksize, + blocks, + atime, + atime_nsec, + mtime, + mtime_nsec, + ctime, + ctime_nsec); + + return tmp_buf; +} + + +void +server_loc_wipe (loc_t *loc) +{ + if (loc->parent) + inode_unref (loc->parent); + if (loc->inode) + inode_unref (loc->inode); + if (loc->path) + free ((char *)loc->path); +} + +void +free_state (server_state_t *state) +{ + transport_t *trans = NULL; + + trans = state->trans; + + if (state->fd) + fd_unref (state->fd); + + transport_unref (trans); + + if (state->xattr_req) + dict_unref (state->xattr_req); + + FREE (state); +} + + +call_frame_t * +server_copy_frame (call_frame_t *frame) +{ + call_frame_t *new_frame = NULL; + server_state_t *state = NULL, *new_state = NULL; + + state = frame->root->state; + + new_frame = copy_frame (frame); + + new_state = CALLOC (1, sizeof (server_state_t)); + + new_frame->root->op = frame->root->op; + new_frame->root->type = frame->root->type; + new_frame->root->trans = state->trans; + new_frame->root->state = new_state; + + new_state->bound_xl = state->bound_xl; + new_state->trans = transport_ref (state->trans); + new_state->itable = state->itable; + + return new_frame; +} + +int32_t +gf_add_locker (struct _lock_table *table, + loc_t *loc, + fd_t *fd, + pid_t pid) +{ + int32_t ret = -1; + struct _locker *new = NULL; + uint8_t dir = 0; + + new = CALLOC (1, sizeof (struct _locker)); + if (new == NULL) { + gf_log ("server", GF_LOG_ERROR, + "failed to allocate memory for \'struct _locker\'"); + goto out; + } + INIT_LIST_HEAD (&new->lockers); + + if (fd == NULL) { + loc_copy (&new->loc, loc); + dir = S_ISDIR (new->loc.inode->st_mode); + } else { + new->fd = fd_ref (fd); + dir = S_ISDIR (fd->inode->st_mode); + } + + new->pid = pid; + + LOCK (&table->lock); + { + if (dir) + list_add_tail (&new->lockers, &table->dir_lockers); + else + list_add_tail (&new->lockers, &table->file_lockers); + } + UNLOCK (&table->lock); +out: + return ret; +} + +int32_t +gf_del_locker (struct _lock_table *table, + loc_t *loc, + fd_t *fd, + pid_t pid) +{ + struct _locker *locker = NULL, *tmp = NULL; + int32_t ret = 0; + uint8_t dir = 0; + struct list_head *head = NULL; + struct list_head del; + + INIT_LIST_HEAD (&del); + + if (fd) { + dir = S_ISDIR (fd->inode->st_mode); + } else { + dir = S_ISDIR (loc->inode->st_mode); + } + + LOCK (&table->lock); + { + if (dir) { + head = &table->dir_lockers; + } else { + head = &table->file_lockers; + } + + list_for_each_entry_safe (locker, tmp, head, lockers) { + if (locker->fd && + fd && + (locker->fd == fd) && (locker->pid == pid)) { + list_move_tail (&locker->lockers, &del); + } else if (locker->loc.inode && + loc && + (locker->loc.inode == loc->inode) && + (locker->pid == pid)) { + list_move_tail (&locker->lockers, &del); + } + } + } + UNLOCK (&table->lock); + + tmp = NULL; + locker = NULL; + + list_for_each_entry_safe (locker, tmp, &del, lockers) { + list_del_init (&locker->lockers); + if (locker->fd) + fd_unref (locker->fd); + else + loc_wipe (&locker->loc); + + free (locker); + } + + return ret; +} + +int32_t +gf_direntry_to_bin (dir_entry_t *head, + char **bufferp) +{ + dir_entry_t *trav = NULL; + uint32_t len = 0; + uint32_t this_len = 0; + char *buffer = NULL; + size_t buflen = -1; + char *ptr = NULL; + char *tmp_buf = NULL; + + trav = head->next; + while (trav) { + len += strlen (trav->name); + len += 1; + len += strlen (trav->link); + len += 1; /* for '\n' */ + len += 256; // max possible for statbuf; + trav = trav->next; + } + + buffer = CALLOC (1, len); + if (buffer == NULL) { + gf_log ("server", GF_LOG_ERROR, + "failed to allocate memory for buffer"); + goto out; + } + + ptr = buffer; + trav = head->next; + while (trav) { + tmp_buf = stat_to_str (&trav->buf); + /* tmp_buf will have \n before \0 */ + + this_len = sprintf (ptr, "%s/%s%s\n", + trav->name, tmp_buf, + trav->link); + + FREE (tmp_buf); + trav = trav->next; + ptr += this_len; + } + if (bufferp) + *bufferp = buffer; + buflen = strlen (buffer); + +out: + return buflen; +} + + +static struct _lock_table * +gf_lock_table_new (void) +{ + struct _lock_table *new = NULL; + + new = CALLOC (1, sizeof (struct _lock_table)); + if (new == NULL) { + gf_log ("server-protocol", GF_LOG_CRITICAL, + "failed to allocate memory for new lock table"); + goto out; + } + INIT_LIST_HEAD (&new->dir_lockers); + INIT_LIST_HEAD (&new->file_lockers); + LOCK_INIT (&new->lock); +out: + return new; +} + + +int +server_connection_destroy (xlator_t *this, server_connection_t *conn) +{ + + call_frame_t *frame = NULL, *tmp_frame = NULL; + xlator_t *bound_xl = NULL; + int32_t ret = -1; + server_state_t *state = NULL; + struct list_head file_lockers; + struct list_head dir_lockers; + struct _lock_table *ltable = NULL; + struct _locker *locker = NULL, *tmp = NULL; + struct flock flock = {0,}; + + + bound_xl = (xlator_t *) (conn->bound_xl); + + if (bound_xl) { + /* trans will have ref_count = 1 after this call, but its + ok since this function is called in + GF_EVENT_TRANSPORT_CLEANUP */ + frame = create_frame (this, this->ctx->pool); + + pthread_mutex_lock (&(conn->lock)); + { + if (conn->ltable) { + ltable = conn->ltable; + conn->ltable = NULL; + } + } + pthread_mutex_unlock (&conn->lock); + + INIT_LIST_HEAD (&file_lockers); + INIT_LIST_HEAD (&dir_lockers); + + LOCK (<able->lock); + { + list_splice_init (<able->file_lockers, + &file_lockers); + + list_splice_init (<able->dir_lockers, &dir_lockers); + } + UNLOCK (<able->lock); + free (ltable); + + flock.l_type = F_UNLCK; + flock.l_start = 0; + flock.l_len = 0; + list_for_each_entry_safe (locker, + tmp, &file_lockers, lockers) { + tmp_frame = copy_frame (frame); + /* + pid = 0 is a special case that tells posix-locks + to release all locks from this transport + */ + tmp_frame->root->pid = 0; + tmp_frame->root->trans = conn; + + if (locker->fd) { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->finodelk, + locker->fd, F_SETLK, &flock); + fd_unref (locker->fd); + } else { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->inodelk, + &(locker->loc), F_SETLK, &flock); + loc_wipe (&locker->loc); + } + + list_del_init (&locker->lockers); + free (locker); + } + + tmp = NULL; + locker = NULL; + list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) { + tmp_frame = copy_frame (frame); + + tmp_frame->root->pid = 0; + tmp_frame->root->trans = conn; + + if (locker->fd) { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->fentrylk, + locker->fd, NULL, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + fd_unref (locker->fd); + } else { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->entrylk, + &(locker->loc), NULL, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + loc_wipe (&locker->loc); + } + + list_del_init (&locker->lockers); + free (locker); + } + + state = CALL_STATE (frame); + if (state) + free (state); + STACK_DESTROY (frame->root); + + pthread_mutex_lock (&(conn->lock)); + { + if (conn->fdtable) { + gf_fd_fdtable_destroy (conn->fdtable); + conn->fdtable = NULL; + } + } + pthread_mutex_unlock (&conn->lock); + + } + + gf_log (this->name, GF_LOG_INFO, "destroyed connection of %s", + conn->id); + + FREE (conn->id); + FREE (conn); + + return ret; +} + + +server_connection_t * +server_connection_get (xlator_t *this, const char *id) +{ + server_connection_t *conn = NULL; + server_connection_t *trav = NULL; + server_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + list_for_each_entry (trav, &conf->conns, list) { + if (!strcmp (id, trav->id)) { + conn = trav; + break; + } + } + + if (!conn) { + conn = (void *) CALLOC (1, sizeof (*conn)); + + conn->id = strdup (id); + conn->fdtable = gf_fd_fdtable_alloc (); + conn->ltable = gf_lock_table_new (); + + pthread_mutex_init (&conn->lock, NULL); + + list_add (&conn->list, &conf->conns); + } + + conn->ref++; + } + pthread_mutex_unlock (&conf->mutex); + + return conn; +} + + +void +server_connection_put (xlator_t *this, server_connection_t *conn) +{ + server_conf_t *conf = NULL; + server_connection_t *todel = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + conn->ref--; + + if (!conn->ref) { + list_del_init (&conn->list); + todel = conn; + } + } + pthread_mutex_unlock (&conf->mutex); + + if (todel) { + server_connection_destroy (this, todel); + } + + return; +} diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h new file mode 100644 index 000000000..36c0ce98e --- /dev/null +++ b/xlators/protocol/server/src/server-helpers.h @@ -0,0 +1,77 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __SERVER_HELPERS_H__ +#define __SERVER_HELPERS_H__ + +#define CALL_STATE(frame) ((server_state_t *)frame->root->state) + +#define BOUND_XL(frame) ((xlator_t *) CALL_STATE(frame)->bound_xl) + +#define TRANSPORT_FROM_FRAME(frame) ((transport_t *) CALL_STATE(frame)->trans) + +#define SERVER_CONNECTION(frame) \ + ((server_connection_t *) TRANSPORT_FROM_FRAME(frame)->xl_private) + +#define SERVER_CONF(frame) \ + ((server_conf_t *)TRANSPORT_FROM_FRAME(frame)->xl->private) + +#define TRANSPORT_FROM_XLATOR(this) ((((server_conf_t *)this->private))->trans) + +#define INODE_LRU_LIMIT(this) \ + (((server_conf_t *)(this->private))->inode_lru_limit) + +#define IS_ROOT_INODE(inode) (inode == inode->table->root) + +#define IS_NOT_ROOT(pathlen) ((pathlen > 2)? 1 : 0) + +int32_t +server_loc_fill (loc_t *loc, + server_state_t *state, + ino_t ino, + ino_t par, + const char *name, + const char *path); + +char * +stat_to_str (struct stat *stbuf); + +call_frame_t * +server_copy_frame (call_frame_t *frame); + +void free_state (server_state_t *state); + +void server_loc_wipe (loc_t *loc); + +int32_t +gf_add_locker (struct _lock_table *table, + loc_t *loc, + fd_t *fd, + pid_t pid); + +int32_t +gf_del_locker (struct _lock_table *table, + loc_t *loc, + fd_t *fd, + pid_t pid); + +int32_t +gf_direntry_to_bin (dir_entry_t *head, + char **bufferp); +#endif /* __SERVER_HELPERS_H__ */ diff --git a/xlators/protocol/server/src/server-protocol.c b/xlators/protocol/server/src/server-protocol.c new file mode 100644 index 000000000..a5198c1ed --- /dev/null +++ b/xlators/protocol/server/src/server-protocol.c @@ -0,0 +1,7984 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include <time.h> +#include <sys/uio.h> +#include <sys/resource.h> + +#include <libgen.h> + +#include "transport.h" +#include "fnmatch.h" +#include "xlator.h" +#include "protocol.h" +#include "server-protocol.h" +#include "server-helpers.h" +#include "call-stub.h" +#include "defaults.h" +#include "list.h" +#include "dict.h" +#include "compat.h" +#include "compat-errno.h" + + +static void +protocol_server_reply (call_frame_t *frame, + int type, int op, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iovec *vector, int count, + dict_t *refs) +{ + server_state_t *state = NULL; + xlator_t *bound_xl = NULL; + transport_t *trans = NULL; + + bound_xl = BOUND_XL(frame); + state = CALL_STATE(frame); + trans = state->trans; + + hdr->callid = hton64 (frame->root->unique); + hdr->type = hton32 (type); + hdr->op = hton32 (op); + + transport_submit (trans, (char *)hdr, hdrlen, vector, count, refs); + /* TODO: If transport submit fails, there is no reply sent to client, + * its bailed out as of now.. loggically, only this frame should fail. + */ + + STACK_DESTROY (frame->root); + + if (state) + free_state (state); + +} + + +/* + * server_fchmod_cbk + */ +int32_t +server_fchmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fchmod_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FCHMOD %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FCHMOD, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fchmod + * + */ +int32_t +server_fchmod (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + server_connection_t *conn = NULL; + gf_fop_fchmod_req_t *req = NULL; + server_state_t *state = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->mode = ntoh32 (req->mode); + } + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + STACK_WIND (frame, + server_fchmod_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->fchmod, + state->fd, + state->mode); + + return 0; +fail: + server_fchmod_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + return 0; +} + + +/* + * server_fchown_cbk + */ +int32_t +server_fchown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fchown_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FCHOWN %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FCHOWN, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fchown + * + */ +int32_t +server_fchown (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + server_connection_t *conn = NULL; + gf_fop_fchown_req_t *req = NULL; + server_state_t *state = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->uid = ntoh32 (req->uid); + state->gid = ntoh32 (req->gid); + } + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + STACK_WIND (frame, + server_fchown_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->fchown, + state->fd, + state->uid, + state->gid); + + return 0; +fail: + server_fchown_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + return 0; +} + +/* + * server_setdents_cbk - writedir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int32_t +server_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setdents_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SETDENTS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_lk_cbk - lk callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @lock: + * + * not for external reference + */ +int32_t +server_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_lk_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_flock_from_flock (&rsp->flock, lock); + } else if (op_errno != ENOSYS) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": LK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int32_t +server_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_inodelk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + if (state->flock.l_type == F_UNLCK) + gf_del_locker (conn->ltable, + &state->loc, NULL, frame->root->pid); + else + gf_add_locker (conn->ltable, + &state->loc, NULL, frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, op_ret, + strerror (op_errno)); + } + + server_loc_wipe (&state->loc); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_INODELK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int32_t +server_finodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_finodelk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + if (state->flock.l_type == F_UNLCK) + gf_del_locker (conn->ltable, + NULL, state->fd, frame->root->pid); + else + gf_add_locker (conn->ltable, + NULL, state->fd, frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FINODELK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FINODELK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_entrylk_cbk - + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @lock: + * + * not for external reference + */ +int32_t +server_entrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_entrylk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + if (state->cmd == ENTRYLK_UNLOCK) + gf_del_locker (conn->ltable, + &state->loc, NULL, frame->root->pid); + else + gf_add_locker (conn->ltable, + &state->loc, NULL, frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, op_ret, + strerror (op_errno)); + } + + server_loc_wipe (&state->loc); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_ENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int32_t +server_fentrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_fentrylk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + state = CALL_STATE(frame); + if (state->cmd == ENTRYLK_UNLOCK) + gf_del_locker (conn->ltable, + NULL, state->fd, frame->root->pid); + else + gf_add_locker (conn->ltable, + NULL, state->fd, frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FENTRYLK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_access_cbk - access callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_access_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_access_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_ACCESS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_utimens_cbk - utimens callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_utimens_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_utimens_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) + gf_stat_from_stat (&rsp->stat, stbuf); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_UTIMENS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_chmod_cbk - chmod callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_chmod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_chmod_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) + gf_stat_from_stat (&rsp->stat, stbuf); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHMOD, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_chown_cbk - chown callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_chown_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_chown_rsp_t *rsp = NULL; + server_state_t *state = NULL; + int32_t gf_errno = 0; + size_t hdrlen = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) + gf_stat_from_stat (&rsp->stat, stbuf); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHOWN, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_rmdir_cbk - rmdir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rmdir_rsp_t *rsp = NULL; + server_state_t *state = NULL; + int32_t gf_errno = 0; + size_t hdrlen = 0; + + state = CALL_STATE(frame); + + if (op_ret == 0) { + inode_unlink (state->loc.inode, state->loc.parent, + state->loc.name); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": RMDIR %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_RMDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_mkdir_cbk - mkdir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mkdir_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + inode_link (inode, state->loc.parent, state->loc.name, stbuf); + inode_lookup (inode); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": MKDIR %s ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + op_ret, strerror (op_errno)); + } + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_MKDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_mknod_cbk - mknod callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mknod_rsp_t *rsp = NULL; + server_state_t *state = NULL; + int32_t gf_errno = 0; + size_t hdrlen = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + inode_link (inode, state->loc.parent, state->loc.name, stbuf); + inode_lookup (inode); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": MKNOD %s ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + op_ret, strerror (op_errno)); + } + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_MKNOD, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fsyncdir_cbk - fsyncdir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_fsyncdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsyncdir_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + if (op_ret < 0) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FSYNCDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSYNCDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_getdents_cbk - readdir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @entries: + * @count: + * + * not for external reference + */ +int32_t +server_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entries, + int32_t count) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_getdents_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t vec_count = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + dict_t *reply_dict = NULL; + char *buffer = NULL; + size_t buflen = 0; + struct iovec vector[1]; + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + buflen = gf_direntry_to_bin (entries, &buffer); + if (buflen < 0) { + gf_log (this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to convert " + "entries list to string buffer", + state->fd_no, state->fd->inode->ino); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + reply_dict = dict_new (); + if (reply_dict == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to get new dict", + state->fd_no, state->fd->inode->ino); + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + ret = dict_set_dynptr (reply_dict, NULL, + buffer, buflen); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to set read buffer " + "to reply dictionary", + state->fd_no, state->fd->inode->ino); + op_ret = -1; + op_errno = -ret; + goto out; + } + frame->root->rsp_refs = reply_dict; + vector[0].iov_base = buffer; + vector[0].iov_len = buflen; + vec_count = 1; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": GETDENTS %"PRId64" (%"PRId64"): %"PRId32" (%s)", + frame->root->unique, + state->fd_no, + state->fd ? state->fd->inode->ino : 0, + op_ret, strerror (op_errno)); + vector[0].iov_base = NULL; + vector[0].iov_len = 0; + } + +out: + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + rsp->count = hton32 (count); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_GETDENTS, + hdr, hdrlen, vector, vec_count, + frame->root->rsp_refs); + + if (reply_dict) + dict_unref (reply_dict); + + return 0; +} + + +/* + * server_readdir_cbk - getdents callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *entries) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readdir_rsp_t *rsp = NULL; + size_t hdrlen = 0; + size_t buf_size = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + if (op_ret > 0) + buf_size = gf_dirent_serialize (entries, NULL, 0); + + hdrlen = gf_hdr_len (rsp, buf_size); + hdr = gf_hdr_new (rsp, buf_size); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret > 0) { + rsp->size = hton32 (buf_size); + gf_dirent_serialize (entries, rsp->buf, buf_size); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": READDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_releasedir_cbk - releasedir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int32_t +server_releasedir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_cbk_releasedir_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASEDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_opendir_cbk - opendir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @fd: file descriptor structure of opened directory + * + * not for external reference + */ +int32_t +server_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_opendir_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + fd_bind (fd); + + state->fd_no = gf_fd_unused_get (conn->fdtable, fd); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": OPENDIR %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + + /* NOTE: corresponding to fd_create()'s ref */ + if (state->fd) + fd_unref (state->fd); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + rsp->fd = hton64 (state->fd_no); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_OPENDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_statfs_cbk - statfs callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @buf: + * + * not for external reference + */ +int32_t +server_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *buf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_statfs_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_statfs_from_statfs (&rsp->statfs, buf); + } + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_STATFS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_removexattr_cbk - removexattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int32_t +server_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_removexattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_REMOVEXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_getxattr_cbk - getxattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @value: + * + * not for external reference + */ +int32_t +server_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_getxattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t len = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + len = dict_serialized_length (dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to get serialized length of " + "reply dict", + state->loc.path, state->ino); + op_ret = -1; + op_errno = EINVAL; + len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, len + 1); + hdr = gf_hdr_new (rsp, len + 1); + rsp = gf_param (hdr); + + if (op_ret >= 0) { + ret = dict_serialize (dict, rsp->dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to serialize reply dict", + state->loc.path, state->ino); + op_ret = -1; + op_errno = -ret; + } + } + rsp->dict_len = hton32 (len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_GETXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_setxattr_cbk - setxattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int32_t +server_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setxattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SETXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_rename_cbk - rename callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int32_t +server_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rename_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + stbuf->st_ino = state->loc.inode->ino; + stbuf->st_mode = state->loc.inode->st_mode; + + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": RENAME_CBK (%"PRId64") %"PRId64"/%s " + "==> %"PRId64"/%s", + frame->root->unique, state->loc.inode->ino, + state->loc.parent->ino, state->loc.name, + state->loc2.parent->ino, state->loc2.name); + + inode_rename (state->itable, + state->loc.parent, state->loc.name, + state->loc2.parent, state->loc2.name, + state->loc.inode, stbuf); + gf_stat_from_stat (&rsp->stat, stbuf); + } + + server_loc_wipe (&(state->loc)); + server_loc_wipe (&(state->loc2)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_RENAME, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_unlink_cbk - unlink callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int32_t +server_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_unlink_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + if (op_ret == 0) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": UNLINK_CBK %"PRId64"/%s (%"PRId64")", + frame->root->unique, state->loc.parent->ino, + state->loc.name, state->loc.inode->ino); + + inode_unlink (state->loc.inode, state->loc.parent, + state->loc.name); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": UNLINK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_UNLINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_symlink_cbk - symlink callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int32_t +server_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_symlink_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret >= 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + inode_link (inode, state->loc.parent, state->loc.name, stbuf); + inode_lookup (inode); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": SYMLINK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SYMLINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_link_cbk - link callback for server protocol + * @frame: call frame + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_link_rsp_t *rsp = NULL; + server_state_t *state = NULL; + int32_t gf_errno = 0; + size_t hdrlen = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + stbuf->st_ino = state->loc.inode->ino; + gf_stat_from_stat (&rsp->stat, stbuf); + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s", + frame->root->unique, inode->ino, state->loc2.parent->ino, + state->loc2.name, state->loc.parent->ino, state->loc.name); + + inode_link (inode, state->loc2.parent, + state->loc2.name, stbuf); + } else { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s " + " ==> %"PRId32" (%s)", + frame->root->unique, inode->ino, state->loc2.parent->ino, + state->loc2.name, state->loc.parent->ino, state->loc.name, + op_ret, strerror (op_errno)); + } + + server_loc_wipe (&(state->loc)); + server_loc_wipe (&(state->loc2)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_truncate_cbk - truncate callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_truncate_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": TRUNCATE %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_TRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fstat_cbk - fstat callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_fstat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fstat_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FSTAT %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSTAT, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_ftruncate_cbk - ftruncate callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_ftruncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_ftruncate_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FTRUNCATE %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FTRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_flush_cbk - flush callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_flush_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + if (op_ret < 0) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FLUSH %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FLUSH, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fsync_cbk - fsync callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsync_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + if (op_ret < 0) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FSYNC %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSYNC, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_release_cbk - rleease callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_release_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_cbk_release_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_writev_cbk - writev callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ + +int32_t +server_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_write_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret >= 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": WRITEV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, + GF_OP_TYPE_FOP_REPLY, GF_FOP_WRITE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_readv_cbk - readv callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @vector: + * @count: + * + * not for external reference + */ +int32_t +server_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_read_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": READV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READ, + hdr, hdrlen, vector, count, + frame->root->rsp_refs); + + return 0; +} + + +/* + * server_open_cbk - open callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @fd: + * + * not for external reference + */ +int32_t +server_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_open_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + fd_bind (fd); + + state->fd_no = gf_fd_unused_get (conn->fdtable, fd); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": OPEN %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + + /* NOTE: corresponding to fd_create()'s ref */ + if (state->fd) + fd_unref (state->fd); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + rsp->fd = hton64 (state->fd_no); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_OPEN, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_create_cbk - create callback for server + * @frame: call frame + * @cookie: + * @this: translator structure + * @op_ret: + * @op_errno: + * @fd: file descriptor + * @inode: inode structure + * @stbuf: struct stat of created file + * + * not for external reference + */ +int32_t +server_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *stbuf) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_create_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": CREATE %"PRId64"/%s (%"PRId64")", + frame->root->unique, state->loc.parent->ino, + state->loc.name, stbuf->st_ino); + + inode_link (inode, state->loc.parent, state->loc.name, stbuf); + inode_lookup (inode); + + fd_bind (fd); + + state->fd_no = gf_fd_unused_get (conn->fdtable, fd); + + if ((state->fd_no < 0) || (fd == 0)) { + op_ret = state->fd_no; + op_errno = errno; + } + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": CREATE %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + + /* NOTE: corresponding to fd_create()'s ref */ + if (state->fd) + fd_unref (state->fd); + + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + rsp->fd = hton64 (state->fd_no); + + if (op_ret >= 0) + gf_stat_from_stat (&rsp->stat, stbuf); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CREATE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_readlink_cbk - readlink callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @buf: + * + * not for external reference + */ +int32_t +server_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *buf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readlink_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + size_t linklen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + linklen = strlen (buf) + 1; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": READLINK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, linklen); + hdr = gf_hdr_new (rsp, linklen); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret >= 0) + strcpy (rsp->path, buf); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READLINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_stat_cbk - stat callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int32_t +server_stat_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_stat_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret == 0) { + gf_stat_from_stat (&rsp->stat, stbuf); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": STAT %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_STAT, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_forget_cbk - forget callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int32_t +server_forget_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_cbk_forget_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_FORGET, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_lookup_cbk - lookup callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @inode: + * @stbuf: + * + * not for external reference + */ +int32_t +server_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *stbuf, + dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_lookup_rsp_t *rsp = NULL; + server_state_t *state = NULL; + inode_t *root_inode = NULL; + int32_t dict_len = 0; + size_t hdrlen = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + + state = CALL_STATE(frame); + if ((op_errno == ESTALE) && (op_ret == -1)) { + /* Send lookup again with new ctx dictionary */ + loc_t loc = {0,}; + + root_inode = BOUND_XL(frame)->itable->root; + if (state->loc.inode != root_inode) { + if (state->loc.inode) + inode_unref (state->loc.inode); + state->loc.inode = inode_new (BOUND_XL(frame)->itable); + } + loc.inode = state->loc.inode; + loc.path = state->path; + state->is_revalidate = 2; + STACK_WIND (frame, server_lookup_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->lookup, + &loc, + state->xattr_req); + return 0; + } + + if (dict) { + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to get serialized " + "length of reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = EINVAL; + dict_len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, dict_len); + hdr = gf_hdr_new (rsp, dict_len); + rsp = gf_param (hdr); + + if ((op_ret >= 0) && dict) { + ret = dict_serialize (dict, rsp->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to serialize reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = -ret; + dict_len = 0; + } + } + rsp->dict_len = hton32 (dict_len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + root_inode = BOUND_XL(frame)->itable->root; + if (inode == root_inode) { + /* we just looked up root ("/") */ + stbuf->st_ino = 1; + if (inode->st_mode == 0) + inode->st_mode = stbuf->st_mode; + } + + gf_stat_from_stat (&rsp->stat, stbuf); + + if (inode->ino == 0) { + inode_link (inode, state->loc.parent, + state->loc.name, stbuf); + inode_lookup (inode); + } + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": LOOKUP %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + server_loc_wipe (&state->loc); + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LOOKUP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +int32_t +server_xattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_xattrop_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t len = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + + state = CALL_STATE(frame); + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": XATTROP %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + if ((op_ret >= 0) && dict) { + len = dict_serialized_length (dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to get serialized length" + " for reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = EINVAL; + len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, len + 1); + hdr = gf_hdr_new (rsp, len + 1); + rsp = gf_param (hdr); + + if ((op_ret >= 0) && dict) { + ret = dict_serialize (dict, rsp->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to serialize reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = -ret; + len = 0; + } + } + rsp->dict_len = hton32 (len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + server_loc_wipe (&(state->loc)); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_XATTROP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +int32_t +server_fxattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_xattrop_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t len = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FXATTROP %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + if ((op_ret >= 0) && dict) { + len = dict_serialized_length (dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to get " + "serialized length for reply dict", + state->fd_no, state->fd->inode->ino); + op_ret = -1; + op_errno = EINVAL; + len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, len + 1); + hdr = gf_hdr_new (rsp, len + 1); + rsp = gf_param (hdr); + + if ((op_ret >= 0) && dict) { + ret = dict_serialize (dict, rsp->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to " + "serialize reply dict", + state->fd_no, state->fd->inode->ino); + op_ret = -1; + op_errno = -ret; + len = 0; + } + } + rsp->dict_len = hton32 (len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FXATTROP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_stub_resume - this is callback function used whenever an fop does + * STACK_WIND to fops->lookup in order to lookup the inode + * for a pathname. this case of doing fops->lookup arises + * when fop searches in inode table for pathname and search + * fails. + * + * @stub: call stub + * @op_ret: + * @op_errno: + * @inode: + * @parent: + * + * not for external reference + */ +int32_t +server_stub_resume (call_stub_t *stub, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + inode_t *parent) +{ + inode_t *server_inode = inode; + + if (!stub) { + return 0; + } + switch (stub->fop) + { + case GF_FOP_RENAME: + if (stub->args.rename.old.inode == NULL) { + loc_t *newloc = NULL; + /* now we are called by lookup of oldpath. */ + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": RENAME (%s -> %s) on %s " + "returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.rename.old.path, + stub->args.rename.new.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + /* lookup of oldpath failed, UNWIND to + * server_rename_cbk with ret=-1 and + * errno=ENOENT + */ + server_rename_cbk (stub->frame, + NULL, + stub->frame->this, + -1, + ENOENT, + NULL); + server_loc_wipe (&stub->args.rename.old); + server_loc_wipe (&stub->args.rename.new); + FREE (stub); + return 0; + } + + if (stub->args.rename.old.parent == NULL) + stub->args.rename.old.parent = + inode_ref (parent); + + /* store inode information of oldpath in our stub + * and search for newpath in inode table. + */ + if (server_inode) { + stub->args.rename.old.inode = + inode_ref (server_inode); + + stub->args.rename.old.ino = + server_inode->ino; + } + + /* now lookup for newpath */ + newloc = &stub->args.rename.new; + + if (newloc->parent == NULL) { + /* lookup for newpath */ + do_path_lookup (stub, newloc); + break; + } else { + /* found newpath in inode cache */ + call_resume (stub); + break; + } + } else { + /* we are called by the lookup of newpath */ + if (stub->args.rename.new.parent == NULL) + stub->args.rename.new.parent = + inode_ref (parent); + } + + /* after looking up for oldpath as well as newpath, + * we are ready to resume */ + { + call_resume (stub); + } + break; + + case GF_FOP_OPEN: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": OPEN (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.open.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_open_cbk (stub->frame, + NULL, + stub->frame->this, + -1, + ENOENT, + NULL); + FREE (stub->args.open.loc.path); + FREE (stub); + return 0; + } + if (stub->args.open.loc.parent == NULL) + stub->args.open.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.open.loc.inode == NULL)) { + stub->args.open.loc.inode = inode_ref (server_inode); + stub->args.open.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_LOOKUP: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, + GF_LOG_DEBUG, + "%"PRId64": LOOKUP (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.lookup.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_lookup_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL, NULL, + NULL); + server_loc_wipe (&stub->args.lookup.loc); + FREE (stub); + return 0; + } + + if (stub->args.lookup.loc.parent == NULL) + stub->args.lookup.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.lookup.loc.inode == NULL)) { + stub->args.lookup.loc.inode = inode_ref (server_inode); + stub->args.lookup.loc.ino = server_inode->ino; + } + + call_resume (stub); + + break; + } + + case GF_FOP_STAT: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": STAT (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.stat.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + server_stat_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.stat.loc); + FREE (stub); + return 0; + } + + /* TODO:reply from here only, we already have stat structure */ + if (stub->args.stat.loc.parent == NULL) + stub->args.stat.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.stat.loc.inode == NULL)) { + stub->args.stat.loc.inode = inode_ref (server_inode); + stub->args.stat.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_XATTROP: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": XATTROP (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.xattrop.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + server_xattrop_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.xattrop.loc); + FREE (stub); + return 0; + } + + if (stub->args.xattrop.loc.parent == NULL) + stub->args.xattrop.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.xattrop.loc.inode == NULL)) { + stub->args.xattrop.loc.inode = + inode_ref (server_inode); + + stub->args.xattrop.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_UNLINK: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": UNLINK (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.unlink.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + server_unlink_cbk (stub->frame, NULL, + stub->frame->this, + -1, ENOENT); + server_loc_wipe (&stub->args.unlink.loc); + FREE (stub); + return 0; + } + + if (stub->args.unlink.loc.parent == NULL) + stub->args.unlink.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.unlink.loc.inode == NULL)) { + stub->args.unlink.loc.inode = inode_ref (server_inode); + stub->args.unlink.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_SYMLINK: + { + if ((op_ret < 0) && (parent == NULL)) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": SYMLINK (%s -> %s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.symlink.loc.path, + stub->args.symlink.linkname, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + server_symlink_cbk (stub->frame, NULL, + stub->frame->this, + -1, ENOENT, + NULL, NULL); + server_loc_wipe (&stub->args.symlink.loc); + FREE (stub); + return 0; + } + + if (stub->args.symlink.loc.parent == NULL) + stub->args.symlink.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.symlink.loc.inode == NULL)) { + stub->args.symlink.loc.inode = + inode_ref (server_inode); + stub->args.symlink.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_RMDIR: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": RMDIR (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.rmdir.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + server_rmdir_cbk (stub->frame, + NULL, + stub->frame->this, + -1, + ENOENT); + server_loc_wipe (&stub->args.rmdir.loc); + FREE (stub); + return 0; + } + + if (stub->args.rmdir.loc.parent == NULL) + stub->args.rmdir.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.rmdir.loc.inode == NULL)) { + stub->args.rmdir.loc.inode = inode_ref (server_inode); + stub->args.rmdir.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_CHMOD: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": CHMOD (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.chmod.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + server_chmod_cbk (stub->frame, + NULL, + stub->frame->this, + -1, + ENOENT, + NULL); + server_loc_wipe (&stub->args.chmod.loc); + FREE (stub); + return 0; + } + + if (stub->args.chmod.loc.parent == NULL) + stub->args.chmod.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.chmod.loc.inode == NULL)) { + stub->args.chmod.loc.inode = inode_ref (server_inode); + stub->args.chmod.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_CHOWN: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": CHOWN (%s) on %s returning ENOENT: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.chown.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + server_chown_cbk (stub->frame, + NULL, + stub->frame->this, + -1, + ENOENT, + NULL); + server_loc_wipe (&stub->args.chown.loc); + FREE (stub); + return 0; + } + + if (stub->args.chown.loc.parent == NULL) + stub->args.chown.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.chown.loc.inode == NULL)) { + stub->args.chown.loc.inode = inode_ref (server_inode); + stub->args.chown.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_LINK: + { + if (stub->args.link.oldloc.inode == NULL) { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": LINK (%s -> %s) on %s returning " + "error for oldloc: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.link.oldloc.path, + stub->args.link.newloc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_link_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL, NULL); + server_loc_wipe (&stub->args.link.oldloc); + server_loc_wipe (&stub->args.link.newloc); + FREE (stub); + return 0; + } + + if (stub->args.link.oldloc.parent == NULL) + stub->args.link.oldloc.parent = + inode_ref (parent); + + if (server_inode && + (stub->args.link.oldloc.inode == NULL)) { + stub->args.link.oldloc.inode = + inode_ref (server_inode); + stub->args.link.oldloc.ino = server_inode->ino; + } + + if (stub->args.link.newloc.parent == NULL) { + do_path_lookup (stub, + &(stub->args.link.newloc)); + break; + } + } else { + /* we are called by the lookup of newpath */ + if ((op_ret < 0) && (parent == NULL)) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": LINK (%s -> %s) on %s returning " + "error for newloc: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.link.oldloc.path, + stub->args.link.newloc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_link_cbk (stub->frame, NULL, + stub->frame->this, + -1, ENOENT, + NULL, NULL); + + server_loc_wipe (&stub->args.link.oldloc); + server_loc_wipe (&stub->args.link.newloc); + FREE (stub); + break; + } + + if (stub->args.link.newloc.parent == NULL) { + stub->args.link.newloc.parent = + inode_ref (parent); + } + + if (server_inode && + (stub->args.link.newloc.inode == NULL)) { + /* as new.inode doesn't get forget, it + * needs to be unref'd here */ + stub->args.link.newloc.inode = + inode_ref (server_inode); + stub->args.link.newloc.ino = server_inode->ino; + } + } + call_resume (stub); + break; + } + + case GF_FOP_TRUNCATE: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": TRUNCATE (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.truncate.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_truncate_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.truncate.loc); + FREE (stub); + return 0; + } + + if (stub->args.truncate.loc.parent == NULL) + stub->args.truncate.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.truncate.loc.inode == NULL)) { + stub->args.truncate.loc.inode = + inode_ref (server_inode); + stub->args.truncate.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_STATFS: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": STATFS (%s) on %s returning ENOENT: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.statfs.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_statfs_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.statfs.loc); + FREE (stub); + return 0; + } + + if (stub->args.statfs.loc.parent == NULL) + stub->args.statfs.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.statfs.loc.inode == NULL)) { + stub->args.statfs.loc.inode = inode_ref (server_inode); + stub->args.statfs.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_SETXATTR: + { + dict_t *dict = stub->args.setxattr.dict; + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": SETXATTR (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.setxattr.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_setxattr_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT); + + server_loc_wipe (&stub->args.setxattr.loc); + dict_unref (dict); + FREE (stub); + return 0; + } + + if (stub->args.setxattr.loc.parent == NULL) + stub->args.setxattr.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.setxattr.loc.inode == NULL)) { + stub->args.setxattr.loc.inode = + inode_ref (server_inode); + stub->args.setxattr.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_GETXATTR: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": GETXATTR (%s) on %s for key %s " + "returning error: %"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.getxattr.loc.path, + BOUND_XL(stub->frame)->name, + stub->args.getxattr.name ? + stub->args.getxattr.name : "<nul>", + op_ret, op_errno); + + server_getxattr_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.getxattr.loc); + FREE (stub); + return 0; + } + + if (stub->args.getxattr.loc.parent == NULL) + stub->args.getxattr.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.getxattr.loc.inode == NULL)) { + stub->args.getxattr.loc.inode = + inode_ref (server_inode); + stub->args.getxattr.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_REMOVEXATTR: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": REMOVEXATTR (%s) on %s for key %s " + "returning error: %"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.removexattr.loc.path, + BOUND_XL(stub->frame)->name, + stub->args.removexattr.name, + op_ret, op_errno); + + server_removexattr_cbk (stub->frame, + NULL, + stub->frame->this, + -1, + ENOENT); + server_loc_wipe (&stub->args.removexattr.loc); + FREE (stub); + return 0; + } + + if (stub->args.removexattr.loc.parent == NULL) + stub->args.removexattr.loc.parent = inode_ref (parent); + + if (server_inode && + (stub->args.removexattr.loc.inode == NULL)) { + stub->args.removexattr.loc.inode = + inode_ref (server_inode); + stub->args.removexattr.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_OPENDIR: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": OPENDIR (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.opendir.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_opendir_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.opendir.loc); + FREE (stub); + return 0; + } + + if (stub->args.opendir.loc.parent == NULL) + stub->args.opendir.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.opendir.loc.inode == NULL)) { + stub->args.opendir.loc.inode = + inode_ref (server_inode); + stub->args.opendir.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_ACCESS: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": ACCESS (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.access.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_access_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT); + server_loc_wipe (&stub->args.access.loc); + FREE (stub); + return 0; + } + + if (stub->args.access.loc.parent == NULL) + stub->args.access.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.access.loc.inode == NULL)) { + stub->args.access.loc.inode = inode_ref (server_inode); + stub->args.access.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + + case GF_FOP_UTIMENS: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": UTIMENS (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.utimens.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_utimens_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.utimens.loc); + FREE (stub); + return 0; + } + + if (stub->args.utimens.loc.parent == NULL) + stub->args.utimens.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.utimens.loc.inode == NULL)) { + stub->args.utimens.loc.inode = + inode_ref (server_inode); + stub->args.utimens.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + + case GF_FOP_READLINK: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": READLINK (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.readlink.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_readlink_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL); + server_loc_wipe (&stub->args.readlink.loc); + FREE (stub); + return 0; + } + + if (stub->args.readlink.loc.parent == NULL) + stub->args.readlink.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.readlink.loc.inode == NULL)) { + stub->args.readlink.loc.inode = + inode_ref (server_inode); + stub->args.readlink.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + case GF_FOP_MKDIR: + { + if ((op_ret < 0) && (parent == NULL)) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": MKDIR (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.mkdir.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_mkdir_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL, NULL); + server_loc_wipe (&stub->args.mkdir.loc); + FREE (stub); + break; + } + + if (stub->args.mkdir.loc.parent == NULL) + stub->args.mkdir.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.mkdir.loc.inode == NULL)) { + stub->args.mkdir.loc.inode = inode_ref (server_inode); + stub->args.mkdir.loc.ino = server_inode->ino; + } + + call_resume (stub); + break; + } + + case GF_FOP_CREATE: + { + if ((op_ret < 0) && (parent == NULL)) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": CREATE (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.create.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_create_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL, NULL, + NULL); + if (stub->args.create.fd) + fd_unref (stub->args.create.fd); + server_loc_wipe (&stub->args.create.loc); + FREE (stub); + break; + } + + if (stub->args.create.loc.parent == NULL) + stub->args.create.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.create.loc.inode == NULL)) { + stub->args.create.loc.inode = inode_ref (server_inode); + stub->args.create.loc.ino = server_inode->ino; + } + + call_resume (stub); + break; + } + + case GF_FOP_MKNOD: + { + if ((op_ret < 0) && (parent == NULL)) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": MKNOD (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.mknod.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_mknod_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT, + NULL, NULL); + server_loc_wipe (&stub->args.mknod.loc); + FREE (stub); + break; + } + + if (stub->args.mknod.loc.parent == NULL) + stub->args.mknod.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.mknod.loc.inode == NULL)) { + stub->args.mknod.loc.inode = inode_ref (server_inode); + stub->args.mknod.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + case GF_FOP_ENTRYLK: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": ENTRYLK (%s) on %s for key %s returning " + "error: %"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.entrylk.loc.path, + BOUND_XL(stub->frame)->name, + stub->args.entrylk.name ? + stub->args.entrylk.name : "<nul>", + op_ret, op_errno); + + server_entrylk_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT); + server_loc_wipe (&stub->args.entrylk.loc); + FREE (stub); + break; + } + + if (stub->args.entrylk.loc.parent == NULL) + stub->args.entrylk.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.entrylk.loc.inode == NULL)) { + stub->args.entrylk.loc.inode = inode_ref (server_inode); + stub->args.entrylk.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + case GF_FOP_INODELK: + { + if (op_ret < 0) { + gf_log (stub->frame->this->name, GF_LOG_ERROR, + "%"PRId64": INODELK (%s) on %s returning error: " + "%"PRId32" (%"PRId32")", + stub->frame->root->unique, + stub->args.inodelk.loc.path, + BOUND_XL(stub->frame)->name, + op_ret, op_errno); + + server_inodelk_cbk (stub->frame, + NULL, + stub->frame->this, + -1, ENOENT); + server_loc_wipe (&stub->args.inodelk.loc); + FREE (stub); + break; + } + + if (stub->args.inodelk.loc.parent == NULL) + stub->args.inodelk.loc.parent = inode_ref (parent); + + if (server_inode && (stub->args.inodelk.loc.inode == NULL)) { + stub->args.inodelk.loc.inode = + inode_ref (server_inode); + stub->args.inodelk.loc.ino = server_inode->ino; + } + call_resume (stub); + break; + } + default: + call_resume (stub); + } + + return 0; +} + +static int +server_lookup_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if ((state->loc.parent == NULL) && + (loc->parent)) + state->loc.parent = inode_ref (loc->parent); + + if (state->loc.inode == NULL) { + if (loc->inode == NULL) + state->loc.inode = inode_new (state->itable); + else + /* FIXME: why another lookup? */ + state->loc.inode = inode_ref (loc->inode); + } else { + if (loc->inode && (state->loc.inode != loc->inode)) { + if (state->loc.inode) + inode_unref (state->loc.inode); + state->loc.inode = inode_ref (loc->inode); + } + } + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": LOOKUP \'%"PRId64"/%s\'", + frame->root->unique, state->par, state->bname); + + STACK_WIND (frame, + server_lookup_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->lookup, + &(state->loc), + xattr_req); + return 0; +} + +/* + * server_lookup - lookup function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int +server_lookup (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_lookup_req_t *req = NULL; + server_state_t *state = NULL; + call_stub_t *lookup_stub = NULL; + int32_t ret = -1; + size_t pathlen = 0, baselen = 0; + size_t dictlen = 0; + dict_t *xattr_req = NULL; + char *req_dictbuf = NULL; + + req = gf_param (hdr); + + state = CALL_STATE(frame); + { + + pathlen = STRLEN_0 (req->path); + dictlen = ntoh32 (req->dictlen); + + /* NOTE: lookup() uses req->ino only to identify if a lookup() + * is requested for 'root' or not + */ + state->ino = ntoh64 (req->ino); + if (state->ino != 1) + state->ino = 0; + + state->par = ntoh64 (req->par); + state->path = req->path; + if (IS_NOT_ROOT(pathlen)) { + state->bname = req->bname + pathlen; + baselen = STRLEN_0 (state->bname); + } + + if (dictlen) { + /* Unserialize the dictionary */ + req_dictbuf = memdup (req->dict + pathlen + baselen, dictlen); + GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail); + + xattr_req = dict_new (); + GF_VALIDATE_OR_GOTO(bound_xl->name, xattr_req, fail); + + ret = dict_unserialize (req_dictbuf, dictlen, &xattr_req); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "%"PRId64": %s (%"PRId64"): failed to " + "unserialize request buffer to dictionary", + frame->root->unique, state->loc.path, + state->ino); + free (req_dictbuf); + goto fail; + } else{ + xattr_req->extra_free = req_dictbuf; + state->xattr_req = xattr_req; + xattr_req = NULL; + } + } + } + + ret = server_loc_fill (&state->loc, state, + state->ino, state->par, state->bname, + state->path); + + if (state->loc.inode) { + /* revalidate */ + state->is_revalidate = 1; + } else { + /* fresh lookup or inode was previously pruned out */ + state->is_revalidate = -1; + } + + lookup_stub = fop_lookup_stub (frame, server_lookup_resume, + &(state->loc), state->xattr_req); + GF_VALIDATE_OR_GOTO(bound_xl->name, lookup_stub, fail); + + if ((state->loc.parent == NULL) && + IS_NOT_ROOT(pathlen)) + do_path_lookup (lookup_stub, &(state->loc)); + else + call_resume (lookup_stub); + + return 0; +fail: + server_lookup_cbk (frame, NULL, frame->this, + -1,EINVAL, + NULL, NULL, NULL); + if (xattr_req) + dict_unref (xattr_req); + + return 0; +} + + +/* + * server_forget - forget function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_forget (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int index = 0; + ino_t ino = 0; + int32_t count = 0; + inode_t *inode = NULL; + gf_cbk_forget_req_t *req = NULL; + + req = gf_param (hdr); + count = ntoh32 (req->count); + + for (index = 0; index < count; index++) { + + ino = ntoh64 (req->ino_array[index]); + + if (!ino) + continue; + + inode = inode_search (bound_xl->itable, ino, NULL); + + if (inode) { + inode_forget (inode, 0); + inode_unref (inode); + } else { + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FORGET %"PRId64" not found " + "in inode table", + frame->root->unique, ino); + } + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FORGET \'%"PRId64"\'", + frame->root->unique, ino); + } + + server_forget_cbk (frame, NULL, bound_xl, 0, 0); + + return 0; +} + + + +int32_t +server_stat_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": STAT \'%s (%"PRId64")\'", + frame->root->unique, state->loc.path, state->loc.ino); + + STACK_WIND (frame, + server_stat_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->stat, + loc); + return 0; +} + +/* + * server_stat - stat function for server + * @frame: call frame + * @bound_xl: translator this server is bound to + * @params: parameters dictionary + * + * not for external reference + */ +int32_t +server_stat (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *stat_stub = NULL; + gf_fop_stat_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->ino = ntoh64 (req->ino); + state->path = req->path; + pathlen = STRLEN_0(state->path); + + ret = server_loc_fill (&(state->loc), state, + state->ino, state->par, state->bname, + state->path); + + stat_stub = fop_stat_stub (frame, + server_stat_resume, + &(state->loc)); + GF_VALIDATE_OR_GOTO(bound_xl->name, stat_stub, fail); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (stat_stub, &(state->loc)); + } else { + call_resume (stat_stub); + } + return 0; +fail: + server_stat_cbk (frame, NULL, frame->this, + -1, EINVAL, + NULL); + return 0; +} + + +int32_t +server_readlink_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": READLINK \'%s (%"PRId64")\'", + frame->root->unique, state->loc.path, state->loc.ino); + + STACK_WIND (frame, + server_readlink_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->readlink, + loc, + size); + return 0; +} + +/* + * server_readlink - readlink function for server + * @frame: call frame + * @bound_xl: translator this server is bound to + * @params: parameters dictionary + * + * not for external reference + */ +int32_t +server_readlink (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *readlink_stub = NULL; + gf_fop_readlink_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->size = ntoh32 (req->size); + + state->ino = ntoh64 (req->ino); + state->path = req->path; + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + readlink_stub = fop_readlink_stub (frame, + server_readlink_resume, + &(state->loc), + state->size); + GF_VALIDATE_OR_GOTO(bound_xl->name, readlink_stub, fail); + + if ((state->loc.parent == NULL) || + (state->loc.inode == NULL)) { + do_path_lookup (readlink_stub, &(state->loc)); + } else { + call_resume (readlink_stub); + } + return 0; +fail: + server_readlink_cbk (frame, NULL,frame->this, + -1, EINVAL, + NULL); + return 0; +} + +int32_t +server_create_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + server_state_t *state = CALL_STATE(frame); + + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (loc->parent); + + state->loc.inode = inode_new (state->itable); + GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, state->loc.inode, fail); + + state->fd = fd_create (state->loc.inode, frame->root->pid); + GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, state->fd, fail); + + state->fd->flags = flags; + state->fd = fd_ref (state->fd); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": CREATE \'%"PRId64"/%s\'", + frame->root->unique, state->par, state->bname); + + STACK_WIND (frame, + server_create_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->create, + &(state->loc), + flags, + mode, + state->fd); + + return 0; +fail: + server_create_cbk (frame, NULL, frame->this, + -1, EINVAL, + NULL, NULL, NULL); + return 0; +} + + +/* + * server_create - create function for server + * @frame: call frame + * @bound_xl: translator this server is bound to + * @params: parameters dictionary + * + * not for external reference + */ +int32_t +server_create (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_create_req_t *req = NULL; + server_state_t *state = NULL; + call_stub_t *create_stub = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + + state->par = ntoh64 (req->par); + state->path = req->path; + if (IS_NOT_ROOT(pathlen)) + state->bname = req->bname + pathlen; + + state->mode = ntoh32 (req->mode); + state->flags = ntoh32 (req->flags); + } + + ret = server_loc_fill (&(state->loc), state, + 0, state->par, state->bname, + state->path); + + create_stub = fop_create_stub (frame, server_create_resume, + &(state->loc), state->flags, + state->mode, state->fd); + GF_VALIDATE_OR_GOTO(bound_xl->name, create_stub, fail); + + if (state->loc.parent == NULL) { + do_path_lookup (create_stub, &state->loc); + } else { + call_resume (create_stub); + } + return 0; +fail: + server_create_cbk (frame, NULL, frame->this, + -1, EINVAL, + NULL, NULL, NULL); + return 0; +} + + +int32_t +server_open_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + server_state_t *state = CALL_STATE(frame); + fd_t *new_fd = NULL; + + new_fd = fd_create (loc->inode, frame->root->pid); + GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, new_fd, fail); + + new_fd->flags = flags; + + state->fd = fd_ref (new_fd); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": OPEN \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_open_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->open, + loc, + flags, + state->fd); + + return 0; +fail: + server_open_cbk (frame, NULL, frame->this, + -1, EINVAL, + NULL); + return 0; +} + +/* + * server_open - open function for server protocol + * @frame: call frame + * @bound_xl: translator this server protocol is bound to + * @params: parameters dictionary + * + * not for external reference + */ +int32_t +server_open (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *open_stub = NULL; + gf_fop_open_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->ino = ntoh64 (req->ino); + state->path = req->path; + pathlen = STRLEN_0(state->path); + state->flags = ntoh32 (req->flags); + } + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + open_stub = fop_open_stub (frame, + server_open_resume, + &(state->loc), state->flags, NULL); + GF_VALIDATE_OR_GOTO(bound_xl->name, open_stub, fail); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (open_stub, &state->loc); + } else { + call_resume (open_stub); + } + return 0; +fail: + server_open_cbk (frame, NULL, frame->this, + -1, EINVAL, + NULL); + return 0; +} + + +/* + * server_readv - readv function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_readv (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_read_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->size = ntoh32 (req->size); + state->offset = ntoh64 (req->offset); + } + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": READV \'fd=%"PRId64" (%"PRId64"); " + "offset=%"PRId64"; size=%"PRId64, + frame->root->unique, state->fd_no, state->fd->inode->ino, + state->offset, (int64_t)state->size); + + STACK_WIND (frame, + server_readv_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->readv, + state->fd, state->size, state->offset); + return 0; +fail: + server_readv_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL, 0, NULL); + return 0; +} + + +/* + * server_writev - writev function for server + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_writev (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + server_connection_t *conn = NULL; + gf_fop_write_req_t *req = NULL; + struct iovec iov = {0, }; + dict_t *refs = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->offset = ntoh64 (req->offset); + } + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + iov.iov_base = buf; + iov.iov_len = buflen; + + refs = dict_new (); + GF_VALIDATE_OR_GOTO(bound_xl->name, refs, fail); + + ret = dict_set_dynptr (refs, NULL, buf, buflen); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to set buffer entry " + "to req_refs", + state->fd_no, state->fd->inode->ino); + goto fail; + } else { + buf = NULL; + } + + frame->root->req_refs = refs; + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": WRITEV \'fd=%"PRId64" (%"PRId64"); " + "offset=%"PRId64"; size=%"PRId64, + frame->root->unique, state->fd_no, state->fd->inode->ino, + state->offset, (int64_t)buflen); + + STACK_WIND (frame, + server_writev_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->writev, + state->fd, &iov, 1, state->offset); + + if (refs) + dict_unref (refs); + return 0; +fail: + server_writev_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + + if (buf) + free (buf); + + if (refs) + dict_unref (refs); + + return 0; +} + + + +/* + * server_release - release function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_release (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_cbk_release_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->fd_no = ntoh64 (req->fd); + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + gf_fd_put (conn->fdtable, + state->fd_no); + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": RELEASE \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, + server_release_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->flush, + state->fd); + return 0; +fail: + server_release_cbk (frame, NULL, frame->this, + -1, EINVAL); + return 0; +} + + +/* + * server_fsync - fsync function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameters dictionary + * + * not for external reference + */ +int32_t +server_fsync (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_fsync_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->flags = ntoh32 (req->data); + } + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FSYNC \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, + server_fsync_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->fsync, + state->fd, state->flags); + return 0; +fail: + server_fsync_cbk (frame, NULL, frame->this, + -1, EINVAL); + + return 0; +} + + +/* + * server_flush - flush function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_flush (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_flush_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + } + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FLUSH \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, + server_flush_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->flush, + state->fd); + return 0; + +fail: + server_flush_cbk (frame, NULL, frame->this, + -1, EINVAL); + + return 0; +} + + +/* + * server_ftruncate - ftruncate function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameters dictionary + * + * not for external reference + */ +int32_t +server_ftruncate (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_ftruncate_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->offset = ntoh64 (req->offset); + } + + GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail); + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FTRUNCATE \'fd=%"PRId64" (%"PRId64"); " + "offset=%"PRId64"\'", + frame->root->unique, state->fd_no, state->fd->inode->ino, + state->offset); + + STACK_WIND (frame, + server_ftruncate_cbk, + bound_xl, + bound_xl->fops->ftruncate, + state->fd, + state->offset); + return 0; +fail: + server_ftruncate_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + + return 0; +} + + +/* + * server_fstat - fstat function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_fstat (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_fstat_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + } + + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_fstat_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + + goto out; + } + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FSTAT \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, + server_fstat_cbk, + bound_xl, + bound_xl->fops->fstat, + state->fd); +out: + return 0; +} + + +int32_t +server_truncate_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": TRUNCATE \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_truncate_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->truncate, + loc, + offset); + return 0; +} + + +/* + * server_truncate - truncate function for server protocol + * @frame: call frame + * @bound_xl: + * @params: + * + * not for external reference + */ +int32_t +server_truncate (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *truncate_stub = NULL; + gf_fop_truncate_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + state->offset = ntoh64 (req->offset); + + state->path = req->path; + state->ino = ntoh64 (req->ino); + } + + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + truncate_stub = fop_truncate_stub (frame, + server_truncate_resume, + &(state->loc), + state->offset); + if ((state->loc.parent == NULL) || + (state->loc.inode == NULL)) { + do_path_lookup (truncate_stub, &(state->loc)); + } else { + call_resume (truncate_stub); + } + + return 0; +} + + + + + +int32_t +server_unlink_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (loc->parent); + + if (state->loc.inode == NULL) + state->loc.inode = inode_ref (loc->inode); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": UNLINK \'%"PRId64"/%s (%"PRId64")\'", + frame->root->unique, state->par, state->path, + state->loc.inode->ino); + + STACK_WIND (frame, + server_unlink_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->unlink, + loc); + return 0; +} + +/* + * server_unlink - unlink function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_unlink (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *unlink_stub = NULL; + gf_fop_unlink_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + + pathlen = STRLEN_0(req->path); + + state->par = ntoh64 (req->par); + state->path = req->path; + if (IS_NOT_ROOT(pathlen)) + state->bname = req->bname + pathlen; + + ret = server_loc_fill (&(state->loc), state, + 0, state->par, state->bname, + state->path); + + unlink_stub = fop_unlink_stub (frame, + server_unlink_resume, + &(state->loc)); + + if ((state->loc.parent == NULL) || + (state->loc.inode == NULL)) { + do_path_lookup (unlink_stub, &state->loc); + } else { + call_resume (unlink_stub); + } + + return 0; +} + + + + + +int32_t +server_setxattr_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": SETXATTR \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_setxattr_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->setxattr, + loc, + dict, + flags); + return 0; +} + +/* + * server_setxattr - setxattr function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ + +int32_t +server_setxattr (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *setxattr_stub = NULL; + gf_fop_setxattr_req_t *req = NULL; + dict_t *dict = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + size_t dict_len = 0; + char *req_dictbuf = NULL; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + dict_len = ntoh32 (req->dict_len); + + state->path = req->path + dict_len; + + pathlen = STRLEN_0(state->path); + state->ino = ntoh64 (req->ino); + + state->flags = ntoh32 (req->flags); + } + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + { + /* Unserialize the dictionary */ + req_dictbuf = memdup (req->dict, dict_len); + GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail); + + dict = dict_new (); + GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail); + + ret = dict_unserialize (req_dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "%"PRId64": %s (%"PRId64"): failed to " + "unserialize request buffer to dictionary", + frame->root->unique, state->loc.path, + state->ino); + free (req_dictbuf); + goto fail; + } else{ + dict->extra_free = req_dictbuf; + } + } + + setxattr_stub = fop_setxattr_stub (frame, + server_setxattr_resume, + &(state->loc), + dict, + state->flags); + GF_VALIDATE_OR_GOTO(bound_xl->name, setxattr_stub, fail); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (setxattr_stub, &(state->loc)); + } else { + call_resume (setxattr_stub); + } + + if (dict) + dict_unref (dict); + + return 0; +fail: + if (dict) + dict_unref (dict); + + server_setxattr_cbk (frame, NULL, frame->this, + -1, ENOENT); + return 0; + +} + + + +int32_t +server_fxattrop (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + server_connection_t *conn = NULL; + gf_fop_fxattrop_req_t *req = NULL; + dict_t *dict = NULL; + server_state_t *state = NULL; + size_t dict_len = 0; + char *req_dictbuf = NULL; + int32_t ret = -1; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + dict_len = ntoh32 (req->dict_len); + state->ino = ntoh64 (req->ino); + state->flags = ntoh32 (req->flags); + } + + if (dict_len) { + /* Unserialize the dictionary */ + req_dictbuf = memdup (req->dict, dict_len); + GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail); + + dict = dict_new (); + GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail); + + ret = dict_unserialize (req_dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to unserialize " + "request buffer to dictionary", + state->fd_no, state->fd->inode->ino); + free (req_dictbuf); + goto fail; + } else { + dict->extra_free = req_dictbuf; + } + } + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FXATTROP \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, + server_fxattrop_cbk, + bound_xl, + bound_xl->fops->fxattrop, + state->fd, + state->flags, + dict); + if (dict) + dict_unref (dict); + return 0; +fail: + if (dict) + dict_unref (dict); + + server_fxattrop_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + return 0; +} + +int32_t +server_xattrop_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": XATTROP \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_xattrop_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->xattrop, + loc, + flags, + dict); + return 0; +} + +int32_t +server_xattrop (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_xattrop_req_t *req = NULL; + dict_t *dict = NULL; + server_state_t *state = NULL; + call_stub_t *xattrop_stub = NULL; + int32_t ret = -1; + size_t pathlen = 0; + size_t dict_len = 0; + char *req_dictbuf = NULL; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + dict_len = ntoh32 (req->dict_len); + state->ino = ntoh64 (req->ino); + state->path = req->path + dict_len; + pathlen = STRLEN_0(state->path); + state->flags = ntoh32 (req->flags); + } + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + if (dict_len) { + /* Unserialize the dictionary */ + req_dictbuf = memdup (req->dict, dict_len); + GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail); + + dict = dict_new (); + GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail); + + ret = dict_unserialize (req_dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to unserialize " + "request buffer to dictionary", + state->loc.path, state->ino); + goto fail; + } else { + dict->extra_free = req_dictbuf; + } + } + xattrop_stub = fop_xattrop_stub (frame, + server_xattrop_resume, + &(state->loc), + state->flags, + dict); + GF_VALIDATE_OR_GOTO(bound_xl->name, xattrop_stub, fail); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (xattrop_stub, &(state->loc)); + } else { + call_resume (xattrop_stub); + } + + if (dict) + dict_unref (dict); + return 0; +fail: + if (dict) + dict_unref (dict); + + server_xattrop_cbk (frame, NULL, frame->this, + -1, EINVAL, + NULL); + return 0; +} + + +int32_t +server_getxattr_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": GETXATTR \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_getxattr_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->getxattr, + loc, + name); + return 0; +} + +/* + * server_getxattr - getxattr function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_getxattr (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_getxattr_req_t *req = NULL; + call_stub_t *getxattr_stub = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t namelen = 0; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + + state->path = req->path; + state->ino = ntoh64 (req->ino); + + namelen = ntoh32 (req->namelen); + if (namelen) + state->name = (req->name + pathlen); + } + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + getxattr_stub = fop_getxattr_stub (frame, + server_getxattr_resume, + &(state->loc), + state->name); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (getxattr_stub, &(state->loc)); + } else { + call_resume (getxattr_stub); + } + + return 0; +} + + + +int32_t +server_removexattr_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": REMOVEXATTR \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_removexattr_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->removexattr, + loc, + name); + return 0; +} + +/* + * server_removexattr - removexattr function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_removexattr (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_removexattr_req_t *req = NULL; + call_stub_t *removexattr_stub = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + + state->path = req->path; + state->ino = ntoh64 (req->ino); + + state->name = (req->name + pathlen); + } + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + removexattr_stub = fop_removexattr_stub (frame, + server_removexattr_resume, + &(state->loc), + state->name); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (removexattr_stub, &(state->loc)); + } else { + call_resume (removexattr_stub); + } + + return 0; +} + + +/* + * server_statfs - statfs function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_statfs (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_statfs_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + + req = gf_param (hdr); + + state = CALL_STATE(frame); + state->ino = ntoh64 (req->ino); + state->path = req->path; + + ret = server_loc_fill (&state->loc, state, + state->ino, 0, NULL, state->path); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": STATFS \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_statfs_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->statfs, + &(state->loc)); + + return 0; +} + + + +int32_t +server_opendir_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + server_state_t *state = CALL_STATE(frame); + fd_t *new_fd = NULL; + + new_fd = fd_create (loc->inode, frame->root->pid); + state->fd = fd_ref (new_fd); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": OPENDIR \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_opendir_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->opendir, + loc, + state->fd); + return 0; +} + + +/* + * server_opendir - opendir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_opendir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *opendir_stub = NULL; + gf_fop_opendir_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->path = req->path; + pathlen = STRLEN_0(state->path); + state->ino = ntoh64 (req->ino); + } + + ret = server_loc_fill (&state->loc, state, + state->ino, 0, NULL, state->path); + + opendir_stub = fop_opendir_stub (frame, + server_opendir_resume, + &(state->loc), + NULL); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (opendir_stub, &(state->loc)); + } else { + call_resume (opendir_stub); + } + + return 0; +} + + +/* + * server_releasedir - releasedir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_releasedir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_cbk_releasedir_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->fd_no = ntoh64 (req->fd); + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_releasedir_cbk (frame, NULL, frame->this, + -1, EINVAL); + goto out; + } + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": RELEASEDIR \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + gf_fd_put (conn->fdtable, state->fd_no); + + server_releasedir_cbk (frame, NULL, frame->this, + 0, 0); +out: + return 0; +} + + +/* + * server_readdir - readdir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_getdents (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_getdents_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->size = ntoh32 (req->size); + state->offset = ntoh64 (req->offset); + state->flags = ntoh32 (req->flags); + } + + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_getdents_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL, 0); + + goto out; + } + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": GETDENTS \'fd=%"PRId64" (%"PRId64"); " + "offset=%"PRId64"; size=%"PRId64, + frame->root->unique, state->fd_no, state->fd->inode->ino, + state->offset, (int64_t)state->size); + + STACK_WIND (frame, + server_getdents_cbk, + bound_xl, + bound_xl->fops->getdents, + state->fd, + state->size, + state->offset, + state->flags); +out: + return 0; +} + + +/* + * server_readdir - readdir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_readdir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_readdir_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->size = ntoh32 (req->size); + state->offset = ntoh64 (req->offset); + } + + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_readdir_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + + goto out; + } + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": READDIR \'fd=%"PRId64" (%"PRId64"); " + "offset=%"PRId64"; size=%"PRId64, + frame->root->unique, state->fd_no, state->fd->inode->ino, + state->offset, (int64_t)state->size); + + STACK_WIND (frame, + server_readdir_cbk, + bound_xl, + bound_xl->fops->readdir, + state->fd, state->size, state->offset); +out: + return 0; +} + + + +/* + * server_fsyncdir - fsyncdir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_fsyncdir (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_fsyncdir_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->flags = ntoh32 (req->data); + } + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_fsyncdir_cbk (frame, NULL, frame->this, + -1, EINVAL); + goto out; + } + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": FSYNCDIR \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, + server_fsyncdir_cbk, + bound_xl, + bound_xl->fops->fsyncdir, + state->fd, state->flags); +out: + return 0; +} + + +int32_t +server_mknod_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t dev) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (loc->parent); + + state->loc.inode = inode_new (state->itable); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": MKNOD \'%"PRId64"/%s\'", + frame->root->unique, state->par, state->bname); + + STACK_WIND (frame, + server_mknod_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->mknod, + &(state->loc), mode, dev); + + return 0; +} +/* + * server_mknod - mknod function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_mknod (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_mknod_req_t *req = NULL; + server_state_t *state = NULL; + call_stub_t *mknod_stub = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + + state->par = ntoh64 (req->par); + state->path = req->path; + if (IS_NOT_ROOT(pathlen)) + state->bname = req->bname + pathlen; + + state->mode = ntoh32 (req->mode); + state->dev = ntoh64 (req->dev); + } + ret = server_loc_fill (&(state->loc), state, + 0, state->par, state->bname, + state->path); + + mknod_stub = fop_mknod_stub (frame, server_mknod_resume, + &(state->loc), state->mode, state->dev); + + if (state->loc.parent == NULL) { + do_path_lookup (mknod_stub, &(state->loc)); + } else { + call_resume (mknod_stub); + } + + return 0; +} + +int32_t +server_mkdir_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) + +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (loc->parent); + + state->loc.inode = inode_new (state->itable); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": MKDIR \'%"PRId64"/%s\'", + frame->root->unique, state->par, state->bname); + + STACK_WIND (frame, + server_mkdir_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->mkdir, + &(state->loc), + state->mode); + + return 0; +} + +/* + * server_mkdir - mkdir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: + * + * not for external reference + */ +int32_t +server_mkdir (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_mkdir_req_t *req = NULL; + server_state_t *state = NULL; + call_stub_t *mkdir_stub = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + state->mode = ntoh32 (req->mode); + + state->path = req->path; + state->bname = req->bname + pathlen; + state->par = ntoh64 (req->par); + } + + + ret = server_loc_fill (&(state->loc), state, + 0, state->par, state->bname, + state->path); + + mkdir_stub = fop_mkdir_stub (frame, server_mkdir_resume, + &(state->loc), state->mode); + + if (state->loc.parent == NULL) { + do_path_lookup (mkdir_stub, &(state->loc)); + } else { + call_resume (mkdir_stub); + } + + return 0; +} + + +int32_t +server_rmdir_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (loc->parent); + + if (state->loc.inode == NULL) + state->loc.inode = inode_ref (loc->inode); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": RMDIR \'%"PRId64"/%s\'", + frame->root->unique, state->par, state->bname); + + STACK_WIND (frame, + server_rmdir_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->rmdir, + loc); + return 0; +} + +/* + * server_rmdir - rmdir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: + * + * not for external reference + */ +int32_t +server_rmdir (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *rmdir_stub = NULL; + gf_fop_rmdir_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + state->path = req->path; + state->par = ntoh64 (req->par); + state->bname = req->bname + pathlen; + } + + + ret = server_loc_fill (&(state->loc), state, + state->ino, state->par, state->bname, + state->path); + + rmdir_stub = fop_rmdir_stub (frame, + server_rmdir_resume, + &(state->loc)); + + if ((state->loc.parent == NULL) || + (state->loc.inode == NULL)) { + do_path_lookup (rmdir_stub, &(state->loc)); + } else { + call_resume (rmdir_stub); + } + + return 0; +} + + + +int32_t +server_chown_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": CHOWN \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, server_chown_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->chown, + loc, uid, gid); + return 0; +} + + +/* + * server_chown - chown function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_chown (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *chown_stub = NULL; + gf_fop_chown_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->ino = ntoh64 (req->ino); + state->path = req->path; + pathlen = STRLEN_0(state->path); + state->uid = ntoh32 (req->uid); + state->gid = ntoh32 (req->gid); + } + + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + chown_stub = fop_chown_stub (frame, + server_chown_resume, + &(state->loc), + state->uid, + state->gid); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (chown_stub, &(state->loc)); + } else { + call_resume (chown_stub); + } + + return 0; +} + + +int32_t +server_chmod_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": CHMOD \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_chmod_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->chmod, + loc, + mode); + return 0; + +} + +/* + * server_chmod - chmod function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_chmod (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *chmod_stub = NULL; + gf_fop_chmod_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + + state = CALL_STATE(frame); + { + state->ino = ntoh64 (req->ino); + state->path = req->path; + pathlen = STRLEN_0(state->path); + + state->mode = ntoh32 (req->mode); + } + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + chmod_stub = fop_chmod_stub (frame, + server_chmod_resume, + &(state->loc), + state->mode); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (chmod_stub, &(state->loc)); + } else { + call_resume (chmod_stub); + } + + return 0; +} + + +int32_t +server_utimens_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec *tv) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": UTIMENS \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_utimens_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->utimens, + loc, + tv); + return 0; +} + +/* + * server_utimens - utimens function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_utimens (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *utimens_stub = NULL; + gf_fop_utimens_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->ino = ntoh64 (req->ino); + state->path = req->path; + pathlen = STRLEN_0(state->path); + + gf_timespec_to_timespec (req->tv, state->tv); + } + + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + utimens_stub = fop_utimens_stub (frame, + server_utimens_resume, + &(state->loc), + state->tv); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (utimens_stub, &(state->loc)); + } else { + call_resume (utimens_stub); + } + + return 0; +} + + + +int32_t +server_inodelk_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, int32_t cmd, + struct flock *flock) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + if (state->loc.inode == NULL) { + state->loc.inode = inode_ref (loc->inode); + } + + if (state->loc.parent == NULL) { + state->loc.parent = inode_ref (loc->parent); + } + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": INODELK \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_inodelk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->inodelk, + loc, cmd, flock); + return 0; + +} + + +int32_t +server_inodelk (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *inodelk_stub = NULL; + gf_fop_inodelk_req_t *req = NULL; + server_state_t *state = NULL; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->cmd = ntoh32 (req->cmd); + switch (state->cmd) { + case GF_LK_GETLK: + state->cmd = F_GETLK; + break; + case GF_LK_SETLK: + state->cmd = F_SETLK; + break; + case GF_LK_SETLKW: + state->cmd = F_SETLKW; + break; + } + + state->type = ntoh32 (req->type); + + pathlen = STRLEN_0(req->path); + + state->path = req->path; + state->ino = ntoh64 (req->ino); + + gf_flock_to_flock (&req->flock, &state->flock); + + switch (state->type) { + case GF_LK_F_RDLCK: + state->flock.l_type = F_RDLCK; + break; + case GF_LK_F_WRLCK: + state->flock.l_type = F_WRLCK; + break; + case GF_LK_F_UNLCK: + state->flock.l_type = F_UNLCK; + break; + } + + } + + server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + inodelk_stub = fop_inodelk_stub (frame, + server_inodelk_resume, + &state->loc, state->cmd, &state->flock); + + if ((state->loc.parent == NULL) || + (state->loc.inode == NULL)) { + do_path_lookup (inodelk_stub, &(state->loc)); + } else { + call_resume (inodelk_stub); + } + + return 0; +} + + +int32_t +server_finodelk (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_finodelk_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->cmd = ntoh32 (req->cmd); + switch (state->cmd) { + case GF_LK_GETLK: + state->cmd = F_GETLK; + break; + case GF_LK_SETLK: + state->cmd = F_SETLK; + break; + case GF_LK_SETLKW: + state->cmd = F_SETLKW; + break; + } + + state->type = ntoh32 (req->type); + + gf_flock_to_flock (&req->flock, &state->flock); + + switch (state->type) { + case GF_LK_F_RDLCK: + state->flock.l_type = F_RDLCK; + break; + case GF_LK_F_WRLCK: + state->flock.l_type = F_WRLCK; + break; + case GF_LK_F_UNLCK: + state->flock.l_type = F_UNLCK; + break; + } + + } + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_finodelk_cbk (frame, NULL, frame->this, + -1, EINVAL); + return -1; + } + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": FINODELK \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, server_finodelk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->finodelk, + state->fd, state->cmd, &state->flock); + return 0; +} + + +int32_t +server_entrylk_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, const char *name, + entrylk_cmd cmd, entrylk_type type) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->loc.inode == NULL) + state->loc.inode = inode_ref (loc->inode); + + if ((state->loc.parent == NULL) && + (loc->parent)) + state->loc.parent = inode_ref (loc->parent); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": ENTRYLK \'%s (%"PRId64") \'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_entrylk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->entrylk, + loc, name, cmd, type); + return 0; + +} + +/* + * server_entrylk - entrylk function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_entrylk (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_entrylk_req_t *req = NULL; + server_state_t *state = NULL; + call_stub_t *entrylk_stub = NULL; + size_t pathlen = 0; + size_t namelen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + + state->path = req->path; + state->ino = ntoh64 (req->ino); + namelen = ntoh64 (req->namelen); + if (namelen) + state->name = req->name + pathlen; + + state->cmd = ntoh32 (req->cmd); + state->type = ntoh32 (req->type); + } + + + server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + entrylk_stub = fop_entrylk_stub (frame, + server_entrylk_resume, + &state->loc, state->name, state->cmd, + state->type); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (entrylk_stub, &(state->loc)); + } else { + call_resume (entrylk_stub); + } + + return 0; +} + + +int32_t +server_fentrylk (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_fentrylk_req_t *req = NULL; + server_state_t *state = NULL; + size_t namelen = 0; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->cmd = ntoh32 (req->cmd); + state->type = ntoh32 (req->type); + namelen = ntoh64 (req->namelen); + + if (namelen) + state->name = req->name; + } + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_fentrylk_cbk (frame, NULL, frame->this, + -1, EINVAL); + return -1; + } + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": FENTRYLK \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, server_fentrylk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->fentrylk, + state->fd, state->name, state->cmd, state->type); + return 0; +} + + +int32_t +server_access_resume (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": ACCESS \'%s (%"PRId64")\'", + frame->root->unique, state->path, state->ino); + + STACK_WIND (frame, + server_access_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->access, + loc, + mask); + return 0; +} + +/* + * server_access - access function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_access (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + call_stub_t *access_stub = NULL; + gf_fop_access_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->mask = ntoh32 (req->mask); + + state->ino = ntoh64 (req->ino); + state->path = req->path; + pathlen = STRLEN_0(state->path); + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, state->path); + + access_stub = fop_access_stub (frame, + server_access_resume, + &(state->loc), + state->mask); + + if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) || + (state->loc.inode == NULL)) { + do_path_lookup (access_stub, &(state->loc)); + } else { + call_resume (access_stub); + } + + return 0; +} + + +int32_t +server_symlink_resume (call_frame_t *frame, + xlator_t *this, + const char *linkname, + loc_t *loc) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (loc->parent); + + state->loc.inode = inode_new (BOUND_XL(frame)->itable); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": SYMLINK \'%"PRId64"/%s \'", + frame->root->unique, state->par, state->bname); + + STACK_WIND (frame, + server_symlink_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->symlink, + linkname, + &(state->loc)); + + return 0; +} + +/* + * server_symlink- symlink function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ + +int32_t +server_symlink (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + server_state_t *state = NULL; + gf_fop_symlink_req_t *req = NULL; + call_stub_t *symlink_stub = NULL; + int32_t ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + pathlen = STRLEN_0(req->path); + baselen = STRLEN_0(req->bname + pathlen); + + state->par = ntoh64 (req->par); + state->path = req->path; + state->bname = req->bname + pathlen; + + state->name = (req->linkname + pathlen + baselen); + } + + ret = server_loc_fill (&(state->loc), state, + 0, state->par, state->bname, + state->path); + + symlink_stub = fop_symlink_stub (frame, server_symlink_resume, + state->name, &(state->loc)); + + if (state->loc.parent == NULL) { + do_path_lookup (symlink_stub, &(state->loc)); + } else { + call_resume (symlink_stub); + } + + return 0; +} + +int32_t +server_link_resume (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (oldloc->parent); + + if (state->loc.inode == NULL) { + state->loc.inode = inode_ref (oldloc->inode); + } else if (state->loc.inode != oldloc->inode) { + if (state->loc.inode) + inode_unref (state->loc.inode); + state->loc.inode = inode_ref (oldloc->inode); + } + + if (state->loc2.parent == NULL) + state->loc2.parent = inode_ref (newloc->parent); + + state->loc2.inode = inode_ref (state->loc.inode); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": LINK \'%"PRId64"/%s ==> %s (%"PRId64")\'", + frame->root->unique, state->par2, state->bname2, + state->path, state->ino); + + STACK_WIND (frame, + server_link_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->link, + &(state->loc), + &(state->loc2)); + return 0; +} + +/* + * server_link - link function for server protocol + * @frame: call frame + * @bound_xl: + * @params: + * + * not for external reference + */ +int32_t +server_link (call_frame_t *frame, + xlator_t *this, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_link_req_t *req = NULL; + server_state_t *state = NULL; + call_stub_t *link_stub = NULL; + int32_t ret = -1; + size_t oldpathlen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + + req = gf_param (hdr); + + state = CALL_STATE(frame); + { + oldpathlen = STRLEN_0(req->oldpath); + newpathlen = STRLEN_0(req->newpath + oldpathlen); + newbaselen = STRLEN_0(req->newbname + oldpathlen + newpathlen); + + state->path = req->oldpath; + state->path2 = req->newpath + oldpathlen; + state->bname2 = req->newbname + oldpathlen + newpathlen; + state->ino = ntoh64 (req->oldino); + state->par2 = ntoh64 (req->newpar); + } + + ret = server_loc_fill (&(state->loc), state, + state->ino, 0, NULL, + state->path); + ret = server_loc_fill (&(state->loc2), state, + 0, state->par2, state->bname2, + state->path2); + + link_stub = fop_link_stub (frame, server_link_resume, + &(state->loc), &(state->loc2)); + + if ((state->loc.parent == NULL) || + (state->loc.inode == NULL)) { + do_path_lookup (link_stub, &(state->loc)); + } else if (state->loc2.parent == NULL) { + do_path_lookup (link_stub, &(state->loc2)); + } else { + call_resume (link_stub); + } + + return 0; +} + + +int32_t +server_rename_resume (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->loc.parent == NULL) + state->loc.parent = inode_ref (oldloc->parent); + + if (state->loc.inode == NULL) { + state->loc.inode = inode_ref (oldloc->inode); + } + + if (state->loc2.parent == NULL) + state->loc2.parent = inode_ref (newloc->parent); + + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": RENAME %s (%"PRId64"/%s) ==> %s (%"PRId64"/%s)", + frame->root->unique, state->path, state->par, state->bname, + state->path2, state->par2, state->bname2); + + STACK_WIND (frame, + server_rename_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->rename, + &(state->loc), + &(state->loc2)); + return 0; +} + +/* + * server_rename - rename function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int32_t +server_rename (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_fop_rename_req_t *req = NULL; + server_state_t *state = NULL; + call_stub_t *rename_stub = NULL; + int32_t ret = -1; + size_t oldpathlen = 0; + size_t oldbaselen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + + req = gf_param (hdr); + + state = CALL_STATE(frame); + { + oldpathlen = STRLEN_0(req->oldpath); + oldbaselen = STRLEN_0(req->oldbname + oldpathlen); + newpathlen = STRLEN_0(req->newpath + oldpathlen + oldbaselen); + newbaselen = STRLEN_0(req->newbname + oldpathlen + + oldbaselen + newpathlen); + + state->path = req->oldpath; + state->bname = req->oldbname + oldpathlen; + state->path2 = req->newpath + oldpathlen + oldbaselen; + state->bname2 = (req->newbname + oldpathlen + oldbaselen + + newpathlen); + + state->par = ntoh64 (req->oldpar); + state->par2 = ntoh64 (req->newpar); + } + + ret = server_loc_fill (&(state->loc), state, + 0, state->par, state->bname, + state->path); + ret = server_loc_fill (&(state->loc2), state, + 0, state->par2, state->bname2, + state->path2); + + rename_stub = fop_rename_stub (frame, + server_rename_resume, + &(state->loc), + &(state->loc2)); + + if ((state->loc.parent == NULL) || + (state->loc.inode == NULL)){ + do_path_lookup (rename_stub, &(state->loc)); + } else if ((state->loc2.parent == NULL)){ + do_path_lookup (rename_stub, &(state->loc2)); + } else { + /* we have found inode for both oldpath and newpath in + * inode cache. lets continue with fops->rename() */ + call_resume (rename_stub); + } + + return 0; +} + + +/* + * server_lk - lk function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ + +int32_t +server_lk (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + struct flock lock = {0, }; + gf_fop_lk_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + { + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->cmd = ntoh32 (req->cmd); + state->type = ntoh32 (req->type); + } + + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_lk_cbk (frame, NULL, frame->this, + -1, EINVAL, NULL); + + goto out; + } + + switch (state->cmd) { + case GF_LK_GETLK: + state->cmd = F_GETLK; + break; + case GF_LK_SETLK: + state->cmd = F_SETLK; + break; + case GF_LK_SETLKW: + state->cmd = F_SETLKW; + break; + } + + switch (state->type) { + case GF_LK_F_RDLCK: + lock.l_type = F_RDLCK; + break; + case GF_LK_F_WRLCK: + lock.l_type = F_WRLCK; + break; + case GF_LK_F_UNLCK: + lock.l_type = F_UNLCK; + break; + default: + gf_log (bound_xl->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): Unknown lock type: %"PRId32"!", + state->fd_no, state->fd->inode->ino, state->type); + break; + } + + gf_flock_to_flock (&req->flock, &lock); + + gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG, + "%"PRId64": LK \'fd=%"PRId64" (%"PRId64")\'", + frame->root->unique, state->fd_no, state->fd->inode->ino); + + STACK_WIND (frame, server_lk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->lk, + state->fd, state->cmd, &lock); + +out: + return 0; +} + + +/* + * server_writedir - + * + * @frame: + * @bound_xl: + * @params: + * + */ +int32_t +server_setdents (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + server_connection_t *conn = NULL; + gf_fop_setdents_req_t *req = NULL; + server_state_t *state = NULL; + dir_entry_t *entry = NULL; + dir_entry_t *trav = NULL; + dir_entry_t *prev = NULL; + int32_t count = 0; + int32_t i = 0; + int32_t bread = 0; + char *ender = NULL; + char *buffer_ptr = NULL; + char tmp_buf[512] = {0,}; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->fd_no = ntoh64 (req->fd); + if (state->fd_no >= 0) + state->fd = gf_fd_fdptr_get (conn->fdtable, + state->fd_no); + + state->nr_count = ntoh32 (req->count); + + if (state->fd == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64": unresolved fd", + state->fd_no); + + server_setdents_cbk (frame, NULL, frame->this, + -1, EINVAL); + + goto out; + } + + if (buf == NULL) { + gf_log (frame->this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): received a null buffer, " + "returning EINVAL", + state->fd_no, state->fd->inode->ino); + + server_setdents_cbk (frame, NULL, frame->this, + -1, EINVAL); + + goto out; + } + + entry = CALLOC (1, sizeof (dir_entry_t)); + ERR_ABORT (entry); + prev = entry; + buffer_ptr = buf; + + for (i = 0; i < state->nr_count ; i++) { + bread = 0; + trav = CALLOC (1, sizeof (dir_entry_t)); + ERR_ABORT (trav); + + ender = strchr (buffer_ptr, '/'); + if (!ender) + break; + count = ender - buffer_ptr; + trav->name = CALLOC (1, count + 2); + ERR_ABORT (trav->name); + + strncpy (trav->name, buffer_ptr, count); + bread = count + 1; + buffer_ptr += bread; + + ender = strchr (buffer_ptr, '\n'); + if (!ender) + break; + count = ender - buffer_ptr; + strncpy (tmp_buf, buffer_ptr, count); + bread = count + 1; + buffer_ptr += bread; + + /* TODO: use str_to_stat instead */ + { + uint64_t dev; + uint64_t ino; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint64_t rdev; + uint64_t size; + uint32_t blksize; + uint64_t blocks; + uint32_t atime; + uint32_t atime_nsec; + uint32_t mtime; + uint32_t mtime_nsec; + uint32_t ctime; + uint32_t ctime_nsec; + + sscanf (tmp_buf, GF_STAT_PRINT_FMT_STR, + &dev, + &ino, + &mode, + &nlink, + &uid, + &gid, + &rdev, + &size, + &blksize, + &blocks, + &atime, + &atime_nsec, + &mtime, + &mtime_nsec, + &ctime, + &ctime_nsec); + + trav->buf.st_dev = dev; + trav->buf.st_ino = ino; + trav->buf.st_mode = mode; + trav->buf.st_nlink = nlink; + trav->buf.st_uid = uid; + trav->buf.st_gid = gid; + trav->buf.st_rdev = rdev; + trav->buf.st_size = size; + trav->buf.st_blksize = blksize; + trav->buf.st_blocks = blocks; + + trav->buf.st_atime = atime; + trav->buf.st_mtime = mtime; + trav->buf.st_ctime = ctime; + + ST_ATIM_NSEC_SET(&trav->buf, atime_nsec); + ST_MTIM_NSEC_SET(&trav->buf, mtime_nsec); + ST_CTIM_NSEC_SET(&trav->buf, ctime_nsec); + + } + + ender = strchr (buffer_ptr, '\n'); + if (!ender) + break; + count = ender - buffer_ptr; + *ender = '\0'; + if (S_ISLNK (trav->buf.st_mode)) { + trav->link = strdup (buffer_ptr); + } else + trav->link = ""; + bread = count + 1; + buffer_ptr += bread; + + prev->next = trav; + prev = trav; + } + + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": SETDENTS \'fd=%"PRId64" (%"PRId64"); count=%"PRId64, + frame->root->unique, state->fd_no, state->fd->inode->ino, + (int64_t)state->nr_count); + + STACK_WIND (frame, + server_setdents_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->setdents, + state->fd, + state->flags, + entry, + state->nr_count); + + + /* Free the variables allocated in this fop here */ + trav = entry->next; + prev = entry; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + +out: + return 0; +} + + + +/* xxx_MOPS */ + +/* Management Calls */ +/* + * mop_getspec - getspec function for server protocol + * @frame: call frame + * @bound_xl: + * @params: + * + */ +int32_t +mop_getspec (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_hdr_common_t *_hdr = NULL; + gf_mop_getspec_rsp_t *rsp = NULL; + int32_t ret = -1; + int32_t op_errno = ENOENT; + int32_t gf_errno = 0; + int32_t spec_fd = -1; + size_t file_len = 0; + size_t _hdrlen = 0; + char tmp_filename[ZR_FILENAME_MAX] = {0,}; + char data_key[256] = {0,}; + char *filename = NULL; + struct stat stbuf = {0,}; + peer_info_t *peerinfo = NULL; + transport_t *trans = NULL; + + gf_mop_getspec_req_t *req = NULL; + uint32_t flags = 0; + uint32_t keylen = 0; + char *key = NULL; + + req = gf_param (hdr); + flags = ntoh32 (req->flags); + keylen = ntoh32 (req->keylen); + if (keylen) { + key = req->key; + } + + trans = TRANSPORT_FROM_FRAME(frame); + + peerinfo = &(trans->peerinfo); + /* Inform users that this option is changed now */ + ret = dict_get_str (frame->this->options, "client-volume-filename", + &filename); + if (ret == 0) { + gf_log (trans->xl->name, GF_LOG_WARNING, + "option 'client-volume-specfile' is changed to " + "'volume-filename.<key>' which now takes 'key' as an " + "option to choose/fetch different files from server. " + "Refer documentation or contact developers for more " + "info. Currently defaulting to given file '%s'", + filename); + } + + if (key && !filename) { + sprintf (data_key, "volume-filename.%s", key); + ret = dict_get_str (frame->this->options, data_key, &filename); + if (ret < 0) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to get corresponding volume file " + "for the key '%s'. using default file %s", + key, GLUSTERFSD_SPEC_PATH); + } + } + if (!filename) { + filename = GLUSTERFSD_SPEC_PATH; + if (!key) + gf_log (trans->xl->name, GF_LOG_WARNING, + "using default volume file %s", + GLUSTERFSD_SPEC_PATH); + } + + { + sprintf (tmp_filename, "%s.%s", + filename, peerinfo->identifier); + + /* Try for ip specific client volfile. + * If not found, then go for, regular client file. + */ + ret = open (tmp_filename, O_RDONLY); + spec_fd = ret; + if (spec_fd < 0) { + gf_log (trans->xl->name, GF_LOG_DEBUG, + "Unable to open %s (%s)", + tmp_filename, strerror (errno)); + /* fall back */ + ret = open (filename, O_RDONLY); + spec_fd = ret; + if (spec_fd < 0) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "Unable to open %s (%s)", + filename, strerror (errno)); + goto fail; + } + } else { + /* Successful */ + filename = tmp_filename; + } + } + + /* to allocate the proper buffer to hold the file data */ + { + ret = stat (filename, &stbuf); + if (ret < 0){ + gf_log (trans->xl->name, GF_LOG_ERROR, + "Unable to stat %s (%s)", + filename, strerror (errno)); + goto fail; + } + + file_len = stbuf.st_size; + } + +fail: + op_errno = errno; + + _hdrlen = gf_hdr_len (rsp, file_len + 1); + _hdr = gf_hdr_new (rsp, file_len + 1); + rsp = gf_param (_hdr); + + _hdr->rsp.op_ret = hton32 (ret); + gf_errno = gf_errno_to_error (op_errno); + _hdr->rsp.op_errno = hton32 (gf_errno); + + if (file_len) { + read (spec_fd, rsp->spec, file_len); + close (spec_fd); + } + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_GETSPEC, + _hdr, _hdrlen, NULL, 0, NULL); + + return 0; +} + +int32_t +server_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *fchecksum, + uint8_t *dchecksum) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_checksum_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, ZR_FILENAME_MAX + 1 + ZR_FILENAME_MAX + 1); + hdr = gf_hdr_new (rsp, ZR_FILENAME_MAX + 1 + ZR_FILENAME_MAX + 1); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + memcpy (rsp->fchecksum, fchecksum, ZR_FILENAME_MAX); + rsp->fchecksum[ZR_FILENAME_MAX] = '\0'; + memcpy (rsp->dchecksum + ZR_FILENAME_MAX, + dchecksum, ZR_FILENAME_MAX); + rsp->dchecksum[ZR_FILENAME_MAX + ZR_FILENAME_MAX] = '\0'; + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHECKSUM, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +int32_t +server_checksum (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + loc_t loc = {0,}; + int32_t flag = 0; + gf_fop_checksum_req_t *req = NULL; + + req = gf_param (hdr); + + loc.path = req->path; + loc.ino = ntoh64 (req->ino); + loc.inode = NULL; + flag = ntoh32 (req->flag); + + gf_log (bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": CHECKSUM \'%s (%"PRId64")\'", + frame->root->unique, loc.path, loc.ino); + + STACK_WIND (frame, + server_checksum_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->checksum, + &loc, + flag); + + return 0; +} + + +/* + * mop_unlock - unlock management function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + */ +int32_t +mop_getvolume (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + return 0; +} + +struct __get_xl_struct { + const char *name; + xlator_t *reply; +}; + +void __check_and_set (xlator_t *each, + void *data) +{ + if (!strcmp (each->name, + ((struct __get_xl_struct *) data)->name)) + ((struct __get_xl_struct *) data)->reply = each; +} + +static xlator_t * +get_xlator_by_name (xlator_t *some_xl, + const char *name) +{ + struct __get_xl_struct get = { + .name = name, + .reply = NULL + }; + + xlator_foreach (some_xl, __check_and_set, &get); + + return get.reply; +} + + +/* + * mop_setvolume - setvolume management function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + */ +int +mop_setvolume (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *req_hdr, size_t req_hdrlen, + char *req_buf, size_t req_buflen) +{ + server_connection_t *conn = NULL; + server_conf_t *conf = NULL; + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_setvolume_req_t *req = NULL; + gf_mop_setvolume_rsp_t *rsp = NULL; + peer_info_t *peerinfo = NULL; + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t gf_errno = 0; + dict_t *reply = NULL; + dict_t *config_params = NULL; + dict_t *params = NULL; + char *name = NULL; + char *version = NULL; + char *process_uuid = NULL; + xlator_t *xl = NULL; + transport_t *trans = NULL; + size_t rsp_hdrlen = -1; + size_t dict_len = -1; + size_t req_dictlen = -1; + + params = dict_new (); + reply = dict_new (); + + req = gf_param (req_hdr); + req_dictlen = ntoh32 (req->dict_len); + ret = dict_unserialize (req->buf, req_dictlen, ¶ms); + + config_params = dict_copy_with_ref (frame->this->options, NULL); + trans = TRANSPORT_FROM_FRAME(frame); + conf = SERVER_CONF(frame); + + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "Internal error: failed to unserialize " + "request dictionary"); + if (ret < 0) + gf_log (bound_xl->name, GF_LOG_ERROR, + "failed to set error msg \"%s\"", + "Internal error: failed to unserialize " + "request dictionary"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + ret = dict_get_str (params, "process-uuid", &process_uuid); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "UUID not specified"); + if (ret < 0) + gf_log (bound_xl->name, GF_LOG_ERROR, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + + conn = server_connection_get (frame->this, process_uuid); + if (trans->xl_private != conn) + trans->xl_private = conn; + + ret = dict_get_str (params, "version", &version); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "No version number specified"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + ret = strcmp (version, PACKAGE_VERSION); + if (ret != 0) { + char *msg = NULL; + asprintf (&msg, + "Version mismatch: client(%s) Vs server (%s)", + version, PACKAGE_VERSION); + ret = dict_set_dynstr (reply, "ERROR", msg); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + + ret = dict_get_str (params, + "remote-subvolume", &name); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "No remote-subvolume option specified"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + xl = get_xlator_by_name (frame->this, name); + if (xl == NULL) { + char *msg = NULL; + asprintf (&msg, "remote-subvolume \"%s\" is not found", name); + ret = dict_set_dynstr (reply, "ERROR", msg); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to set error msg"); + + op_ret = -1; + op_errno = ENOENT; + goto fail; + } + + peerinfo = &trans->peerinfo; + ret = dict_set_static_ptr (params, "peer-info", peerinfo); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to set peer-info"); + + if (conf->auth_modules == NULL) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "Authentication module not initialized"); + } + + ret = gf_authenticate (params, config_params, + conf->auth_modules); + if (ret == AUTH_ACCEPT) { + gf_log (trans->xl->name, GF_LOG_INFO, + "accepted client from %s", + peerinfo->identifier); + op_ret = 0; + conn->bound_xl = xl; + ret = dict_set_str (reply, "ERROR", "Success"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to set error msg"); + } else { + gf_log (trans->xl->name, GF_LOG_ERROR, + "Cannot authenticate client from %s", + peerinfo->identifier); + op_ret = -1; + op_errno = EACCES; + ret = dict_set_str (reply, "ERROR", "Authentication failed"); + if (ret < 0) + gf_log (bound_xl->name, GF_LOG_ERROR, + "failed to set error msg"); + + goto fail; + } + + if (conn->bound_xl == NULL) { + ret = dict_set_str (reply, "ERROR", + "Check volfile and handshake " + "options in protocol/client"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_ERROR, + "failed to set error msg"); + + op_ret = -1; + op_errno = EACCES; + goto fail; + } + + if ((conn->bound_xl != NULL) && + (ret >= 0) && + (conn->bound_xl->itable == NULL)) { + /* create inode table for this bound_xl, if one doesn't + already exist */ + int32_t lru_limit = 1024; + + lru_limit = INODE_LRU_LIMIT (frame->this); + + gf_log (trans->xl->name, GF_LOG_DEBUG, + "creating inode table with lru_limit=%"PRId32", " + "xlator=%s", lru_limit, conn->bound_xl->name); + + conn->bound_xl->itable = + inode_table_new (lru_limit, + conn->bound_xl); + } + + ret = dict_set_str (reply, "process-uuid", + xl->ctx->process_uuid); + +fail: + dict_len = dict_serialized_length (reply); + if (dict_len < 0) { + gf_log (xl->name, GF_LOG_ERROR, + "failed to get serialized length of reply dict"); + op_ret = -1; + op_errno = EINVAL; + dict_len = 0; + } + + rsp_hdr = gf_hdr_new (rsp, dict_len); + rsp_hdrlen = gf_hdr_len (rsp, dict_len); + rsp = gf_param (rsp_hdr); + + if (dict_len) { + ret = dict_serialize (reply, rsp->buf); + if (ret < 0) { + gf_log (xl->name, GF_LOG_ERROR, + "failed to serialize reply dict"); + op_ret = -1; + op_errno = -ret; + } + } + rsp->dict_len = hton32 (dict_len); + + rsp_hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + rsp_hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_SETVOLUME, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + dict_unref (params); + dict_unref (reply); + dict_unref (config_params); + + return 0; +} + +/* + * server_mop_stats_cbk - stats callback for server management operation + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @stats:err + * + * not for external reference + */ + +int32_t +server_mop_stats_cbk (call_frame_t *frame, + void *cookie, + xlator_t *xl, + int32_t ret, + int32_t op_errno, + struct xlator_stats *stats) +{ + /* TODO: get this information from somewhere else, not extern */ + gf_hdr_common_t *hdr = NULL; + gf_mop_stats_rsp_t *rsp = NULL; + char buffer[256] = {0,}; + int64_t glusterfsd_stats_nr_clients = 0; + size_t hdrlen = 0; + size_t buf_len = 0; + int32_t gf_errno = 0; + + if (ret >= 0) { + sprintf (buffer, + "%"PRIx64",%"PRIx64",%"PRIx64 + ",%"PRIx64",%"PRIx64",%"PRIx64 + ",%"PRIx64",%"PRIx64"\n", + stats->nr_files, + stats->disk_usage, + stats->free_disk, + stats->total_disk_size, + stats->read_usage, + stats->write_usage, + stats->disk_speed, + glusterfsd_stats_nr_clients); + + buf_len = strlen (buffer); + } + + hdrlen = gf_hdr_len (rsp, buf_len + 1); + hdr = gf_hdr_new (rsp, buf_len + 1); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + strcpy (rsp->buf, buffer); + + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_STATS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * mop_unlock - unlock management function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + */ +static int32_t +mop_stats (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + int32_t flag = 0; + gf_mop_stats_req_t *req = NULL; + + req = gf_param (hdr); + + flag = ntoh32 (req->flags); + + STACK_WIND (frame, + server_mop_stats_cbk, + bound_xl, + bound_xl->mops->stats, + flag); + + return 0; +} + +int32_t +mop_ping (call_frame_t *frame, + xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen) +{ + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_ping_rsp_t *rsp = NULL; + size_t rsp_hdrlen = 0; + + rsp_hdrlen = gf_hdr_len (rsp, 0); + rsp_hdr = gf_hdr_new (rsp, 0); + + hdr->rsp.op_ret = 0; + + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_PING, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + return 0; +} +/* + * unknown_op_cbk - This function is called when a opcode for unknown + * type is called. Helps to keep the backward/forward + * compatiblity + * @frame: call frame + * @type: + * @opcode: + * + */ + +int32_t +unknown_op_cbk (call_frame_t *frame, + int32_t type, + int32_t opcode) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_flush_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (-1); + gf_errno = gf_errno_to_error (ENOSYS); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, type, opcode, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * get_frame_for_transport - get call frame for specified transport object + * + * @trans: transport object + * + */ +static call_frame_t * +get_frame_for_transport (transport_t *trans) +{ + call_frame_t *frame = NULL; + call_pool_t *pool = NULL; + server_connection_t *conn = NULL; + server_state_t *state = NULL;; + + GF_VALIDATE_OR_GOTO("server", trans, out); + + if (trans->xl && trans->xl->ctx) + pool = trans->xl->ctx->pool; + GF_VALIDATE_OR_GOTO("server", pool, out); + + frame = create_frame (trans->xl, pool); + GF_VALIDATE_OR_GOTO("server", frame, out); + + state = CALLOC (1, sizeof (*state)); + GF_VALIDATE_OR_GOTO("server", state, out); + + conn = trans->xl_private; + if (conn) { + if (conn->bound_xl) + state->itable = conn->bound_xl->itable; + state->bound_xl = conn->bound_xl; + } + + state->trans = transport_ref (trans); + + frame->root->trans = conn; + frame->root->state = state; /* which socket */ + frame->root->unique = 0; /* which call */ + +out: + return frame; +} + +/* + * get_frame_for_call - create a frame into the capable of + * generating and replying the reply packet by itself. + * By making a call with this frame, the last UNWIND + * function will have all needed state from its + * frame_t->root to send reply. + * @trans: + * @blk: + * @params: + * + * not for external reference + */ +static call_frame_t * +get_frame_for_call (transport_t *trans, gf_hdr_common_t *hdr) +{ + call_frame_t *frame = NULL; + + frame = get_frame_for_transport (trans); + + frame->root->op = ntoh32 (hdr->op); + frame->root->type = ntoh32 (hdr->type); + + frame->root->uid = ntoh32 (hdr->req.uid); + frame->root->unique = ntoh64 (hdr->callid); /* which call */ + frame->root->gid = ntoh32 (hdr->req.gid); + frame->root->pid = ntoh32 (hdr->req.pid); + + return frame; +} + +/* + * prototype of operations function for each of mop and + * fop at server protocol level + * + * @frame: call frame pointer + * @bound_xl: the xlator that this frame is bound to + * @params: parameters dictionary + * + * to be used by protocol interpret, _not_ for exterenal reference + */ +typedef int32_t (*gf_op_t) (call_frame_t *frame, xlator_t *bould_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + char *buf, size_t buflen); + + +static gf_op_t gf_fops[] = { + [GF_FOP_STAT] = server_stat, + [GF_FOP_READLINK] = server_readlink, + [GF_FOP_MKNOD] = server_mknod, + [GF_FOP_MKDIR] = server_mkdir, + [GF_FOP_UNLINK] = server_unlink, + [GF_FOP_RMDIR] = server_rmdir, + [GF_FOP_SYMLINK] = server_symlink, + [GF_FOP_RENAME] = server_rename, + [GF_FOP_LINK] = server_link, + [GF_FOP_CHMOD] = server_chmod, + [GF_FOP_CHOWN] = server_chown, + [GF_FOP_TRUNCATE] = server_truncate, + [GF_FOP_OPEN] = server_open, + [GF_FOP_READ] = server_readv, + [GF_FOP_WRITE] = server_writev, + [GF_FOP_STATFS] = server_statfs, + [GF_FOP_FLUSH] = server_flush, + [GF_FOP_FSYNC] = server_fsync, + [GF_FOP_SETXATTR] = server_setxattr, + [GF_FOP_GETXATTR] = server_getxattr, + [GF_FOP_REMOVEXATTR] = server_removexattr, + [GF_FOP_OPENDIR] = server_opendir, + [GF_FOP_GETDENTS] = server_getdents, + [GF_FOP_FSYNCDIR] = server_fsyncdir, + [GF_FOP_ACCESS] = server_access, + [GF_FOP_CREATE] = server_create, + [GF_FOP_FTRUNCATE] = server_ftruncate, + [GF_FOP_FSTAT] = server_fstat, + [GF_FOP_LK] = server_lk, + [GF_FOP_UTIMENS] = server_utimens, + [GF_FOP_FCHMOD] = server_fchmod, + [GF_FOP_FCHOWN] = server_fchown, + [GF_FOP_LOOKUP] = server_lookup, + [GF_FOP_SETDENTS] = server_setdents, + [GF_FOP_READDIR] = server_readdir, + [GF_FOP_INODELK] = server_inodelk, + [GF_FOP_FINODELK] = server_finodelk, + [GF_FOP_ENTRYLK] = server_entrylk, + [GF_FOP_FENTRYLK] = server_fentrylk, + [GF_FOP_CHECKSUM] = server_checksum, + [GF_FOP_XATTROP] = server_xattrop, + [GF_FOP_FXATTROP] = server_fxattrop, +}; + + + +static gf_op_t gf_mops[] = { + [GF_MOP_SETVOLUME] = mop_setvolume, + [GF_MOP_GETVOLUME] = mop_getvolume, + [GF_MOP_STATS] = mop_stats, + [GF_MOP_GETSPEC] = mop_getspec, + [GF_MOP_PING] = mop_ping, +}; + +static gf_op_t gf_cbks[] = { + [GF_CBK_FORGET] = server_forget, + [GF_CBK_RELEASE] = server_release, + [GF_CBK_RELEASEDIR] = server_releasedir +}; + +int +protocol_server_interpret (xlator_t *this, transport_t *trans, + char *hdr_p, size_t hdrlen, char *buf, + size_t buflen) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + xlator_t *bound_xl = NULL; + call_frame_t *frame = NULL; + peer_info_t *peerinfo = NULL; + int32_t type = -1; + int32_t op = -1; + int32_t ret = -1; + + hdr = (gf_hdr_common_t *)hdr_p; + type = ntoh32 (hdr->type); + op = ntoh32 (hdr->op); + + conn = trans->xl_private; + if (conn) + bound_xl = conn->bound_xl; + + peerinfo = &trans->peerinfo; + switch (type) { + case GF_OP_TYPE_FOP_REQUEST: + if ((op < 0) || + (op > GF_FOP_MAXVALUE)) { + gf_log (this->name, GF_LOG_ERROR, + "invalid fop %"PRId32" from client %s", + op, peerinfo->identifier); + break; + } + if (bound_xl == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Received fop %"PRId32" before " + "authentication.", op); + break; + } + frame = get_frame_for_call (trans, hdr); + ret = gf_fops[op] (frame, bound_xl, hdr, hdrlen, buf, buflen); + break; + + case GF_OP_TYPE_MOP_REQUEST: + if (op < 0 || op > GF_MOP_MAXVALUE) { + gf_log (this->name, GF_LOG_ERROR, + "invalid mop %"PRId32" from client %s", + op, peerinfo->identifier); + break; + } + frame = get_frame_for_call (trans, hdr); + ret = gf_mops[op] (frame, bound_xl, hdr, hdrlen, buf, buflen); + break; + + case GF_OP_TYPE_CBK_REQUEST: + if (op < 0 || op > GF_CBK_MAXVALUE) { + gf_log (this->name, GF_LOG_ERROR, + "invalid cbk %"PRId32" from client %s", + op, peerinfo->identifier); + break; + } + if (bound_xl == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Received cbk %d before authentication.", op); + break; + } + + frame = get_frame_for_call (trans, hdr); + ret = gf_cbks[op] (frame, bound_xl, hdr, hdrlen, buf, buflen); + break; + + default: + break; + } + + return ret; +} + + +/* + * server_nop_cbk - nop callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_nop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state) + free_state (state); + STACK_DESTROY (frame->root); + return 0; +} + + +static void +get_auth_types (dict_t *this, + char *key, + data_t *value, + void *data) +{ + dict_t *auth_dict = data; + char *saveptr = NULL, *tmp = NULL; + char *key_cpy = NULL; + int32_t ret = -1; + + key_cpy = strdup (key); + GF_VALIDATE_OR_GOTO("server", key_cpy, out); + + tmp = strtok_r (key_cpy, ".", &saveptr); + ret = strcmp (tmp, "auth"); + if (ret == 0) { + tmp = strtok_r (NULL, ".", &saveptr); + if (strcmp (tmp, "ip") == 0) { + /* TODO: backward compatibility, remove when + newer versions are available */ + tmp = "addr"; + gf_log ("server", GF_LOG_WARNING, + "assuming 'auth.ip' to be 'auth.addr'"); + } + ret = dict_set_dynptr (auth_dict, tmp, NULL, 0); + if (ret < 0) { + gf_log ("server", GF_LOG_ERROR, + "failed to dict_set_dynptr"); + } + } + + FREE (key_cpy); +out: + return; +} + + +static int +validate_auth_options (xlator_t *this, dict_t *dict) +{ + int ret = -1; + int error = 0; + xlator_list_t *trav = NULL; + data_pair_t *pair = NULL; + char *saveptr = NULL, *tmp = NULL; + char *key_cpy = NULL; + + trav = this->children; + while (trav) { + error = -1; + for (pair = dict->members_list; pair; pair = pair->next) { + key_cpy = strdup (pair->key); + tmp = strtok_r (key_cpy, ".", &saveptr); + ret = strcmp (tmp, "auth"); + if (ret == 0) { + /* for module type */ + tmp = strtok_r (NULL, ".", &saveptr); + /* for volume name */ + tmp = strtok_r (NULL, ".", &saveptr); + } + + if (strcmp (tmp, trav->xlator->name) == 0) { + error = 0; + free (key_cpy); + break; + } + free (key_cpy); + } + if (-1 == error) { + gf_log (this->name, GF_LOG_ERROR, + "volume '%s' defined as subvolume, but no " + "authentication defined for the same", + trav->xlator->name); + break; + } + trav = trav->next; + } + + return error; +} + + +/* + * init - called during server protocol initialization + * + * @this: + * + */ +int +init (xlator_t *this) +{ + int32_t ret = -1; + transport_t *trans = NULL; + server_conf_t *conf = NULL; + + if (this->children == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "protocol/server should have subvolume"); + goto out; + } + + trans = transport_load (this->options, this); + if (trans == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "failed to load transport"); + goto out; + } + + ret = transport_listen (trans); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to bind/listen on socket"); + goto out; + } + + conf = CALLOC (1, sizeof (server_conf_t)); + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + INIT_LIST_HEAD (&conf->conns); + pthread_mutex_init (&conf->mutex, NULL); + + conf->trans = trans; + + conf->auth_modules = dict_new (); + GF_VALIDATE_OR_GOTO(this->name, conf->auth_modules, out); + + dict_foreach (this->options, get_auth_types, + conf->auth_modules); + ret = validate_auth_options (this, this->options); + if (ret == -1) { + /* logging already done in validate_auth_options function. */ + goto out; + } + + ret = gf_auth_init (this, conf->auth_modules); + if (ret) { + dict_unref (conf->auth_modules); + goto out; + } + + this->private = conf; + + ret = dict_get_int32 (this->options, "inode-lru-limit", + &conf->inode_lru_limit); + if (ret < 0) { + conf->inode_lru_limit = 1024; + } + + ret = dict_get_int32 (this->options, "limits.transaction-size", + &conf->max_block_size); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "defaulting limits.transaction-size to %d", + DEFAULT_BLOCK_SIZE); + conf->max_block_size = DEFAULT_BLOCK_SIZE; + } + +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_log (this->name, GF_LOG_WARNING, + "WARNING: Failed to set 'ulimit -n 1M': %s", + strerror(errno)); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set max open fd to 64k: %s", + strerror(errno)); + } else { + gf_log (this->name, GF_LOG_ERROR, + "max open fd set to 64k"); + } + } + } +#endif + this->ctx->top = this; + + ret = 0; +out: + return ret; +} + + + +int +protocol_server_pollin (xlator_t *this, transport_t *trans) +{ + char *hdr = NULL; + size_t hdrlen = 0; + char *buf = NULL; + size_t buflen = 0; + int ret = -1; + + + ret = transport_receive (trans, &hdr, &hdrlen, &buf, &buflen); + + if (ret == 0) + ret = protocol_server_interpret (this, trans, hdr, + hdrlen, buf, buflen); + + /* TODO: use mem-pool */ + FREE (hdr); + + return ret; +} + + +/* + * fini - finish function for server protocol, called before + * unloading server protocol. + * + * @this: + * + */ +void +fini (xlator_t *this) +{ + server_conf_t *conf = this->private; + + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->auth_modules) { + dict_unref (conf->auth_modules); + } + + FREE (conf); + this->private = NULL; +out: + return; +} + +/* + * server_protocol_notify - notify function for server protocol + * @this: + * @trans: + * @event: + * + */ +int +notify (xlator_t *this, int32_t event, void *data, ...) +{ + int ret = 0; + transport_t *trans = data; + + switch (event) { + case GF_EVENT_POLLIN: + ret = protocol_server_pollin (this, trans); + break; + case GF_EVENT_POLLERR: + { + peer_info_t *peerinfo = NULL; + + peerinfo = &(trans->peerinfo); + gf_log (trans->xl->name, GF_LOG_INFO, "%s disconnected", + peerinfo->identifier); + + ret = -1; + transport_disconnect (trans); + } + break; + + case GF_EVENT_TRANSPORT_CLEANUP: + { + if (trans->xl_private) + server_connection_put (this, trans->xl_private); + } + break; + + default: + default_notify (this, event, data); + break; + } + + return ret; +} + + +struct xlator_mops mops = { +}; + +struct xlator_fops fops = { +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {"transport-type"}, + .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp", + "tcp/server", "ib-verbs/server"}, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"volume-filename.*"}, + .type = GF_OPTION_TYPE_PATH, + }, + { .key = {"inode-lru-limit"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = (1 * GF_UNIT_MB) + }, + { .key = {"client-volume-filename"}, + .type = GF_OPTION_TYPE_PATH + }, + { .key = {NULL} }, +}; diff --git a/xlators/protocol/server/src/server-protocol.h b/xlators/protocol/server/src/server-protocol.h new file mode 100644 index 000000000..cc5f6f951 --- /dev/null +++ b/xlators/protocol/server/src/server-protocol.h @@ -0,0 +1,143 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _SERVER_PROTOCOL_H_ +#define _SERVER_PROTOCOL_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <pthread.h> + +#include "glusterfs.h" +#include "xlator.h" +#include "logging.h" +#include "call-stub.h" +#include "authenticate.h" +#include "fd.h" +#include "byte-order.h" + +#define DEFAULT_BLOCK_SIZE 4194304 /* 4MB */ +#define GLUSTERFSD_SPEC_PATH CONFDIR "/glusterfs-client.vol" + +typedef struct _server_state server_state_t; + +struct _locker { + struct list_head lockers; + loc_t loc; + fd_t *fd; + pid_t pid; +}; + +struct _lock_table { + struct list_head file_lockers; + struct list_head dir_lockers; + gf_lock_t lock; + size_t count; +}; + + +/* private structure per connection (transport object) + * used as transport_t->xl_private + */ +struct _server_connection { + struct list_head list; + char *id; + int ref; + pthread_mutex_t lock; + char disconnected; + fdtable_t *fdtable; + struct _lock_table *ltable; + xlator_t *bound_xl; +}; + +typedef struct _server_connection server_connection_t; + + +server_connection_t * +server_connection_get (xlator_t *this, const char *id); + +void +server_connection_put (xlator_t *this, server_connection_t *conn); + +int +server_connection_destroy (xlator_t *this, server_connection_t *conn); + +int +server_nop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno); + + +typedef struct { + dict_t *auth_modules; + transport_t *trans; + int32_t max_block_size; + int32_t inode_lru_limit; + pthread_mutex_t mutex; + struct list_head conns; +} server_conf_t; + + +struct _server_state { + transport_t *trans; + xlator_t *bound_xl; + loc_t loc; + loc_t loc2; + int flags; + fd_t *fd; + size_t size; + off_t offset; + mode_t mode; + dev_t dev; + uid_t uid; + gid_t gid; + size_t nr_count; + int cmd; + int type; + char *name; + int name_len; + inode_table_t *itable; + int64_t fd_no; + ino_t ino; + ino_t par; + ino_t ino2; + ino_t par2; + char *path; + char *path2; + char *bname; + char *bname2; + int mask; + char is_revalidate; + dict_t *xattr_req; + struct flock flock; + struct timespec tv[2]; + char *resolved; +}; + + +int +server_stub_resume (call_stub_t *stub, int32_t op_ret, int32_t op_errno, + inode_t *inode, inode_t *parent); + +int +do_path_lookup (call_stub_t *stub, const loc_t *loc); + +#endif diff --git a/xlators/storage/Makefile.am b/xlators/storage/Makefile.am new file mode 100644 index 000000000..59b968969 --- /dev/null +++ b/xlators/storage/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = posix $(BDB_SUBDIR) + +CLEANFILES = diff --git a/xlators/storage/bdb/Makefile.am b/xlators/storage/bdb/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/storage/bdb/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/storage/bdb/src/Makefile.am b/xlators/storage/bdb/src/Makefile.am new file mode 100644 index 000000000..c0ab394bc --- /dev/null +++ b/xlators/storage/bdb/src/Makefile.am @@ -0,0 +1,18 @@ + +xlator_LTLIBRARIES = bdb.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage + +bdb_la_LDFLAGS = -module -avoidversion + +bdb_la_SOURCES = bctx.c bdb-ll.c bdb.c +bdb_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = bdb.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +AM_LDFLAGS = -ldb + +CLEANFILES = + diff --git a/xlators/storage/bdb/src/bctx.c b/xlators/storage/bdb/src/bctx.c new file mode 100644 index 000000000..2bfa3ea87 --- /dev/null +++ b/xlators/storage/bdb/src/bctx.c @@ -0,0 +1,394 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <list.h> +#include <bdb.h> +#include <libgen.h> /* for dirname */ + +static void +__destroy_bctx (bctx_t *bctx) +{ + if (bctx->directory) + FREE (bctx->directory); + + if (bctx->db_path) + FREE (bctx->db_path); + + FREE (bctx); +} + +static void +__unhash_bctx (bctx_t *bctx) +{ + list_del_init (&bctx->b_hash); +} + +static int32_t +bctx_table_prune (bctx_table_t *table) +{ + int32_t ret = 0; + struct list_head purge = {0,}; + struct list_head *next = NULL; + bctx_t *entry = NULL; + bctx_t *del = NULL, *tmp = NULL; + + if (!table) + return 0; + + INIT_LIST_HEAD (&purge); + + LOCK (&table->lock); + { + if ((table->lru_limit) && + (table->lru_size > table->lru_limit)) { + while (table->lru_size > table->lru_limit) { + next = table->b_lru.next; + entry = list_entry (next, bctx_t, list); + + list_move_tail (next, &table->purge); + __unhash_bctx (entry); + + table->lru_size--; + ret++; + } + } + list_move_tail (&purge, &table->purge); + list_del_init (&table->purge); + } + UNLOCK (&table->lock); + + { + list_for_each_entry_safe (del, tmp, &purge, list) { + list_del_init (&del->list); + if (del->dbp) { + ret = del->dbp->close (del->dbp, 0); + if (ret != 0) { + gf_log (table->this->name, GF_LOG_ERROR, + "failed to close db on path (%s): %s", + del->directory, db_strerror (ret)); + } else { + gf_log (table->this->name, GF_LOG_WARNING, + "close db for path %s; table->lru_count = %d", + del->directory, table->lru_size); + } + } + __destroy_bctx (del); + } + } + + return ret; +} + + +/* struct bdb_ctx related */ +static inline uint32_t +bdb_key_hash (char *key, uint32_t hash_size) +{ + uint32_t hash = 0; + + hash = *key; + + if (hash) { + for (key += 1; *key != '\0'; key++) { + hash = (hash << 5) - hash + *key; + } + } + + return (hash + *key) % hash_size; +} + +static void +__hash_bctx (bctx_t *bctx) +{ + bctx_table_t *table = NULL; + char *key = NULL; + + table = bctx->table; + + MAKE_KEY_FROM_PATH (key, bctx->directory); + bctx->key_hash = bdb_key_hash (key, table->hash_size); + + list_del_init (&bctx->b_hash); + list_add (&bctx->b_hash, &table->b_hash[bctx->key_hash]); +} + +static inline bctx_t * +__bctx_passivate (bctx_t *bctx) +{ + if (bctx->dbp) { + list_move_tail (&bctx->list, &(bctx->table->b_lru)); + bctx->table->lru_size++; + } else { + list_move_tail (&bctx->list, &bctx->table->purge); + __unhash_bctx (bctx); + } + return bctx; +} + +static inline bctx_t * +__bctx_activate (bctx_t *bctx) +{ + list_move (&bctx->list, &bctx->table->active); + bctx->table->lru_size--; + + return bctx; +} + +static bctx_t * +__bdb_ctx_unref (bctx_t *bctx) +{ + assert (bctx->ref); + + --bctx->ref; + + if (!bctx->ref) + bctx = __bctx_passivate (bctx); + + return bctx; +} + + +bctx_t * +bctx_unref (bctx_t *bctx) +{ + bctx_table_t *table = NULL; + + if (!bctx && !bctx->table) + return NULL; + + table = bctx->table; + + LOCK (&table->lock); + { + bctx = __bdb_ctx_unref (bctx); + } + UNLOCK (&table->lock); + + bctx_table_prune (table); + + return bctx; +} + +/* + * NOTE: __bdb_ctx_ref() is called only after holding table->lock and bctx->lock, in that order + */ +static inline bctx_t * +__bctx_ref (bctx_t *bctx) +{ + if (!bctx->ref) + __bctx_activate (bctx); + + bctx->ref++; + + return bctx; +} + +bctx_t * +bctx_ref (bctx_t *bctx) +{ + LOCK (&(bctx->table->lock)); + { + __bctx_ref (bctx); + } + UNLOCK (&(bctx->table->lock)); + + return bctx; +} + + +#define BDB_THIS(table) (table->this) + +static inline bctx_t * +__create_bctx (bctx_table_t *table, + const char *path) +{ + bctx_t *bctx = NULL; + char *db_path = NULL; + + bctx = CALLOC (1, sizeof (*bctx)); + GF_VALIDATE_OR_GOTO ("bctx", bctx, out); + + bctx->table = table; + bctx->directory = strdup (path); + GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out); + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, BDB_THIS (table), path); + + bctx->db_path = strdup (db_path); + GF_VALIDATE_OR_GOTO ("bctx", bctx->directory, out); + + INIT_LIST_HEAD (&bctx->c_list); + INIT_LIST_HEAD (&bctx->list); + INIT_LIST_HEAD (&bctx->b_hash); + + LOCK_INIT (&bctx->lock); + + __hash_bctx (bctx); + + list_add (&bctx->list, &table->b_lru); + table->lru_size++; + +out: + return bctx; +} + +/* bctx_lookup - lookup bctx_t for the directory @directory. (see description of bctx_t in bdb.h) + * + * @table: bctx_table_t for this instance of bdb. + * @directory: directory for which bctx_t is being looked up. + */ +bctx_t * +bctx_lookup (bctx_table_t *table, + const char *directory) +{ + char *key = NULL; + uint32_t key_hash = 0; + bctx_t *trav = NULL, *bctx = NULL, *tmp = NULL; + int32_t need_break = 0; + + GF_VALIDATE_OR_GOTO ("bctx", table, out); + GF_VALIDATE_OR_GOTO ("bctx", directory, out); + + MAKE_KEY_FROM_PATH (key, directory); + key_hash = bdb_key_hash (key, table->hash_size); + + LOCK (&table->lock); + { + if (!list_empty (&table->b_hash[key_hash])) { + list_for_each_entry_safe (trav, tmp, &table->b_hash[key_hash], b_hash) { + LOCK(&trav->lock); + if (!strcmp(trav->directory, directory)) { + bctx = __bctx_ref (trav); + need_break = 1; + } + UNLOCK(&trav->lock); + if (need_break) + break; + } + } + + if (!bctx) { + bctx = __create_bctx (table, directory); + bctx = __bctx_ref (bctx); + } + } + UNLOCK (&table->lock); +out: + return bctx; +} + + +bctx_t * +bctx_parent (bctx_table_t *table, + const char *path) +{ + char *pathname = NULL, *directory = NULL; + bctx_t *bctx = NULL; + + GF_VALIDATE_OR_GOTO ("bctx", table, out); + GF_VALIDATE_OR_GOTO ("bctx", path, out); + + pathname = strdup (path); + GF_VALIDATE_OR_GOTO ("bctx", pathname, out); + directory = dirname (pathname); + + bctx = bctx_lookup (table, directory); + GF_VALIDATE_OR_GOTO ("bctx", bctx, out); + +out: + if (pathname) + free (pathname); + return bctx; +} + +inline int32_t +bdb_db_rename (bctx_table_t *table, + const char *oldpath, + const char *newpath) +{ + DB_ENV *dbenv = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("bctx", table, out); + GF_VALIDATE_OR_GOTO ("bctx", oldpath, out); + GF_VALIDATE_OR_GOTO ("bctx", newpath, out); + + dbenv = table->dbenv; + GF_VALIDATE_OR_GOTO ("bctx", dbenv, out); + + LOCK (&table->lock); + { + ret = dbenv->dbrename (dbenv, NULL, oldpath, NULL, newpath, 0); + + if (ret != 0) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to rename %s to %s: %s", + oldpath, newpath, db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "successfully renamed %s to %s: %s", + oldpath, newpath, db_strerror (ret)); + } + } + UNLOCK (&table->lock); + +out: + return ret; +} + +bctx_t * +bctx_rename (bctx_t *bctx, + const char *db_newpath) +{ + bctx_table_t *table = NULL; + int32_t ret = -1; + + table = bctx->table; + + LOCK (&table->lock); + { + __unhash_bctx (bctx); + list_del_init (&bctx->list); + if (bctx->dbp) { + ret = bctx->dbp->close (bctx->dbp, 0); + if (ret != 0) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to close db for directory %s (%s)", + bctx->directory, db_strerror (ret)); + } + bctx->dbp = NULL; + } + } + UNLOCK (&table->lock); + + ret = bdb_db_rename (table, bctx->db_path, db_newpath); + + if (ret != 0) { + gf_log ("bctx", + GF_LOG_ERROR, + "bdb_db_rename failed for directory %s", + bctx->directory); + bctx = NULL; + } + + return bctx; +} diff --git a/xlators/storage/bdb/src/bdb-ll.c b/xlators/storage/bdb/src/bdb-ll.c new file mode 100644 index 000000000..40e7d1877 --- /dev/null +++ b/xlators/storage/bdb/src/bdb-ll.c @@ -0,0 +1,1455 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <libgen.h> +#include "bdb.h" +#include <list.h> +/* + * implement the procedures to interact with bdb */ + +/**************************************************************** + * + * General wrappers and utility procedures for bdb xlator + * + ****************************************************************/ +#define BDB_LL_PAGE_SIZE_DEFAULT 4096 +#define BDB_LL_PAGE_SIZE_MIN 4096 +#define BDB_LL_PAGE_SIZE_MAX 65536 + +ino_t +bdb_inode_transform (ino_t parent, + bctx_t *bctx) +{ + struct bdb_private *private = NULL; + ino_t ino = -1; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + + private = bctx->table->this->private; + + LOCK (&private->ino_lock); + ino = ++private->next_ino; + UNLOCK (&private->ino_lock); +out: + return ino; +} + + +/*********************************************************** + * + * bdb storage database utilities + * + **********************************************************/ + +/* + * bdb_db_open - opens a storage db. + * + * @ctx: context specific to the directory for which we are supposed to open db + * + * see, if we have empty slots to open a db. + * if (no-empty-slots), then prune open dbs and close as many as possible + * if (empty-slot-available), tika muchkonDu db open maaDu + * + * NOTE: illi baro munche lock hiDkobEku + */ +static DB * +bdb_db_open (bctx_t *bctx) +{ + DB *storage_dbp = NULL; + int32_t op_ret = -1; + bctx_table_t *table = NULL; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + + table = bctx->table; + GF_VALIDATE_OR_GOTO ("bdb-ll", table, out); + + /* we have to do the following, we can't deny someone of db_open ;) */ + op_ret = db_create (&storage_dbp, table->dbenv, 0); + if (op_ret != 0) { + gf_log ("bdb-ll", GF_LOG_ERROR, + "failed to do db_create for directory %s (%s)", + bctx->directory, db_strerror (op_ret)); + storage_dbp = NULL; + goto out; + } + + if (table->page_size) { + op_ret = storage_dbp->set_pagesize (storage_dbp, + table->page_size); + if (op_ret != 0) { + gf_log ("bdb-ll", GF_LOG_ERROR, + "failed to set the page_size (%"PRIu64") for directory %s (%s)", + table->page_size, bctx->directory, db_strerror (op_ret)); + } else { + gf_log ("bdb-ll", GF_LOG_DEBUG, + "page-size (%"PRIu64") set on DB", + table->page_size); + } + } + + op_ret = storage_dbp->open (storage_dbp, + NULL, + bctx->db_path, + NULL, + table->access_mode, + table->dbflags, + 0); + if (op_ret != 0 ) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to open storage-db for directory %s (%s)", + bctx->db_path, db_strerror (op_ret)); + storage_dbp = NULL; + } + +out: + return storage_dbp; +} + + + +int32_t +bdb_cursor_close (bctx_t *bctx, + DBC *cursorp) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out); + + LOCK (&bctx->lock); + { +#ifdef HAVE_BDB_CURSOR_GET + ret = cursorp->close (cursorp); +#else + ret = cursorp->c_close (cursorp); +#endif + if ((ret != 0)) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to close db cursor for directory %s (%s)", + bctx->directory, db_strerror (ret)); + } + } + UNLOCK (&bctx->lock); + +out: + return ret; +} + + +int32_t +bdb_cursor_open (bctx_t *bctx, + DBC **cursorpp) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", cursorpp, out); + + LOCK (&bctx->lock); + { + if (bctx->dbp) { + /* do nothing, just continue */ + ret = 0; + } else { + bctx->dbp = bdb_db_open (bctx); + if (!bctx->dbp) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to open storage db for %s", + bctx->directory); + ret = -1; + } else { + ret = 0; + } + } + + if (ret == 0) { + /* all set, lets open cursor */ + ret = bctx->dbp->cursor (bctx->dbp, NULL, cursorpp, 0); + if (ret != 0) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to create a cursor for %s (%s)", + bctx->directory, db_strerror (ret)); + } + } + } + UNLOCK (&bctx->lock); + +out: + return ret; +} + + +/* cache related */ +static bdb_cache_t * +bdb_cache_lookup (bctx_t *bctx, + char *path) +{ + bdb_cache_t *bcache = NULL; + bdb_cache_t *trav = NULL; + char *key = NULL; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", path, out); + + MAKE_KEY_FROM_PATH (key, path); + + LOCK (&bctx->lock); + { + list_for_each_entry (trav, &bctx->c_list, c_list) { + if (!strcmp (trav->key, key)){ + bcache = trav; + break; + } + } + } + UNLOCK (&bctx->lock); + +out: + return bcache; +} + +static int32_t +bdb_cache_insert (bctx_t *bctx, + DBT *key, + DBT *data) +{ + bdb_cache_t *bcache = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", key, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", data, out); + + LOCK (&bctx->lock); + { + if (bctx->c_count > 5) { + /* most of the times, we enter here */ + /* FIXME: ugly, not supposed to disect any of the + * 'struct list_head' directly */ + if (!list_empty (&bctx->c_list)) { + bcache = list_entry (bctx->c_list.prev, bdb_cache_t, c_list); + list_del_init (&bcache->c_list); + } + if (bcache->key) { + free (bcache->key); + bcache->key = strdup ((char *)key->data); + GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock); + } else { + /* should never come here */ + gf_log ("bdb-ll", + GF_LOG_CRITICAL, + "bcache->key (null)"); + } /* if(bcache->key)...else */ + if (bcache->data) { + free (bcache->data); + bcache->data = memdup (data->data, data->size); + GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock); + bcache->size = data->size; + } else { + /* should never come here */ + gf_log ("bdb-ll", + GF_LOG_CRITICAL, + "bcache->data (null)"); + } /* if(bcache->data)...else */ + list_add (&bcache->c_list, &bctx->c_list); + ret = 0; + } else { + /* we will be entering here very rarely */ + bcache = CALLOC (1, sizeof (*bcache)); + GF_VALIDATE_OR_GOTO ("bdb-ll", bcache, unlock); + bcache->key = strdup ((char *)(key->data)); + GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock); + bcache->data = memdup (data->data, data->size); + GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock); + bcache->size = data->size; + list_add (&bcache->c_list, &bctx->c_list); + bctx->c_count++; + ret = 0; + } /* if(private->c_count < 5)...else */ + } +unlock: + UNLOCK (&bctx->lock); +out: + return ret; +} + +static int32_t +bdb_cache_delete (bctx_t *bctx, + char *key) +{ + bdb_cache_t *bcache = NULL; + bdb_cache_t *trav = NULL; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", key, out); + + LOCK (&bctx->lock); + { + list_for_each_entry (trav, &bctx->c_list, c_list) { + if (!strcmp (trav->key, key)){ + bctx->c_count--; + bcache = trav; + break; + } + } + + if (bcache) { + list_del_init (&bcache->c_list); + free (bcache->key); + free (bcache->data); + free (bcache); + } + } + UNLOCK (&bctx->lock); + +out: + return 0; +} + +void * +bdb_db_stat (bctx_t *bctx, + DB_TXN *txnid, + uint32_t flags) +{ + DB *storage = NULL; + void *stat = NULL; + int32_t ret = -1; + + LOCK (&bctx->lock); + { + if (bctx->dbp == NULL) { + bctx->dbp = bdb_db_open (bctx); + storage = bctx->dbp; + } else { + /* we are just fine, lets continue */ + storage = bctx->dbp; + } /* if(bctx->dbp==NULL)...else */ + } + UNLOCK (&bctx->lock); + + GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); + + ret = storage->stat (storage, txnid, &stat, flags); + + if (ret != 0) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to do DB->stat() on db file %s: %s", + bctx->db_path, db_strerror (ret)); + } else { + gf_log ("bdb-ll", + GF_LOG_DEBUG, + "successfully called DB->stat() on db file %s", + bctx->db_path); + } +out: + return stat; + +} + +/* bdb_storage_get - retrieve a key/value pair corresponding to @path from the corresponding + * db file. + * + * @bctx: bctx_t * corresponding to the parent directory of @path. (should always be a valid + * bctx). bdb_storage_get should never be called if @bctx = NULL. + * @txnid: NULL if bdb_storage_get is not embedded in an explicit transaction or a valid + * DB_TXN *, when embedded in an explicit transaction. + * @path: path of the file to read from (translated to a database key using MAKE_KEY_FROM_PATH) + * @buf: char ** - pointer to a pointer to char. a read buffer is created in this procedure + * and pointer to the buffer is passed through @buf to the caller. + * @size: size of the file content to be read. + * @offset: offset from which the file content to be read. + * + * NOTE: bdb_storage_get tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL, + * nobody has opened DB till now or DB was closed by bdb_table_prune()). + * + * NOTE: if private->cache is set (bdb xlator's internal caching enabled), then bdb_storage_get + * first looks up the cache for key/value pair. if bdb_lookup_cache fails, then only + * DB->get() is called. also, inserts a newly read key/value pair to cache through + * bdb_insert_to_cache. + * + * return: 'number of bytes read' on success or -1 on error. + * + * also see: bdb_lookup_cache, bdb_insert_to_cache for details about bdb xlator's internal cache. + */ +int32_t +bdb_db_get (bctx_t *bctx, + DB_TXN *txnid, + const char *path, + char **buf, + size_t size, + off_t offset) +{ + DB *storage = NULL; + DBT key = {0,}; + DBT value = {0,}; + int32_t ret = -1; + char *key_string = NULL; + bdb_cache_t *bcache = NULL; + int32_t db_flags = 0; + uint8_t need_break = 0; + int32_t retries = 1; + + GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", path, out); + + MAKE_KEY_FROM_PATH (key_string, path); + + if (bctx->cache && + ((bcache = bdb_cache_lookup (bctx, key_string)) != NULL)) { + if (buf) { + *buf = CALLOC (1, bcache->size); + GF_VALIDATE_OR_GOTO ("bdb-ll", buf, out); + memcpy (*buf, (bcache->data + offset), bcache->size); + } + ret = bcache->size; + } else { + LOCK (&bctx->lock); + { + if (bctx->dbp == NULL) { + bctx->dbp = bdb_db_open (bctx); + storage = bctx->dbp; + } else { + /* we are just fine, lets continue */ + storage = bctx->dbp; + } /* if(bctx->dbp==NULL)...else */ + } + UNLOCK (&bctx->lock); + + GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); + + key.data = (char *)key_string; + key.size = strlen (key_string); + key.flags = DB_DBT_USERMEM; + + if (bctx->cache){ + /* we are called to return the size of the file */ + value.flags = DB_DBT_MALLOC; + } else { + if (size) { + value.flags = DB_DBT_MALLOC | DB_DBT_PARTIAL; + } else { + value.flags = DB_DBT_MALLOC; + } + value.dlen = size; + value.doff = offset; + } + + do { + /* TODO: we prefer to give our own buffer to value.data + * and ask bdb to fill in it */ + ret = storage->get (storage, txnid, &key, &value, db_flags); + + if (ret == DB_NOTFOUND) { + gf_log ("bdb-ll", + GF_LOG_DEBUG, + "failed to do DB->get() for key: %s." + " key not found in storage DB", key_string); + ret = -1; + need_break = 1; + } else if (ret == DB_LOCK_DEADLOCK) { + retries++; + gf_log ("bdb-ll", + GF_LOG_ERROR, + "deadlock detected in DB->put. retrying DB->put (%d)", + retries); + }else if (ret == 0) { + /* successfully read data, lets set everything in place + * and return */ + if (buf) { + *buf = CALLOC (1, value.size); + ERR_ABORT (*buf); + memcpy (*buf, value.data, value.size); + } + ret = value.size; + if (bctx->cache) + bdb_cache_insert (bctx, &key, &value); + free (value.data); + need_break = 1; + } else { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to do DB->get() for key %s: %s", + key_string, db_strerror (ret)); + ret = -1; + need_break = 1; + } + } while (!need_break); + } +out: + return ret; +}/* bdb_db_get */ + +/* bdb_storage_put - insert a key/value specified to the corresponding DB. + * + * @bctx: bctx_t * corresponding to the parent directory of @path. + * (should always be a valid bctx). bdb_storage_put should never be called if @bctx = NULL. + * @txnid: NULL if bdb_storage_put is not embedded in an explicit transaction or a valid + * DB_TXN *, when embedded in an explicit transaction. + * @key_string: key of the database entry. + * @buf: pointer to the buffer data to be written as data for @key_string. + * @size: size of @buf. + * @offset: offset in the key's data to be modified with provided data. + * @flags: valid flags are BDB_TRUNCATE_RECORD (to reduce the data of @key_string to 0 size). + * + * NOTE: bdb_storage_put tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL, + * nobody has opened DB till now or DB was closed by bdb_table_prune()). + * + * NOTE: bdb_storage_put deletes the key/value from bdb xlator's internal cache. + * + * return: 0 on success or -1 on error. + * + * also see: bdb_cache_delete for details on how a cached key/value pair is removed. + */ +int32_t +bdb_db_put (bctx_t *bctx, + DB_TXN *txnid, + const char *key_string, + const char *buf, + size_t size, + off_t offset, + int32_t flags) +{ + DB *storage = NULL; + DBT key = {0,}, value = {0,}; + int32_t ret = -1; + int32_t db_flags = DB_AUTO_COMMIT; + uint8_t need_break = 0; + int32_t retries = 1; + + LOCK (&bctx->lock); + { + if (bctx->dbp == NULL) { + bctx->dbp = bdb_db_open (bctx); + storage = bctx->dbp; + } else { + /* we are just fine, lets continue */ + storage = bctx->dbp; + } + } + UNLOCK (&bctx->lock); + + GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); + + if (bctx->cache) { + ret = bdb_cache_delete (bctx, (char *)key_string); + GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out); + } + + key.data = (void *)key_string; + key.size = strlen (key_string); + + /* NOTE: bdb lets us expand the file, suppose value.size > value.len, then value.len bytes + * from value.doff offset and value.size bytes will be written from value.doff and + * data from value.doff + value.dlen will be pushed value.doff + value.size + */ + value.data = (void *)buf; + + if (flags & BDB_TRUNCATE_RECORD) { + value.size = size; + value.doff = 0; + value.dlen = offset; + } else { + value.size = size; + value.dlen = size; + value.doff = offset; + } + value.flags = DB_DBT_PARTIAL; + if (buf == NULL && size == 0) + /* truncate called us */ + value.flags = 0; + + do { + ret = storage->put (storage, txnid, &key, &value, db_flags); + if (ret == DB_LOCK_DEADLOCK) { + retries++; + gf_log ("bdb-ll", + GF_LOG_ERROR, + "deadlock detected in DB->put. retrying DB->put (%d)", + retries); + } else if (ret) { + /* write failed */ + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to do DB->put() for key %s: %s", + key_string, db_strerror (ret)); + need_break = 1; + } else { + /* successfully wrote */ + ret = 0; + need_break = 1; + } + } while (!need_break); +out: + return ret; +}/* bdb_db_put */ + + +/* bdb_storage_del - delete a key/value pair corresponding to @path from corresponding db file. + * + * @bctx: bctx_t * corresponding to the parent directory of @path. + * (should always be a valid bctx). bdb_storage_del should never be called + * if @bctx = NULL. + * @txnid: NULL if bdb_storage_del is not embedded in an explicit transaction or a + * valid DB_TXN *, when embedded in an explicit transaction. + * @path: path to the file, whose key/value pair has to be deleted. + * + * NOTE: bdb_storage_del tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL, + * nobody has opened DB till now or DB was closed by bdb_table_prune()). + * + * return: 0 on success or -1 on error. + */ +int32_t +bdb_db_del (bctx_t *bctx, + DB_TXN *txnid, + const char *path) +{ + DB *storage = NULL; + DBT key = {0,}; + char *key_string = NULL; + int32_t ret = -1; + int32_t db_flags = 0; + uint8_t need_break = 0; + int32_t retries = 1; + + MAKE_KEY_FROM_PATH (key_string, path); + + LOCK (&bctx->lock); + { + if (bctx->dbp == NULL) { + bctx->dbp = bdb_db_open (bctx); + storage = bctx->dbp; + } else { + /* we are just fine, lets continue */ + storage = bctx->dbp; + } + } + UNLOCK (&bctx->lock); + + GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); + + ret = bdb_cache_delete (bctx, key_string); + GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out); + + key.data = key_string; + key.size = strlen (key_string); + key.flags = DB_DBT_USERMEM; + + do { + ret = storage->del (storage, txnid, &key, db_flags); + + if (ret == DB_NOTFOUND) { + gf_log ("bdb-ll", + GF_LOG_DEBUG, + "failed to delete %s from storage db, doesn't exist in storage DB", + path); + need_break = 1; + } else if (ret == DB_LOCK_DEADLOCK) { + retries++; + gf_log ("bdb-ll", + GF_LOG_ERROR, + "deadlock detected in DB->put. retrying DB->put (%d)", + retries); + }else if (ret == 0) { + /* successfully deleted the entry */ + gf_log ("bdb-ll", + GF_LOG_DEBUG, + "deleted %s from storage db", path); + ret = 0; + need_break = 1; + } else { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to delete %s from storage db: %s", + path, db_strerror (ret)); + ret = -1; + need_break = 1; + } + } while (!need_break); +out: + return ret; +} + +/* NOTE: bdb version compatibility wrapper */ +int32_t +bdb_cursor_get (DBC *cursorp, + DBT *key, + DBT *value, + int32_t flags) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out); + +#ifdef HAVE_BDB_CURSOR_GET + ret = cursorp->get (cursorp, key, value, flags); +#else + ret = cursorp->c_get (cursorp, key, value, flags); +#endif + if ((ret != 0) && (ret != DB_NOTFOUND)) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "failed to CURSOR->get() for key %s (%s)", + (char *)key->data, db_strerror (ret)); + } + +out: + return ret; +}/* bdb_cursor_get */ + + +int32_t +bdb_dirent_size (DBT *key) +{ + return ALIGN (24 /* FIX MEEEE!!! */ + key->size); +} + + +/* bdb_extract_bfd - translate a fd_t to a bfd (either a 'struct bdb_bfd' or 'struct bdb_dir') + * + * @fd->ctx is with bdb specific file handle during a successful bdb_open (also bdb_create) + * or bdb_opendir. + * + * return: 'struct bdb_bfd *' or 'struct bdb_dir *' on success, or NULL on failure. + */ +inline void * +bdb_extract_bfd (fd_t *fd, + xlator_t *this) +{ + uint64_t tmp_bfd = 0; + void *bfd = NULL; + + GF_VALIDATE_OR_GOTO ("bdb-ll", fd, out); + GF_VALIDATE_OR_GOTO ("bdb-ll", this, out); + + fd_ctx_get (fd, this, &tmp_bfd); + bfd = (void *)(long)bfd; + +out: + return bfd; +} + +/* bdb_dbenv_init - initialize DB_ENV + * + * initialization includes: + * 1. opening DB_ENV (db_env_create(), DB_ENV->open()). + * NOTE: see private->envflags for flags used. + * 2. DB_ENV->set_lg_dir - set log directory to be used for storing log files + * (log files are the files in which transaction logs are written by db). + * 3. DB_ENV->set_flags (DB_LOG_AUTOREMOVE) - set DB_ENV to automatically clear + * the unwanted log files (flushed at each checkpoint). + * 4. DB_ENV->set_errfile - set errfile to be used by db to report detailed error logs. + * used only for debbuging purpose. + * + * return: returns a valid DB_ENV * on success or NULL on error. + * + */ +static DB_ENV * +bdb_dbenv_init (xlator_t *this, + char *directory) +{ + /* Create a DB environment */ + DB_ENV *dbenv = NULL; + int32_t ret = 0; + bdb_private_t *private = NULL; + int32_t fatal_flags = 0; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (directory, out); + + private = this->private; + VALIDATE_OR_GOTO (private, out); + + ret = db_env_create (&dbenv, 0); + VALIDATE_OR_GOTO ((ret == 0), out); + + /* NOTE: set_errpfx returns 'void' */ + dbenv->set_errpfx(dbenv, this->name); + + ret = dbenv->set_lk_detect (dbenv, DB_LOCK_DEFAULT); + VALIDATE_OR_GOTO ((ret == 0), out); + + ret = dbenv->open(dbenv, directory, + private->envflags, + S_IRUSR | S_IWUSR); + if ((ret != 0) && (ret != DB_RUNRECOVERY)) { + gf_log (this->name, + GF_LOG_CRITICAL, + "failed to open DB environment (%s)", + db_strerror (ret)); + dbenv = NULL; + goto out; + } else if (ret == DB_RUNRECOVERY) { + fatal_flags = ((private->envflags & (~DB_RECOVER)) | DB_RECOVER_FATAL); + ret = dbenv->open(dbenv, directory, + fatal_flags, + S_IRUSR | S_IWUSR); + if (ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to open DB environment (%s) with DB_REOVER_FATAL", + db_strerror (ret)); + dbenv = NULL; + goto out; + } else { + gf_log (this->name, + GF_LOG_WARNING, + "opened DB environment after DB_RECOVER_FATAL: %s", + db_strerror (ret)); + } + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "DB environment successfull opened: %s", + db_strerror (ret)); + } + + + +#if (DB_VERSION_MAJOR == 4 && \ + DB_VERSION_MINOR == 7) + if (private->log_auto_remove) { + ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 1); + } else { + ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 0); + } +#else + if (private->log_auto_remove) { + ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 1); + } else { + ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 0); + } +#endif + if (ret != 0) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to set DB_LOG_AUTOREMOVE on dbenv: %s", db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "DB_LOG_AUTOREMOVE set on dbenv"); + } + + if (private->transaction) { + ret = dbenv->set_flags(dbenv, DB_AUTO_COMMIT, 1); + + if (ret != 0) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to set DB_AUTO_COMMIT on dbenv: %s", + db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "DB_AUTO_COMMIT set on dbenv"); + } + + if (private->txn_timeout) { + ret = dbenv->set_timeout (dbenv, + private->txn_timeout, + DB_SET_TXN_TIMEOUT); + if (ret != 0) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to set TXN_TIMEOUT to %d milliseconds " + "on dbenv: %s", + private->txn_timeout, db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "TXN_TIMEOUT set to %d milliseconds", + private->txn_timeout); + } + } + + if (private->lock_timeout) { + ret = dbenv->set_timeout(dbenv, + private->txn_timeout, + DB_SET_LOCK_TIMEOUT); + + if (ret != 0) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to set LOCK_TIMEOUT to %d milliseconds " + "on dbenv: %s", + private->lock_timeout, db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "LOCK_TIMEOUT set to %d milliseconds", + private->lock_timeout); + } + } + + ret = dbenv->set_lg_dir (dbenv, private->logdir); + + if (ret != 0) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to set log directory for dbenv: %s", db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "set dbenv log dir to %s", private->logdir); + } + + } + + if (private->errfile) { + private->errfp = fopen (private->errfile, "a+"); + if (private->errfp) { + dbenv->set_errfile (dbenv, private->errfp); + } else { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to open errfile: %s", strerror (errno)); + } + } + +out: + return dbenv; +} + +#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv) + +/* bdb_checkpoint - during transactional usage, db does not directly write the data to db + * files, instead db writes a 'log' (similar to a journal entry) into a + * log file. db normally clears the log files during opening of an + * environment. since we expect a filesystem server to run for a pretty + * long duration and flushing 'log's during dbenv->open would prove very + * costly, if we accumulate the log entries for one complete run of + * glusterfs server. to flush the logs frequently, db provides a mechanism + * called 'checkpointing'. when we do a checkpoint, db flushes the logs to + * disk (writes changes to db files) and we can also clear the accumulated + * log files after checkpointing. NOTE: removing unwanted log files is not + * part of dbenv->txn_checkpoint() call. + * + * @data: xlator_t of the current instance of bdb xlator. + * + * bdb_checkpoint is called in a different thread from the main glusterfs thread. bdb + * xlator creates the checkpoint thread after successfully opening the db environment. + * NOTE: bdb_checkpoint thread shares the DB_ENV handle with the filesystem thread. + * + * db environment checkpointing frequency is controlled by + * 'option checkpoint-timeout <time-in-seconds>' in volfile. + * + * NOTE: checkpointing thread is started only if 'option transaction on' specified in + * volfile. checkpointing is not valid for non-transactional environments. + * + */ +static void * +bdb_checkpoint (void *data) +{ + xlator_t *this = NULL; + struct bdb_private *private = NULL; + DB_ENV *dbenv = NULL; + int32_t ret = 0; + uint32_t active = 0; + + this = (xlator_t *) data; + dbenv = BDB_ENV(this); + private = this->private; + + for (;;sleep (private->checkpoint_timeout)) { + LOCK (&private->active_lock); + active = private->active; + UNLOCK (&private->active_lock); + + if (active) { + ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0); + if (ret) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to checkpoint environment: %s", db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "checkpointing successful"); + } + } else { + ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0); + if (ret) { + gf_log ("bctx", + GF_LOG_ERROR, + "failed to do final checkpoint environment: %s", + db_strerror (ret)); + } else { + gf_log ("bctx", + GF_LOG_DEBUG, + "final checkpointing successful"); + } + break; + } + } + + return NULL; +} + +static inline void +BDB_CACHE_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + /* cache is always on */ + private->cache = ON; +} + +static inline void +BDB_LOG_REMOVE_INIT(xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + private->log_auto_remove = 1; + gf_log (this->name, + GF_LOG_DEBUG, + "DB_ENV will use DB_LOG_AUTO_REMOVE"); +} + +static inline void +BDB_ERRFILE_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *errfile = NULL; + + errfile = dict_get (options, "errfile"); + if (errfile) { + private->errfile = strdup (errfile->data); + gf_log (this->name, + GF_LOG_DEBUG, + "using errfile: %s", private->errfile); + } +} + +static inline void +BDB_TABLE_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + bctx_table_t *table = NULL; + int32_t idx = 0; + + data_t *lru_limit = NULL; + data_t *page_size = NULL; + + table = CALLOC (1, sizeof (*table)); + if (table) { + INIT_LIST_HEAD(&(table->b_lru)); + INIT_LIST_HEAD(&(table->active)); + INIT_LIST_HEAD(&(table->purge)); + + LOCK_INIT (&table->lock); + LOCK_INIT (&table->checkpoint_lock); + + table->transaction = private->transaction; + table->access_mode = private->access_mode; + table->dbflags = private->dbflags; + table->this = this; + + { + lru_limit = dict_get (options, "lru-limit"); + + /* TODO: set max lockers and max txns to accomodate + * for more than lru_limit */ + if (lru_limit) { + table->lru_limit = strtol (lru_limit->data, NULL, 0); + gf_log ("bdb-ll", + GF_LOG_DEBUG, + "setting bctx lru limit to %d", table->lru_limit); + } else { + table->lru_limit = BDB_DEFAULT_LRU_LIMIT; + } + } + + { + page_size = dict_get (options, "page-size"); + + if (page_size) + { + if (gf_string2bytesize (page_size->data, + &table->page_size) != 0) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "invalid number format \"%s\"" + " of \"option page-size\"", + page_size->data); + } + + if (!(table->page_size >= BDB_LL_PAGE_SIZE_MIN && + table->page_size <= BDB_LL_PAGE_SIZE_MAX)) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "pagesize %s is out of range." + "Allowed pagesize is between %d and %d", + page_size->data, + BDB_LL_PAGE_SIZE_MIN, + BDB_LL_PAGE_SIZE_MAX); + } + } + else { + table->page_size = BDB_LL_PAGE_SIZE_DEFAULT; + } + gf_log ("bdb-ll", + GF_LOG_DEBUG, "using page-size %"PRIu64, + table->page_size); + } + + table->hash_size = BDB_DEFAULT_HASH_SIZE; + table->b_hash = CALLOC (BDB_DEFAULT_HASH_SIZE, sizeof (struct list_head)); + + for (idx = 0; idx < table->hash_size; idx++) + INIT_LIST_HEAD(&(table->b_hash[idx])); + + private->b_table = table; + } else { + gf_log ("bdb-ll", + GF_LOG_CRITICAL, + "failed to allocate bctx table: out of memory"); + } +} + +static inline void +BDB_DIRECTORY_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *directory = NULL; + data_t *logdir = NULL; + int32_t op_ret = -1; + struct stat stbuf = {0}; + + directory = dict_get (options, "directory"); + + if (directory) { + logdir = dict_get (options, "logdir"); + + if (logdir == NULL) { + gf_log ("bdb-ll", + GF_LOG_DEBUG, + "using default logdir as database home"); + private->logdir = strdup (directory->data); + + } else { + private->logdir = strdup (logdir->data); + gf_log ("bdb-ll", + GF_LOG_DEBUG, + "using logdir: %s", private->logdir); + umask (000); + if (mkdir (private->logdir, 0777) == 0) { + gf_log ("bdb-ll", GF_LOG_WARNING, + "logdir specified (%s) not exists, created", + private->logdir); + } + + op_ret = stat (private->logdir, &stbuf); + if ((op_ret != 0) || !S_ISDIR (stbuf.st_mode)) { + gf_log ("bdb-ll", + GF_LOG_ERROR, + "specified logdir doesn't exist, " + "using default (environment home directory: %s)", + directory->data); + private->logdir = strdup (directory->data); + } + } + + private->b_table->dbenv = bdb_dbenv_init (this, directory->data); + + if (!private->b_table->dbenv) { + gf_log ("bdb-ll", GF_LOG_ERROR, + "failed to initialize db environment"); + FREE (private); + op_ret = -1; + } else { + if (private->transaction) { + /* all well, start the checkpointing thread */ + LOCK_INIT (&private->active_lock); + + LOCK (&private->active_lock); + private->active = 1; + UNLOCK (&private->active_lock); + pthread_create (&private->checkpoint_thread, NULL, + bdb_checkpoint, this); + } + } + } +} + +static inline void +BDB_DIR_MODE_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *dir_mode = NULL; + char *endptr = NULL; + + dir_mode = dict_get (options, "dir-mode"); + + if (dir_mode) { + private->dir_mode = strtol (dir_mode->data, &endptr, 8); + if ((*endptr) || + (!IS_VALID_FILE_MODE(private->dir_mode))) { + gf_log (this->name, + GF_LOG_DEBUG, + "invalid dir-mode %o. setting to default %o", + private->dir_mode, + DEFAULT_DIR_MODE); + private->dir_mode = DEFAULT_DIR_MODE; + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "setting dir-mode to %o", private->dir_mode); + private->dir_mode = private->dir_mode; + } + } else { + private->dir_mode = DEFAULT_DIR_MODE; + } + + private->dir_mode = private->dir_mode | S_IFDIR; +} + +static inline void +BDB_FILE_MODE_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *file_mode = NULL; + char *endptr = NULL; + + file_mode = dict_get (options, "file-mode"); + + if (file_mode) { + private->file_mode = strtol (file_mode->data, &endptr, 8); + + if ((*endptr) || + (!IS_VALID_FILE_MODE(private->file_mode))) { + gf_log (this->name, + GF_LOG_DEBUG, + "invalid file-mode %o. setting to default %o", + private->file_mode, + DEFAULT_FILE_MODE); + private->file_mode = DEFAULT_FILE_MODE; + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "setting file-mode to %o", private->file_mode); + private->file_mode = private->file_mode; + } + } else { + private->file_mode = DEFAULT_FILE_MODE; + } + + private->symlink_mode = private->file_mode | S_IFLNK; + private->file_mode = private->file_mode | S_IFREG; +} + +static inline void +BDB_CHECKPOINT_TIMEOUT_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *checkpoint_timeout = NULL; + + checkpoint_timeout = dict_get (options, "checkpoint-timeout"); + + private->checkpoint_timeout = BDB_DEFAULT_CHECKPOINT_TIMEOUT; + + if (checkpoint_timeout) { + private->checkpoint_timeout = strtol (checkpoint_timeout->data, NULL, 0); + + if (private->checkpoint_timeout < 5 || private->checkpoint_timeout > 60) { + gf_log (this->name, + GF_LOG_WARNING, + "checkpoint-timeout %d seconds too %s", + private->checkpoint_timeout, + (private->checkpoint_timeout < 5)?"low":"high"); + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "setting checkpoint-timeout to %d seconds", + private->checkpoint_timeout); + } + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "setting checkpoint-timeout to default: %d seconds", + private->checkpoint_timeout); + } +} + +static inline void +BDB_LOCK_TIMEOUT_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *lock_timeout = NULL; + + lock_timeout = dict_get (options, "lock-timeout"); + + if (lock_timeout) { + private->lock_timeout = strtol (lock_timeout->data, NULL, 0); + + if (private->lock_timeout > 4260000) { + /* db allows us to DB_SET_LOCK_TIMEOUT to be set to a + * maximum of 71 mins (4260000 milliseconds) */ + gf_log (this->name, + GF_LOG_DEBUG, + "lock-timeout %d, out of range", + private->lock_timeout); + private->lock_timeout = 0; + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "setting lock-timeout to %d milliseconds", + private->lock_timeout); + } + } +} + +static inline void +BDB_TRANSACTION_TIMEOUT_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *txn_timeout = NULL; + txn_timeout = dict_get (options, "transaction-timeout"); + + if (txn_timeout) { + private->txn_timeout = strtol (txn_timeout->data, NULL, 0); + + if (private->txn_timeout > 4260000) { + /* db allows us to DB_SET_TXN_TIMEOUT to be set to a maximum + * of 71 mins (4260000 milliseconds) */ + gf_log (this->name, + GF_LOG_DEBUG, + "transaction-timeout %d, out of range", + private->txn_timeout); + private->txn_timeout = 0; + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "setting transaction-timeout to %d milliseconds", + private->txn_timeout); + } + } +} + +static inline void +BDB_TRANSACTION_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *mode = NULL; + + mode = dict_get (options, "mode"); + + if (mode && !strcmp (mode->data, "off")) { + gf_log (this->name, + GF_LOG_DEBUG, + "cache mode selected"); + private->envflags = DB_CREATE | DB_INIT_LOG | + DB_INIT_MPOOL | DB_THREAD; + private->dbflags = DB_CREATE | DB_THREAD; + private->transaction = OFF; + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "persistant mode selected"); + private->transaction = ON; + private->envflags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD; + private->dbflags = DB_CREATE | DB_THREAD; + } +} + +static inline void +BDB_ACCESS_MODE_INIT (xlator_t *this, + dict_t *options, + struct bdb_private *private) +{ + data_t *access_mode = NULL; + + access_mode = dict_get (options, "access-mode"); + + if (access_mode && !strcmp (access_mode->data, "btree")) { + gf_log (this->name, + GF_LOG_DEBUG, + "using access mode BTREE"); + private->access_mode = DB_BTREE; + } else { + gf_log (this->name, + GF_LOG_DEBUG, + "using access mode HASH"); + private->access_mode = DB_HASH; + } +} + + +/* bdb_db_init - initialize bdb xlator + * + * reads the options from @options dictionary and sets appropriate values in @this->private. + * also initializes DB_ENV. + * + * return: 0 on success or -1 on error (with logging the error through gf_log()). + */ +int +bdb_db_init (xlator_t *this, + dict_t *options) +{ + /* create a db entry for root */ + int32_t op_ret = 0; + bdb_private_t *private = NULL; + + private = this->private; + + BDB_CACHE_INIT (this, options, private); + + BDB_ACCESS_MODE_INIT (this, options, private); + + BDB_TRANSACTION_INIT (this, options, private); + + BDB_TRANSACTION_TIMEOUT_INIT (this, options, private); + + BDB_LOCK_TIMEOUT_INIT (this, options, private); + + { + LOCK_INIT (&private->ino_lock); + private->next_ino = 2; + } + + BDB_CHECKPOINT_TIMEOUT_INIT (this, options, private); + + BDB_FILE_MODE_INIT (this, options, private); + + BDB_DIR_MODE_INIT (this, options, private); + + BDB_TABLE_INIT (this, options, private); + + BDB_ERRFILE_INIT (this, options, private); + + BDB_LOG_REMOVE_INIT (this, options, private); + + BDB_DIRECTORY_INIT (this, options, private); + + return op_ret; +} diff --git a/xlators/storage/bdb/src/bdb.c b/xlators/storage/bdb/src/bdb.c new file mode 100644 index 000000000..e820e867a --- /dev/null +++ b/xlators/storage/bdb/src/bdb.c @@ -0,0 +1,3371 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/* bdb based storage translator - named as 'bdb' translator + * + * + * There can be only two modes for files existing on bdb translator: + * 1. DIRECTORY - directories are stored by bdb as regular directories on background + * file-system. directories also have an entry in the ns_db.db of their parent directory. + * 2. REGULAR FILE - regular files are stored as records in the storage_db.db present in + * the directory. regular files also have an entry in ns_db.db + * + * Internally bdb has a maximum of three different types of logical files associated with + * each directory: + * 1. storage_db.db - storage database, used to store the data corresponding to regular + * files in the form of key/value pair. file-name is the 'key' and data + * is 'value'. + * 2. directory (all subdirectories) - any subdirectory will have a regular directory entry. + */ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#define __XOPEN_SOURCE 500 + +#include <stdint.h> +#include <sys/time.h> +#include <errno.h> +#include <ftw.h> +#include <libgen.h> + +#include "glusterfs.h" +#include "dict.h" +#include "logging.h" +#include "bdb.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" + +/* to be used only by fops, nobody else */ +#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv) +#define B_TABLE(this) (((struct bdb_private *)this->private)->b_table) + + +int32_t +bdb_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t dev) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *key_string = NULL; /* after translating loc->path to DB key */ + char *db_path = NULL; + bctx_t *bctx = NULL; + struct stat stbuf = {0,}; + + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + if (!S_ISREG(mode)) { + gf_log (this->name, + GF_LOG_DEBUG, + "mknod for non-regular file"); + op_ret = -1; + op_errno = EPERM; + goto out; + } /* if(!S_ISREG(mode)) */ + + bctx = bctx_parent (B_TABLE(this), loc->path); + + if (bctx == NULL) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to get bctx for path: %s", loc->path); + op_ret = -1; + op_errno = ENOENT; + goto out; + } /* if(bctx == NULL) */ + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); + + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + MAKE_KEY_FROM_PATH (key_string, loc->path); + op_ret = bdb_db_put (bctx, NULL, key_string, NULL, 0, 0, 0); + if (op_ret > 0) { + /* create successful */ + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + stbuf.st_mode = mode; + stbuf.st_size = 0; + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + } else { + gf_log (this->name, + GF_LOG_ERROR, + "bdb_db_get() failed for path: %s", loc->path); + op_ret = -1; + op_errno = ENOENT; + }/* if (!op_ret)...else */ + +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + return 0; +} + +static inline int32_t +is_dir_empty (xlator_t *this, + loc_t *loc) +{ + int32_t ret = 1; + bctx_t *bctx = NULL; + DIR *dir = NULL; + char *real_path = NULL; + void *dbstat = NULL; + struct dirent *entry = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + bctx = bctx_lookup (B_TABLE(this), loc->path); + if (bctx == NULL) { + gf_log (this->name, + GF_LOG_DEBUG, + "failed to get bctx from inode for dir: %s," + "assuming empty directory", + loc->path); + ret = 1; + goto out; + } + + dbstat = bdb_db_stat (bctx, NULL, 0); + if (dbstat) { + switch (bctx->table->access_mode) + { + case DB_HASH: + ret = (((DB_HASH_STAT *)dbstat)->hash_nkeys == 0); + break; + case DB_BTREE: + case DB_RECNO: + ret = (((DB_BTREE_STAT *)dbstat)->bt_nkeys == 0); + break; + case DB_QUEUE: + ret = (((DB_QUEUE_STAT *)dbstat)->qs_nkeys == 0); + break; + case DB_UNKNOWN: + gf_log (this->name, + GF_LOG_CRITICAL, + "unknown access-mode set for db"); + ret = 0; + } + } else { + gf_log (this->name, + GF_LOG_ERROR, + "failed to get db stat for db at path: %s", loc->path); + ret = 1; + goto out; + } + + MAKE_REAL_PATH (real_path, this, loc->path); + dir = opendir (real_path); + if (dir == NULL) { + gf_log (this->name, + GF_LOG_DEBUG, + "failed to opendir(%s)", loc->path); + ret = 0; + goto out; + } + + while ((entry = readdir (dir))) { + if ((!IS_BDB_PRIVATE_FILE(entry->d_name)) && + (!IS_DOT_DOTDOT(entry->d_name))) { + gf_log (this->name, + GF_LOG_DEBUG, + "directory (%s) not empty, has a non-db entry", + loc->path); + ret = 0; + break; + }/* if(!IS_BDB_PRIVATE_FILE()) */ + } /* while(true) */ + closedir (dir); +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + return ret; +} + +int32_t +bdb_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + struct bdb_private *private = NULL; + bctx_table_t *table = NULL; + bctx_t *oldbctx = NULL; + bctx_t *newbctx = NULL; + bctx_t *tmpbctx = NULL; + int32_t op_ret = -1; + int32_t op_errno = ENOENT; + int32_t read_size = 0; + struct stat stbuf = {0,}; + struct stat old_stbuf = {0,}; + DB_TXN *txnid = NULL; + char *real_newpath = NULL; + char *real_oldpath = NULL; + char *oldkey = NULL; + char *newkey = NULL; + char *buf = NULL; /* pointer to temporary buffer, where + * the contents of a file are read, if + * file being renamed is a regular file */ + char *real_db_newpath = NULL; + char *tmp_db_newpath = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, newloc, out); + GF_VALIDATE_OR_GOTO (this->name, oldloc, out); + + private = this->private; + table = private->b_table; + + MAKE_REAL_PATH (real_oldpath, this, oldloc->path); + + if (S_ISREG (oldloc->inode->st_mode)) { + oldbctx = bctx_parent (B_TABLE(this), oldloc->path); + MAKE_REAL_PATH (real_newpath, this, newloc->path); + + op_ret = lstat (real_newpath, &stbuf); + + if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))) { + op_ret = -1; + op_errno = EISDIR; + goto out; + } + if (op_ret == 0) { + /* destination is a symlink */ + MAKE_KEY_FROM_PATH (oldkey, oldloc->path); + MAKE_KEY_FROM_PATH (newkey, newloc->path); + + op_ret = unlink (real_newpath); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to unlink %s (%s)", + newloc->path, strerror (op_errno)); + goto out; + } + newbctx = bctx_parent (B_TABLE (this), newloc->path); + GF_VALIDATE_OR_GOTO (this->name, newbctx, out); + + op_ret = bdb_txn_begin (BDB_ENV(this), &txnid); + + if ((read_size = + bdb_db_get (oldbctx, txnid, oldkey, &buf, 0, 0)) < 0) { + bdb_txn_abort (txnid); + } else if ((op_ret = + bdb_db_del (oldbctx, txnid, oldkey)) != 0) { + bdb_txn_abort (txnid); + } else if ((op_ret = bdb_db_put (newbctx, txnid, + newkey, buf, + read_size, 0, 0)) != 0) { + bdb_txn_abort (txnid); + } else { + bdb_txn_commit (txnid); + } + + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (newbctx); + } else { + /* destination doesn't exist or a regular file */ + MAKE_KEY_FROM_PATH (oldkey, oldloc->path); + MAKE_KEY_FROM_PATH (newkey, newloc->path); + + newbctx = bctx_parent (B_TABLE (this), newloc->path); + GF_VALIDATE_OR_GOTO (this->name, newbctx, out); + + op_ret = bdb_txn_begin (BDB_ENV(this), &txnid); + + if ((read_size = bdb_db_get (oldbctx, txnid, + oldkey, &buf, + 0, 0)) < 0) { + bdb_txn_abort (txnid); + } else if ((op_ret = bdb_db_del (oldbctx, + txnid, oldkey)) != 0) { + bdb_txn_abort (txnid); + } else if ((op_ret = bdb_db_put (newbctx, txnid, + newkey, buf, + read_size, 0, 0)) != 0) { + bdb_txn_abort (txnid); + } else { + bdb_txn_commit (txnid); + } + + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (newbctx); + } + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (oldbctx); + } else if (S_ISLNK (oldloc->inode->st_mode)) { + MAKE_REAL_PATH (real_newpath, this, newloc->path); + op_ret = lstat (real_newpath, &stbuf); + if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))) { + op_ret = -1; + op_errno = EISDIR; + goto out; + } + + if (op_ret == 0){ + /* destination exists and is also a symlink */ + MAKE_REAL_PATH (real_oldpath, this, oldloc->path); + op_ret = rename (real_oldpath, real_newpath); + op_errno = errno; + + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to rename symlink %s (%s)", + oldloc->path, strerror (op_errno)); + } + goto out; + } + + /* destination doesn't exist */ + MAKE_REAL_PATH (real_oldpath, this, oldloc->path); + MAKE_KEY_FROM_PATH (newkey, newloc->path); + newbctx = bctx_parent (B_TABLE (this), newloc->path); + GF_VALIDATE_OR_GOTO (this->name, newbctx, out); + + op_ret = bdb_db_del (newbctx, txnid, newkey); + if (op_ret != 0) { + /* no problem */ + } + op_ret = rename (real_oldpath, real_newpath); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to rename %s to %s (%s)", + oldloc->path, newloc->path, strerror (op_errno)); + goto out; + } + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (newbctx); + } else if (S_ISDIR (oldloc->inode->st_mode) && + (old_stbuf.st_nlink == 2)) { + + tmp_db_newpath = tempnam (private->export_path, "rename_temp"); + GF_VALIDATE_OR_GOTO (this->name, tmp_db_newpath, out); + + MAKE_REAL_PATH (real_newpath, this, newloc->path); + + MAKE_REAL_PATH_TO_STORAGE_DB (real_db_newpath, this, newloc->path); + + oldbctx = bctx_lookup (B_TABLE(this), oldloc->path); + op_ret = -1; + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, oldbctx, out); + + op_ret = lstat (real_newpath, &stbuf); + if ((op_ret == 0) && + S_ISDIR (stbuf.st_mode) && + is_dir_empty (this, newloc)) { + + tmpbctx = bctx_rename (oldbctx, tmp_db_newpath); + op_ret = -1; + op_errno = ENOENT; + GF_VALIDATE_OR_GOTO (this->name, tmpbctx, out); + + op_ret = rename (real_oldpath, real_newpath); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "rename directory %s to %s failed: %s", + oldloc->path, newloc->path, + strerror (errno)); + op_ret = bdb_db_rename (table, + tmp_db_newpath, + oldbctx->db_path); + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "renaming temp database back to old db failed" + " for directory %s", oldloc->path); + goto out; + } else { + /* this is a error case, set op_errno & op_ret */ + op_ret = -1; + op_errno = ENOENT; /* TODO: errno */ + } + } + op_ret = bdb_db_rename (table, tmp_db_newpath, real_db_newpath); + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "renaming temp database to new db failed" + " for directory %s", oldloc->path); + goto out; + } + } else if ((op_ret != 0) && (errno == ENOENT)) { + tmp_db_newpath = tempnam (private->export_path, "rename_temp"); + GF_VALIDATE_OR_GOTO (this->name, tmp_db_newpath, out); + + tmpbctx = bctx_rename (oldbctx, tmp_db_newpath); + op_ret = -1; + op_errno = ENOENT; + GF_VALIDATE_OR_GOTO (this->name, tmpbctx, out); + + op_ret = rename (real_oldpath, real_newpath); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "rename directory %s to %s failed: %s", + oldloc->path, newloc->path, + strerror (errno)); + op_ret = bdb_db_rename (table, + tmp_db_newpath, + oldbctx->db_path); + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "renaming temp database back to old db failed" + " for directory %s", oldloc->path); + goto out; + } else { + /* this is a error case, set op_errno & op_ret */ + op_ret = -1; + op_errno = ENOENT; /* TODO: errno */ + } + } else { + op_ret = bdb_db_rename (table, + tmp_db_newpath, + real_db_newpath); + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "renaming temp database to new db failed" + " for directory %s", oldloc->path); + goto out; + } else { + /* this is a error case, set op_errno & op_ret */ + op_ret = -1; + op_errno = ENOENT; /* TODO: errno */ + } + } + } + } else { + gf_log (this->name, + GF_LOG_CRITICAL, + "rename called on non-existent file type"); + op_ret = -1; + op_errno = EPERM; + } + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + return 0; +} + +int32_t +bdb_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, -1, EPERM, NULL, NULL); + return 0; +} + +int32_t +is_space_left (xlator_t *this, + size_t size) +{ + struct bdb_private *private = this->private; + struct statvfs stbuf = {0,}; + int32_t ret = -1; + fsblkcnt_t req_blocks = 0; + fsblkcnt_t usable_blocks = 0; + + ret = statvfs (private->export_path, &stbuf); + if (ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to do statvfs on %s", private->export_path); + return 0; + } else { + req_blocks = (size / stbuf.f_frsize) + 1; + + usable_blocks = (stbuf.f_bfree - BDB_ENOSPC_THRESHOLD); + + gf_log (this->name, GF_LOG_DEBUG, + "requested size: %"GF_PRI_SIZET"\nfree blocks: %"PRIu64"\nblock size: %lu\nfrag size: %lu", + size, stbuf.f_bfree, stbuf.f_bsize, stbuf.f_frsize); + + if (req_blocks < usable_blocks) + return 1; + else + return 0; + } +} + +int32_t +bdb_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = EPERM; + char *db_path = NULL; + struct stat stbuf = {0,}; + bctx_t *bctx = NULL; + struct bdb_private *private = NULL; + char *key_string = NULL; + struct bdb_fd *bfd = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + private = this->private; + + bctx = bctx_parent (B_TABLE(this), loc->path); + op_errno = ENOENT; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + MAKE_KEY_FROM_PATH (key_string, loc->path); + op_ret = bdb_db_put (bctx, NULL, key_string, NULL, 0, 0, 0); + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out); + + /* create successful */ + bfd = CALLOC (1, sizeof (*bfd)); + op_ret = -1; + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + /* NOTE: bdb_get_bctx_from () returns bctx with a ref */ + bfd->ctx = bctx; + bfd->key = strdup (key_string); + op_ret = -1; + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bfd->key, out); + + BDB_SET_BFD (this, fd, bfd); + + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + stbuf.st_mode = private->file_mode; + stbuf.st_size = 0; + stbuf.st_nlink = 1; + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + op_ret = 0; + op_errno = 0; +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, fd, loc->inode, &stbuf); + + return 0; +} + + +/* bdb_open + * + * as input parameters bdb_open gets the file name, i.e key. bdb_open should effectively + * do: store key, open storage db, store storage-db pointer. + * + */ +int32_t +bdb_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + bctx_t *bctx = NULL; + char *key_string = NULL; + struct bdb_fd *bfd = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + bctx = bctx_parent (B_TABLE(this), loc->path); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + bfd = CALLOC (1, sizeof (*bfd)); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + /* NOTE: bctx_parent () returns bctx with a ref */ + bfd->ctx = bctx; + + MAKE_KEY_FROM_PATH (key_string, loc->path); + bfd->key = strdup (key_string); + op_ret = -1; + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bfd->key, out); + + BDB_SET_BFD (this, fd, bfd); + op_ret = 0; +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + +int32_t +bdb_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct iovec vec = {0,}; + struct stat stbuf = {0,}; + struct bdb_fd *bfd = NULL; + dict_t *reply_dict = NULL; + char *buf = NULL; + data_t *buf_data = NULL; + char *db_path = NULL; + int32_t read_size = 0; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory); + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + /* we are ready to go */ + op_ret = bdb_db_get (bfd->ctx, NULL, + bfd->key, &buf, + size, offset); + read_size = op_ret; + if (op_ret == -1) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to do db_storage_get()"); + op_ret = -1; + op_errno = ENOENT; + goto out; + } else if (op_ret == 0) { + goto out; + } + + buf_data = get_new_data (); + op_ret = -1; + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, buf_data, out); + + reply_dict = get_new_dict (); + op_ret = -1; + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, reply_dict, out); + + buf_data->data = buf; + + if (size < read_size) { + op_ret = size; + read_size = size; + } + + buf_data->len = op_ret; + + dict_set (reply_dict, NULL, buf_data); + + frame->root->rsp_refs = dict_ref (reply_dict); + + vec.iov_base = buf; + vec.iov_len = read_size; + + stbuf.st_ino = fd->inode->ino; + stbuf.st_size = op_ret ; + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + op_ret = size; +out: + STACK_UNWIND (frame, op_ret, op_errno, &vec, 1, &stbuf); + + if (reply_dict) + dict_unref (reply_dict); + + return 0; +} + + +int32_t +bdb_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct stat stbuf = {0,}; + struct bdb_fd *bfd = NULL; + int32_t idx = 0; + off_t c_off = offset; + int32_t c_ret = -1; + char *db_path = NULL; + size_t total_size = 0; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO (this->name, vector, out); + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory); + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + + for (idx = 0; idx < count; idx++) + total_size += vector[idx].iov_len; + + if (!is_space_left (this, total_size)) { + gf_log (this->name, + GF_LOG_ERROR, + "requested storage for %"GF_PRI_SIZET", ENOSPC", total_size); + op_ret = -1; + op_errno = ENOSPC; + goto out; + } + + + /* we are ready to go */ + for (idx = 0; idx < count; idx++) { + c_ret = bdb_db_put (bfd->ctx, NULL, + bfd->key, vector[idx].iov_base, + vector[idx].iov_len, c_off, 0); + if (c_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to do bdb_db_put at offset: %"PRIu64" for file: %s", + c_off, bfd->key); + break; + } else { + c_off += vector[idx].iov_len; + } + op_ret += vector[idx].iov_len; + } /* for(idx=0;...)... */ + + if (c_ret) { + /* write failed */ + gf_log (this->name, + GF_LOG_ERROR, + "failed to do bdb_db_put(): %s", + db_strerror (op_ret)); + op_ret = -1; + op_errno = EBADFD; /* TODO: search for a more meaningful errno */ + goto out; + } + /* NOTE: we want to increment stbuf->st_size, as stored in db */ + stbuf.st_size = op_ret; + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + op_errno = 0; + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + return 0; +} + +int32_t +bdb_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = EPERM; + struct bdb_fd *bfd = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + /* do nothing */ + op_ret = 0; + op_errno = 0; + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +bdb_release (xlator_t *this, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = EBADFD; + struct bdb_fd *bfd = NULL; + + if ((bfd = bdb_extract_bfd (fd, this)) == NULL){ + gf_log (this->name, + GF_LOG_ERROR, + "failed to extract %s specific information from fd:%p", this->name, fd); + op_ret = -1; + op_errno = EBADFD; + } else { + bctx_unref (bfd->ctx); + bfd->ctx = NULL; + + if (bfd->key) + free (bfd->key); /* we did strdup() in bdb_open() */ + free (bfd); + op_ret = 0; + op_errno = 0; + } /* if((fd->ctx == NULL)...)...else */ + + return 0; +}/* bdb_release */ + + +int32_t +bdb_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync) +{ + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, 0, 0); + return 0; +}/* bdb_fsync */ + +static int gf_bdb_lk_log; + +int32_t +bdb_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + struct flock nullock = {0, }; + + gf_bdb_lk_log++; + if (!(gf_bdb_lk_log % GF_UNIVERSAL_ANSWER)) { + gf_log (this->name, GF_LOG_ERROR, + "\"features/posix-locks\" translator is not loaded, you need to use it"); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, -1, ENOSYS, &nullock); + return 0; +}/* bdb_lk */ + +/* bdb_lookup + * + * there are four possibilities for a file being looked up: + * 1. file exists and is a directory. + * 2. file exists and is a symlink. + * 3. file exists and is a regular file. + * 4. file does not exist. + * case 1 and 2 are handled by doing lstat() on the @loc. if the file is a directory or symlink, + * lstat() succeeds. lookup continues to check if the @loc belongs to case-3 only if lstat() fails. + * to check for case 3, bdb_lookup does a bdb_db_get() for the given @loc. (see description of + * bdb_db_get() for more details on how @loc is transformed into db handle and key). if check + * for case 1, 2 and 3 fail, we proceed to conclude that file doesn't exist (case 4). + * + * @frame: call frame. + * @this: xlator_t of this instance of bdb xlator. + * @loc: loc_t specifying the file to operate upon. + * @need_xattr: if need_xattr != 0, we are asked to return all the extended attributed of @loc, + * if any exist, in a dictionary. if @loc is a regular file and need_xattr is set, then + * we look for value of need_xattr. if need_xattr > sizo-of-the-file @loc, then the file + * content of @loc is returned in dictionary of xattr with 'glusterfs.content' as + * dictionary key. + * + * NOTE: bdb currently supports only directories, symlinks and regular files. + * + * NOTE: bdb_lookup returns the 'struct stat' of underlying file itself, in case of directory and + * symlink (st_ino is modified as bdb allocates its own set of inodes of all files). for + * regular files, bdb uses 'struct stat' of the database file in which the @loc is stored + * as templete and modifies st_ino (see bdb_inode_transform for more details), st_mode (can + * be set in volfile 'option file-mode <mode>'), st_size (exact size of the @loc + * contents), st_blocks (block count on the underlying filesystem to accomodate st_size, + * see BDB_COUNT_BLOCKS in bdb.h for more details). + */ +int32_t +bdb_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + struct stat stbuf = {0, }; + int32_t op_ret = -1; + int32_t op_errno = ENOENT; + dict_t *xattr = NULL; + char *pathname = NULL; + char *directory = NULL; + char *real_path = NULL; + bctx_t *bctx = NULL; + char *db_path = NULL; + struct bdb_private *private = NULL; + char *key_string = NULL; + int32_t entry_size = 0; + char *file_content = NULL; + data_t *file_content_data = NULL; + uint64_t need_xattr = 0; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + private = this->private; + + MAKE_REAL_PATH (real_path, this, loc->path); + + pathname = strdup (loc->path); + GF_VALIDATE_OR_GOTO (this->name, pathname, out); + + directory = dirname (pathname); + GF_VALIDATE_OR_GOTO (this->name, directory, out); + + if (!strcmp (directory, loc->path)) { + /* SPECIAL CASE: looking up root */ + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + real_path, strerror (op_errno)); + goto out; + } + + /* bctx_lookup() returns NULL only when its time to wind up, + * we should shutdown functioning */ + bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); + op_ret = -1; + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + stbuf.st_ino = 1; + stbuf.st_mode = private->dir_mode; + } else { + MAKE_KEY_FROM_PATH (key_string, loc->path); + op_ret = lstat (real_path, &stbuf); + if ((op_ret == 0) && (S_ISDIR (stbuf.st_mode))){ + bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); + op_ret = -1; + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + if (loc->ino) { + /* revalidating directory inode */ + gf_log (this->name, + GF_LOG_DEBUG, + "revalidating directory %s", (char *)loc->path); + stbuf.st_ino = loc->ino; + } else { + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + } + stbuf.st_mode = private->dir_mode; + op_ret = 0; + op_errno = 0; + goto out; + } else if (op_ret == 0) { + /* a symlink */ + gf_log (this->name, + GF_LOG_DEBUG, + "lookup called for symlink: %s", loc->path); + bctx = bctx_parent (B_TABLE(this), loc->path); + op_ret = -1; + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + if (loc->ino) { + stbuf.st_ino = loc->ino; + } else { + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + } + stbuf.st_mode = private->symlink_mode; + op_ret = 0; + op_errno = 0; + goto out; + } + + /* for regular files */ + bctx = bctx_parent (B_TABLE(this), loc->path); + op_ret = -1; + op_errno = ENOENT; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + if (GF_FILE_CONTENT_REQUESTED(xattr_req, &need_xattr)) { + entry_size = bdb_db_get (bctx, + NULL, + loc->path, + &file_content, + 0, 0); + } else { + entry_size = bdb_db_get (bctx, + NULL, + loc->path, + NULL, + 0, 0); + } + + op_ret = entry_size; + op_errno = ENOENT; + if (op_ret == -1) { + gf_log (this->name, + GF_LOG_DEBUG, + "returning ENOENT for %s", loc->path); + goto out; + } + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + if ((need_xattr >= entry_size) + && (entry_size) && (file_content)) { + file_content_data = data_from_dynptr (file_content, + entry_size); + xattr = get_new_dict (); + dict_set (xattr, "glusterfs.content", + file_content_data); + } else { + if (file_content) + free (file_content); + } + + if (loc->ino) { + /* revalidate */ + stbuf.st_ino = loc->ino; + stbuf.st_size = entry_size; + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + } else { + /* fresh lookup, create an inode number */ + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + stbuf.st_size = entry_size; + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + }/* if(inode->ino)...else */ + stbuf.st_nlink = 1; + stbuf.st_mode = private->file_mode; + } + op_ret = 0; +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + if (pathname) + free (pathname); + + if (xattr) + dict_ref (xattr); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf, xattr); + + if (xattr) + dict_unref (xattr); + + return 0; + +}/* bdb_lookup */ + +int32_t +bdb_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + + struct stat stbuf = {0,}; + char *real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct bdb_private *private = NULL; + char *db_path = NULL; + bctx_t *bctx = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + private = this->private; + GF_VALIDATE_OR_GOTO (this->name, private, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret == 0) { + /* directory or symlink */ + stbuf.st_ino = loc->inode->ino; + if (S_ISDIR(stbuf.st_mode)) + stbuf.st_mode = private->dir_mode; + else + stbuf.st_mode = private->symlink_mode; + /* we are done, lets unwind the stack */ + goto out; + } + + bctx = bctx_parent (B_TABLE(this), loc->path); + op_ret = -1; + op_errno = ENOENT; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + stbuf.st_size = bdb_db_get (bctx, NULL, loc->path, NULL, 0, 0); + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + stbuf.st_ino = loc->inode->ino; + +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +}/* bdb_stat */ + + + +/* bdb_opendir - in the world of bdb, open/opendir is all about opening correspondind databases. + * opendir in particular, opens the database for the directory which is + * to be opened. after opening the database, a cursor to the database is also created. + * cursor helps us get the dentries one after the other, and cursor maintains the state + * about current positions in directory. pack 'pointer to db', 'pointer to the + * cursor' into struct bdb_dir and store it in fd->ctx, we get from our parent xlator. + * + * @frame: call frame + * @this: our information, as we filled during init() + * @loc: location information + * @fd: file descriptor structure (glusterfs internal) + * + * return value - immaterial, async call. + * + */ +int32_t +bdb_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + char *real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + bctx_t *bctx = NULL; + struct bdb_dir *bfd = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + bfd = CALLOC (1, sizeof (*bfd)); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + bfd->dir = opendir (real_path); + op_errno = errno; + GF_VALIDATE_OR_GOTO (this->name, bfd->dir, out); + + /* NOTE: bctx_lookup() return bctx with ref */ + bfd->ctx = bctx; + + bfd->path = strdup (real_path); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bfd->path, out); + + BDB_SET_BFD (this, fd, bfd); + op_ret = 0; +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +}/* bdb_opendir */ + + +int32_t +bdb_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t off, + int32_t flag) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t ret = -1; + int32_t real_path_len = 0; + int32_t entry_path_len = 0; + int32_t count = 0; + char *real_path = NULL; + char *entry_path = NULL; + char *db_path = NULL; + dir_entry_t entries = {0, }; + dir_entry_t *tmp = NULL; + DIR *dir = NULL; + struct dirent *dirent = NULL; + struct bdb_dir *bfd = NULL; + struct stat db_stbuf = {0,}; + struct stat buf = {0,}; + DBC *cursorp = NULL; + size_t tmp_name_len = 0; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + MAKE_REAL_PATH (real_path, this, bfd->path); + dir = bfd->dir; + + while ((dirent = readdir (dir))) { + if (!dirent) + break; + + if (IS_BDB_PRIVATE_FILE(dirent->d_name)) { + continue; + } + + tmp_name_len = strlen (dirent->d_name); + if (entry_path_len < (real_path_len + 1 + (tmp_name_len) + 1)) { + entry_path_len = real_path_len + tmp_name_len + 1024; + entry_path = realloc (entry_path, entry_path_len); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, entry_path, out); + } + + strncpy (&entry_path[real_path_len+1], dirent->d_name, tmp_name_len); + op_ret = stat (entry_path, &buf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + entry_path, strerror (op_errno)); + goto out; + } + + if ((flag == GF_GET_DIR_ONLY) && + (ret != -1 && !S_ISDIR(buf.st_mode))) { + continue; + } + + tmp = CALLOC (1, sizeof (*tmp)); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, tmp, out); + + tmp->name = strdup (dirent->d_name); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, dirent->d_name, out); + + memcpy (&tmp->buf, &buf, sizeof (buf)); + + tmp->buf.st_ino = -1; + if (S_ISLNK(tmp->buf.st_mode)) { + char linkpath[ZR_PATH_MAX] = {0,}; + ret = readlink (entry_path, linkpath, ZR_PATH_MAX); + if (ret != -1) { + linkpath[ret] = '\0'; + tmp->link = strdup (linkpath); + } + } else { + tmp->link = ""; + } + + count++; + + tmp->next = entries.next; + entries.next = tmp; + /* if size is 0, count can never be = size, so entire dir is read */ + + if (count == size) + break; + } + + if ((flag != GF_GET_DIR_ONLY) && (count < size)) { + /* read from db */ + op_ret = bdb_cursor_open (bfd->ctx, &cursorp); + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out); + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bfd->ctx->directory); + op_ret = lstat (db_path, &db_stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + /* read all the entries in database, one after the other and put into dictionary */ + while (1) { + DBT key = {0,}, value = {0,}; + + key.flags = DB_DBT_MALLOC; + value.flags = DB_DBT_MALLOC; + op_ret = bdb_cursor_get (cursorp, &key, &value, DB_NEXT); + + if (op_ret == DB_NOTFOUND) { + gf_log (this->name, + GF_LOG_DEBUG, + "end of list of key/value pair in db for directory: %s", + bfd->ctx->directory); + op_ret = 0; + op_errno = 0; + break; + } else if (op_ret != 0){ + gf_log (this->name, + GF_LOG_ERROR, + "failed to do cursor get for directory %s: %s", + bfd->ctx->directory, db_strerror (op_ret)); + op_ret = -1; + op_errno = ENOENT; + break; + } + /* successfully read */ + tmp = CALLOC (1, sizeof (*tmp)); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, tmp, out); + + tmp->name = CALLOC (1, key.size + 1); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, tmp->name, out); + + memcpy (tmp->name, key.data, key.size); + tmp->buf = db_stbuf; + tmp->buf.st_size = bdb_db_get (bfd->ctx, NULL, + tmp->name, NULL, + 0, 0); + tmp->buf.st_blocks = BDB_COUNT_BLOCKS (tmp->buf.st_size, \ + tmp->buf.st_blksize); + /* FIXME: wat will be the effect of this? */ + tmp->buf.st_ino = -1; + count++; + + tmp->next = entries.next; + tmp->link = ""; + entries.next = tmp; + /* if size is 0, count can never be = size, so entire dir is read */ + if (count == size) + break; + + free (key.data); + } /* while(1){ } */ + bdb_cursor_close (bfd->ctx, cursorp); + } else { + /* do nothing */ + } + FREE (entry_path); + op_ret = 0; + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &entries, count); + + while (entries.next) { + tmp = entries.next; + entries.next = entries.next->next; + FREE (tmp->name); + FREE (tmp); + } + return 0; +}/* bdb_getdents */ + + +int32_t +bdb_releasedir (xlator_t *this, + fd_t *fd) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + struct bdb_dir *bfd = NULL; + + if ((bfd = bdb_extract_bfd (fd, this)) == NULL) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to extract fd data from fd=%p", fd); + op_ret = -1; + op_errno = EBADF; + } else { + if (bfd->path) { + free (bfd->path); + } else { + gf_log (this->name, GF_LOG_ERROR, "bfd->path was NULL. fd=%p bfd=%p", + fd, bfd); + } + + if (bfd->dir) { + closedir (bfd->dir); + } else { + gf_log (this->name, + GF_LOG_ERROR, + "bfd->dir is NULL."); + } + if (bfd->ctx) { + bctx_unref (bfd->ctx); + } else { + gf_log (this->name, + GF_LOG_ERROR, + "bfd->ctx is NULL"); + } + free (bfd); + } + + return 0; +}/* bdb_releasedir */ + + +int32_t +bdb_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + char *dest = NULL; + int32_t op_ret = -1; + int32_t op_errno = EPERM; + char *real_path = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + dest = alloca (size + 1); + GF_VALIDATE_OR_GOTO (this->name, dest, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = readlink (real_path, dest, size); + + if (op_ret > 0) + dest[op_ret] = 0; + + op_errno = errno; + + if (op_ret == -1) { + gf_log (this->name, + GF_LOG_DEBUG, + "readlink failed on %s: %s", + loc->path, strerror (op_errno)); + } +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, dest); + + return 0; +}/* bdb_readlink */ + + +int32_t +bdb_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int32_t op_ret = -1; + int32_t ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct stat stbuf = {0, }; + bctx_t *bctx = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = mkdir (real_path, mode); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to mkdir %s (%s)", + real_path, strerror (op_errno)); + goto out; + } + + op_ret = chown (real_path, frame->root->uid, frame->root->gid); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to chmod on %s (%s)", + real_path, strerror (op_errno)); + goto err; + } + + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + real_path, strerror (op_errno)); + goto err; + } + + bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, bctx, err); + + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + + goto out; + +err: + ret = rmdir (real_path); + if (ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to rmdir the directory created (%s)", + strerror (errno)); + } + + +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + + return 0; +}/* bdb_mkdir */ + + +int32_t +bdb_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + bctx_t *bctx = NULL; + char *real_path = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + bctx = bctx_parent (B_TABLE(this), loc->path); + op_errno = ENOENT; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + op_ret = bdb_db_del (bctx, NULL, loc->path); + if (op_ret == DB_NOTFOUND) { + MAKE_REAL_PATH (real_path, this, loc->path); + op_ret = unlink (real_path); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to unlink on %s (%s)", + real_path, strerror (op_errno)); + goto out; + } + + } else if (op_ret == 0) { + op_errno = 0; + } +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +}/* bdb_unlink */ + + + +int32_t +bdb_do_rmdir (xlator_t *this, + loc_t *loc) +{ + char *real_path = NULL; + int32_t ret = -1; + bctx_t *bctx = NULL; + DB_ENV *dbenv = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + dbenv = BDB_ENV(this); + GF_VALIDATE_OR_GOTO (this->name, dbenv, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + bctx = bctx_lookup (B_TABLE(this), loc->path); + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + LOCK(&bctx->lock); + { + if (bctx->dbp == NULL) { + goto unlock; + } + + ret = bctx->dbp->close (bctx->dbp, 0); + GF_VALIDATE_OR_GOTO (this->name, (ret == 0), unlock); + + bctx->dbp = NULL; + + ret = dbenv->dbremove (dbenv, NULL, bctx->db_path, NULL, 0); + if (ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to DB_ENV->dbremove() on path %s: %s", + loc->path, db_strerror (ret)); + } + } +unlock: + UNLOCK(&bctx->lock); + + if (ret) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to remove db %s: %s", bctx->db_path, db_strerror (ret)); + ret = -1; + goto out; + } + gf_log (this->name, + GF_LOG_DEBUG, + "removed db %s", bctx->db_path); + ret = rmdir (real_path); + +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + return ret; +} + +int32_t +bdb_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOTEMPTY; + + if (!is_dir_empty (this, loc)) { + gf_log (this->name, + GF_LOG_DEBUG, + "rmdir: directory %s not empty", loc->path); + op_errno = ENOTEMPTY; + op_ret = -1; + goto out; + } + + op_ret = bdb_do_rmdir (this, loc); + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to bdb_do_rmdir on %s", + loc->path); + goto out; + } + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} /* bdb_rmdir */ + +int32_t +bdb_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkname, + loc_t *loc) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct stat stbuf = {0,}; + struct bdb_private *private = NULL; + bctx_t *bctx = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO (this->name, linkname, out); + + private = this->private; + GF_VALIDATE_OR_GOTO (this->name, private, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + op_ret = symlink (linkname, real_path); + op_errno = errno; + if (op_ret == 0) { + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + real_path, strerror (op_errno)); + goto err; + } + + bctx = bctx_parent (B_TABLE(this), loc->path); + GF_VALIDATE_OR_GOTO (this->name, bctx, err); + + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + stbuf.st_mode = private->symlink_mode; + + goto out; + } +err: + op_ret = unlink (real_path); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to unlink the previously created symlink (%s)", + strerror (op_errno)); + } + op_ret = -1; + op_errno = ENOENT; +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + + return 0; +} /* bdb_symlink */ + +int32_t +bdb_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct stat stbuf = {0,}; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + real_path, strerror (op_errno)); + goto out; + } + + /* directory or symlink */ + op_ret = chmod (real_path, mode); + op_errno = errno; + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +}/* bdb_chmod */ + + +int32_t +bdb_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct stat stbuf = {0,}; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + real_path, strerror (op_errno)); + goto out; + } + + /* directory or symlink */ + op_ret = lchown (real_path, uid, gid); + op_errno = errno; +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +}/* bdb_chown */ + + +int32_t +bdb_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct stat stbuf = {0,}; + char *db_path = NULL; + bctx_t *bctx = NULL; + char *key_string = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + bctx = bctx_parent (B_TABLE(this), loc->path); + op_errno = ENOENT; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_KEY_FROM_PATH (key_string, loc->path); + + /* now truncate */ + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + if (loc->inode->ino) { + stbuf.st_ino = loc->inode->ino; + }else { + stbuf.st_ino = bdb_inode_transform (stbuf.st_ino, bctx); + } + + op_ret = bdb_db_put (bctx, NULL, key_string, NULL, 0, 1, 0); + if (op_ret == -1) { + gf_log (this->name, + GF_LOG_DEBUG, + "failed to do bdb_db_put: %s", + db_strerror (op_ret)); + op_ret = -1; + op_errno = EINVAL; /* TODO: better errno */ + } + +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +}/* bdb_truncate */ + + +int32_t +bdb_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec ts[2]) +{ + int32_t op_ret = -1; + int32_t op_errno = EPERM; + char *real_path = NULL; + struct stat stbuf = {0,}; + struct timeval tv[2] = {{0,},}; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + op_errno = EPERM; + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + real_path, strerror (op_errno)); + goto out; + } + + /* directory or symlink */ + tv[0].tv_sec = ts[0].tv_sec; + tv[0].tv_usec = ts[0].tv_nsec / 1000; + tv[1].tv_sec = ts[1].tv_sec; + tv[1].tv_usec = ts[1].tv_nsec / 1000; + + op_ret = lutimes (real_path, tv); + if (op_ret == -1 && errno == ENOSYS) { + op_ret = utimes (real_path, tv); + } + op_errno = errno; + if (op_ret == -1) { + gf_log (this->name, + GF_LOG_WARNING, + "utimes on %s failed: %s", + loc->path, strerror (op_errno)); + goto out; + } + + op_ret = lstat (real_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + real_path, strerror (op_errno)); + goto out; + } + + stbuf.st_ino = loc->inode->ino; + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +}/* bdb_utimens */ + +int32_t +bdb_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) + +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct statvfs buf = {0, }; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = statvfs (real_path, &buf); + op_errno = errno; +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + return 0; +}/* bdb_statfs */ + +static int gf_bdb_xattr_log; + +/* bdb_setxattr - set extended attributes. + * + * bdb allows setxattr operation only on directories. + * bdb reservers 'glusterfs.file.<attribute-name>' to operate on the content of the files + * under the specified directory. 'glusterfs.file.<attribute-name>' transforms to contents of + * file of name '<attribute-name>' under specified directory. + * + * @frame: call frame. + * @this: xlator_t of this instance of bdb xlator. + * @loc: loc_t specifying the file to operate upon. + * @dict: list of extended attributes to set on @loc. + * @flags: can be XATTR_REPLACE (replace an existing extended attribute only if it exists) or + * XATTR_CREATE (create an extended attribute only if it doesn't already exist). + * + * + */ +int32_t +bdb_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int flags) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + data_pair_t *trav = dict->members_list; + bctx_t *bctx = NULL; + char *real_path = NULL; + char *key = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + if (!S_ISDIR (loc->inode->st_mode)) { + op_ret = -1; + op_errno = EPERM; + goto out; + } + + while (trav) { + if (ZR_FILE_CONTENT_REQUEST(trav->key) ) { + bctx = bctx_lookup (B_TABLE(this), loc->path); + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + key = &(trav->key[15]); + + if (flags & XATTR_REPLACE) { + /* replace only if previously exists, otherwise error out */ + op_ret = bdb_db_get (bctx, NULL, key, + NULL, 0, 0); + if (op_ret == -1) { + /* key doesn't exist in database */ + gf_log (this->name, + GF_LOG_DEBUG, + "cannot XATTR_REPLACE, xattr %s doesn't exist " + "on path %s", key, loc->path); + op_ret = -1; + op_errno = ENOENT; + break; + } + op_ret = bdb_db_put (bctx, NULL, + key, trav->value->data, + trav->value->len, + op_ret, BDB_TRUNCATE_RECORD); + if (op_ret != 0) { + op_ret = -1; + op_errno = EINVAL; + break; + } + } else { + /* fresh create */ + op_ret = bdb_db_put (bctx, NULL, key, + trav->value->data, + trav->value->len, + 0, 0); + if (op_ret != 0) { + op_ret = -1; + op_errno = EINVAL; + break; + } else { + op_ret = 0; + op_errno = 0; + } /* if(op_ret!=0)...else */ + } /* if(flags&XATTR_REPLACE)...else */ + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + } else { + /* do plain setxattr */ + op_ret = lsetxattr (real_path, + trav->key, + trav->value->data, + trav->value->len, + flags); + op_errno = errno; + if ((op_ret == -1) && (op_errno != ENOENT)) { + if (op_errno == ENOTSUP) { + gf_bdb_xattr_log++; + if (!(gf_bdb_xattr_log % GF_UNIVERSAL_ANSWER)) { + gf_log (this->name, GF_LOG_WARNING, + "Extended Attributes support not present."\ + "Please check"); + } + } else { + gf_log (this->name, GF_LOG_DEBUG, + "setxattr failed on %s (%s)", + loc->path, strerror (op_errno)); + } + break; + } + } /* if(ZR_FILE_CONTENT_REQUEST())...else */ + trav = trav->next; + }/* while(trav) */ +out: + frame->root->rsp_refs = NULL; + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +}/* bdb_setxattr */ + + +/* bdb_gettxattr - get extended attributes. + * + * bdb allows getxattr operation only on directories. + * bdb_getxattr retrieves the whole content of the file, when glusterfs.file.<attribute-name> + * is specified. + * + * @frame: call frame. + * @this: xlator_t of this instance of bdb xlator. + * @loc: loc_t specifying the file to operate upon. + * @name: name of extended attributes to get for @loc. + * + * NOTE: see description of bdb_setxattr for details on how + * 'glusterfs.file.<attribute-name>' is handles by bdb. + */ +int32_t +bdb_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + dict_t *dict = NULL; + bctx_t *bctx = NULL; + char *buf = NULL; + char *key_string = NULL; + int32_t list_offset = 0; + size_t size = 0; + size_t remaining_size = 0; + char *real_path = NULL; + char key[1024] = {0,}; + char *value = NULL; + char *list = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO (this->name, name, out); + + dict = get_new_dict (); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + if (!S_ISDIR (loc->inode->st_mode)) { + gf_log (this->name, + GF_LOG_DEBUG, + "operation not permitted on a non-directory file: %s", loc->path); + op_ret = -1; + op_errno = ENODATA; + goto out; + } + + if (name && ZR_FILE_CONTENT_REQUEST(name)) { + bctx = bctx_lookup (B_TABLE(this), loc->path); + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + key_string = (char *)&(name[15]); + + op_ret = bdb_db_get (bctx, NULL, key_string, &buf, 0, 0); + if (op_ret == -1) { + gf_log (this->name, + GF_LOG_DEBUG, + "failed to db get on directory: %s for key: %s", + bctx->directory, name); + op_ret = -1; + op_errno = ENODATA; + goto out; + } + + dict_set (dict, (char *)name, data_from_dynptr (buf, op_ret)); + } else { + MAKE_REAL_PATH (real_path, this, loc->path); + size = llistxattr (real_path, NULL, 0); + op_errno = errno; + if (size <= 0) { + /* There are no extended attributes, send an empty dictionary */ + if (size == -1 && op_errno != ENODATA) { + if (op_errno == ENOTSUP) { + gf_bdb_xattr_log++; + if (!(gf_bdb_xattr_log % GF_UNIVERSAL_ANSWER)) + gf_log (this->name, + GF_LOG_WARNING, + "Extended Attributes support not present."\ + "Please check"); + } else { + gf_log (this->name, + GF_LOG_WARNING, + "llistxattr failed on %s (%s)", + loc->path, strerror (op_errno)); + } + } + op_ret = -1; + op_errno = ENODATA; + } else { + list = alloca (size + 1); + op_errno = ENOMEM; + GF_VALIDATE_OR_GOTO (this->name, list, out); + + size = llistxattr (real_path, list, size); + op_ret = size; + op_errno = errno; + if (size == -1) { + gf_log (this->name, + GF_LOG_ERROR, + "llistxattr failed on %s (%s)", + loc->path, strerror (errno)); + goto out; + } + remaining_size = size; + list_offset = 0; + while (remaining_size > 0) { + if(*(list+list_offset) == '\0') + break; + strcpy (key, list + list_offset); + op_ret = lgetxattr (real_path, key, NULL, 0); + if (op_ret == -1) + break; + value = CALLOC (op_ret + 1, sizeof(char)); + GF_VALIDATE_OR_GOTO (this->name, value, out); + + op_ret = lgetxattr (real_path, key, value, op_ret); + if (op_ret == -1) + break; + value [op_ret] = '\0'; + dict_set (dict, key, data_from_dynptr (value, op_ret)); + remaining_size -= strlen (key) + 1; + list_offset += strlen (key) + 1; + } /* while(remaining_size>0) */ + } /* if(size <= 0)...else */ + } /* if(name...)...else */ + +out: + if(bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + if (dict) + dict_ref (dict); + + STACK_UNWIND (frame, op_ret, op_errno, dict); + + if (dict) + dict_unref (dict); + + return 0; +}/* bdb_getxattr */ + + +int32_t +bdb_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + bctx_t *bctx = NULL; + char *real_path = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + GF_VALIDATE_OR_GOTO (this->name, name, out); + + if (!S_ISDIR(loc->inode->st_mode)) { + gf_log (this->name, + GF_LOG_WARNING, + "operation not permitted on non-directory files"); + op_ret = -1; + op_errno = EPERM; + goto out; + } + + if (ZR_FILE_CONTENT_REQUEST(name)) { + bctx = bctx_lookup (B_TABLE(this), loc->path); + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + op_ret = bdb_db_del (bctx, NULL, name); + if (op_ret == -1) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to delete %s from db of %s directory", + name, loc->path); + op_errno = EINVAL; /* TODO: errno */ + goto out; + } + } else { + MAKE_REAL_PATH(real_path, this, loc->path); + op_ret = lremovexattr (real_path, name); + op_errno = errno; + if (op_ret == -1) { + if (op_errno == ENOTSUP) { + gf_bdb_xattr_log++; + if (!(gf_bdb_xattr_log % GF_UNIVERSAL_ANSWER)) + gf_log (this->name, GF_LOG_WARNING, + "Extended Attributes support not present." + "Please check"); + } else { + gf_log (this->name, + GF_LOG_WARNING, + "%s: %s", + loc->path, strerror (op_errno)); + } + } /* if(op_ret == -1) */ + } /* if (ZR_FILE_CONTENT_REQUEST(name))...else */ + +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +}/* bdb_removexattr */ + + +int32_t +bdb_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int datasync) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct bdb_fd *bfd = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + frame->root->rsp_refs = NULL; + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + +out: + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +}/* bdb_fsycndir */ + + +int32_t +bdb_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = access (real_path, mask); + op_errno = errno; + /* TODO: implement for db entries */ +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +}/* bdb_access */ + + +int32_t +bdb_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + int32_t op_ret = -1; + int32_t op_errno = EPERM; + struct stat buf = {0,}; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + /* TODO: impelement */ +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + + return 0; +} + +int32_t +bdb_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + int32_t op_ret = -1; + int32_t op_errno = EPERM; + struct stat buf = {0,}; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + /* TODO: implement */ +out: + STACK_UNWIND (frame, op_ret, op_errno, &buf); + + return 0; +} + + +int32_t +bdb_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + int32_t op_ret = -1; + int32_t op_errno = EPERM; + struct stat buf = {0,}; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + /* TODO: impelement */ +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + + return 0; +} + +int32_t +bdb_setdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags, + dir_entry_t *entries, + int32_t count) +{ + int32_t op_ret = -1, op_errno = EINVAL; + char *entry_path = NULL; + int32_t real_path_len = 0; + int32_t entry_path_len = 0; + int32_t ret = 0; + struct bdb_dir *bfd = NULL; + dir_entry_t *trav = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO (this->name, entries, out); + + frame->root->rsp_refs = NULL; + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + real_path_len = strlen (bfd->path); + entry_path_len = real_path_len + 256; + entry_path = CALLOC (1, entry_path_len); + GF_VALIDATE_OR_GOTO (this->name, entry_path, out); + + strcpy (entry_path, bfd->path); + entry_path[real_path_len] = '/'; + + trav = entries->next; + while (trav) { + char pathname[ZR_PATH_MAX] = {0,}; + strcpy (pathname, entry_path); + strcat (pathname, trav->name); + + if (S_ISDIR(trav->buf.st_mode)) { + /* If the entry is directory, create it by calling 'mkdir'. If + * directory is not present, it will be created, if its present, + * no worries even if it fails. + */ + ret = mkdir (pathname, trav->buf.st_mode); + if ((ret == -1) && (errno != EEXIST)) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to created directory %s: %s", + pathname, strerror(errno)); + goto loop; + } + + gf_log (this->name, + GF_LOG_DEBUG, + "Creating directory %s with mode (0%o)", + pathname, + trav->buf.st_mode); + /* Change the mode + * NOTE: setdents tries its best to restore the state + * of storage. if chmod and chown fail, they can be + * ignored now */ + ret = chmod (pathname, trav->buf.st_mode); + if (ret != 0) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, + GF_LOG_ERROR, + "chmod failed on %s (%s)", + pathname, strerror (errno)); + goto loop; + } + /* change the ownership */ + ret = chown (pathname, trav->buf.st_uid, trav->buf.st_gid); + if (ret != 0) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, + GF_LOG_ERROR, + "chown failed on %s (%s)", + pathname, strerror (errno)); + goto loop; + } + } else if ((flags == GF_SET_IF_NOT_PRESENT) || + (flags != GF_SET_DIR_ONLY)) { + /* Create a 0 byte file here */ + if (S_ISREG (trav->buf.st_mode)) { + op_ret = bdb_db_put (bfd->ctx, NULL, + trav->name, NULL, 0, 0, 0); + if (op_ret != 0) { + /* create successful */ + gf_log (this->name, + GF_LOG_ERROR, + "failed to create file %s", + pathname); + } /* if (!op_ret)...else */ + } else if (S_ISLNK (trav->buf.st_mode)) { + /* TODO: impelement */; + } else { + gf_log (this->name, + GF_LOG_ERROR, + "storage/bdb allows to create regular files only" + "file %s (mode = %d) cannot be created", + pathname, trav->buf.st_mode); + } /* if(S_ISREG())...else */ + } /* if(S_ISDIR())...else if */ + loop: + /* consider the next entry */ + trav = trav->next; + } /* while(trav) */ + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + FREE (entry_path); + return 0; +} + +int32_t +bdb_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct stat stbuf = {0,}; + struct bdb_fd *bfd = NULL; + bctx_t *bctx = NULL; + char *db_path = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + bctx = bfd->ctx; + + MAKE_REAL_PATH_TO_STORAGE_DB (db_path, this, bctx->directory); + op_ret = lstat (db_path, &stbuf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to lstat on %s (%s)", + db_path, strerror (op_errno)); + goto out; + } + + stbuf.st_ino = fd->inode->ino; + stbuf.st_size = bdb_db_get (bctx, NULL, bfd->key, NULL, 0, 0); + stbuf.st_blocks = BDB_COUNT_BLOCKS (stbuf.st_size, stbuf.st_blksize); + +out: + frame->root->rsp_refs = NULL; + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + return 0; +} + + +int32_t +bdb_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t off) +{ + struct bdb_dir *bfd = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + size_t filled = 0; + gf_dirent_t *this_entry = NULL; + gf_dirent_t entries; + struct dirent *entry = NULL; + off_t in_case = 0; + int32_t this_size = 0; + DBC *cursorp = NULL; + int32_t count = 0; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + INIT_LIST_HEAD (&entries.list); + + bfd = bdb_extract_bfd (fd, this); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, bfd, out); + + op_errno = ENOMEM; + + while (filled <= size) { + this_entry = NULL; + entry = NULL; + in_case = 0; + this_size = 0; + + in_case = telldir (bfd->dir); + entry = readdir (bfd->dir); + if (!entry) + break; + + if (IS_BDB_PRIVATE_FILE(entry->d_name)) + continue; + + this_size = dirent_size (entry); + + if (this_size + filled > size) { + seekdir (bfd->dir, in_case); + break; + } + + count++; + + this_entry = gf_dirent_for_name (entry->d_name); + this_entry->d_ino = entry->d_ino; + + this_entry->d_off = -1; + + this_entry->d_type = entry->d_type; + this_entry->d_len = entry->d_reclen; + + + list_add (&this_entry->list, &entries.list); + + filled += this_size; + } + op_ret = filled; + op_errno = 0; + if (filled >= size) { + goto out; + } + + /* hungry kyaa? */ + op_ret = bdb_cursor_open (bfd->ctx, &cursorp); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out); + + /* TODO: fix d_off, don't use bfd->offset. wrong method */ + if (strlen (bfd->offset)) { + DBT key = {0,}, value = {0,}; + key.data = bfd->offset; + key.size = strlen (bfd->offset); + key.flags = DB_DBT_USERMEM; + value.dlen = 0; + value.doff = 0; + value.flags = DB_DBT_PARTIAL; + + op_ret = bdb_cursor_get (cursorp, &key, &value, DB_SET); + op_errno = EBADFD; + GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out); + + } else { + /* first time or last time, do nothing */ + } + + while (filled <= size) { + DBT key = {0,}, value = {0,}; + this_entry = NULL; + + key.flags = DB_DBT_MALLOC; + value.dlen = 0; + value.doff = 0; + value.flags = DB_DBT_PARTIAL; + op_ret = bdb_cursor_get (cursorp, &key, &value, DB_NEXT); + + if (op_ret == DB_NOTFOUND) { + /* we reached end of the directory */ + op_ret = 0; + op_errno = 0; + break; + } else if (op_ret != 0) { + gf_log (this->name, + GF_LOG_DEBUG, + "database error during readdir"); + op_ret = -1; + op_errno = ENOENT; + break; + } /* if (op_ret == DB_NOTFOUND)...else if...else */ + + if (key.data == NULL) { + /* NOTE: currently ignore when we get key.data == NULL. + * TODO: we should not get key.data = NULL */ + gf_log (this->name, + GF_LOG_DEBUG, + "null key read from db"); + continue; + }/* if(key.data)...else */ + count++; + this_size = bdb_dirent_size (&key); + if (this_size + filled > size) + break; + /* TODO - consider endianness here */ + this_entry = gf_dirent_for_name ((const char *)key.data); + /* FIXME: bug, if someone is going to use ->d_ino */ + this_entry->d_ino = -1; + this_entry->d_off = 0; + this_entry->d_type = 0; + this_entry->d_len = key.size; + + if (key.data) { + strncpy (bfd->offset, key.data, key.size); + bfd->offset [key.size] = '\0'; + free (key.data); + } + + list_add (&this_entry->list, &entries.list); + + filled += this_size; + }/* while */ + bdb_cursor_close (bfd->ctx, cursorp); + op_ret = filled; + op_errno = 0; +out: + frame->root->rsp_refs = NULL; + gf_log (this->name, + GF_LOG_DEBUG, + "read %"GF_PRI_SIZET" bytes for %d entries", filled, count); + STACK_UNWIND (frame, count, op_errno, &entries); + + gf_dirent_free (&entries); + + return 0; +} + + +int32_t +bdb_stats (call_frame_t *frame, + xlator_t *this, + int32_t flags) + +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + struct xlator_stats xlstats = {0, }, *stats = NULL; + struct statvfs buf; + struct timeval tv; + struct bdb_private *private = NULL; + int64_t avg_read = 0; + int64_t avg_write = 0; + int64_t _time_ms = 0; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + + private = (struct bdb_private *)(this->private); + stats = &xlstats; + + op_ret = statvfs (private->export_path, &buf); + op_errno = errno; + if (op_ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to statvfs on %s (%s)", + private->export_path, strerror (op_errno)); + goto out; + } + + stats->nr_files = private->stats.nr_files; + stats->nr_clients = private->stats.nr_clients; /* client info is maintained at FSd */ + stats->free_disk = buf.f_bfree * buf.f_bsize; /* Number of Free block in the filesystem. */ + stats->total_disk_size = buf.f_blocks * buf.f_bsize; /* */ + stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize; + + /* Calculate read and write usage */ + gettimeofday (&tv, NULL); + + /* Read */ + _time_ms = (tv.tv_sec - private->init_time.tv_sec) * 1000 + + ((tv.tv_usec - private->init_time.tv_usec) / 1000); + + avg_read = (_time_ms) ? (private->read_value / _time_ms) : 0; /* KBps */ + avg_write = (_time_ms) ? (private->write_value / _time_ms) : 0; /* KBps */ + + _time_ms = (tv.tv_sec - private->prev_fetch_time.tv_sec) * 1000 + + ((tv.tv_usec - private->prev_fetch_time.tv_usec) / 1000); + if (_time_ms && ((private->interval_read / _time_ms) > private->max_read)) { + private->max_read = (private->interval_read / _time_ms); + } + if (_time_ms && ((private->interval_write / _time_ms) > private->max_write)) { + private->max_write = private->interval_write / _time_ms; + } + + stats->read_usage = avg_read / private->max_read; + stats->write_usage = avg_write / private->max_write; + + gettimeofday (&(private->prev_fetch_time), NULL); + private->interval_read = 0; + private->interval_write = 0; + +out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, stats); + return 0; +} + + +int32_t +bdb_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *lock) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + + +int32_t +bdb_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *lock) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + + +int32_t +bdb_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + + +int32_t +bdb_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + + +int32_t +bdb_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + char *real_path = NULL; + DIR *dir = NULL; + struct dirent *dirent = NULL; + uint8_t file_checksum[ZR_FILENAME_MAX] = {0,}; + uint8_t dir_checksum[ZR_FILENAME_MAX] = {0,}; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t i = 0, length = 0; + bctx_t *bctx = NULL; + DBC *cursorp = NULL; + char *data = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", frame, out); + GF_VALIDATE_OR_GOTO ("bdb", this, out); + GF_VALIDATE_OR_GOTO (this->name, loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + { + dir = opendir (real_path); + op_errno = errno; + GF_VALIDATE_OR_GOTO (this->name, dir, out); + while ((dirent = readdir (dir))) { + if (!dirent) + break; + + if (IS_BDB_PRIVATE_FILE(dirent->d_name)) + continue; + + length = strlen (dirent->d_name); + for (i = 0; i < length; i++) + dir_checksum[i] ^= dirent->d_name[i]; + } /* while((dirent...)) */ + closedir (dir); + } + + { + bctx = bctx_lookup (B_TABLE(this), (char *)loc->path); + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, bctx, out); + + op_ret = bdb_cursor_open (bctx, &cursorp); + op_errno = EINVAL; + GF_VALIDATE_OR_GOTO (this->name, (op_ret == 0), out); + + while (1) { + DBT key = {0,}, value = {0,}; + + key.flags = DB_DBT_MALLOC; + value.doff = 0; + value.dlen = 0; + op_ret = bdb_cursor_get (cursorp, &key, &value, DB_NEXT); + + if (op_ret == DB_NOTFOUND) { + gf_log (this->name, + GF_LOG_DEBUG, + "end of list of key/value pair in db for " + "directory: %s", bctx->directory); + op_ret = 0; + op_errno = 0; + break; + } else if (op_ret == 0){ + /* successfully read */ + data = key.data; + length = key.size; + for (i = 0; i < length; i++) + file_checksum[i] ^= data[i]; + + free (key.data); + } else { + gf_log (this->name, + GF_LOG_ERROR, + "failed to do cursor get for directory %s: %s", + bctx->directory, db_strerror (op_ret)); + op_ret = -1; + op_errno = ENOENT; + break; + }/* if(op_ret == DB_NOTFOUND)...else if...else */ + } /* while(1) */ + bdb_cursor_close (bctx, cursorp); + } +out: + if (bctx) { + /* NOTE: bctx_unref always returns success, + * see description of bctx_unref for more details */ + bctx_unref (bctx); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum); + + return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + switch (event) + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that bdb xlator is up */ + assert ((this->private != NULL) && + (BDB_ENV(this) != NULL)); + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; + default: + /* */ + break; + } + return 0; +} + + + +/** + * init - + */ +int32_t +init (xlator_t *this) +{ + int32_t ret = -1; + struct stat buf = {0,}; + struct bdb_private *_private = NULL; + data_t *directory = NULL; + bctx_t *bctx = NULL; + + GF_VALIDATE_OR_GOTO ("bdb", this, out); + + _private = CALLOC (1, sizeof (*_private)); + GF_VALIDATE_OR_GOTO (this->name, _private, out); + + if (this->children) { + gf_log (this->name, + GF_LOG_ERROR, + "FATAL: storage/bdb cannot have subvolumes"); + FREE (_private); + goto out;; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + directory = dict_get (this->options, "directory"); + if (!directory) { + gf_log (this->name, GF_LOG_ERROR, + "export directory not specified in volfile"); + FREE (_private); + goto out; + } + umask (000); // umask `masking' is done at the client side + /* // * No need to create directory, sys admin should do it himself + if (mkdir (directory->data, 0777) == 0) { + gf_log (this->name, GF_LOG_WARNING, + "directory specified not exists, created"); + } + */ + + /* Check whether the specified directory exists, if not create it. */ + ret = stat (directory->data, &buf); + if ((ret != 0) || !S_ISDIR (buf.st_mode)) { + gf_log (this->name, GF_LOG_ERROR, + "specified directory '%s' doesn't exists, Exiting", directory->data); + FREE (_private); + goto out; + } else { + ret = 0; + } + + + _private->export_path = strdup (directory->data); + _private->export_path_length = strlen (_private->export_path); + + { + /* Stats related variables */ + gettimeofday (&_private->init_time, NULL); + gettimeofday (&_private->prev_fetch_time, NULL); + _private->max_read = 1; + _private->max_write = 1; + } + + this->private = (void *)_private; + { + ret = bdb_db_init (this, this->options); + + if (ret == -1){ + gf_log (this->name, + GF_LOG_DEBUG, + "failed to initialize database"); + goto out; + } else { + bctx = bctx_lookup (_private->b_table, "/"); + /* NOTE: we are not doing bctx_unref() for root bctx, + * let it remain in active list forever */ + if (!bctx) { + gf_log (this->name, + GF_LOG_ERROR, + "failed to allocate memory for root (/) bctx: out of memory"); + goto out; + } else { + ret = 0; + } + } + } +out: + return ret; +} + +void +bctx_cleanup (struct list_head *head) +{ + bctx_t *trav = NULL; + bctx_t *tmp = NULL; + DB *storage = NULL; + + list_for_each_entry_safe (trav, tmp, head, list) { + LOCK (&trav->lock); + storage = trav->dbp; + trav->dbp = NULL; + list_del_init (&trav->list); + UNLOCK (&trav->lock); + + if (storage) { + storage->close (storage, 0); + storage = NULL; + } + } + return; +} + +void +fini (xlator_t *this) +{ + struct bdb_private *private = NULL; + int32_t idx = 0; + int32_t ret = 0; + private = this->private; + + if (B_TABLE(this)) { + /* close all the dbs from lru list */ + bctx_cleanup (&(B_TABLE(this)->b_lru)); + for (idx = 0; idx < B_TABLE(this)->hash_size; idx++) + bctx_cleanup (&(B_TABLE(this)->b_hash[idx])); + + if (BDB_ENV(this)) { + LOCK (&private->active_lock); + private->active = 0; + UNLOCK (&private->active_lock); + + ret = pthread_join (private->checkpoint_thread, NULL); + if (ret != 0) { + gf_log (this->name, + GF_LOG_CRITICAL, + "failed to join checkpoint thread"); + } + + /* TODO: pick each of the 'struct bctx' from private->b_hash + * and close all the databases that are open */ + BDB_ENV(this)->close (BDB_ENV(this), 0); + } else { + /* impossible to reach here */ + } + + FREE (B_TABLE(this)); + } + FREE (private); + return; +} + +struct xlator_mops mops = { + .stats = bdb_stats, +}; + +struct xlator_fops fops = { + .lookup = bdb_lookup, + .stat = bdb_stat, + .opendir = bdb_opendir, + .readdir = bdb_readdir, + .readlink = bdb_readlink, + .mknod = bdb_mknod, + .mkdir = bdb_mkdir, + .unlink = bdb_unlink, + .rmdir = bdb_rmdir, + .symlink = bdb_symlink, + .rename = bdb_rename, + .link = bdb_link, + .chmod = bdb_chmod, + .chown = bdb_chown, + .truncate = bdb_truncate, + .utimens = bdb_utimens, + .create = bdb_create, + .open = bdb_open, + .readv = bdb_readv, + .writev = bdb_writev, + .statfs = bdb_statfs, + .flush = bdb_flush, + .fsync = bdb_fsync, + .setxattr = bdb_setxattr, + .getxattr = bdb_getxattr, + .removexattr = bdb_removexattr, + .fsyncdir = bdb_fsyncdir, + .access = bdb_access, + .ftruncate = bdb_ftruncate, + .fstat = bdb_fstat, + .lk = bdb_lk, + .inodelk = bdb_inodelk, + .finodelk = bdb_finodelk, + .entrylk = bdb_entrylk, + .fentrylk = bdb_fentrylk, + .fchown = bdb_fchown, + .fchmod = bdb_fchmod, + .setdents = bdb_setdents, + .getdents = bdb_getdents, + .checksum = bdb_checksum, +}; + +struct xlator_cbks cbks = { + .release = bdb_release, + .releasedir = bdb_releasedir +}; + +#if 0 +struct volume_options options[] = { + { "directory", GF_OPTION_TYPE_PATH, 0, }, + { "logdir", GF_OPTION_TYPE_PATH, 0, }, + { "errfile", GF_OPTION_TYPE_PATH, 0, }, + { "dir-mode", GF_OPTION_TYPE_ANY, 0, }, // base 8 number + { "file-mode", GF_OPTION_TYPE_ANY, 0, }, // base 8 number + { "page-size", GF_OPTION_TYPE_SIZET, -1, }, + { "lru-limit", GF_OPTION_TYPE_INT, -1, }, + { "lock-timeout", GF_OPTION_TYPE_TIME, 0, }, + { "checkpoint-timeout", GF_OPTION_TYPE_TIME, 0, }, + { "transaction-timeout", GF_OPTION_TYPE_TIME, 0, }, + { "mode", GF_OPTION_TYPE_BOOL, 0, }, // Should be 'cache' ?? + { "access-mode", GF_OPTION_TYPE_STR, 0, 0, 0, "btree"}, + { NULL, 0, } +}; + +#endif /* #if 0 */ diff --git a/xlators/storage/bdb/src/bdb.h b/xlators/storage/bdb/src/bdb.h new file mode 100644 index 000000000..f2d962680 --- /dev/null +++ b/xlators/storage/bdb/src/bdb.h @@ -0,0 +1,439 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _BDB_H +#define _BDB_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <dirent.h> +#include <unistd.h> +#include <sys/types.h> +#include <dirent.h> + +#include <db.h> + +#ifdef linux +#ifdef __GLIBC__ +#include <sys/fsuid.h> +#else +#include <unistd.h> +#endif +#endif + +#ifdef HAVE_SYS_XATTR_H +#include <sys/xattr.h> +#endif + +#ifdef HAVE_SYS_EXTATTR_H +#include <sys/extattr.h> +#endif + +#include <pthread.h> +#include "xlator.h" +#include "inode.h" +#include "compat.h" +#include "compat-errno.h" + +#define GLFS_BDB_STORAGE "/glusterfs_storage.db" + +/* numbers are not so reader-friendly, so lets have ON and OFF macros */ +#define ON 1 +#define OFF 0 + +#define BDB_DEFAULT_LRU_LIMIT 100 +#define BDB_DEFAULT_HASH_SIZE 100 + +#define BDB_ENOSPC_THRESHOLD 25600 + +#define BDB_DEFAULT_CHECKPOINT_TIMEOUT 30 + +#define BCTX_ENV(bctx) (bctx->table->dbenv) +/* MAKE_REAL_PATH(var,this,path) + * make the real path on the underlying file-system + * + * @var: destination to hold the real path + * @this: pointer to xlator_t corresponding to bdb xlator + * @path: path, as seen from mount-point + */ +#define MAKE_REAL_PATH(var, this, path) do { \ + int base_len = ((struct bdb_private *)this->private)->export_path_length; \ + var = alloca (strlen (path) + base_len + 2); \ + strcpy (var, ((struct bdb_private *)this->private)->export_path); \ + strcpy (&var[base_len], path); \ + } while (0) + +/* MAKE_REAL_PATH_TO_STORAGE_DB(var,this,path) + * make the real path to the storage-database file on file-system + * + * @var: destination to hold the real path + * @this: pointer to xlator_t corresponding to bdb xlator + * @path: path of the directory, as seen from mount-point + */ +#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \ + int base_len = ((struct bdb_private *)this->private)->export_path_length; \ + var = alloca (strlen (path) + base_len + strlen (GLFS_BDB_STORAGE)); \ + strcpy (var, ((struct bdb_private *)this->private)->export_path); \ + strcpy (&var[base_len], path); \ + strcat (var, GLFS_BDB_STORAGE); \ + } while (0) + +/* MAKE_KEY_FROM_PATH(key,path) + * make a 'key', which we use as key in the underlying database by using the path + * + * @key: destination to hold the key + * @path: path to file as seen from mount-point + */ +#define MAKE_KEY_FROM_PATH(key, path) do { \ + char *tmp = alloca (strlen (path)); \ + strcpy (tmp, path); \ + key = basename (tmp); \ + }while (0); + +/* BDB_DO_LSTAT(path,stbuf,dirent) + * construct real-path to a dirent and do lstat on the real-path + * + * @path: path to the directory whose readdir is currently in progress + * @stbuf: a 'struct stat *' + * @dirent: a 'struct dirent *' + */ +#define BDB_DO_LSTAT(path, stbuf, dirent) do { \ + char tmp_real_path[GF_PATH_MAX]; \ + strcpy(tmp_real_path, path); \ + strcat (tmp_real_path, "/"); \ + strcat(tmp_real_path, dirent->d_name); \ + ret = lstat (tmp_real_path, stbuf); \ + } while(0); + +/* IS_BDB_PRIVATE_FILE(name) + * check if a given 'name' is bdb xlator's internal file name + * + * @name: basename of a file. + * + * bdb xlator reserves file names 'glusterfs_storage.db', + * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*' (used by libdb) + */ +#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \ + (!strcmp(name, "glusterfs_storage.db")) || \ + (!strcmp(name, "glusterfs_ns.db")) || \ + (!strncmp(name, "log.0000", 8))) + +/* check if 'name' is '.' or '..' entry */ +#define IS_DOT_DOTDOT(name) ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2))) + +/* BDB_SET_BCTX(this,inode,bctx) + * put a stamp on inode. d00d, you are using bdb.. huhaha. + * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories. + * this will happen either in lookup() or mkdir(). + * + * @this: pointer xlator_t of bdb xlator. + * @inode: inode where 'struct bdb_ctx *' has to be stored. + * @bctx: a 'struct bdb_ctx *' + */ +#define BDB_SET_BCTX(this,inode,bctx) do{ \ + inode_ctx_put(inode, this, (uint64_t)(long)bctx); \ + }while (0); + +/* MAKE_BCTX_FROM_INODE(this,bctx,inode) + * extract bdb xlator's 'struct bdb_ctx *' from an inode's ctx. + * valid only if done for directory inodes, otherwise bctx = NULL. + * + * @this: pointer xlator_t of bdb xlator. + * @bctx: a 'struct bdb_ctx *' + * @inode: inode from where 'struct bdb_ctx *' has to be extracted. + */ +#define MAKE_BCTX_FROM_INODE(this,bctx,inode) do{ \ + uint64_t tmp_bctx = 0; \ + inode_ctx_get (inode, this, &tmp_bctx); \ + if (ret == 0) \ + bctx = (void *)(long)tmp_bctx; \ + }while (0); + +#define BDB_SET_BFD(this,fd,bfd) do{ \ + fd_ctx_set (fd, this, (uint64_t)(long)bfd); \ + }while (0); + +/* maximum number of open dbs that bdb xlator will ever have */ +#define BDB_MAX_OPEN_DBS 100 + +/* convert file size to block-count */ +#define BDB_COUNT_BLOCKS(size,blksize) (((size + blksize - 1)/blksize) - 1) + +/* file permissions, again macros are more readable */ +#define RWXRWXRWX 0777 +#define DEFAULT_FILE_MODE 0644 +#define DEFAULT_DIR_MODE 0755 + +/* see, if have a valid file permissions specification in @mode */ +#define IS_VALID_FILE_MODE(mode) (!(mode & (~RWXRWXRWX))) +#define IS_VALID_DIR_MODE(mode) (!(mode & (~(RWXRWXRWX))) + +/* maximum retries for a failed transactional operation */ +#define BDB_MAX_RETRIES 10 + +typedef struct bctx_table bctx_table_t; +typedef struct bdb_ctx bctx_t; +typedef struct bdb_cache bdb_cache_t; +typedef struct bdb_private bdb_private_t; + +struct bctx_table { + uint64_t dbflags; /* flags to be used for opening each database */ + uint64_t cache; /* cache: can be either ON or OFF */ + gf_lock_t lock; /* used to lock the 'struct bctx_table *' */ + gf_lock_t checkpoint_lock; /* lock for checkpointing */ + struct list_head *b_hash; /* hash table of 'struct bdb_ctx' */ + struct list_head active; /* list of active 'struct bdb_ctx' */ + struct list_head b_lru; /* lru list of inactive 'struct bdb_ctx' */ + struct list_head purge; + uint32_t lru_limit; + uint32_t lru_size; + uint32_t hash_size; + DBTYPE access_mode; /* access mode for accessing the databases, + * can be DB_HASH, DB_BTREE */ + DB_ENV *dbenv; /* DB_ENV under which every db operation + * is carried over */ + int32_t transaction; + xlator_t *this; + + uint64_t page_size; /* page-size of DB, + * DB->set_pagesize(), should be set before DB->open */ +}; + +struct bdb_ctx { + /* controller members */ + struct list_head list; /* lru list of 'struct bdb_ctx's, + * a bdb_ctx can exist in one of b_hash or lru lists */ + struct list_head b_hash; /* directory 'name' hashed list of 'struct bdb_ctx's */ + + struct bctx_table *table; + int32_t ref; /* reference count */ + gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */ + + char *directory; /* directory path */ + DB *dbp; /* pointer to open database, that resides inside this directory */ + uint32_t cache; /* cache ON or OFF */ + + /* per directory cache, bdb xlator's internal cache */ + struct list_head c_list; /* linked list of cached records */ + int32_t c_count; /* number of cached records */ + + int32_t key_hash; /* index to hash table list, to which this ctx belongs */ + char *db_path; /* absolute path to db file */ +}; + +struct bdb_fd { + struct bdb_ctx *ctx; /* pointer to bdb_ctx of the parent directory */ + char *key; /* name of the file. NOTE: basename, not the complete path */ + int32_t flags; /* open flags */ +}; + +struct bdb_dir { + struct bdb_ctx *ctx; /* pointer to bdb_ctx of this directory */ + DIR *dir; /* open directory pointer, as returned by opendir() */ + char offset[NAME_MAX]; /* FIXME: readdir offset, too crude. must go */ + char *path; /* path to this directory */ +}; + +/* cache */ +struct bdb_cache { + struct list_head c_list; /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */ + char *key; /* name of the file this cache holds. NOTE: basename of file */ + char *data; /* file content */ + size_t size; /* size of the file content that this cache holds */ +}; + + +struct bdb_private { + inode_table_t *itable; /* pointer to inode table that we use */ + int32_t temp; /**/ + char is_stateless; /**/ + char *export_path; /* path to the export directory + * (option directory <export-path>) */ + int32_t export_path_length; /* length of 'export_path' string */ + + /* statistics */ + struct xlator_stats stats; /* Statistics, provides activity of the server */ + + struct timeval prev_fetch_time; + struct timeval init_time; + int32_t max_read; /* */ + int32_t max_write; /* */ + int64_t interval_read; /* Used to calculate the max_read value */ + int64_t interval_write; /* Used to calculate the max_write value */ + int64_t read_value; /* Total read, from init */ + int64_t write_value; /* Total write, from init */ + + /* bdb xlator specific private data */ + uint64_t envflags; /* flags used for opening DB_ENV for this xlator */ + uint64_t dbflags; /* flags to be used for opening each database */ + uint64_t cache; /* cache: can be either ON or OFF */ + uint32_t transaction; /* transaction: can be either ON or OFF */ + uint32_t active; + gf_lock_t active_lock; + struct bctx_table *b_table; + DBTYPE access_mode; /* access mode for accessing the databases, + * can be DB_HASH, DB_BTREE + * (option access-mode <mode>) */ + mode_t file_mode; /* mode for each and every file stored on bdb + * (option file-mode <mode>) */ + mode_t dir_mode; /* mode for each and every directory stored on bdb + * (option dir-mode <mode>) */ + mode_t symlink_mode; /* mode for each and every symlink stored on bdb */ + pthread_t checkpoint_thread; /* pthread_t object used for creating checkpoint + * thread */ + int32_t checkpoint_timeout; /* time duration between two consecutive checkpoint + * operations. + * (option checkpoint-timeout <time-in-seconds>) */ + ino_t next_ino; /* inode number allocation counter */ + gf_lock_t ino_lock; /* lock to protect 'next_ino' */ + char *logdir; /* environment log directory + * (option logdir <directory>) */ + char *errfile; /* errfile path, used by environment to + * print detailed error log. + * (option errfile <errfile-path>) */ + FILE *errfp; /* DB_ENV->set_errfile() expects us to fopen + * the errfile before doing DB_ENV->set_errfile() */ + uint32_t txn_timeout; /* used by DB_ENV->set_timeout to set the timeout for + * a transactionally encapsulated DB->operation() to + * timeout before waiting for locks to be released. + * (option transaction-timeout <time-in-milliseconds>) + */ + uint32_t lock_timeout; + uint32_t log_auto_remove; /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/ + uint32_t log_region_max; +}; + + +static inline int32_t +bdb_txn_begin (DB_ENV *dbenv, + DB_TXN **ptxnid) +{ + return dbenv->txn_begin (dbenv, NULL, ptxnid, 0); +} + +static inline int32_t +bdb_txn_abort (DB_TXN *txnid) +{ + return txnid->abort (txnid); +} + +static inline int32_t +bdb_txn_commit (DB_TXN *txnid) +{ + return txnid->commit (txnid, 0); +} + +inline void * +bdb_extract_bfd (fd_t *fd, xlator_t *this); + + +void * +bdb_db_stat (bctx_t *bctx, + DB_TXN *txnid, + uint32_t flags); + +int32_t +bdb_db_get(struct bdb_ctx *bctx, + DB_TXN *txnid, + const char *key_string, + char **buf, + size_t size, + off_t offset); + +#define BDB_TRUNCATE_RECORD 0xcafebabe + +int32_t +bdb_db_put (struct bdb_ctx *bctx, + DB_TXN *txnid, + const char *key_string, + const char *buf, + size_t size, + off_t offset, + int32_t flags); + +int32_t +bdb_db_del (struct bdb_ctx *bctx, + DB_TXN *txnid, + const char *path); + +ino_t +bdb_inode_transform (ino_t parent, + struct bdb_ctx *bctx); + + +int32_t +bdb_cursor_open (struct bdb_ctx *bctx, + DBC **cursorp); + +int32_t +bdb_cursor_get (DBC *cursorp, + DBT *key, + DBT *value, + int32_t flags); + + +int32_t +bdb_cursor_close (struct bdb_ctx *ctx, + DBC *cursorp); + + +int32_t +bdb_dirent_size (DBT *key); + +int32_t +dirent_size (struct dirent *entry); + +int +bdb_db_init (xlator_t *this, + dict_t *options); + +void +bdb_dbs_from_dict_close (dict_t *this, + char *key, + data_t *value, + void *data); + +bctx_t * +bctx_lookup (struct bctx_table *table, + const char *path); + +bctx_t * +bctx_parent +(struct bctx_table *table, + const char *path); + +bctx_t * +bctx_unref (bctx_t *ctx); + +bctx_t * +bctx_ref (bctx_t *ctx); + +bctx_t * +bctx_rename (bctx_t *bctx, + const char *db_newpath); + +int32_t +bdb_db_rename (bctx_table_t *table, + const char *tmp_db_newpath, + const char *real_db_newpath); +#endif /* _BDB_H */ diff --git a/xlators/storage/posix/Makefile.am b/xlators/storage/posix/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/storage/posix/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am new file mode 100644 index 000000000..2859e09aa --- /dev/null +++ b/xlators/storage/posix/src/Makefile.am @@ -0,0 +1,17 @@ + +xlator_LTLIBRARIES = posix.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage + +posix_la_LDFLAGS = -module -avoidversion + +posix_la_SOURCES = posix.c xattr-cache.c +posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = posix.h xattr-cache.h + +AM_CFLAGS = -fPIC -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \ + $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c new file mode 100644 index 000000000..159f02dde --- /dev/null +++ b/xlators/storage/posix/src/posix.c @@ -0,0 +1,3715 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#define __XOPEN_SOURCE 500 + +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <ftw.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#include "glusterfs.h" +#include "dict.h" +#include "logging.h" +#include "posix.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) do { \ + old_fsuid = setfsuid (uid); \ + old_fsgid = setfsgid (gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() do { \ + setfsuid (old_fsuid); \ + setfsgid (old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +typedef struct { + xlator_t *this; + const char *real_path; + dict_t *xattr; + struct stat *stbuf; + loc_t *loc; +} posix_xattr_filler_t; + +int +posix_forget (xlator_t *this, inode_t *inode) +{ + uint64_t tmp_cache = 0; + if (!inode_ctx_del (inode, this, &tmp_cache)) + dict_destroy ((dict_t *)(long)tmp_cache); + + return 0; +} + +static void +_posix_xattr_get_set (dict_t *xattr_req, + char *key, + data_t *data, + void *xattrargs) +{ + posix_xattr_filler_t *filler = xattrargs; + char *value = NULL; + ssize_t xattr_size = -1; + int ret = -1; + char *databuf = NULL; + int _fd = -1; + loc_t *loc = NULL; + ssize_t req_size = 0; + + + /* should size be put into the data_t ? */ + if (!strcmp (key, "glusterfs.content")) { + /* file content request */ + req_size = data_to_uint64 (data); + if (req_size >= filler->stbuf->st_size) { + _fd = open (filler->real_path, O_RDONLY); + + if (_fd == -1) { + gf_log (filler->this->name, GF_LOG_ERROR, + "opening file %s failed: %s", + filler->real_path, strerror (errno)); + goto err; + } + + databuf = calloc (1, filler->stbuf->st_size); + + if (!databuf) { + gf_log (filler->this->name, GF_LOG_ERROR, + "out of memory :("); + goto err; + } + + ret = read (_fd, databuf, filler->stbuf->st_size); + if (ret == -1) { + gf_log (filler->this->name, GF_LOG_ERROR, + "read on file %s failed: %s", + filler->real_path, strerror (errno)); + goto err; + } + + ret = close (_fd); + _fd = -1; + if (ret == -1) { + gf_log (filler->this->name, GF_LOG_ERROR, + "close on file %s failed: %s", + filler->real_path, strerror (errno)); + goto err; + } + + ret = dict_set_bin (filler->xattr, key, + databuf, filler->stbuf->st_size); + if (ret < 0) { + goto err; + } + + /* To avoid double free in cleanup below */ + databuf = NULL; + err: + if (_fd != -1) + close (_fd); + if (databuf) + FREE (databuf); + } + } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) { + loc = filler->loc; + if (!list_empty (&loc->inode->fd_list)) { + ret = dict_set_uint32 (filler->xattr, key, 1); + } else { + ret = dict_set_uint32 (filler->xattr, key, 0); + } + } else { + xattr_size = lgetxattr (filler->real_path, key, NULL, 0); + + if (xattr_size > 0) { + value = calloc (1, xattr_size + 1); + + lgetxattr (filler->real_path, key, value, xattr_size); + + value[xattr_size] = '\0'; + ret = dict_set_bin (filler->xattr, key, + value, xattr_size); + if (ret < 0) + gf_log (filler->this->name, GF_LOG_ERROR, + "dict set failed. path: %s, key: %s", + filler->real_path, key); + } + } +} + + +dict_t * +posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc, + dict_t *xattr_req, struct stat *buf) +{ + dict_t *xattr = NULL; + posix_xattr_filler_t filler = {0, }; + + xattr = get_new_dict(); + if (!xattr) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + filler.this = this; + filler.real_path = real_path; + filler.xattr = xattr; + filler.stbuf = buf; + filler.loc = loc; + + dict_foreach (xattr_req, _posix_xattr_get_set, &filler); +out: + return xattr; +} + + +int32_t +posix_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + struct stat buf = {0, }; + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + dict_t * xattr = NULL; + + struct posix_private *priv = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (loc->path, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + priv = this->private; + + op_ret = lstat (real_path, &buf); + op_errno = errno; + + if (op_ret == -1) { + if (op_errno != ENOENT) { + gf_log (this->name, GF_LOG_WARNING, + "lstat on %s failed: %s", + loc->path, strerror (op_errno)); + } + goto out; + } + + /* Make sure we don't access another mountpoint inside export dir. + * It may cause inode number to repeat from single export point, + * which leads to severe problems.. + */ + if (priv->base_stdev != buf.st_dev) { + op_errno = ENOENT; + gf_log (this->name, GF_LOG_WARNING, + "%s: different mountpoint/device, returning " + "ENOENT", loc->path); + goto out; + } + + if (xattr_req && (op_ret == 0)) { + xattr = posix_lookup_xattr_fill (this, real_path, loc, + xattr_req, &buf); + } + + op_ret = 0; +out: + frame->root->rsp_refs = NULL; + + if (xattr) + dict_ref (xattr); + + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &buf, xattr); + + if (xattr) + dict_unref (xattr); + + return 0; +} + + +int32_t +posix_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + struct stat buf = {0,}; + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = lstat (real_path, &buf); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lstat on %s: %s", loc->path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID(); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + + return 0; +} + +int32_t +posix_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd) +{ + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + DIR * dir = NULL; + struct posix_fd * pfd = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (loc->path, out); + VALIDATE_OR_GOTO (fd, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + dir = opendir (real_path); + + if (dir == NULL) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "opendir failed on %s (%s)", + loc->path, strerror (op_errno)); + goto out; + } + + op_ret = dirfd (dir); + if (op_ret < 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "dirfd() failed on %s (%s)", + loc->path, strerror (op_errno)); + goto out; + } + + pfd = CALLOC (1, sizeof (*fd)); + if (!pfd) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + pfd->dir = dir; + pfd->fd = dirfd (dir); + pfd->path = strdup (real_path); + if (!pfd->path) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + fd_ctx_set (fd, this, (uint64_t)(long)pfd); + + frame->root->rsp_refs = NULL; + + op_ret = 0; + + out: + if (op_ret == -1) { + if (dir) { + closedir (dir); + dir = NULL; + } + if (pfd) { + if (pfd->path) + FREE (pfd->path); + FREE (pfd); + pfd = NULL; + } + } + + SET_TO_OLD_FS_ID (); + STACK_UNWIND (frame, op_ret, op_errno, fd); + return 0; +} + + +int32_t +posix_getdents (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, int32_t flag) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + dir_entry_t entries = {0, }; + dir_entry_t * tmp = NULL; + DIR * dir = NULL; + struct dirent * dirent = NULL; + int real_path_len = -1; + int entry_path_len = -1; + char * entry_path = NULL; + int count = 0; + struct posix_fd * pfd = NULL; + uint64_t tmp_pfd = 0; + struct stat buf = {0,}; + int ret = -1; + char tmp_real_path[ZR_PATH_MAX]; + char linkpath[ZR_PATH_MAX]; + + DECLARE_OLD_FS_ID_VAR ; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "fd %p does not have context in %s", + fd, this->name); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + if (!pfd->path) { + op_errno = EBADFD; + gf_log (this->name, GF_LOG_ERROR, + "pfd does not have path set (possibly file " + "fd, fd=%p)", fd); + goto out; + } + + real_path = pfd->path; + real_path_len = strlen (real_path); + + entry_path_len = real_path_len + NAME_MAX; + entry_path = CALLOC (1, entry_path_len); + + if (!entry_path) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + strncpy (entry_path, real_path, entry_path_len); + entry_path[real_path_len] = '/'; + + dir = pfd->dir; + + if (!dir) { + op_errno = EBADFD; + gf_log (this->name, GF_LOG_ERROR, + "pfd does not have dir set (possibly file fd, " + "fd=%p, path=`%s'", + fd, real_path); + goto out; + } + + /* TODO: check for all the type of flag, and behave appropriately */ + + while ((dirent = readdir (dir))) { + if (!dirent) + break; + + /* This helps in self-heal, when only directories + needs to be replicated */ + + /* This is to reduce the network traffic, in case only + directory is needed from posix */ + + strncpy (tmp_real_path, real_path, ZR_PATH_MAX); + strncat (tmp_real_path, "/", + ZR_PATH_MAX - strlen (tmp_real_path)); + + strncat (tmp_real_path, dirent->d_name, + ZR_PATH_MAX - strlen (tmp_real_path)); + ret = lstat (tmp_real_path, &buf); + + if ((flag == GF_GET_DIR_ONLY) + && (ret != -1 && !S_ISDIR(buf.st_mode))) { + continue; + } + + tmp = CALLOC (1, sizeof (*tmp)); + + if (!tmp) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + tmp->name = strdup (dirent->d_name); + if (!tmp->name) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + if (entry_path_len < + (real_path_len + 1 + strlen (tmp->name) + 1)) { + entry_path_len = (real_path_len + + strlen (tmp->name) + 1024); + + entry_path = realloc (entry_path, entry_path_len); + } + + strcpy (&entry_path[real_path_len+1], tmp->name); + + ret = lstat (entry_path, &tmp->buf); + + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s failed: %s", + entry_path, strerror (op_errno)); + goto out; + } + + if (S_ISLNK(tmp->buf.st_mode)) { + + ret = readlink (entry_path, linkpath, ZR_PATH_MAX); + if (ret != -1) { + linkpath[ret] = '\0'; + tmp->link = strdup (linkpath); + } + } else { + tmp->link = ""; + } + + count++; + + tmp->next = entries.next; + entries.next = tmp; + + /* if size is 0, count can never be = size, so entire + dir is read */ + if (count == size) + break; + } + + FREE (entry_path); + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + if (op_ret == -1) { + if (entry_path) + FREE (entry_path); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &entries, count); + + if (op_ret == 0) { + while (entries.next) { + tmp = entries.next; + entries.next = entries.next->next; + FREE (tmp->name); + FREE (tmp); + } + } + + return 0; +} + + +int32_t +posix_releasedir (xlator_t *this, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd * pfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_del (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "pfd from fd=%p is NULL", fd); + goto out; + } + + pfd = (struct posix_fd *)(long)tmp_pfd; + if (!pfd->dir) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_ERROR, + "pfd->dir is NULL for fd=%p path=%s", + fd, pfd->path ? pfd->path : "<NULL>"); + goto out; + } + + ret = closedir (pfd->dir); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "closedir on %p failed", pfd->dir); + goto out; + } + pfd->dir = NULL; + + if (!pfd->path) { + op_errno = EBADFD; + gf_log (this->name, GF_LOG_ERROR, + "pfd->path was NULL. fd=%p pfd=%p", + fd, pfd); + goto out; + } + + op_ret = 0; + + out: + if (pfd) { + if (pfd->path) + FREE (pfd->path); + FREE (pfd); + } + + return 0; +} + + +int32_t +posix_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size) +{ + char * dest = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + dest = alloca (size + 1); + + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = readlink (real_path, dest, size); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "readlink on %s: %s", loc->path, strerror (op_errno)); + goto out; + } + + dest[op_ret] = 0; + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + + STACK_UNWIND (frame, op_ret, op_errno, dest); + + return 0; +} + +int32_t +posix_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t dev) +{ + int tmp_fd = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct stat stbuf = { 0, }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = mknod (real_path, mode, dev); + + if (op_ret == -1) { + op_errno = errno; + if ((op_errno == EINVAL) && S_ISREG (mode)) { + /* Over Darwin, mknod with (S_IFREG|mode) + doesn't work */ + tmp_fd = creat (real_path, mode); + if (tmp_fd == -1) + goto out; + close (tmp_fd); + } else { + + gf_log (this->name, GF_LOG_ERROR, + "mknod on %s: %s", loc->path, + strerror (op_errno)); + goto out; + } + } + +#ifndef HAVE_SET_FSID + op_ret = lchown (real_path, frame->root->uid, frame->root->gid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lchown on %s: %s", loc->path, strerror (op_errno)); + goto out; + } +#endif + + op_ret = lstat (real_path, &stbuf); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "mknod on %s: %s", loc->path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + + return 0; +} + +int32_t +posix_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + struct stat stbuf = {0, }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = mkdir (real_path, mode); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "mkdir of %s: %s", loc->path, strerror (op_errno)); + goto out; + } + +#ifndef HAVE_SET_FSID + op_ret = chown (real_path, frame->root->uid, frame->root->gid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "chown on %s: %s", loc->path, strerror (op_errno)); + goto out; + } +#endif + + op_ret = lstat (real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s: %s", loc->path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + + return 0; +} + + +int32_t +posix_unlink (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + + xattr_cache_handle_t handle = {{0,}, 0}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + loc_copy (&handle.loc, loc); + { + posix_xattr_cache_flush (this, &handle); + } + loc_wipe (&handle.loc); + + op_ret = unlink (real_path); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "unlink of %s: %s", loc->path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +int32_t +posix_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + + xattr_cache_handle_t handle = {{0,}, 0}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + loc_copy (&handle.loc, loc); + { + posix_xattr_cache_flush (this, &handle); + } + loc_wipe (&handle.loc); + + op_ret = rmdir (real_path); + op_errno = errno; + + if (op_errno == EEXIST) + /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ + op_errno = ENOTEMPTY; + + if (op_ret == -1 && op_errno != ENOTEMPTY) { + gf_log (this->name, GF_LOG_WARNING, + "rmdir of %s: %s", loc->path, strerror (op_errno)); + goto out; + } + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +int32_t +posix_symlink (call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct stat stbuf = { 0, }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (linkname, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = symlink (linkname, real_path); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "symlink of %s --> %s: %s", + loc->path, linkname, strerror (op_errno)); + goto out; + } + +#ifndef HAVE_SET_FSID + op_ret = lchown (real_path, frame->root->uid, frame->root->gid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lchown failed on %s: %s", + loc->path, strerror (op_errno)); + goto out; + } +#endif + op_ret = lstat (real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lstat failed on %s: %s", + loc->path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, &stbuf); + + return 0; +} + + +int +posix_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_oldpath = NULL; + char * real_newpath = NULL; + struct stat stbuf = {0, }; + + xattr_cache_handle_t handle = {{0,}, 0}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (oldloc, out); + VALIDATE_OR_GOTO (newloc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_oldpath, this, oldloc->path); + MAKE_REAL_PATH (real_newpath, this, newloc->path); + + loc_copy (&handle.loc, oldloc); + { + posix_xattr_cache_flush (this, &handle); + } + loc_wipe (&handle.loc); + + op_ret = rename (real_oldpath, real_newpath); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, + (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR), + "rename of %s to %s failed: %s", + oldloc->path, newloc->path, strerror (op_errno)); + goto out; + } + + op_ret = lstat (real_newpath, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lstat on %s failed: %s", + real_newpath, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +int +posix_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_oldpath = 0; + char * real_newpath = 0; + struct stat stbuf = {0, }; + + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (oldloc, out); + VALIDATE_OR_GOTO (newloc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_oldpath, this, oldloc->path); + MAKE_REAL_PATH (real_newpath, this, newloc->path); + + op_ret = link (real_oldpath, real_newpath); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "link %s to %s failed: %s", + oldloc->path, newloc->path, strerror (op_errno)); + goto out; + } + + op_ret = lstat (real_newpath, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lstat on %s failed: %s", + real_newpath, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, oldloc->inode, &stbuf); + + return 0; +} + + +int +posix_chmod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct stat stbuf = {0,}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + if (S_ISLNK (loc->inode->st_mode)) { + /* chmod on a link should always succeed */ + op_ret = lstat (real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lstat on %s failed: %s", + real_path, strerror (op_errno)); + goto out; + } + op_ret = 0; + goto out; + } + + op_ret = lchmod (real_path, mode); + if ((op_ret == -1) && (errno == ENOSYS)) { + gf_log (this->name, GF_LOG_DEBUG, + "lchmod not implemented, falling back to chmod"); + op_ret = chmod (real_path, mode); + } + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "chmod on %s failed: %s", + loc->path, strerror (op_errno)); + goto out; + } + + op_ret = lstat (real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s", + real_path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +int +posix_chown (call_frame_t *frame, xlator_t *this, + loc_t *loc, uid_t uid, gid_t gid) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct stat stbuf = {0,}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = lchown (real_path, uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lchown on %s failed: %s", + loc->path, strerror (op_errno)); + goto out; + } + + op_ret = lstat (real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lstat on %s failed: %s", + real_path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +int32_t +posix_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct stat stbuf = {0,}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = truncate (real_path, offset); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "truncate on %s failed: %s", + loc->path, strerror (op_errno)); + goto out; + } + + op_ret = lstat (real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "lstat on %s failed: %s", + real_path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +int +posix_utimens (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct timespec ts[2]) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct stat stbuf = {0,}; + struct timeval tv[2] = {{0,},{0,}}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + tv[0].tv_sec = ts[0].tv_sec; + tv[0].tv_usec = ts[0].tv_nsec / 1000; + tv[1].tv_sec = ts[1].tv_sec; + tv[1].tv_usec = ts[1].tv_nsec / 1000; + + op_ret = lutimes (real_path, tv); + if ((op_ret == -1) && (errno == ENOSYS)) { + op_ret = utimes (real_path, tv); + } + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "utimes on %s: %s", real_path, strerror (op_errno)); + goto out; + } + + op_ret = lstat (real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "lstat on %s: %s", real_path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +int32_t +posix_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t _fd = -1; + int _flags = 0; + char * real_path = NULL; + struct stat stbuf = {0, }; + struct posix_fd * pfd = NULL; + struct posix_private * priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + if (!flags) { + _flags = O_CREAT | O_RDWR | O_EXCL; + } + else { + _flags = flags | O_CREAT; + } + + if (priv->o_direct) + flags |= O_DIRECT; + + _fd = open (real_path, _flags, mode); + + if (_fd == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "open on %s: %s", loc->path, strerror (op_errno)); + goto out; + } + +#ifndef HAVE_SET_FSID + op_ret = chown (real_path, frame->root->uid, frame->root->gid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "chown on %s failed: %s", + real_path, strerror (op_errno)); + goto out; + } +#endif + + op_ret = fstat (_fd, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "fstat on %d failed: %s", _fd, strerror (op_errno)); + goto out; + } + + op_ret = -1; + pfd = CALLOC (1, sizeof (*pfd)); + + if (!pfd) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + close (_fd); + goto out; + } + + pfd->flags = flags; + pfd->fd = _fd; + + fd_ctx_set (fd, this, (uint64_t)(long)pfd); + + ((struct posix_private *)this->private)->stats.nr_files++; + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, fd, loc->inode, &stbuf); + + return 0; +} + +int32_t +posix_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + int32_t _fd = -1; + struct posix_fd * pfd = NULL; + struct posix_private * priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + if (priv->o_direct) + flags |= O_DIRECT; + + _fd = open (real_path, flags, 0); + if (_fd == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "open on %s: %s", real_path, strerror (op_errno)); + goto out; + } + + pfd = CALLOC (1, sizeof (*pfd)); + + if (!pfd) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + pfd->flags = flags; + pfd->fd = _fd; + + fd_ctx_set (fd, this, (uint64_t)(long)pfd); + + ((struct posix_private *)this->private)->stats.nr_files++; + +#ifndef HAVE_SET_FSID + if (flags & O_CREAT) { + op_ret = chown (real_path, frame->root->uid, frame->root->gid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "chown on %s failed: %s", + real_path, strerror (op_errno)); + goto out; + } + } +#endif + + op_ret = 0; + + out: + if (op_ret == -1) { + if (_fd != -1) { + close (_fd); + _fd = -1; + } + } + + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + +#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \ + (unsigned long)(~(bound - 1)))) + +int +posix_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset) +{ + uint64_t tmp_pfd = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + char * buf = NULL; + char * alloc_buf = NULL; + int _fd = -1; + struct posix_private * priv = NULL; + dict_t * reply_dict = NULL; + struct iovec vec = {0,}; + struct posix_fd * pfd = NULL; + struct stat stbuf = {0,}; + int align = 1; + int ret = -1; + int dict_ret = -1; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL from fd=%p", fd); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + if (!size) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_ERROR, "size == 0"); + goto out; + } + + if (pfd->flags & O_DIRECT) { + align = 4096; /* align to page boundary */ + } + + alloc_buf = MALLOC (1 * (size + align)); + if (!alloc_buf) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + /* page aligned buffer */ + buf = ALIGN_BUF (alloc_buf, align); + + _fd = pfd->fd; + + op_ret = lseek (_fd, offset, SEEK_SET); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lseek(%"PRId64") failed: %s", + offset, strerror (op_errno)); + goto out; + } + + op_ret = read (_fd, buf, size); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "read failed: %s", strerror (op_errno)); + goto out; + } + + priv->read_value += size; + priv->interval_read += size; + + vec.iov_base = buf; + vec.iov_len = op_ret; + + op_ret = -1; + reply_dict = get_new_dict (); + if (!reply_dict) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + dict_ref (reply_dict); + + dict_ret = dict_set_ptr (reply_dict, NULL, alloc_buf); + if (dict_ret < 0) { + op_errno = -dict_ret; + gf_log (this->name, GF_LOG_ERROR, "could not dict_set: (%s)", + strerror (op_errno)); + goto out; + } + + /* + * readv successful, and we need to get the stat of the file + * we read from + */ + + op_ret = fstat (_fd, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "fstat failed: %s", strerror (op_errno)); + goto out; + } + + op_ret = 0; + out: + if (op_ret == -1) { + frame->root->rsp_refs = NULL; + + if (reply_dict) { + dict_unref (reply_dict); + reply_dict = NULL; + } + + if ((alloc_buf != NULL) && (dict_ret != -1)) + FREE (alloc_buf); + } + + if (reply_dict) + frame->root->rsp_refs = reply_dict; + + STACK_UNWIND (frame, op_ret, op_errno, &vec, 1, &stbuf); + + if (reply_dict) + dict_unref (reply_dict); + + return 0; +} + + +int32_t +posix_writev (call_frame_t *frame, xlator_t *this, + fd_t *fd, struct iovec *vector, int32_t count, off_t offset) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private * priv = NULL; + struct posix_fd * pfd = NULL; + struct stat stbuf = {0,}; + int ret = -1; + + int idx = 0; + int align = 4096; + int max_buf_size = 0; + int retval = 0; + char * buf = NULL; + char * alloc_buf = NULL; + uint64_t tmp_pfd = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO (vector, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + VALIDATE_OR_GOTO (priv, out); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL from fd=%p", fd); + op_errno = -ret; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + op_ret = lseek (_fd, offset, SEEK_SET); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "lseek(%"PRId64") failed: %s", + offset, strerror (op_errno)); + goto out; + } + + /* Check for the O_DIRECT flag during open() */ + if (pfd->flags & O_DIRECT) { + /* This is O_DIRECT'd file */ + op_ret = -1; + for (idx = 0; idx < count; idx++) { + if (max_buf_size < vector[idx].iov_len) + max_buf_size = vector[idx].iov_len; + } + + alloc_buf = MALLOC (1 * (max_buf_size + align)); + if (!alloc_buf) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto out; + } + + for (idx = 0; idx < count; idx++) { + /* page aligned buffer */ + buf = ALIGN_BUF (alloc_buf, align); + + memcpy (buf, vector[idx].iov_base, + vector[idx].iov_len); + + /* not sure whether writev works on O_DIRECT'd fd */ + retval = write (_fd, buf, vector[idx].iov_len); + + if (retval == -1) { + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "O_DIRECT enabled: %s", + strerror (op_errno)); + goto out; + } + + break; + } + if (op_ret == -1) + op_ret = 0; + op_ret += retval; + } + + } else /* if (O_DIRECT) */ { + + /* This is not O_DIRECT'd fd */ + op_ret = writev (_fd, vector, count); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "writev failed: %s", + strerror (op_errno)); + goto out; + } + } + + priv->write_value += op_ret; + priv->interval_write += op_ret; + + if (op_ret >= 0) { + /* wiretv successful, we also need to get the stat of + * the file we wrote to + */ + ret = fstat (_fd, &stbuf); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fstat failed: %s", + strerror (op_errno)); + goto out; + } + } + + out: + if (alloc_buf) { + FREE (alloc_buf); + } + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +int32_t +posix_statfs (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct statvfs buf = {0, }; + struct posix_private * priv = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (this->private, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + priv = this->private; + + op_ret = statvfs (real_path, &buf); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "statvfs failed: %s", + strerror (op_errno)); + goto out; + } + + if (!priv->export_statfs) { + buf.f_blocks = 0; + buf.f_bfree = 0; + buf.f_bavail = 0; + buf.f_files = 0; + buf.f_ffree = 0; + buf.f_favail = 0; + } + + op_ret = 0; + + out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + return 0; +} + + +int32_t +posix_flush (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_fd * pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL on fd=%p", fd); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + /* do nothing */ + posix_xattr_cache_flush_all (this); + + op_ret = 0; + + out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + +int32_t +posix_release (xlator_t *this, + fd_t *fd) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private * priv = NULL; + struct posix_fd * pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + xattr_cache_handle_t handle = {{0,},0}; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + + priv->stats.nr_files--; + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL from fd=%p", fd); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + handle.fd = fd; + posix_xattr_cache_flush (this, &handle); + + _fd = pfd->fd; + + op_ret = close (_fd); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "close(): %s", strerror (op_errno)); + goto out; + } + + if (pfd->dir) { + op_ret = -1; + op_errno = EBADF; + gf_log (this->name, GF_LOG_ERROR, + "pfd->dir is %p (not NULL) for file fd=%p", + pfd->dir, fd); + goto out; + } + + op_ret = 0; + + out: + if (pfd) + FREE (pfd); + + return 0; +} + + +int32_t +posix_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t datasync) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_fd * pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + +#ifdef GF_DARWIN_HOST_OS + /* Always return success in case of fsync in MAC OS X */ + op_ret = 0; + goto out; +#endif + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, "pfd not found in fd's ctx"); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + if (datasync) { + ; +#ifdef HAVE_FDATASYNC + op_ret = fdatasync (_fd); +#endif + } else { + op_ret = fsync (_fd); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "fsync: %s", + strerror (op_errno)); + } + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +static int gf_posix_xattr_enotsup_log; + +int +set_file_contents (xlator_t *this, char *real_path, + data_pair_t *trav, int flags) +{ + char * key = NULL; + char real_filepath[ZR_PATH_MAX] = {0,}; + int32_t file_fd = -1; + int op_ret = 0; + int ret = -1; + + key = &(trav->key[15]); + sprintf (real_filepath, "%s/%s", real_path, key); + + if (flags & XATTR_REPLACE) { + /* if file exists, replace it + * else, error out */ + file_fd = open (real_filepath, O_TRUNC|O_WRONLY); + + if (file_fd == -1) { + goto create; + } + + if (trav->value->len) { + ret = write (file_fd, trav->value->data, + trav->value->len); + if (ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "write failed while doing setxattr " + "for key %s on path %s: %s", + key, real_filepath, strerror (errno)); + goto out; + } + + ret = close (file_fd); + if (ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "close failed on %s: %s", + real_filepath, strerror (errno)); + goto out; + } + } + + create: /* we know file doesn't exist, create it */ + + file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644); + + if (file_fd == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "failed to open file %s with O_CREAT: %s", + key, strerror (errno)); + goto out; + } + + ret = write (file_fd, trav->value->data, trav->value->len); + if (ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "write failed on %s while setxattr with " + "key %s: %s", + real_filepath, key, strerror (errno)); + goto out; + } + + ret = close (file_fd); + if (ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "close failed on %s while setxattr with " + "key %s: %s", + real_filepath, key, strerror (errno)); + goto out; + } + } + + out: + return op_ret; +} + +int +handle_pair (xlator_t *this, char *real_path, + data_pair_t *trav, int flags) +{ + int sys_ret = -1; + int ret = 0; + + if (ZR_FILE_CONTENT_REQUEST(trav->key)) { + ret = set_file_contents (this, real_path, trav, flags); + } else { + sys_ret = lsetxattr (real_path, trav->key, trav->value->data, + trav->value->len, flags); + + if (sys_ret < 0) { + if (errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, + this->name,GF_LOG_WARNING, + "Extended attributes not " + "supported"); + } else if (errno == ENOENT) { + gf_log (this->name, GF_LOG_DEBUG, + "setxattr on %s failed: %s", real_path, + strerror (errno)); + } else { + +#ifdef GF_DARWIN_HOST_OS + gf_log (this->name, + ((errno == EINVAL) ? + GF_LOG_DEBUG : GF_LOG_WARNING), + "%s: key:%s error:%s", + real_path, trav->key, + strerror (errno)); +#else /* ! DARWIN */ + gf_log (this->name, GF_LOG_WARNING, + "%s: key:%s error:%s", + real_path, trav->key, + strerror (errno)); +#endif /* DARWIN */ + } + + ret = -errno; + goto out; + } + } + out: + return ret; +} + +int32_t +posix_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + data_pair_t * trav = NULL; + int ret = -1; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (dict, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + trav = dict->members_list; + + while (trav) { + ret = handle_pair (this, real_path, trav, flags); + if (ret < 0) { + op_errno = -ret; + goto out; + } + trav = trav->next; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +int +get_file_contents (xlator_t *this, char *real_path, + const char *name, char **contents) +{ + char real_filepath[ZR_PATH_MAX] = {0,}; + char * key = NULL; + int32_t file_fd = -1; + struct stat stbuf = {0,}; + int op_ret = 0; + int ret = -1; + + key = (char *) &(name[15]); + sprintf (real_filepath, "%s/%s", real_path, key); + + op_ret = lstat (real_filepath, &stbuf); + if (op_ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s", + real_filepath, strerror (errno)); + goto out; + } + + file_fd = open (real_filepath, O_RDONLY); + + if (file_fd == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s", + real_filepath, strerror (errno)); + goto out; + } + + *contents = CALLOC (stbuf.st_size + 1, sizeof(char)); + + if (! *contents) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, "out of memory :("); + goto out; + } + + ret = read (file_fd, *contents, stbuf.st_size); + if (ret <= 0) { + op_ret = -1; + gf_log (this->name, GF_LOG_ERROR, "read on %s failed", + real_filepath); + goto out; + } + + *contents[stbuf.st_size] = '\0'; + + op_ret = close (file_fd); + file_fd = -1; + if (op_ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s", + real_filepath, strerror (errno)); + goto out; + } + + out: + if (op_ret < 0) { + if (*contents) + FREE (*contents); + if (file_fd != -1) + close (file_fd); + } + + return op_ret; +} + +/** + * posix_getxattr - this function returns a dictionary with all the + * key:value pair present as xattr. used for + * both 'listxattr' and 'getxattr'. + */ +int32_t +posix_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOENT; + int32_t list_offset = 0; + size_t size = 0; + size_t remaining_size = 0; + char key[1024] = {0,}; + char * value = NULL; + char * list = NULL; + char * real_path = NULL; + dict_t * dict = NULL; + char * file_contents = NULL; + int ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_REAL_PATH (real_path, this, loc->path); + + if (loc->inode && S_ISDIR(loc->inode->st_mode) && name && + ZR_FILE_CONTENT_REQUEST(name)) { + ret = get_file_contents (this, real_path, name, + &file_contents); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "getting file contents failed: %s", + strerror (op_errno)); + goto out; + } + } + + /* Get the total size */ + dict = get_new_dict (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "out of memory :("); + goto out; + } + + size = llistxattr (real_path, NULL, 0); + if (size == -1) { + op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported."); + } + else { + gf_log (this->name, GF_LOG_ERROR, + "listxattr failed on %s: %s", + real_path, strerror (op_errno)); + } + goto out; + } + + if (size == 0) + goto done; + + list = alloca (size + 1); + if (!list) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "out of memory :("); + goto out; + } + + size = llistxattr (real_path, list, size); + + remaining_size = size; + list_offset = 0; + while (remaining_size > 0) { + if(*(list + list_offset) == '\0') + break; + + strcpy (key, list + list_offset); + op_ret = lgetxattr (real_path, key, NULL, 0); + if (op_ret == -1) + break; + + value = CALLOC (op_ret + 1, sizeof(char)); + if (!value) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "out of memory :("); + goto out; + } + + op_ret = lgetxattr (real_path, key, value, op_ret); + if (op_ret == -1) + break; + + value [op_ret] = '\0'; + dict_set (dict, key, data_from_dynptr (value, op_ret)); + remaining_size -= strlen (key) + 1; + list_offset += strlen (key) + 1; + + } /* while (remaining_size > 0) */ + + done: + op_ret = size; + + if (dict) { + dict_ref (dict); + } + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, dict); + + if (dict) + dict_unref (dict); + + return 0; +} + +int32_t +posix_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + + DECLARE_OLD_FS_ID_VAR; + + MAKE_REAL_PATH (real_path, this, loc->path); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + op_ret = lremovexattr (real_path, name); + + if (op_ret == -1) { + op_errno = errno; + if (op_errno != ENOATTR && op_errno != EPERM) + gf_log (this->name, GF_LOG_WARNING, + "removexattr on %s: %s", loc->path, + strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +posix_fsyncdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd * pfd = NULL; + int _fd = -1; + int ret = -1; + uint64_t tmp_pfd = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL, fd=%p", fd); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + op_ret = 0; + + out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + +void +posix_print_xattr (dict_t *this, + char *key, + data_t *value, + void *data) +{ + gf_log ("posix", GF_LOG_TRACE, + "(key/val) = (%s/%d)", key, data_to_int32 (value)); +} + + +/** + * add_array - add two arrays of 32-bit numbers (stored in network byte order) + * dest = dest + src + * @count: number of 32-bit numbers + * FIXME: handle overflow + */ + +static void +__add_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i])); + } +} + + +/** + * xattrop - xattr operations - for internal use by GlusterFS + * @optype: ADD_ARRAY: + * dict should contain: + * "key" ==> array of 32-bit numbers + */ + + +int +posix_xattrop_common (call_frame_t *frame, xlator_t *this, + xattr_cache_handle_t *handle, + gf_xattrop_flags_t optype, dict_t *xattr) +{ + int32_t *array = NULL; + + int ret = 0; + int count = 0; + + int op_ret = 0; + int op_errno = 0; + + data_pair_t *trav = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (xattr, out); + VALIDATE_OR_GOTO (this, out); + + trav = xattr->members_list; + + while (trav) { + count = trav->value->len / sizeof (int32_t); + array = CALLOC (count, sizeof (int32_t)); + + ret = posix_xattr_cache_read (this, handle, trav->key, + array, trav->value->len); + + switch (optype) { + + case GF_XATTROP_ADD_ARRAY: + __add_array (array, (int32_t *) trav->value->data, + trav->value->len / 4); + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "unknown xattrop type %d", + optype); + + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + ret = posix_xattr_cache_write (this, handle, trav->key, + array, trav->value->len); + + ret = dict_set_bin (xattr, trav->key, array, + trav->value->len); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "key=%s (%s)", + trav->key, strerror (-ret)); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + trav = trav->next; + array = NULL; + } + +out: + if (array) + FREE (array); + + STACK_UNWIND (frame, op_ret, op_errno, xattr); + return 0; +} + + +int +posix_xattrop (call_frame_t *frame, xlator_t *this, + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr) +{ + xattr_cache_handle_t handle = {{0,}, 0}; + int ret = -1; + + loc_copy (&handle.loc, loc); + { + ret = posix_xattrop_common (frame, this, &handle, optype, xattr); + } + loc_wipe (&handle.loc); + + return ret; +} + + +int +posix_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) +{ + int ret = -1; + xattr_cache_handle_t handle = {{0,}, 0}; + + handle.fd = fd; + + ret = posix_xattrop_common (frame, this, &handle, optype, xattr); + + return ret; +} + + +int +posix_access (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t mask) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + MAKE_REAL_PATH (real_path, this, loc->path); + + op_ret = access (real_path, mask & 07); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "access failed on %s: %s", + loc->path, strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +posix_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct stat buf = {0,}; + struct posix_fd * pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + op_ret = ftruncate (_fd, offset); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "ftruncate failed: %s", + strerror (errno)); + goto out; + } + + op_ret = fstat (_fd, &buf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s", + strerror (errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + + return 0; +} + +int32_t +posix_fchown (call_frame_t *frame, xlator_t *this, + fd_t *fd, uid_t uid, gid_t gid) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct stat buf = {0,}; + struct posix_fd * pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + op_ret = fchown (_fd, uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "fchown failed: %s", + strerror (op_errno)); + goto out; + } + + op_ret = fstat (_fd, &buf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s", + strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + + return 0; +} + + +int32_t +posix_fchmod (call_frame_t *frame, xlator_t *this, + fd_t *fd, mode_t mode) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct stat buf = {0,}; + struct posix_fd * pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL fd=%p", fd); + op_errno = -ret; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + op_ret = fchmod (_fd, mode); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "fchmod failed: %s", strerror (errno)); + goto out; + } + + op_ret = fstat (_fd, &buf); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, + "fstat failed: %s", strerror (errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + + return 0; +} + + +static int +same_file_type (mode_t m1, mode_t m2) +{ + return ((S_IFMT & (m1 ^ m2)) == 0); +} + + +static int +ensure_file_type (xlator_t *this, char *pathname, mode_t mode) +{ + struct stat stbuf = {0,}; + int op_ret = 0; + int ret = -1; + + ret = lstat (pathname, &stbuf); + if (ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_CRITICAL, + "stat failed while trying to make sure entry %s " + "is a directory: %s", pathname, strerror (errno)); + goto out; + } + + if (!same_file_type (mode, stbuf.st_mode)) { + op_ret = -EEXIST; + gf_log (this->name, GF_LOG_CRITICAL, + "entry %s is a different type of file " + "than expected", pathname); + goto out; + } + out: + return op_ret; +} + +static int +create_entry (xlator_t *this, int32_t flags, + dir_entry_t *entry, char *pathname) +{ + int op_ret = 0; + int ret = -1; + struct timeval tv[2] = {{0,0},{0,0}}; + + if (S_ISDIR (entry->buf.st_mode)) { + /* + * If the entry is directory, create it by + * calling 'mkdir'. If the entry is already + * present, check if it is a directory, + * and issue a warning if otherwise. + */ + + ret = mkdir (pathname, entry->buf.st_mode); + if (ret == -1) { + if (errno == EEXIST) { + op_ret = ensure_file_type (this, pathname, + entry->buf.st_mode); + } + else { + op_ret = -errno; + gf_log (this->name, GF_LOG_DEBUG, + "mkdir %s with mode (0%o) failed: %s", + pathname, entry->buf.st_mode, + strerror (errno)); + goto out; + } + } + + } else if ((flags & GF_SET_IF_NOT_PRESENT) + || !(flags & GF_SET_DIR_ONLY)) { + + /* create a 0-byte file here */ + + if (S_ISREG (entry->buf.st_mode)) { + ret = open (pathname, O_CREAT|O_EXCL, + entry->buf.st_mode); + + if (ret == -1) { + if (errno == EEXIST) { + op_ret = ensure_file_type (this, + pathname, + entry->buf.st_mode); + } + else { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "Error creating file %s with " + "mode (0%o): %s", + pathname, entry->buf.st_mode, + strerror (errno)); + goto out; + } + } + + close (ret); + + } else if (S_ISLNK (entry->buf.st_mode)) { + ret = symlink (entry->link, pathname); + + if (ret == -1) { + if (errno == EEXIST) { + op_ret = ensure_file_type (this, + pathname, + entry->buf.st_mode); + } + else { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "error creating symlink %s: %s" + , pathname, strerror (errno)); + goto out; + } + } + + } else if (S_ISBLK (entry->buf.st_mode) || + S_ISCHR (entry->buf.st_mode) || + S_ISFIFO (entry->buf.st_mode) || + S_ISSOCK (entry->buf.st_mode)) { + + ret = mknod (pathname, entry->buf.st_mode, + entry->buf.st_dev); + + if (ret == -1) { + if (errno == EEXIST) { + op_ret = ensure_file_type (this, + pathname, + entry->buf.st_mode); + } else { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "error creating device file " + "%s: %s", + pathname, strerror (errno)); + goto out; + } + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "invalid mode 0%o for %s", entry->buf.st_mode, + pathname); + op_ret = -EINVAL; + goto out; + } + } + + /* + * Preserve atime and mtime + */ + + if (!S_ISLNK (entry->buf.st_mode)) { + tv[0].tv_sec = entry->buf.st_atime; + tv[1].tv_sec = entry->buf.st_mtime; + ret = utimes (pathname, tv); + if (ret == -1) { + op_ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "utimes %s failed: %s", + pathname, strerror (errno)); + goto out; + } + } + +out: + return op_ret; + +} + + +int +posix_setdents (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t flags, dir_entry_t *entries, + int32_t count) +{ + char * real_path = NULL; + char * entry_path = NULL; + int32_t real_path_len = -1; + int32_t entry_path_len = -1; + int32_t ret = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd * pfd = {0, }; + struct timeval tv[2] = {{0, }, {0, }}; + uint64_t tmp_pfd = 0; + char pathname[ZR_PATH_MAX] = {0,}; + dir_entry_t * trav = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO (entries, out); + + tv[0].tv_sec = tv[0].tv_usec = 0; + tv[1].tv_sec = tv[1].tv_usec = 0; + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_ERROR, + "fd's ctx not found on fd=%p for %s", + fd, this->name); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + real_path = pfd->path; + + if (!real_path) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_ERROR, + "path is NULL on pfd=%p fd=%p", pfd, fd); + goto out; + } + + real_path_len = strlen (real_path); + entry_path_len = real_path_len + 256; + entry_path = CALLOC (1, entry_path_len); + + if (!entry_path) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "out of memory :("); + goto out; + } + + strcpy (entry_path, real_path); + entry_path[real_path_len] = '/'; + + posix_xattr_cache_flush_all (this); + + /* fd exists, and everything looks fine */ + /** + * create an entry for each one present in '@entries' + * - if flag is set (ie, if its namespace), create both directories + * and files + * - if not set, create only directories. + * + * after the entry is created, change the mode and ownership of the + * entry according to the stat present in entries->buf. + */ + + trav = entries->next; + while (trav) { + strcpy (pathname, entry_path); + strcat (pathname, trav->name); + + ret = create_entry (this, flags, trav, pathname); + if (ret < 0) { + op_errno = -ret; + goto out; + } + + /* TODO: handle another flag, GF_SET_OVERWRITE */ + + /* Change the mode */ + if (!S_ISLNK (trav->buf.st_mode)) { + ret = chmod (pathname, trav->buf.st_mode); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "chmod on %s failed: %s", pathname, + strerror (op_errno)); + goto out; + } + } + + /* change the ownership */ + ret = lchown (pathname, trav->buf.st_uid, trav->buf.st_gid); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "chmod on %s failed: %s", pathname, + strerror (op_errno)); + goto out; + } + + if (flags & GF_SET_EPOCH_TIME) { + ret = utimes (pathname, tv); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "utimes on %s failed: %s", pathname, + strerror (op_errno)); + goto out; + } + } + + /* consider the next entry */ + trav = trav->next; + } + + op_ret = 0; + out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno); + if (entry_path) + FREE (entry_path); + + return 0; +} + +int32_t +posix_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + int _fd = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct stat buf = {0,}; + struct posix_fd * pfd = NULL; + uint64_t tmp_pfd = 0; + int ret = -1; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + op_ret = fstat (_fd, &buf); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_WARNING, "fstat failed: %s", + strerror (op_errno)); + goto out; + } + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &buf); + return 0; +} + +static int gf_posix_lk_log; + +int32_t +posix_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *lock) +{ + struct flock nullock = {0, }; + frame->root->rsp_refs = NULL; + + gf_posix_lk_log++; + + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_ERROR, + "\"features/posix-locks\" translator is " + "not loaded, you need to use it"); + + STACK_UNWIND (frame, -1, ENOSYS, &nullock); + return 0; +} + +int32_t +posix_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *lock) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. " + "You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + +int32_t +posix_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *lock) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. " + "You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + + +int32_t +posix_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. " + "You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + +int32_t +posix_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type) +{ + frame->root->rsp_refs = NULL; + + gf_log (this->name, GF_LOG_CRITICAL, + "\"features/posix-locks\" translator is not loaded. " + " You need to use it for proper functioning of GlusterFS"); + + STACK_UNWIND (frame, -1, ENOSYS); + return 0; +} + + +int32_t +posix_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off) +{ + uint64_t tmp_pfd = 0; + struct posix_fd * pfd = NULL; + DIR * dir = NULL; + int ret = -1; + size_t filled = 0; + int count = 0; + + int32_t op_ret = -1; + int32_t op_errno = 0; + + gf_dirent_t * this_entry = NULL; + gf_dirent_t entries; + struct dirent * entry = NULL; + off_t in_case = -1; + int32_t this_size = -1; + + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + INIT_LIST_HEAD (&entries.list); + + ret = fd_ctx_get (fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + dir = pfd->dir; + + if (!dir) { + gf_log (this->name, GF_LOG_ERROR, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; + goto out; + } + + + if (!off) { + rewinddir (dir); + } else { + seekdir (dir, off); + } + + while (filled <= size) { + in_case = telldir (dir); + + if (in_case == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "telldir failed: %s", + strerror (errno)); + goto out; + } + + errno = 0; + entry = readdir (dir); + + if (!entry) { + if (errno == EBADF) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "readdir failed: %s", + strerror (op_errno)); + goto out; + } + break; + } + + this_size = dirent_size (entry); + + if (this_size + filled > size) { + seekdir (dir, in_case); + break; + } + + + this_entry = gf_dirent_for_name (entry->d_name); + + if (!this_entry) { + gf_log (this->name, GF_LOG_ERROR, + "could not create gf_dirent for entry %s (%s)", + entry->d_name, strerror (errno)); + goto out; + } + this_entry->d_off = telldir (dir); + this_entry->d_ino = entry->d_ino; + + list_add_tail (&this_entry->list, &entries.list); + + filled += this_size; + count ++; + } + + op_ret = count; + + out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, &entries); + + gf_dirent_free (&entries); + + return 0; +} + + +int32_t +posix_stats (call_frame_t *frame, xlator_t *this, + int32_t flags) + +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + + struct xlator_stats xlstats = {0, }; + struct xlator_stats * stats = NULL; + struct statvfs buf = {0,}; + struct timeval tv = {0,}; + struct posix_private * priv = (struct posix_private *)this->private; + + int64_t avg_read = 0; + int64_t avg_write = 0; + int64_t _time_ms = 0; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + + stats = &xlstats; + + op_ret = statvfs (priv->base_path, &buf); + + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "statvfs failed: %s", + strerror (op_errno)); + goto out; + } + + /* client info is maintained at FSd */ + stats->nr_clients = priv->stats.nr_clients; + stats->nr_files = priv->stats.nr_files; + + /* number of free block in the filesystem. */ + stats->free_disk = buf.f_bfree * buf.f_bsize; + + stats->total_disk_size = buf.f_blocks * buf.f_bsize; + stats->disk_usage = (buf.f_blocks - buf.f_bavail) * buf.f_bsize; + + /* Calculate read and write usage */ + op_ret = gettimeofday (&tv, NULL); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "gettimeofday failed: %s", strerror (errno)); + goto out; + } + + /* Read */ + _time_ms = (tv.tv_sec - priv->init_time.tv_sec) * 1000 + + ((tv.tv_usec - priv->init_time.tv_usec) / 1000); + + avg_read = (_time_ms) ? (priv->read_value / _time_ms) : 0; /* KBps */ + avg_write = (_time_ms) ? (priv->write_value / _time_ms) : 0; /* KBps */ + + _time_ms = (tv.tv_sec - priv->prev_fetch_time.tv_sec) * 1000 + + ((tv.tv_usec - priv->prev_fetch_time.tv_usec) / 1000); + + if (_time_ms && ((priv->interval_read / _time_ms) > priv->max_read)) { + priv->max_read = (priv->interval_read / _time_ms); + } + + if (_time_ms && + ((priv->interval_write / _time_ms) > priv->max_write)) { + priv->max_write = priv->interval_write / _time_ms; + } + + stats->read_usage = avg_read / priv->max_read; + stats->write_usage = avg_write / priv->max_write; + + op_ret = gettimeofday (&(priv->prev_fetch_time), NULL); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "gettimeofday failed: %s", + strerror (op_errno)); + goto out; + } + + priv->interval_read = 0; + priv->interval_write = 0; + + op_ret = 0; + + out: + SET_TO_OLD_FS_ID (); + + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, stats); + return 0; +} + +int32_t +posix_checksum (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flag) +{ + char * real_path = NULL; + DIR * dir = NULL; + struct dirent * dirent = NULL; + uint8_t file_checksum[ZR_FILENAME_MAX] = {0,}; + uint8_t dir_checksum[ZR_FILENAME_MAX] = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + int i = 0; + int length = 0; + + struct stat buf = {0,}; + char tmp_real_path[ZR_PATH_MAX] = {0,}; + int ret = -1; + + MAKE_REAL_PATH (real_path, this, loc->path); + + dir = opendir (real_path); + + if (!dir){ + op_errno = errno; + gf_log (this->name, GF_LOG_DEBUG, + "opendir() failed on `%s': %s", + real_path, strerror (op_errno)); + goto out; + } + + while ((dirent = readdir (dir))) { + errno = 0; + if (!dirent) { + if (errno != 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_DEBUG, + "readdir() failed: %s", + strerror (errno)); + goto out; + } + break; + } + + length = strlen (dirent->d_name); + + strcpy (tmp_real_path, real_path); + strcat (tmp_real_path, "/"); + strcat (tmp_real_path, dirent->d_name); + ret = lstat (tmp_real_path, &buf); + + if (ret == -1) + continue; + + if (S_ISDIR (buf.st_mode)) { + for (i = 0; i < length; i++) + dir_checksum[i] ^= dirent->d_name[i]; + } else { + for (i = 0; i < length; i++) + file_checksum[i] ^= dirent->d_name[i]; + } + } + closedir (dir); + + op_ret = 0; + + out: + frame->root->rsp_refs = NULL; + STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum); + + return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + switch (event) + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that posix xlator is up */ + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; + default: + /* */ + break; + } + return 0; +} + +/** + * init - + */ +int +init (xlator_t *this) +{ + int ret = 0; + int op_ret = -1; + gf_boolean_t tmp_bool = 0; + struct stat buf = {0,}; + struct posix_private * _private = NULL; + data_t * dir_data = NULL; + data_t * tmp_data = NULL; + + dir_data = dict_get (this->options, "directory"); + + if (this->children) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: storage/posix cannot have subvolumes"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + if (!dir_data) { + gf_log (this->name, GF_LOG_ERROR, + "export directory not specified in volfile"); + ret = -1; + goto out; + } + + umask (000); // umask `masking' is done at the client side + + /* Check whether the specified directory exists, if not create it. */ + op_ret = lstat (dir_data->data, &buf); + if ((ret != 0) || !S_ISDIR (buf.st_mode)) { + gf_log (this->name, GF_LOG_ERROR, + "directory '%s' doesn't exists, Exiting", + dir_data->data); + ret = -1; + goto out; + } + + + /* Check for Extended attribute support, if not present, log it */ + op_ret = lsetxattr (dir_data->data, + "trusted.glusterfs.test", "working", 8, 0); + if (op_ret < 0) { + tmp_data = dict_get (this->options, + "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &tmp_bool) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "wrong option provided for key " + "\"mandate-xattr\""); + ret = -1; + goto out; + } + if (!tmp_bool) { + gf_log (this->name, GF_LOG_WARNING, + "Extended attribute not supported, " + "starting as per option"); + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "Extended attribute not supported, " + "exiting"); + ret = -1; + goto out; + } + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "Extended attribute not supported, exiting"); + ret = -1; + goto out; + } + } + + _private = CALLOC (1, sizeof (*_private)); + if (!_private) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + ret = -1; + goto out; + } + + _private->base_path = strdup (dir_data->data); + _private->base_path_length = strlen (_private->base_path); + _private->base_stdev = buf.st_dev; + + _private->xattr_cache = posix_xattr_cache_init (16); + if (!_private->xattr_cache) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + ret = -1; + goto out; + } + + { + /* Stats related variables */ + gettimeofday (&_private->init_time, NULL); + gettimeofday (&_private->prev_fetch_time, NULL); + _private->max_read = 1; + _private->max_write = 1; + } + + _private->export_statfs = 1; + tmp_data = dict_get (this->options, "export-statfs-size"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->export_statfs) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "'export-statfs-size' takes only boolean " + "options"); + goto out; + } + if (!_private->export_statfs) + gf_log (this->name, GF_LOG_DEBUG, + "'statfs()' returns dummy size"); + } + + tmp_data = dict_get (this->options, "o-direct"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->o_direct) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "wrong option provided for 'o-direct'"); + goto out; + } + if (_private->o_direct) + gf_log (this->name, GF_LOG_DEBUG, + "o-direct mode is enabled (O_DIRECT " + "for every open)"); + } + +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_log (this->name, GF_LOG_WARNING, + "WARNING: Failed to set 'ulimit -n " + " 1048576': %s", strerror(errno)); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set max open fd to " + "64k: %s", strerror(errno)); + } + else { + gf_log (this->name, GF_LOG_ERROR, + "max open fd set to 64k"); + } + } + } +#endif + + this->private = (void *)_private; + + out: + return ret; +} + +void +fini (xlator_t *this) +{ + struct posix_private *priv = this->private; + lremovexattr (priv->base_path, "trusted.glusterfs.test"); + FREE (priv); + return; +} + +struct xlator_mops mops = { + .stats = posix_stats, +}; + +struct xlator_fops fops = { + .lookup = posix_lookup, + .stat = posix_stat, + .opendir = posix_opendir, + .readdir = posix_readdir, + .readlink = posix_readlink, + .mknod = posix_mknod, + .mkdir = posix_mkdir, + .unlink = posix_unlink, + .rmdir = posix_rmdir, + .symlink = posix_symlink, + .rename = posix_rename, + .link = posix_link, + .chmod = posix_chmod, + .chown = posix_chown, + .truncate = posix_truncate, + .utimens = posix_utimens, + .create = posix_create, + .open = posix_open, + .readv = posix_readv, + .writev = posix_writev, + .statfs = posix_statfs, + .flush = posix_flush, + .fsync = posix_fsync, + .setxattr = posix_setxattr, + .getxattr = posix_getxattr, + .removexattr = posix_removexattr, + .fsyncdir = posix_fsyncdir, + .access = posix_access, + .ftruncate = posix_ftruncate, + .fstat = posix_fstat, + .lk = posix_lk, + .inodelk = posix_inodelk, + .finodelk = posix_finodelk, + .entrylk = posix_entrylk, + .fentrylk = posix_fentrylk, + .fchown = posix_fchown, + .fchmod = posix_fchmod, + .setdents = posix_setdents, + .getdents = posix_getdents, + .checksum = posix_checksum, + .xattrop = posix_xattrop, + .fxattrop = posix_fxattrop, +}; + +struct xlator_cbks cbks = { + .release = posix_release, + .releasedir = posix_releasedir, + .forget = posix_forget +}; + +struct volume_options options[] = { + { .key = {"o-direct"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"directory"}, + .type = GF_OPTION_TYPE_PATH }, + { .key = {"export-statfs-size"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"mandate-attribute"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {NULL} } +}; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h new file mode 100644 index 000000000..b162139c9 --- /dev/null +++ b/xlators/storage/posix/src/posix.h @@ -0,0 +1,110 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _POSIX_H +#define _POSIX_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <dirent.h> + +#ifdef linux +#ifdef __GLIBC__ +#include <sys/fsuid.h> +#else +#include <unistd.h> +#endif +#endif + +#ifdef HAVE_SYS_XATTR_H +#include <sys/xattr.h> +#endif + +#ifdef HAVE_SYS_EXTATTR_H +#include <sys/extattr.h> +#endif + +#include "xlator.h" +#include "inode.h" +#include "compat.h" + +#include "xattr-cache.h" + +/** + * posix_fd - internal structure common to file and directory fd's + */ + +struct posix_fd { + int fd; /* fd returned by the kernel */ + int32_t flags; /* flags for open/creat */ + char * path; /* used by setdents/getdents */ + DIR * dir; /* handle returned by the kernel */ +}; + +struct posix_private { + char *base_path; + int32_t base_path_length; + dev_t base_stdev; + + xattr_cache_t *xattr_cache; + + /* Statistics, provides activity of the server */ + struct xlator_stats stats; + + struct timeval prev_fetch_time; + struct timeval init_time; + + int32_t max_read; /* */ + int32_t max_write; /* */ + int64_t interval_read; /* Used to calculate the max_read value */ + int64_t interval_write; /* Used to calculate the max_write value */ + int64_t read_value; /* Total read, from init */ + int64_t write_value; /* Total write, from init */ + +/* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both + the volumes will lead to erroneous df output at the client, + since free space on the partition will be counted twice. + + In such cases, user can disable exporting statvfs info + on one of the volumes by setting this option. +*/ + gf_boolean_t export_statfs; + + gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ +}; + +#define POSIX_BASE_PATH(this) (((struct posix_private *)this->private)->base_path) + +#define POSIX_BASE_PATH_LEN(this) (((struct posix_private *)this->private)->base_path_length) + +#define MAKE_REAL_PATH(var, this, path) do { \ + var = alloca (strlen (path) + POSIX_BASE_PATH_LEN(this) + 2); \ + strcpy (var, POSIX_BASE_PATH(this)); \ + strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \ + } while (0) + +#endif /* _POSIX_H */ diff --git a/xlators/storage/posix/src/xattr-cache.c b/xlators/storage/posix/src/xattr-cache.c new file mode 100644 index 000000000..a39c35ae2 --- /dev/null +++ b/xlators/storage/posix/src/xattr-cache.c @@ -0,0 +1,521 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "byte-order.h" + +#include "xattr-cache.h" +#include "posix.h" +#include "compat-errno.h" + +static int +__hgetxattr (xattr_cache_handle_t *handle, xlator_t *this, + const char *key, void *value, size_t len) +{ + char * real_path = NULL; + struct posix_fd * pfd = NULL; + uint64_t tmp_pfd = 0; + int op_ret = -1; + int ret = -1; + int _fd = -1; + + if (handle->loc.path) { + MAKE_REAL_PATH (real_path, this, handle->loc.path); + op_ret = lgetxattr (real_path, key, value, len); + + if (op_ret == -1) + op_ret = -errno; + } else { + ret = fd_ctx_get (handle->fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get pfd from fd=%p", + handle->fd); + op_ret = -EBADFD; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + _fd = pfd->fd; + + op_ret = fgetxattr (_fd, key, value, len); + if (op_ret == -1) + op_ret = -errno; + } + +out: + return op_ret; +} + + +static int +__hsetxattr (xattr_cache_handle_t *handle, xlator_t *this, + const char *key, void *value, size_t len, int flags) +{ + char * real_path = NULL; + struct posix_fd * pfd = NULL; + uint64_t tmp_pfd = 0; + int op_ret = -1; + int ret = -1; + int _fd = -1; + + if (handle->loc.path) { + MAKE_REAL_PATH (real_path, this, handle->loc.path); + + op_ret = lsetxattr (real_path, key, value, len, flags); + if (op_ret == -1) + op_ret = -errno; + } else { + ret = fd_ctx_get (handle->fd, this, &tmp_pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get pfd from fd=%p", + handle->fd); + + op_ret = -EBADFD; + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + _fd = pfd->fd; + + op_ret = fsetxattr (_fd, key, value, len, flags); + if (op_ret == -1) + op_ret = -errno; + } + +out: + return op_ret; +} + + +static xattr_cache_entry_t * +__cache_lookup (xattr_cache_t *cache, inode_t *inode, char *key) +{ + int i = 0; + + for (i = 0; i < cache->size; i++) { + if ((cache->entries[i]->inode == inode) + && (!strcmp (cache->entries[i]->key, key))) { + cache->entries[i]->nraccess++; + return cache->entries[i]; + } + } + + return NULL; +} + + +static xattr_cache_entry_t * +__cache_least_used_entry (xattr_cache_t *cache) +{ + xattr_cache_entry_t *lue = cache->entries[0]; + int i; + + for (i = 0; i < cache->size; i++) { + if (cache->entries[i]->nraccess < lue->nraccess) + lue = cache->entries[i]; + } + + lue->nraccess++; + return lue; +} + + +static inode_t * +__inode_for_handle (xattr_cache_handle_t *handle) +{ + inode_t *inode = NULL; + + if (handle->loc.path) + inode = handle->loc.inode; + else if (handle->fd) + inode = handle->fd->inode; + + return inode; +} + + +static void +__free_handle (xattr_cache_handle_t *handle) +{ + if (handle->loc.path) + loc_wipe (&handle->loc); + + FREE (handle); +} + + +static xattr_cache_handle_t * +__copy_handle (xattr_cache_handle_t *handle) +{ + xattr_cache_handle_t *hnew = calloc (1, sizeof (xattr_cache_handle_t)); + + if (handle->loc.path) + loc_copy (&hnew->loc, &handle->loc); + else + hnew->fd = handle->fd; + + return hnew; +} + + +static int +__cache_populate_entry (xattr_cache_entry_t *entry, xlator_t *this, + xattr_cache_handle_t *handle, char *key, size_t len) +{ + int op_ret = -1; + + entry->array = calloc (1, len); + if (!entry->array) { + op_ret = -ENOMEM; + goto out; + } + + op_ret = __hgetxattr (handle, this, key, entry->array, len); + + entry->key = strdup (key); + entry->inode = __inode_for_handle (handle); + entry->handle = __copy_handle (handle); + entry->len = len; + entry->nraccess = 1; + +out: + return op_ret; +} + + +static int +__cache_flush_entry (xattr_cache_entry_t *entry, xlator_t *this) +{ + int ret = -1; + + if (entry->dirty) { + ret = __hsetxattr (entry->handle, this, + entry->key, entry->array, entry->len, 0); + } + + entry->len = 0; + entry->nraccess = 0; + entry->dirty = 0; + entry->inode = NULL; + + if (entry->key) { + FREE (entry->key); + entry->key = NULL; + } + + if (entry->array) { + FREE (entry->array); + entry->array = NULL; + } + + if (entry->handle) { + __free_handle (entry->handle); + entry->handle = NULL; + } + + return 0; +} + + +static void +__print_array (char *str, xlator_t *this, int32_t *array, size_t len) +{ + char *ptr = NULL; + char *buf = NULL; + + int i, count = -1; + + count = len / sizeof (int32_t); + + /* 10 digits per entry + 1 space + '[' and ']' */ + buf = malloc (count * 11 + 8); + + ptr = buf; + ptr += sprintf (ptr, "[ "); + for (i = 0; i < count; i++) + ptr += sprintf (ptr, "%d ", ntoh32 (array[i])); + ptr += sprintf (ptr, "]"); + + gf_log (this->name, GF_LOG_DEBUG, + "%s%s", str, buf); + + FREE (buf); +} + + +int +posix_xattr_cache_read (xlator_t *this, xattr_cache_handle_t *handle, + char *key, int32_t *array, size_t len) +{ + xattr_cache_entry_t *entry = NULL; + xattr_cache_entry_t *purgee = NULL; + + xattr_cache_t *cache = NULL; + inode_t *inode = NULL; + + int op_ret = -1; + + inode = __inode_for_handle (handle); + + if (!inode) { + gf_log (this->name, GF_LOG_DEBUG, + "handle has no inode!"); + goto out; + } + + cache = ((struct posix_private *) (this->private))->xattr_cache; + + pthread_mutex_lock (&cache->lock); + { + entry = __cache_lookup (cache, inode, key); + + if (entry) { + if (handle->loc.path) + gf_log (this->name, GF_LOG_DEBUG, + "cache hit for %s", handle->loc.path); + else if (handle->fd) + gf_log (this->name, GF_LOG_DEBUG, + "cache hit for fd=%p", handle->fd); + } + + if (!entry) { + purgee = __cache_least_used_entry (cache); + + if (purgee->handle && purgee->handle->loc.path) + gf_log (this->name, GF_LOG_DEBUG, + "flushing and purging entry for %s", + purgee->handle->loc.path); + else if (purgee->handle && purgee->handle->fd) + gf_log (this->name, GF_LOG_DEBUG, + "flushing and purging entry for fd=%p", + purgee->handle->fd); + __cache_flush_entry (purgee, this); + + if (handle->loc.path) + gf_log (this->name, GF_LOG_DEBUG, + "populating entry for %s", + handle->loc.path); + else if (handle->fd) + gf_log (this->name, GF_LOG_DEBUG, + "populating entry for fd=%p", + handle->fd); + __cache_populate_entry (purgee, this, handle, key, len); + + entry = purgee; + } + + memcpy (array, entry->array, len); + + __print_array ("read array: ", this, array, len); + } + pthread_mutex_unlock (&cache->lock); + + op_ret = 0; +out: + return op_ret; +} + + +int posix_xattr_cache_write (xlator_t *this, xattr_cache_handle_t *handle, + char *key, int32_t *array, size_t len) +{ + xattr_cache_t * cache = NULL; + xattr_cache_entry_t * entry = NULL; + + inode_t *inode = NULL; + + int op_ret = -1; + + inode = __inode_for_handle (handle); + + if (!inode) { + gf_log (this->name, GF_LOG_DEBUG, + "handle has no inode!"); + goto out; + } + + cache = ((struct posix_private *) (this->private))->xattr_cache; + + pthread_mutex_lock (&cache->lock); + { + entry = __cache_lookup (cache, inode, key); + + if (entry) { + entry->dirty = 1; + memcpy (entry->array, array, len); + } else { + /* + * This case shouldn't usually happen, since the + * entry should have been brought into the cache + * by the previous read (xattrop always does a read & + * write). + * + * If we've reached here, it means things are happening + * very quickly and the entry was flushed after read + * but before this write. In that case, let's just + * write this to disk + */ + + op_ret = __hsetxattr (handle, this, key, array, + len, 0); + } + + __print_array ("wrote array: ", this, array, len); + } + pthread_mutex_unlock (&cache->lock); + + op_ret = 0; +out: + return op_ret; +} + + +int posix_xattr_cache_flush (xlator_t *this, xattr_cache_handle_t *handle) +{ + xattr_cache_t *cache = NULL; + xattr_cache_entry_t *entry = NULL; + + int i; + inode_t *inode = NULL; + + int op_ret = -1; + + inode = __inode_for_handle (handle); + if (!inode) { + gf_log (this->name, GF_LOG_DEBUG, + "handle has no inode!"); + op_ret = -EINVAL; + goto out; + } + + cache = ((struct posix_private *) (this->private))->xattr_cache; + + pthread_mutex_lock (&cache->lock); + { + for (i = 0; i < cache->size; i++) { + entry = cache->entries[i]; + + if (entry->inode == inode) { + if (entry->handle->loc.path) + gf_log (this->name, GF_LOG_DEBUG, + "force flushing entry for %s", + entry->handle->loc.path); + + else if (cache->entries[i]->handle->fd) + gf_log (this->name, GF_LOG_DEBUG, + "force flushing entry for fd=%p", + entry->handle->fd); + + __cache_flush_entry (entry, this); + } + } + } + pthread_mutex_unlock (&cache->lock); + + op_ret = 0; +out: + return op_ret; +} + + +int +posix_xattr_cache_flush_all (xlator_t *this) +{ + xattr_cache_t *cache = NULL; + xattr_cache_entry_t *entry = NULL; + + int i; + int op_ret = 0; + + cache = ((struct posix_private *) (this->private))->xattr_cache; + + pthread_mutex_lock (&cache->lock); + { + gf_log (this->name, GF_LOG_DEBUG, + "flushing entire xattr cache: "); + + for (i = 0; i < cache->size; i++) { + entry = cache->entries[i]; + + if (!entry || !entry->handle) + continue; + + if (entry->handle->loc.path) + gf_log (this->name, GF_LOG_DEBUG, + " force flushing entry for %s", + entry->handle->loc.path); + + else if (cache->entries[i]->handle->fd) + gf_log (this->name, GF_LOG_DEBUG, + " force flushing entry for fd=%p", + entry->handle->fd); + + __cache_flush_entry (entry, this); + } + } + pthread_mutex_unlock (&cache->lock); + + return op_ret; +} + + +xattr_cache_t * +posix_xattr_cache_init (size_t size) +{ + int i = 0; + xattr_cache_t * cache = NULL; + int op_ret = -1; + + cache = CALLOC (1, sizeof (xattr_cache_t)); + if (!cache) { + goto out; + } + + cache->entries = CALLOC (size, sizeof (xattr_cache_entry_t *)); + if (!cache->entries) + goto out; + + cache->size = size; + + for (i = 0; i < size; i++) { + cache->entries[i] = calloc (1, sizeof (xattr_cache_entry_t)); + if (!cache->entries[i]) + goto out; + } + + pthread_mutex_init (&cache->lock, NULL); + + op_ret = 0; +out: + if (op_ret == -1) { + if (cache) { + if (cache->entries) { + for (i = 0; i < size; i++) + if (cache->entries[i]) + FREE (cache->entries[i]); + + FREE (cache->entries); + } + + FREE (cache); + } + } + + return cache; +} diff --git a/xlators/storage/posix/src/xattr-cache.h b/xlators/storage/posix/src/xattr-cache.h new file mode 100644 index 000000000..3e12742a9 --- /dev/null +++ b/xlators/storage/posix/src/xattr-cache.h @@ -0,0 +1,65 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __XATTR_CACHE_H__ +#define __XATTR_CACHE_H__ + + +#include "glusterfs.h" +#include "inode.h" + +typedef struct __xattr_cache_handle { + loc_t loc; + fd_t *fd; +} xattr_cache_handle_t; + + +typedef struct __xattr_cache_entry { + char *key; /* name of the xattr */ + int32_t *array; /* value */ + size_t len; /* length of array in bytes */ + inode_t *inode; /* inode for which the entry is for */ + + xattr_cache_handle_t *handle; + unsigned char dirty; + unsigned long nraccess; /* number of times accessed */ +} xattr_cache_entry_t; + + +typedef struct __xattr_cache { + size_t size; + pthread_mutex_t lock; + xattr_cache_entry_t **entries; +} xattr_cache_t; + + +xattr_cache_t * posix_xattr_cache_init (size_t size); + +int posix_xattr_cache_read (xlator_t *this, xattr_cache_handle_t *handle, + char *key, int32_t *array, size_t len); + +int posix_xattr_cache_write (xlator_t *this, xattr_cache_handle_t *handle, + char *key, int32_t *array, size_t len); + +int posix_xattr_cache_flush (xlator_t *this, xattr_cache_handle_t *handle); + +int posix_xattr_cache_flush_all (xlator_t *this); + + +#endif /* __XATTR_CACHE_H__ */ |