diff options
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r-- | xlators/cluster/dht/Makefile.am | 1 | ||||
-rw-r--r-- | xlators/cluster/dht/src/Makefile.am | 30 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 3470 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 212 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-hashfn-tea.c | 146 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-hashfn.c | 88 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 326 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 543 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-linkfile.c | 224 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rename.c | 562 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 460 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht.c | 222 | ||||
-rw-r--r-- | xlators/cluster/dht/src/nufa.c | 684 |
13 files changed, 6968 insertions, 0 deletions
diff --git a/xlators/cluster/dht/Makefile.am b/xlators/cluster/dht/Makefile.am new file mode 100644 index 00000000000..f963effea22 --- /dev/null +++ b/xlators/cluster/dht/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src
\ No newline at end of file diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am new file mode 100644 index 00000000000..b7d07d137a6 --- /dev/null +++ b/xlators/cluster/dht/src/Makefile.am @@ -0,0 +1,30 @@ + +xlator_LTLIBRARIES = dht.la nufa.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + + +dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \ + dht-selfheal.c dht-rename.c dht-hashfn.c dht-hashfn-tea.c + +dht_la_SOURCES = $(dht_common_source) dht.c + +nufa_la_SOURCES = $(dht_common_source) nufa.c + +dht_la_LDFLAGS = -module -avoidversion +dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +nufa_la_LDFLAGS = -module -avoidversion +nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = dht-common.h dht-common.c + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/distribute.so + +install-data-hook: + ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
\ No newline at end of file diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c new file mode 100644 index 00000000000..5e4979e31b0 --- /dev/null +++ b/xlators/cluster/dht/src/dht-common.c @@ -0,0 +1,3470 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + + +/* TODO: + - use volumename in xattr instead of "dht" + - use NS locks + - handle all cases in self heal layout reconstruction + - complete linkfile selfheal +*/ + +int +dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int ret = 0; + + local = frame->local; + ret = op_ret; + + if (ret == 0) { + layout = local->selfheal.layout; + ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + + if (ret == 0) + local->selfheal.layout = NULL; + + if (local->st_ino) { + local->stbuf.st_ino = local->st_ino; + } else { + gf_log (this->name, GF_LOG_WARNING, + "could not find hashed subvolume for %s", + local->loc.path); + } + } + + DHT_STACK_UNWIND (frame, ret, local->op_errno, local->inode, + &local->stbuf, local->xattr); + + return 0; +} + + +int +dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = 0; + int is_dir = 0; + + conf = this->private; + local = frame->local; + prev = cookie; + + layout = local->layout; + + LOCK (&frame->lock); + { + /* TODO: assert equal mode on stbuf->st_mode and + local->stbuf->st_mode + + else mkdir/chmod/chown and fix + */ + /* TODO: assert equal hash type in xattr, local->xattr */ + + /* TODO: always ensure same subvolume is in layout->list[0] */ + + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, xattr); + + if (op_ret == -1) { + local->op_errno = ENOENT; + gf_log (this->name, GF_LOG_WARNING, + "lookup of %s on %s returned error (%s)", + local->loc.path, prev->this->name, + strerror (op_errno)); + + goto unlock; + } + + is_dir = check_is_dir (inode, stbuf, xattr); + if (!is_dir) + goto unlock; + + local->op_ret = 0; + if (local->xattr == NULL) + local->xattr = dict_ref (xattr); + if (local->inode == NULL) + local->inode = inode_ref (inode); + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + if (prev->this == local->hashed_subvol) + local->st_ino = local->stbuf.st_ino; + + } +unlock: + UNLOCK (&frame->lock); + + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + if (local->op_ret == 0) { + ret = dht_layout_normalize (this, &local->loc, layout); + + local->layout = NULL; + + if (ret != 0) { + layout->gen = conf->gen; + + gf_log (this->name, GF_LOG_WARNING, + "fixing assignment on %s", + local->loc.path); + goto selfheal; + } + + inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + + if (local->st_ino) { + local->stbuf.st_ino = local->st_ino; + } else { + gf_log (this->name, GF_LOG_WARNING, + "could not find hashed subvolume for %s", + local->loc.path); + } + } + + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr); + } + + return 0; + +selfheal: + ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk, + &local->loc, layout); + + return 0; +} + +int +dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + int is_dir = 0; + int is_linkfile = 0; + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + + if (op_errno != ENOTCONN && op_errno != ENOENT) { + gf_log (this->name, GF_LOG_WARNING, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + } + + goto unlock; + } + + if (S_IFMT & (stbuf->st_mode ^ local->inode->st_mode)) { + gf_log (this->name, GF_LOG_WARNING, + "mismatching filetypes 0%o v/s 0%o for %s", + (stbuf->st_mode & S_IFMT), + (local->inode->st_mode & S_IFMT), + local->loc.path); + + local->op_ret = -1; + local->op_errno = EINVAL; + + goto unlock; + } + + layout = dht_layout_get (this, inode); + + is_dir = check_is_dir (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr); + + if (is_linkfile) { + gf_log (this->name, GF_LOG_WARNING, + "linkfile found in revalidate for %s", + local->loc.path); + local->layout_mismatch = 1; + + goto unlock; + } + + if (is_dir) { + ret = dht_layout_dir_mismatch (this, layout, + prev->this, &local->loc, + xattr); + if (ret != 0) { + gf_log (this->name, GF_LOG_WARNING, + "mismatching layouts for %s", + local->loc.path); + + local->layout_mismatch = 1; + + goto unlock; + } + } + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + local->op_ret = 0; + local->stbuf.st_ino = local->st_ino; + + if (!local->xattr) + local->xattr = dict_ref (xattr); + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + if (!S_ISDIR (local->stbuf.st_mode) + && (local->hashed_subvol != local->cached_subvol) + && (local->stbuf.st_nlink == 1)) + local->stbuf.st_mode |= S_ISVTX; + + if (local->layout_mismatch) { + local->op_ret = -1; + local->op_errno = ESTALE; + } + + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr); + } + + return 0; +} + + +int +dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + xlator_t *cached_subvol = NULL; + + local = frame->local; + cached_subvol = local->cached_subvol; + + layout = dht_layout_for_subvol (this, local->cached_subvol); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + cached_subvol ? cached_subvol->name : "<nil>"); + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + + inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + local->op_ret = 0; + if (local->stbuf.st_nlink == 1) + local->stbuf.st_mode |= S_ISVTX; + +unwind: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr); + return 0; +} + + +int +dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *buf, dict_t *xattr) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + int is_linkfile = 0; + int is_dir = 0; + xlator_t *subvol = NULL; + loc_t *loc = NULL; + xlator_t *link_subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + + conf = this->private; + + local = frame->local; + loc = &local->loc; + + prev = cookie; + subvol = prev->this; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + if (op_errno != ENOENT) + local->op_errno = op_errno; + goto unlock; + } + + is_linkfile = check_is_linkfile (inode, buf, xattr); + is_dir = check_is_dir (inode, buf, xattr); + + if (is_linkfile) { + link_subvol = dht_linkfile_subvol (this, inode, buf, + xattr); + gf_log (this->name, GF_LOG_DEBUG, + "found on %s linkfile %s (-> %s)", + subvol->name, loc->path, + link_subvol ? link_subvol->name : "''"); + goto unlock; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "found on %s file %s", + subvol->name, loc->path); + } + + if (!local->cached_subvol) { + /* found one file */ + dht_stat_merge (this, &local->stbuf, buf, subvol); + local->xattr = dict_ref (xattr); + local->cached_subvol = subvol; + } else { + gf_log (this->name, GF_LOG_WARNING, + "multiple subvolumes (%s and %s atleast) have " + "file %s", local->cached_subvol->name, + subvol->name, local->loc.path); + } + } +unlock: + UNLOCK (&frame->lock); + + if (is_linkfile) { + gf_log (this->name, GF_LOG_WARNING, + "deleting stale linkfile %s on %s", + loc->path, subvol->name); + dht_linkfile_unlink (frame, this, subvol, loc); + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + hashed_subvol = local->hashed_subvol; + cached_subvol = local->cached_subvol; + + if (!cached_subvol) { + DHT_STACK_UNWIND (frame, -1, ENOENT, NULL, NULL, NULL); + return 0; + } + + gf_log (this->name, GF_LOG_WARNING, + "linking file %s existing on %s to %s (hash)", + loc->path, cached_subvol->name, hashed_subvol->name); + + dht_linkfile_create (frame, dht_lookup_linkfile_create_cbk, + cached_subvol, hashed_subvol, loc); + } + + return 0; +} + + +int +dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int i = 0; + int call_cnt = 0; + + conf = this->private; + local = frame->local; + + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + if (!local->inode) + local->inode = inode_ref (loc->inode); + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_everywhere_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + loc, local->xattr_req); + } + + return 0; +} + + +int +dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + call_frame_t *prev = NULL; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + loc_t *loc = NULL; + + prev = cookie; + subvol = prev->this; + + local = frame->local; + loc = &local->loc; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "lookup of %s on %s (following linkfile) failed (%s)", + local->loc.path, subvol->name, strerror (op_errno)); + + dht_lookup_everywhere (frame, this, loc); + return 0; + } + + /* TODO: assert type is non-dir and non-linkfile */ + + if (stbuf->st_nlink == 1) + stbuf->st_mode |= S_ISVTX; + dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + inode_ctx_put (inode, this, (uint64_t)(long)layout); + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); + + return 0; +} + + +int +dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_layout_t *layout = NULL; + char is_linkfile = 0; + char is_dir = 0; + xlator_t *subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + int i = 0; + call_frame_t *prev = NULL; + int call_cnt = 0; + + + conf = this->private; + + prev = cookie; + local = frame->local; + loc = &local->loc; + + if (ENTRY_MISSING (op_ret, op_errno)) { + if (conf->search_unhashed) { + local->op_errno = ENOENT; + dht_lookup_everywhere (frame, this, loc); + return 0; + } + } + + if (op_ret == 0) { + is_dir = check_is_dir (inode, stbuf, xattr); + if (is_dir) { + local->inode = inode_ref (inode); + local->xattr = dict_ref (xattr); + } + } + + if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + op_ret = -1; + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + return 0; + } + + if (op_ret == -1) + goto out; + + is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_dir = check_is_dir (inode, stbuf, xattr); + + if (!is_dir && !is_linkfile) { + /* non-directory and not a linkfile */ + + dht_itransform (this, prev->this, stbuf->st_ino, + &stbuf->st_ino); + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + inode_ctx_put (inode, this, (uint64_t)(long)layout); + goto out; + } + + if (is_linkfile) { + subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + + if (!subvol) { + gf_log (this->name, GF_LOG_WARNING, + "linkfile not having link subvolume. path=%s", + loc->path); + dht_lookup_everywhere (frame, this, loc); + return 0; + } + + STACK_WIND (frame, dht_lookup_linkfile_cbk, + subvol, subvol->fops->lookup, + &local->loc, local->xattr_req); + } + + return 0; + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); + return 0; +} + + +int +dht_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + xlator_t *subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "copying location failed for path=%s", + loc->path); + goto err; + } + + if (xattr_req) { + local->xattr_req = dict_ref (xattr_req); + } else { + local->xattr_req = dict_new (); + } + + hashed_subvol = dht_subvol_get_hashed (this, loc); + cached_subvol = dht_subvol_get_cached (this, loc->inode); + + local->cached_subvol = cached_subvol; + local->hashed_subvol = hashed_subvol; + + if (is_revalidate (loc)) { + layout = dht_layout_get (this, loc->inode); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "revalidate without cache. path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + if (layout->gen && (layout->gen < conf->gen)) { + gf_log (this->name, GF_LOG_WARNING, + "incomplete layout failure for path=%s", + loc->path); + op_errno = EAGAIN; + goto err; + } + + local->inode = inode_ref (loc->inode); + local->st_ino = loc->inode->ino; + + local->call_cnt = layout->cnt; + call_cnt = local->call_cnt; + + /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, + * revalidates directly go to the cached-subvolume. + */ + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + + STACK_WIND (frame, dht_revalidate_cbk, + subvol, subvol->fops->lookup, + loc, local->xattr_req); + + if (!--call_cnt) + break; + } + } else { + /* TODO: remove the hard-coding */ + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht.linkto", 256); + + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s, " + "checking on all the subvols to see if " + "it is a directory", loc->path); + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + return 0; + } + + STACK_WIND (frame, dht_lookup_cbk, + hashed_subvol, hashed_subvol->fops->lookup, + loc, local->xattr_req); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + + +int +dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + if (local->inode) + local->stbuf.st_ino = local->inode->ino; + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_stat (call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->stat, + loc); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + layout = dht_layout_get (this, fd->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "local allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = layout->cnt;; + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->fstat, + fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_chmod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->chmod, + loc, mode); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_chown (call_frame_t *frame, xlator_t *this, + loc_t *loc, uid_t uid, gid_t gid) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->chown, + loc, uid, gid); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fchmod (call_frame_t *frame, xlator_t *this, + fd_t *fd, mode_t mode) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + + layout = dht_layout_get (this, fd->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->fchmod, + fd, mode); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fchown (call_frame_t *frame, xlator_t *this, + fd_t *fd, uid_t uid, gid_t gid) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + layout = dht_layout_get (this, fd->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->fchown, + fd, uid, gid); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_utimens (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct timespec tv[2]) +{ + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + layout = dht_layout_get (this, loc->inode); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_ERROR, + "layout is not sane for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_attr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->utimens, + loc, tv); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_truncate (call_frame_t *frame, xlator_t *this, + loc_t *loc, off_t offset) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->truncate, + loc, offset); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (fd->inode); + local->call_cnt = 1; + + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->ftruncate, + fd, offset); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +dht_access (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t mask) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->access, + loc, mask); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, const char *path) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, path); + + return 0; +} + + +int +dht_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_readlink_cbk, + subvol, subvol->fops->readlink, + loc, size); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, xattr); + + return 0; +} + + +int +dht_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_getxattr_cbk, + subvol, subvol->fops->getxattr, + loc, key); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr, int flags) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->setxattr, + loc, xattr, flags); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->removexattr, + loc, key); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + local->fd); + + return 0; +} + + +int +dht_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags, fd_t *fd) +{ + xlator_t *subvol = NULL; + int ret = -1; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_fd_cbk, + subvol, subvol->fops->open, + loc, flags, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + struct iovec *vector, int count, struct stat *stbuf) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + + return 0; +} + + +int +dht_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_readv_cbk, + subvol, subvol->fops->readv, + fd, size, off); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + + return 0; +} + + +int +dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct stat *stbuf) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, stbuf); + + return 0; +} + + +int +dht_writev (call_frame_t *frame, xlator_t *this, + fd_t *fd, struct iovec *vector, int count, off_t off) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_writev_cbk, + subvol, subvol->fops->writev, + fd, vector, count, off); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0); + + return 0; +} + + +int +dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->flush, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocatoin failed :("); + goto err; + } + local->call_cnt = 1; + + STACK_WIND (frame, dht_err_cbk, + subvol, subvol->fops->fsync, + fd, datasync); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct flock *flock) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, flock); + + return 0; +} + + +int +dht_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int cmd, struct flock *flock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_lk_cbk, + subvol, subvol->fops->lk, + fd, cmd, flock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + +/* gf_lk no longer exists +int +dht_gf_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct flock *flock) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, flock); + + return 0; +} + + +int +dht_gf_lk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int cmd, struct flock *flock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_gf_lk_cbk, + subvol, subvol->fops->gf_lk, + fd, cmd, flock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} +*/ + +int +dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct statvfs *statvfs) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + local->op_ret = 0; + + /* TODO: normalize sizes */ + local->statvfs.f_bsize = statvfs->f_bsize; + local->statvfs.f_frsize = statvfs->f_frsize; + + local->statvfs.f_blocks += statvfs->f_blocks; + local->statvfs.f_bfree += statvfs->f_bfree; + local->statvfs.f_bavail += statvfs->f_bavail; + local->statvfs.f_files += statvfs->f_files; + local->statvfs.f_ffree += statvfs->f_ffree; + local->statvfs.f_favail += statvfs->f_favail; + local->statvfs.f_fsid = statvfs->f_fsid; + local->statvfs.f_flag = statvfs->f_flag; + local->statvfs.f_namemax = statvfs->f_namemax; + + } +unlock: + UNLOCK (&frame->lock); + + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->statvfs); + + return 0; +} + + +int +dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_statfs_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, loc); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_fd_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + loc, fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *orig_entries) +{ + dht_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *orig_entry = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + xlator_t *next = NULL; + dht_layout_t *layout = NULL; + int count = 0; + + + INIT_LIST_HEAD (&entries.list); + prev = cookie; + local = frame->local; + + if (op_ret < 0) + goto done; + + layout = dht_layout_get (this, local->fd->inode); + + list_for_each_entry (orig_entry, &orig_entries->list, list) { + subvol = dht_layout_search (this, layout, orig_entry->d_name); + + if (!subvol || subvol == prev->this) { + entry = gf_dirent_for_name (orig_entry->d_name); + if (!entry) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto unwind; + } + + dht_itransform (this, subvol, orig_entry->d_ino, + &entry->d_ino); + dht_itransform (this, subvol, orig_entry->d_off, + &entry->d_off); + + entry->d_type = orig_entry->d_type; + entry->d_len = orig_entry->d_len; + + list_add_tail (&entry->list, &entries.list); + count++; + } + } + op_ret = count; + +done: + if (count == 0) { + next = dht_subvol_next (this, prev->this); + if (!next) { + goto unwind; + } + + STACK_WIND (frame, dht_readdir_cbk, + next, next->fops->readdir, + local->fd, local->size, 0); + return 0; + } + +unwind: + if (op_ret < 0) + op_ret = 0; + + DHT_STACK_UNWIND (frame, op_ret, op_errno, &entries); + + gf_dirent_free (&entries); + + return 0; +} + + +int +dht_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t yoff) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + xlator_t *xvol = NULL; + off_t xoff = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->fd = fd_ref (fd); + local->size = size; + + dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); + + /* TODO: do proper readdir */ + STACK_WIND (frame, dht_readdir_cbk, + xvol, xvol->fops->readdir, + fd, size, xoff); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + + + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret == -1) + local->op_errno = op_errno; + + if (op_ret == 0) + local->op_ret = 0; + } + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->fd = fd_ref (fd); + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_fsyncdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->fsyncdir, + fd, datasync); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + + + if (op_ret == -1) + goto out; + + prev = cookie; + + dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); + layout = dht_layout_for_subvol (this, prev->this); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + ret = inode_ctx_put (inode, this, (uint64_t)(long)layout); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not set inode context"); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + return 0; +} + + +int +dht_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_newfile_cbk, + subvol, subvol->fops->mknod, + loc, mode, rdev); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_symlink (call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_newfile_cbk, + subvol, subvol->fops->symlink, + linkname, loc); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + cached_subvol = dht_subvol_get_cached (this, loc->inode); + if (!cached_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed (this, loc); + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->call_cnt = 1; + if (hashed_subvol != cached_subvol) + local->call_cnt++; + + STACK_WIND (frame, dht_err_cbk, + cached_subvol, cached_subvol->fops->unlink, loc); + + if (hashed_subvol != cached_subvol) + STACK_WIND (frame, dht_err_cbk, + hashed_subvol, hashed_subvol->fops->unlink, loc); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + + prev = cookie; + local = frame->local; + + if (op_ret == -1) + goto out; + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + stbuf->st_ino = local->loc.inode->ino; + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + + return 0; +} + + +int +dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + xlator_t *srcvol = NULL; + + + if (op_ret == -1) + goto err; + + local = frame->local; + srcvol = local->linkfile.srcvol; + + STACK_WIND (frame, dht_link_cbk, + srcvol, srcvol->fops->link, + &local->loc, &local->loc2); + + return 0; + +err: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + + return 0; +} + + +int +dht_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (oldloc, err); + VALIDATE_OR_GOTO (newloc, err); + + cached_subvol = dht_subvol_get_cached (this, oldloc->inode); + if (!cached_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", oldloc->path); + op_errno = EINVAL; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed (this, newloc); + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + newloc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc, oldloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + if (hashed_subvol != cached_subvol) { + dht_linkfile_create (frame, dht_link_linkfile_cbk, + cached_subvol, hashed_subvol, newloc); + } else { + STACK_WIND (frame, dht_link_cbk, + cached_subvol, cached_subvol->fops->link, + oldloc, newloc); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + fd_t *fd, inode_t *inode, struct stat *stbuf) +{ + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + int ret = -1; + + + if (op_ret == -1) + goto out; + + prev = cookie; + + dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); + layout = dht_layout_for_subvol (this, prev->this); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + ret = inode_ctx_put (inode, this, (uint64_t)(long)layout); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not set inode context"); + op_ret = -1; + op_errno = EINVAL; + goto out; + } + +out: + DHT_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf); + return 0; +} + + +int +dht_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + + return 0; +} + + +int +dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + + + local = frame->local; + layout = local->selfheal.layout; + + if (op_ret == 0) { + inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + local->selfheal.layout = NULL; + local->stbuf.st_ino = local->st_ino; + } + + DHT_STACK_UNWIND (frame, op_ret, op_errno, + local->inode, &local->stbuf); + + return 0; +} + + +int +dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + int ret = -1; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + + local = frame->local; + prev = cookie; + layout = local->layout; + + LOCK (&frame->lock); + { + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, NULL); + + if (op_ret == -1) { + local->op_errno = op_errno; + goto unlock; + } + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + local->layout = NULL; + dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, + &local->loc, layout); + } + + return 0; +} + +int +dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int ret = -1; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *hashed_subvol = NULL; + + local = frame->local; + prev = cookie; + layout = local->layout; + conf = this->private; + hashed_subvol = local->hashed_subvol; + + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, NULL); + + if (op_ret == -1) { + local->op_errno = op_errno; + goto err; + } + local->op_ret = 0; + + dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + + local->st_ino = local->stbuf.st_ino; + + local->call_cnt = conf->subvolume_cnt - 1; + + if (local->call_cnt == 0) { + local->layout = NULL; + dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, + &local->loc, layout); + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == hashed_subvol) + continue; + STACK_WIND (frame, dht_mkdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->mkdir, + &local->loc, local->mode); + } + return 0; +err: + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; +} + +int +dht_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int ret = -1; + xlator_t *hashed_subvol = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + hashed_subvol = dht_subvol_get_hashed (this, loc); + + if (hashed_subvol == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "hashed subvol not found"); + op_errno = EINVAL; + goto err; + } + + local->hashed_subvol = hashed_subvol; + local->inode = inode_ref (loc->inode); + ret = loc_copy (&local->loc, loc); + local->mode = mode; + + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + STACK_WIND (frame, dht_mkdir_hashed_cbk, + hashed_subvol, + hashed_subvol->fops->mkdir, + loc, mode); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + + local = frame->local; + local->layout = NULL; + + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + + return 0; +} + + +int +dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + uint64_t tmp_layout = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + dht_layout_t *layout = NULL; + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = -1; + + if (op_errno != ENOENT) + local->need_selfheal = 1; + + gf_log (this->name, GF_LOG_ERROR, + "rmdir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto unlock; + } + } +unlock: + UNLOCK (&frame->lock); + + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + if (local->need_selfheal) { + inode_ctx_get (local->loc.inode, this, + &tmp_layout); + layout = (dht_layout_t *)(long)tmp_layout; + + /* TODO: neater interface needed below */ + local->stbuf.st_mode = local->loc.inode->st_mode; + + dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, + &local->loc, layout); + } else { + DHT_STACK_UNWIND (frame, local->op_ret, + local->op_errno); + } + } + + return 0; +} + + +int +dht_rmdir_do (call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; + + conf = this->private; + local = frame->local; + + if (local->op_ret == -1) + goto err; + + local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rmdir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->rmdir, + &local->loc); + } + + return 0; + +err: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + return 0; +} + + +int +dht_rmdir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + local = frame->local; + prev = cookie; + + if (op_ret > 2) { + gf_log (this->name, GF_LOG_DEBUG, + "readdir on %s for %s returned %d entries", + prev->this->name, local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rmdir_do (frame, this); + } + + return 0; +} + + +int +dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "opendir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto err; + } + + STACK_WIND (frame, dht_rmdir_readdir_cbk, + prev->this, prev->this->fops->readdir, + local->fd, 4096, 0); + + return 0; + +err: + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rmdir_do (frame, this); + } + + return 0; +} + + +int +dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int op_errno = -1; + int i = -1; + int ret = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->call_cnt = conf->subvolume_cnt; + local->op_ret = 0; + + ret = loc_copy (&local->loc, loc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->fd = fd_create (local->loc.inode, frame->root->pid); + if (!local->fd) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rmdir_opendir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + loc, local->fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +static int32_t +dht_xattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +dht_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, + dht_xattrop_cbk, + subvol, subvol->fops->xattrop, + loc, flags, dict); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + +static int32_t +dht_fxattrop_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *dict) +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno, dict); + return 0; +} + +int32_t +dht_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + dht_fxattrop_cbk, + subvol, subvol->fops->fxattrop, + fd, flags, dict); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + + return 0; +} + + +static int32_t +dht_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +dht_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t cmd, struct flock *lock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, + dht_inodelk_cbk, + subvol, subvol->fops->inodelk, + loc, cmd, lock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +static int32_t +dht_finodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int32_t +dht_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct flock *lock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + + STACK_WIND (frame, + dht_finodelk_cbk, + subvol, subvol->fops->finodelk, + fd, cmd, lock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +static int32_t +dht_entrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +dht_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + subvol = dht_subvol_get_cached (this, loc->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->inode = inode_ref (loc->inode); + local->call_cnt = 1; + + STACK_WIND (frame, dht_entrylk_cbk, + subvol, subvol->fops->entrylk, + loc, basename, cmd, type); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + +static int32_t +dht_fentrylk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int32_t +dht_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_fentrylk_cbk, + subvol, subvol->fops->fentrylk, + fd, basename, cmd, type); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno); + + return 0; +} + + +int +dht_forget (xlator_t *this, inode_t *inode) +{ + uint64_t tmp_layout = 0; + dht_layout_t *layout = NULL; + + inode_ctx_get (inode, this, &tmp_layout); + + if (!layout) + return 0; + layout = (dht_layout_t *)(long)tmp_layout; + if (!layout->preset) + FREE (layout); + + return 0; +} + + + +static int +dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) +{ + xlator_list_t *subvols = NULL; + int cnt = 0; + + + for (subvols = this->children; subvols; subvols = subvols->next) + cnt++; + + conf->subvolumes = CALLOC (cnt, sizeof (xlator_t *)); + if (!conf->subvolumes) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + return -1; + } + conf->subvolume_cnt = cnt; + + cnt = 0; + for (subvols = this->children; subvols; subvols = subvols->next) + conf->subvolumes[cnt++] = subvols->xlator; + + conf->subvolume_status = CALLOC (cnt, sizeof (char)); + if (!conf->subvolume_status) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + return -1; + } + + return 0; +} + + +int +dht_notify (xlator_t *this, int event, void *data, ...) +{ + xlator_t *subvol = NULL; + int cnt = -1; + int i = -1; + dht_conf_t *conf = NULL; + int ret = -1; + + + conf = this->private; + + switch (event) { + case GF_EVENT_CHILD_UP: + subvol = data; + + conf->gen++; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } + + if (cnt == -1) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_UP bad subvolume %s", + subvol->name); + break; + } + + LOCK (&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 1; + } + UNLOCK (&conf->subvolume_lock); + + break; + + case GF_EVENT_CHILD_DOWN: + subvol = data; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + cnt = i; + break; + } + } + + if (cnt == -1) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_DOWN bad subvolume %s", + subvol->name); + break; + } + + LOCK (&conf->subvolume_lock); + { + conf->subvolume_status[cnt] = 0; + } + UNLOCK (&conf->subvolume_lock); + + break; + } + + ret = default_notify (this, event, data); + + return ret; +} + diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h new file mode 100644 index 00000000000..17017381b08 --- /dev/null +++ b/xlators/cluster/dht/src/dht-common.h @@ -0,0 +1,212 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#ifndef _DHT_H +#define _DHT_H + + +typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno); + + +struct dht_layout { + int cnt; + int preset; + int gen; + int type; + struct { + int err; /* 0 = normal + -1 = dir exists and no xattr + >0 = dir lookup failed with errno + */ + uint32_t start; + uint32_t stop; + xlator_t *xlator; + } list[0]; +}; +typedef struct dht_layout dht_layout_t; + + +struct dht_local { + int call_cnt; + loc_t loc; + loc_t loc2; + int op_ret; + int op_errno; + int layout_mismatch; + struct stat stbuf; + struct statvfs statvfs; + fd_t *fd; + inode_t *inode; + dict_t *xattr; + dict_t *xattr_req; + dht_layout_t *layout; + size_t size; + ino_t st_ino; + xlator_t *src_hashed, *src_cached; + xlator_t *dst_hashed, *dst_cached; + xlator_t *cached_subvol; + xlator_t *hashed_subvol; + char need_selfheal; + struct { + fop_mknod_cbk_t linkfile_cbk; + struct stat stbuf; + loc_t loc; + inode_t *inode; + dict_t *xattr; + xlator_t *srcvol; + } linkfile; + struct { + uint32_t hole_cnt; + uint32_t overlaps_cnt; + uint32_t missing; + uint32_t down; + uint32_t misc; + dht_selfheal_dir_cbk_t dir_cbk; + dht_layout_t *layout; + } selfheal; + + /* needed by nufa */ + int32_t flags; + mode_t mode; + dev_t rdev; +}; +typedef struct dht_local dht_local_t; + + +struct dht_conf { + gf_lock_t subvolume_lock; + int subvolume_cnt; + xlator_t **subvolumes; + xlator_t *local_volume; /* Needed by NUFA */ + char *subvolume_status; + dht_layout_t **file_layouts; + dht_layout_t **dir_layouts; + dht_layout_t *default_dir_layout; + gf_boolean_t search_unhashed; + int gen; +}; +typedef struct dht_conf dht_conf_t; + + +struct dht_disk_layout { + uint32_t cnt; + uint32_t type; + struct { + uint32_t start; + uint32_t stop; + } list[1]; +}; +typedef struct dht_disk_layout dht_disk_layout_t; + +#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT) + +#define is_fs_root(loc) (strcmp (loc->path, "/") == 0) + +#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0) + +#define is_last_call(cnt) (cnt == 0) + +#define DHT_LINKFILE_MODE (S_ISVTX) +#define check_is_linkfile(i,s,x) ((s->st_mode & ~S_IFMT) == DHT_LINKFILE_MODE) + +#define check_is_dir(i,s,x) (S_ISDIR(s->st_mode)) + +#define layout_is_sane(layout) ((layout) && (layout->cnt > 0)) + +#define DHT_STACK_UNWIND(frame, params ...) do { \ + dht_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_UNWIND (frame, params); \ + dht_local_wipe (__local); \ + } while (0) + +#define DHT_STACK_DESTROY(frame) do { \ + dht_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + dht_local_wipe (__local); \ + } while (0) + +dht_layout_t *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, + const char *name); +int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); +int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, + uint32_t *misc_p); +int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, + xlator_t *subvol, loc_t *loc, dict_t *xattr); + +xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, + struct stat *buf, dict_t *xattr); +int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc); + +int dht_layouts_init (xlator_t *this, dht_conf_t *conf); +int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + int op_ret, int op_errno, dict_t *xattr); + +int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, + int pos, int32_t **disk_layout_p); +int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, + int pos, int32_t *disk_layout); + + +int dht_frame_return (call_frame_t *frame); + +int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); +int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol, + uint64_t *x); + +void dht_local_wipe (dht_local_t *local); +dht_local_t *dht_local_init (call_frame_t *frame); +int dht_stat_merge (xlator_t *this, struct stat *to, struct stat *from, + xlator_t *subvol); + +xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc); +xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode); +xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev); +int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); + +int dht_hash_compute (int type, const char *name, uint32_t *hash_p); + +int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, + xlator_t *tovol, xlator_t *fromvol, loc_t *loc); +int +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); +int +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); + +int dht_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc); +#endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-hashfn-tea.c b/xlators/cluster/dht/src/dht-hashfn-tea.c new file mode 100644 index 00000000000..8437b495541 --- /dev/null +++ b/xlators/cluster/dht/src/dht-hashfn-tea.c @@ -0,0 +1,146 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include <stdint.h> +#include <stdio.h> +#include <string.h> + + +#define DELTA 0x9E3779B9 +#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ +#define PARTROUNDS 6 /* 6 gets complete mixing */ + + +static int +tearound (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1) +{ + uint32_t sum = 0; + int n = 0; + uint32_t b0 = 0; + uint32_t b1 = 0; + + b0 = *h0; + b1 = *h1; + + n = rounds; + + do { + sum += DELTA; + b0 += ((b1 << 4) + array[0]) + ^ (b1 + sum) + ^ ((b1 >> 5) + array[1]); + b1 += ((b0 << 4) + array[2]) + ^ (b0 + sum) + ^ ((b0 >> 5) + array[3]); + } while (--n); + + *h0 += b0; + *h1 += b1; + + return 0; +} + + +uint32_t +__pad (int len) +{ + uint32_t pad = 0; + + pad = (uint32_t) len | ((uint32_t) len << 8); + pad |= pad << 16; + + return pad; +} + + +uint32_t +dht_hashfn_tea (const char *msg, int len) +{ + uint32_t h0 = 0x9464a485; + uint32_t h1 = 0x542e1a94; + uint32_t array[4]; + uint32_t pad = 0; + int i = 0; + int j = 0; + int full_quads = 0; + int full_words = 0; + int full_bytes = 0; + uint32_t *intmsg = NULL; + int word = 0; + + + intmsg = (uint32_t *) msg; + pad = __pad (len); + + full_bytes = len; + full_words = len / 4; + full_quads = len / 16; + + for (i = 0; i < full_quads; i++) { + for (j = 0; j < 4; j++) { + word = *intmsg; + array[j] = word; + intmsg++; + full_words--; + full_bytes -= 4; + } + tearound (PARTROUNDS, &array[0], &h0, &h1); + } + + if ((len % 16) == 0) { + goto done; + } + + for (j = 0; j < 4; j++) { + if (full_words) { + word = *intmsg; + array[j] = word; + intmsg++; + full_words--; + full_bytes -= 4; + } else { + array[j] = pad; + while (full_bytes) { + array[j] <<= 8; + array[j] |= msg[len - full_bytes]; + full_bytes--; + } + } + } + tearound (FULLROUNDS, &array[0], &h0, &h1); + +done: + return h0 ^ h1; +} + + +#if 0 +int +main (int argc, char *argv[]) +{ + int i = 0; + int hashval = 0; + + for (i = 1; i < argc; i++) { + hashval = tea (argv[i], strlen (argv[i])); + printf ("%s: %x\n", argv[i], hashval); + } +} +#endif diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c new file mode 100644 index 00000000000..9e321a43cec --- /dev/null +++ b/xlators/cluster/dht/src/dht-hashfn.c @@ -0,0 +1,88 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +uint32_t dht_hashfn_tea (const char *name, int len); + + +typedef enum { + DHT_HASH_TYPE_TEA, +} dht_hashfn_type_t; + + +int +dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p) +{ + int ret = 0; + uint32_t hash = 0; + + switch (type) { + case DHT_HASH_TYPE_TEA: + hash = dht_hashfn_tea (name, strlen (name)); + break; + default: + ret = -1; + break; + } + + if (ret == 0) { + *hash_p = hash; + } + + return ret; +} + + +#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \ + rsync_frndly_name = (char *) name; \ + if (name[0] == '.') { \ + char *dot = 0; \ + int namelen = 0; \ + \ + dot = strrchr (name, '.'); \ + if (dot && dot > (name + 1) && *(dot + 1)) { \ + namelen = (dot - name); \ + rsync_frndly_name = alloca (namelen); \ + strncpy (rsync_frndly_name, name + 1, \ + namelen); \ + rsync_frndly_name[namelen - 1] = 0; \ + } \ + } \ + } while (0); + + +int +dht_hash_compute (int type, const char *name, uint32_t *hash_p) +{ + char *rsync_friendly_name = NULL; + + MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name); + + return dht_hash_compute_internal (type, rsync_friendly_name, hash_p); +} diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c new file mode 100644 index 00000000000..52d0720025f --- /dev/null +++ b/xlators/cluster/dht/src/dht-helper.c @@ -0,0 +1,326 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +int +dht_frame_return (call_frame_t *frame) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + + if (!frame) + return -1; + + local = frame->local; + + LOCK (&frame->lock); + { + this_call_cnt = --local->call_cnt; + } + UNLOCK (&frame->lock); + + return this_call_cnt; +} + + +int +dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) +{ + dht_conf_t *conf = NULL; + int cnt = 0; + int max = 0; + uint64_t y = 0; + + + if (x == ((uint64_t) -1)) { + y = (uint64_t) -1; + goto out; + } + + conf = this->private; + + max = conf->subvolume_cnt; + cnt = dht_subvol_cnt (this, subvol); + + y = ((x * max) + cnt); + +out: + if (y_p) + *y_p = y; + + return 0; +} + + +int +dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, + uint64_t *x_p) +{ + dht_conf_t *conf = NULL; + int cnt = 0; + int max = 0; + uint64_t x = 0; + xlator_t *subvol = 0; + + + conf = this->private; + max = conf->subvolume_cnt; + + cnt = y % max; + x = y / max; + + subvol = conf->subvolumes[cnt]; + + if (subvol_p) + *subvol_p = subvol; + + if (x_p) + *x_p = x; + + return 0; +} + + +void +dht_local_wipe (dht_local_t *local) +{ + if (!local) + return; + + loc_wipe (&local->loc); + loc_wipe (&local->loc2); + + if (local->xattr) + dict_unref (local->xattr); + + if (local->inode) + inode_unref (local->inode); + + if (local->layout) + FREE (local->layout); + + loc_wipe (&local->linkfile.loc); + + if (local->linkfile.xattr) + dict_unref (local->linkfile.xattr); + + if (local->linkfile.inode) + inode_unref (local->linkfile.inode); + + if (local->fd) { + fd_unref (local->fd); + local->fd = NULL; + } + + if (local->xattr_req) + dict_unref (local->xattr_req); + + FREE (local); +} + + +dht_local_t * +dht_local_init (call_frame_t *frame) +{ + dht_local_t *local = NULL; + + /* TODO: use mem-pool */ + local = CALLOC (1, sizeof (*local)); + + if (!local) + return NULL; + + local->op_ret = -1; + local->op_errno = EUCLEAN; + + frame->local = local; + + return local; +} + + +char * +basestr (const char *str) +{ + char *basestr = NULL; + + basestr = strrchr (str, '/'); + if (basestr) + basestr ++; + + return basestr; +} + +xlator_t * +dht_first_up_child (xlator_t *this) +{ + dht_conf_t *conf = NULL; + xlator_t *child = NULL; + int i = 0; + + conf = this->private; + + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolume_status[i]) { + child = conf->subvolumes[i]; + break; + } + } + } + UNLOCK (&conf->subvolume_lock); + + return child; +} + +xlator_t * +dht_subvol_get_hashed (xlator_t *this, loc_t *loc) +{ + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + + if (is_fs_root (loc)) { + subvol = dht_first_up_child (this); + goto out; + } + + layout = dht_layout_get (this, loc->parent); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "layout missing path=%s parent=%"PRId64, + loc->path, loc->parent->ino); + goto out; + } + + subvol = dht_layout_search (this, layout, loc->name); + + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "could not find subvolume for path=%s", + loc->path); + goto out; + } + +out: + return subvol; +} + + +xlator_t * +dht_subvol_get_cached (xlator_t *this, inode_t *inode) +{ + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + + + layout = dht_layout_get (this, inode); + + if (!layout) { + goto out; + } + + subvol = layout->list[0].xlator; + +out: + return subvol; +} + + +xlator_t * +dht_subvol_next (xlator_t *this, xlator_t *prev) +{ + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *next = NULL; + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev) { + if ((i + 1) < conf->subvolume_cnt) + next = conf->subvolumes[i + 1]; + break; + } + } + + return next; +} + + +int +dht_subvol_cnt (xlator_t *this, xlator_t *subvol) +{ + int i = 0; + int ret = -1; + dht_conf_t *conf = NULL; + + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + ret = i; + break; + } + } + + return ret; +} + + +#define set_if_greater(a, b) do { \ + if ((a) < (b)) \ + (a) = (b); \ + } while (0) + +int +dht_stat_merge (xlator_t *this, struct stat *to, + struct stat *from, xlator_t *subvol) +{ + to->st_dev = from->st_dev; + + dht_itransform (this, subvol, from->st_ino, &to->st_ino); + + to->st_mode = from->st_mode; + to->st_nlink = from->st_nlink; + to->st_uid = from->st_uid; + to->st_gid = from->st_gid; + to->st_rdev = from->st_rdev; + to->st_size += from->st_size; + to->st_blksize = from->st_blksize; + to->st_blocks += from->st_blocks; + + set_if_greater (to->st_atime, from->st_atime); + set_if_greater (to->st_mtime, from->st_mtime); + set_if_greater (to->st_ctime, from->st_ctime); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c new file mode 100644 index 00000000000..08b4a2746f8 --- /dev/null +++ b/xlators/cluster/dht/src/dht-layout.c @@ -0,0 +1,543 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "byte-order.h" + +#define layout_base_size (sizeof (dht_layout_t)) + +#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0]) + +#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size)) + + +dht_layout_t * +dht_layout_new (xlator_t *this, int cnt) +{ + dht_layout_t *layout = NULL; + + + layout = CALLOC (1, layout_size (cnt)); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + layout->cnt = cnt; + +out: + return layout; +} + + +dht_layout_t * +dht_layout_get (xlator_t *this, inode_t *inode) +{ + uint64_t layout = 0; + int ret = -1; + + ret = inode_ctx_get (inode, this, &layout); + + return (dht_layout_t *)(long)layout; +} + + +xlator_t * +dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) +{ + uint32_t hash = 0; + xlator_t *subvol = NULL; + int i = 0; + int ret = 0; + + + ret = dht_hash_compute (layout->type, name, &hash); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "hash computation failed for type=%d name=%s", + layout->type, name); + goto out; + } + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].start <= hash + && layout->list[i].stop >= hash) { + subvol = layout->list[i].xlator; + break; + } + } + + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume for hash (value) = %u", hash); + } + +out: + return subvol; +} + + +dht_layout_t * +dht_layout_for_subvol (xlator_t *this, xlator_t *subvol) +{ + dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == subvol) { + layout = conf->file_layouts[i]; + break; + } + } + + return layout; +} + + +int +dht_layouts_init (xlator_t *this, dht_conf_t *conf) +{ + dht_layout_t *layout = NULL; + int i = 0; + int ret = -1; + + + conf->file_layouts = CALLOC (conf->subvolume_cnt, + sizeof (dht_layout_t *)); + if (!conf->file_layouts) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + layout = dht_layout_new (this, 1); + + if (!layout) { + goto out; + } + + layout->preset = 1; + + layout->list[0].xlator = conf->subvolumes[i]; + + conf->file_layouts[i] = layout; + } + + ret = 0; +out: + return ret; +} + + +int +dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, + int pos, int32_t **disk_layout_p) +{ + int ret = -1; + int32_t *disk_layout = NULL; + + disk_layout = CALLOC (5, sizeof (int)); + if (!disk_layout) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto out; + } + + disk_layout[0] = hton32 (1); + disk_layout[1] = hton32 (layout->type); + disk_layout[2] = hton32 (layout->list[pos].start); + disk_layout[3] = hton32 (layout->list[pos].stop); + + if (disk_layout_p) + *disk_layout_p = disk_layout; + ret = 0; + +out: + return ret; +} + + +int +dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, + int pos, int32_t *disk_layout) +{ + int cnt = 0; + int type = 0; + int start_off = 0; + int stop_off = 0; + + + /* TODO: assert disk_layout_ptr is of required length */ + + cnt = ntoh32 (disk_layout[0]); + if (cnt != 1) { + gf_log (this->name, GF_LOG_ERROR, + "disk layout has invalid count %d", cnt); + return -1; + } + + /* TODO: assert type is compatible */ + type = ntoh32 (disk_layout[1]); + start_off = ntoh32 (disk_layout[2]); + stop_off = ntoh32 (disk_layout[3]); + + layout->list[pos].start = start_off; + layout->list[pos].stop = stop_off; + + gf_log (this->name, GF_LOG_DEBUG, + "merged to layout: %u - %u (type %d) from %s", + start_off, stop_off, type, + layout->list[pos].xlator->name); + + return 0; +} + + +int +dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + int op_ret, int op_errno, dict_t *xattr) +{ + int i = 0; + int ret = -1; + int err = -1; + int32_t *disk_layout = NULL; + + + if (op_ret != 0) { + err = op_errno; + } + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == NULL) { + layout->list[i].err = err; + layout->list[i].xlator = subvol; + break; + } + } + + if (op_ret != 0) { + ret = 0; + goto out; + } + + if (xattr) { + /* during lookup and not mkdir */ + ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", + VOID(&disk_layout)); + } + + if (ret != 0) { + layout->list[i].err = -1; + gf_log (this->name, GF_LOG_DEBUG, + "missing disk layout on %s. err = %d", + subvol->name, err); + ret = 0; + goto out; + } + + ret = dht_disk_layout_merge (this, layout, i, disk_layout); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "layout merge from subvolume %s failed", + subvol->name); + goto out; + } + layout->list[i].err = 0; + +out: + return ret; +} + + +void +dht_layout_entry_swap (dht_layout_t *layout, int i, int j) +{ + uint32_t start_swap = 0; + uint32_t stop_swap = 0; + xlator_t *xlator_swap = 0; + int err_swap = 0; + + + start_swap = layout->list[i].start; + stop_swap = layout->list[i].stop; + xlator_swap = layout->list[i].xlator; + err_swap = layout->list[i].err; + + layout->list[i].start = layout->list[j].start; + layout->list[i].stop = layout->list[j].stop; + layout->list[i].xlator = layout->list[j].xlator; + layout->list[i].err = layout->list[j].err; + + layout->list[j].start = start_swap; + layout->list[j].stop = stop_swap; + layout->list[j].xlator = xlator_swap; + layout->list[j].err = err_swap; +} + + +int64_t +dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) +{ + int64_t diff = 0; + + if (layout->list[i].err || layout->list[j].err) + diff = layout->list[i].err - layout->list[j].err; + else + diff = (int64_t) layout->list[i].start + - (int64_t) layout->list[j].start; + + return diff; +} + + +int +dht_layout_sort (dht_layout_t *layout) +{ + int i = 0; + int j = 0; + int64_t ret = 0; + + /* TODO: O(n^2) -- bad bad */ + + for (i = 0; i < layout->cnt - 1; i++) { + for (j = i + 1; j < layout->cnt; j++) { + ret = dht_layout_entry_cmp (layout, i, j); + if (ret > 0) + dht_layout_entry_swap (layout, i, j); + } + } + + return 0; +} + + +int +dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p) +{ + dht_conf_t *conf = NULL; + uint32_t holes = 0; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0; + uint32_t hole_cnt = 0; + uint32_t overlap_cnt = 0; + int i = 0; + int ret = 0; + uint32_t prev_stop = 0; + uint32_t last_stop = 0; + char is_virgin = 1; + + + conf = this->private; + + /* TODO: explain WTF is happening */ + + last_stop = layout->list[0].start - 1; + prev_stop = last_stop; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err) { + switch (layout->list[i].err) { + case -1: + case ENOENT: + missing++; + break; + case ENOTCONN: + down++; + break; + default: + misc++; + } + continue; + } + + is_virgin = 0; + + if ((prev_stop + 1) < layout->list[i].start) { + hole_cnt++; + holes += (layout->list[i].start - (prev_stop + 1)); + } + + if ((prev_stop + 1) > layout->list[i].start) { + overlap_cnt++; + overlaps += ((prev_stop + 1) - layout->list[i].start); + } + prev_stop = layout->list[i].stop; + } + + if ((last_stop - prev_stop) || is_virgin) + hole_cnt++; + holes += (last_stop - prev_stop); + + if (holes_p) + *holes_p = hole_cnt; + + if (overlaps_p) + *overlaps_p = overlap_cnt; + + if (missing_p) + *missing_p = missing; + + if (down_p) + *down_p = down; + + if (misc_p) + *misc_p = misc; + + return ret; +} + + +int +dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) +{ + int ret = 0; + uint32_t holes = 0; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0; + + + ret = dht_layout_sort (layout); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "sort failed?! how the ...."); + goto out; + } + + ret = dht_layout_anomalies (this, loc, layout, + &holes, &overlaps, + &missing, &down, &misc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "error while finding anomalies in %s -- not good news", + loc->path); + goto out; + } + + if (holes || overlaps) { + if (missing == layout->cnt) { + gf_log (this->name, GF_LOG_WARNING, + "directory %s looked up first time", + loc->path); + } else { + gf_log (this->name, GF_LOG_ERROR, + "found anomalies in %s. holes=%d overlaps=%d", + loc->path, holes, overlaps); + } + ret = 1; + } + +out: + return ret; +} + + +int +dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, + loc_t *loc, dict_t *xattr) +{ + int idx = 0; + int pos = -1; + int ret = -1; + int32_t *disk_layout = NULL; + int32_t count = -1; + uint32_t start_off = -1; + uint32_t stop_off = -1; + + + for (idx = 0; idx < layout->cnt; idx++) { + if (layout->list[idx].xlator == subvol) { + pos = idx; + break; + } + } + + if (pos == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s - no layout info for subvolume %s", + loc->path, subvol->name); + ret = 1; + goto out; + } + + if (xattr == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "%s - xattr dictionary is NULL", + loc->path); + ret = -1; + goto out; + } + + ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", + VOID(&disk_layout)); + + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s - disk layout missing", loc->path); + ret = -1; + goto out; + } + + count = ntoh32 (disk_layout[0]); + if (count != 1) { + gf_log (this->name, GF_LOG_ERROR, + "%s - disk layout has invalid count %d", + loc->path, count); + ret = -1; + goto out; + } + + start_off = ntoh32 (disk_layout[2]); + stop_off = ntoh32 (disk_layout[3]); + + if ((layout->list[pos].start != start_off) + || (layout->list[pos].stop != stop_off)) { + gf_log (this->name, GF_LOG_DEBUG, + "subvol: %s; inode layout - %"PRId32" - %"PRId32"; " + "disk layout - %"PRId32" - %"PRId32, + layout->list[pos].xlator->name, + layout->list[pos].start, layout->list[pos].stop, + start_off, stop_off); + ret = 1; + } else { + ret = 0; + } +out: + return ret; +} + diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c new file mode 100644 index 00000000000..9cc24ccf6b3 --- /dev/null +++ b/xlators/cluster/dht/src/dht-linkfile.c @@ -0,0 +1,224 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "compat.h" +#include "dht-common.h" + + + +int +dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + + + local = frame->local; + local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno, + local->linkfile.inode, + &local->linkfile.stbuf); + + return 0; +} + + +int +dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + dict_t *xattr = NULL; + data_t *str_data = NULL; + int ret = -1; + + local = frame->local; + prev = cookie; + + if (op_ret == -1) + goto err; + + xattr = get_new_dict (); + if (!xattr) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->linkfile.xattr = dict_ref (xattr); + local->linkfile.inode = inode_ref (inode); + + str_data = str_to_data (local->linkfile.srcvol->name); + if (!str_data) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize linkfile data"); + op_errno = EINVAL; + } + str_data = NULL; + + local->linkfile.stbuf = *stbuf; + + STACK_WIND (frame, dht_linkfile_xattr_cbk, + prev->this, prev->this->fops->setxattr, + &local->linkfile.loc, local->linkfile.xattr, 0); + + return 0; + +err: + if (str_data) { + data_destroy (str_data); + str_data = NULL; + } + + local->linkfile.linkfile_cbk (frame, cookie, this, + op_ret, op_errno, inode, stbuf); + return 0; +} + + +int +dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, + xlator_t *tovol, xlator_t *fromvol, loc_t *loc) +{ + dht_local_t *local = NULL; + + + local = frame->local; + local->linkfile.linkfile_cbk = linkfile_cbk; + local->linkfile.srcvol = tovol; + loc_copy (&local->linkfile.loc, loc); + + STACK_WIND (frame, dht_linkfile_create_cbk, + fromvol, fromvol->fops->mknod, loc, + S_IFREG | DHT_LINKFILE_MODE, 0); + + return 0; +} + + +int +dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + + local = frame->local; + prev = cookie; + subvol = prev->this; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "unlinking linkfile %s on %s failed (%s)", + local->loc.path, subvol->name, strerror (op_errno)); + } + + DHT_STACK_DESTROY (frame); + + return 0; +} + + +int +dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc) +{ + call_frame_t *unlink_frame = NULL; + dht_local_t *unlink_local = NULL; + + unlink_frame = copy_frame (frame); + if (!unlink_frame) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + unlink_local = dht_local_init (unlink_frame); + if (!unlink_local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + loc_copy (&unlink_local->loc, loc); + + STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk, + subvol, subvol->fops->unlink, + &unlink_local->loc); + + return 0; +err: + if (unlink_frame) + DHT_STACK_DESTROY (unlink_frame); + + return -1; +} + + +xlator_t * +dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct stat *stbuf, + dict_t *xattr) +{ + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + void *volname = NULL; + int i = 0, ret = 0; + + + conf = this->private; + + if (!xattr) + goto out; + + ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname); + + if ((-1 == ret) || !volname) + goto out; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) { + subvol = conf->subvolumes[i]; + break; + } + } + +out: + return subvol; +} + + diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c new file mode 100644 index 00000000000..e5532f1bc87 --- /dev/null +++ b/xlators/cluster/dht/src/dht-rename.c @@ -0,0 +1,562 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should + * delete the newpath if it gets EEXISTS from link() call. + */ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + + +int +dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + /* TODO: undo the damage */ + + gf_log (this->name, GF_LOG_ERROR, + "rename %s -> %s on %s failed (%s)", + local->loc.path, local->loc2.path, + prev->this->name, strerror (op_errno)); + + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + /* TODO: construct proper stbuf for dir */ + local->stbuf = *stbuf; + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + + return 0; +} + + + +int +dht_rename_dir_do (call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int i = 0; + + conf = this->private; + local = frame->local; + + if (local->op_ret == -1) + goto err; + + local->call_cnt = conf->subvolume_cnt; + local->op_ret = 0; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rename_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->rename, + &local->loc, &local->loc2); + } + + return 0; + +err: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + return 0; +} + + +int +dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + local = frame->local; + prev = cookie; + + if (op_ret > 2) { + gf_log (this->name, GF_LOG_DEBUG, + "readdir on %s for %s returned %d entries", + prev->this->name, local->loc.path, op_ret); + local->op_ret = -1; + local->op_errno = ENOTEMPTY; + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rename_dir_do (frame, this); + } + + return 0; +} + + +int +dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd) +{ + dht_local_t *local = NULL; + int this_call_cnt = -1; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "opendir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto err; + } + + STACK_WIND (frame, dht_rename_readdir_cbk, + prev->this, prev->this->fops->readdir, + local->fd, 4096, 0); + + return 0; + +err: + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_rename_dir_do (frame, this); + } + + return 0; +} + + +int +dht_rename_dir (call_frame_t *frame, xlator_t *this) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int i = 0; + int op_errno = -1; + + + conf = frame->this->private; + local = frame->local; + + local->call_cnt = conf->subvolume_cnt; + + local->fd = fd_create (local->loc.inode, frame->root->pid); + if (!local->fd) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->op_ret = 0; + + if (!local->dst_cached) { + dht_rename_dir_do (frame, this); + return 0; + } + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rename_opendir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->opendir, + &local->loc2, local->fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + return 0; +} + + +int +dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + + local = frame->local; + prev = cookie; + + this_call_cnt = dht_frame_return (frame); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "unlink on %s failed (%s)", + prev->this->name, strerror (op_errno)); + } + + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *rename_subvol = NULL; + + local = frame->local; + prev = cookie; + + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "rename on %s failed (%s)", prev->this->name, + strerror (op_errno)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + + /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk + * is called. since rename has already happened on rename_subvol, + * unlink should not be sent for oldpath (either linkfile or cached-file) + * on rename_subvol. */ + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; + + /* TODO: delete files in background */ + + if (src_cached != dst_hashed && src_cached != dst_cached) + local->call_cnt++; + + if (src_hashed != rename_subvol && src_hashed != src_cached) + local->call_cnt++; + + if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) + local->call_cnt++; + + if (local->call_cnt == 0) + goto unwind; + + if (src_cached != dst_hashed && src_cached != dst_cached) { + gf_log (this->name, GF_LOG_DEBUG, + "deleting old src datafile %s @ %s", + local->loc.path, src_cached->name); + + STACK_WIND (frame, dht_rename_unlink_cbk, + src_cached, src_cached->fops->unlink, + &local->loc); + } + + if (src_hashed != rename_subvol && src_hashed != src_cached) { + gf_log (this->name, GF_LOG_DEBUG, + "deleting old src linkfile %s @ %s", + local->loc.path, src_hashed->name); + + STACK_WIND (frame, dht_rename_unlink_cbk, + src_hashed, src_hashed->fops->unlink, + &local->loc); + } + + if (dst_cached + && (dst_cached != dst_hashed) + && (dst_cached != src_cached)) { + gf_log (this->name, GF_LOG_DEBUG, + "deleting old dst datafile %s @ %s", + local->loc2.path, dst_cached->name); + + STACK_WIND (frame, dht_rename_unlink_cbk, + dst_cached, dst_cached->fops->unlink, + &local->loc2); + } + return 0; + +unwind: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_do_rename (call_frame_t *frame) +{ + dht_local_t *local = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_cached = NULL; + xlator_t *this = NULL; + xlator_t *rename_subvol = NULL; + + + local = frame->local; + this = frame->this; + + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + src_cached = local->src_cached; + + if (src_cached == dst_cached) + rename_subvol = src_cached; + else + rename_subvol = dst_hashed; + + gf_log (this->name, GF_LOG_DEBUG, + "renaming %s => %s (%s)", + local->loc.path, local->loc2.path, rename_subvol->name); + + STACK_WIND (frame, dht_rename_cbk, + rename_subvol, rename_subvol->fops->rename, + &local->loc, &local->loc2); + + return 0; +} + + +int +dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + + + local = frame->local; + prev = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "link/file on %s failed (%s)", + prev->this->name, strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + if (local->op_ret == -1) + goto unwind; + + dht_do_rename (frame); + } + + return 0; + +unwind: + DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + + return 0; +} + + +int +dht_rename_create_links (call_frame_t *frame) +{ + dht_local_t *local = NULL; + xlator_t *this = NULL; + xlator_t *src_hashed = NULL; + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; + int call_cnt = 0; + + + local = frame->local; + this = frame->this; + + src_hashed = local->src_hashed; + src_cached = local->src_cached; + dst_hashed = local->dst_hashed; + dst_cached = local->dst_cached; + + if (src_cached == dst_cached) + goto nolinks; + + if (dst_hashed != src_hashed && dst_hashed != src_cached) + call_cnt++; + + if (src_cached != dst_hashed) + call_cnt++; + + local->call_cnt = call_cnt; + + if (dst_hashed != src_hashed && dst_hashed != src_cached) { + gf_log (this->name, GF_LOG_DEBUG, + "linkfile %s @ %s => %s", + local->loc.path, dst_hashed->name, src_cached->name); + dht_linkfile_create (frame, dht_rename_links_cbk, + src_cached, dst_hashed, &local->loc); + } + + if (src_cached != dst_hashed) { + gf_log (this->name, GF_LOG_DEBUG, + "link %s => %s (%s)", local->loc.path, + local->loc2.path, src_cached->name); + STACK_WIND (frame, dht_rename_links_cbk, + src_cached, src_cached->fops->link, + &local->loc, &local->loc2); + } + +nolinks: + if (!call_cnt) { + /* skip to next step */ + dht_do_rename (frame); + } + + return 0; +} + + +int +dht_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc) +{ + xlator_t *src_cached = NULL; + xlator_t *src_hashed = NULL; + xlator_t *dst_cached = NULL; + xlator_t *dst_hashed = NULL; + int op_errno = -1; + int ret = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (oldloc, err); + VALIDATE_OR_GOTO (newloc, err); + + src_hashed = dht_subvol_get_hashed (this, oldloc); + if (!src_hashed) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + oldloc->path); + op_errno = EINVAL; + goto err; + } + + src_cached = dht_subvol_get_cached (this, oldloc->inode); + if (!src_cached) { + gf_log (this->name, GF_LOG_ERROR, + "no cached subvolume for path=%s", oldloc->path); + op_errno = EINVAL; + goto err; + } + + dst_hashed = dht_subvol_get_hashed (this, newloc); + if (!dst_hashed) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + newloc->path); + op_errno = EINVAL; + goto err; + } + + if (newloc->inode) + dst_cached = dht_subvol_get_cached (this, newloc->inode); + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc, oldloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_copy (&local->loc2, newloc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + local->src_hashed = src_hashed; + local->src_cached = src_cached; + local->dst_hashed = dst_hashed; + local->dst_cached = dst_cached; + + gf_log (this->name, GF_LOG_DEBUG, + "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)", + oldloc->path, src_hashed->name, src_cached->name, + newloc->path, dst_hashed->name, + dst_cached ? dst_cached->name : "<nul>"); + + if (S_ISDIR (oldloc->inode->st_mode)) { + dht_rename_dir (frame, this); + } else { + local->op_ret = 0; + dht_rename_create_links (frame); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c new file mode 100644 index 00000000000..ee32b2253ed --- /dev/null +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -0,0 +1,460 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +int +dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret) +{ + dht_local_t *local = NULL; + + + local = frame->local; + local->selfheal.dir_cbk (frame, NULL, frame->this, ret, + local->op_errno); + + return 0; +} + + +int +dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + int i = 0; + dht_layout_t *layout = NULL; + int err = 0; + int this_call_cnt = 0; + + local = frame->local; + layout = local->selfheal.layout; + prev = cookie; + subvol = prev->this; + + if (op_ret == 0) + err = 0; + else + err = op_errno; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) { + layout->list[i].err = err; + break; + } + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_selfheal_dir_finish (frame, this, 0); + } + + return 0; +} + + +int +dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout, int i) +{ + xlator_t *subvol = NULL; + dict_t *xattr = NULL; + int ret = 0; + xlator_t *this = NULL; + int32_t *disk_layout = NULL; + + + subvol = layout->list[i].xlator; + this = frame->this; + + xattr = get_new_dict (); + if (!xattr) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = dht_disk_layout_extract (this, layout, i, &disk_layout); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to extract disk layout"); + goto err; + } + + ret = dict_set_bin (xattr, "trusted.glusterfs.dht", + disk_layout, 4 * 4); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set xattr dictionary"); + goto err; + } + disk_layout = NULL; + + gf_log (this->name, GF_LOG_DEBUG, + "setting hash range %u - %u (type %d) on subvolume %s for %s", + layout->list[i].start, layout->list[i].stop, + layout->type, subvol->name, loc->path); + + dict_ref (xattr); + + STACK_WIND (frame, dht_selfheal_dir_xattr_cbk, + subvol, subvol->fops->setxattr, + loc, xattr, 0); + + dict_unref (xattr); + + return 0; + +err: + if (xattr) + dict_destroy (xattr); + + if (disk_layout) + FREE (disk_layout); + + dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this, + -1, ENOMEM); + return 0; +} + + +int +dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) +{ + dht_local_t *local = NULL; + int missing_xattr = 0; + int i = 0; + int ret = 0; + xlator_t *this = NULL; + + local = frame->local; + this = frame->this; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) + continue; + /* attr missing and layout present */ + missing_xattr++; + } + + gf_log (this->name, GF_LOG_DEBUG, + "%d subvolumes missing xattr for %s", + missing_xattr, loc->path); + + if (missing_xattr == 0) { + dht_selfheal_dir_finish (frame, this, 0); + return 0; + } + + local->call_cnt = missing_xattr; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) + continue; + + ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i); + + if (--missing_xattr == 0) + break; + } + return 0; +} + + +int +dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + call_frame_t *prev = NULL; + xlator_t *subvol = NULL; + int i = 0; + int this_call_cnt = 0; + + + local = frame->local; + layout = local->selfheal.layout; + prev = cookie; + subvol = prev->this; + + if ((op_ret == 0) || (op_errno == EEXIST)) { + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].xlator == subvol) { + layout->list[i].err = -1; + break; + } + } + } + + this_call_cnt = dht_frame_return (frame); + + if (is_last_call (this_call_cnt)) { + dht_selfheal_dir_xattr (frame, &local->loc, layout); + } + + return 0; +} + + +int +dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout, int force) +{ + int missing_dirs = 0; + int i = 0; + dht_local_t *local = NULL; + xlator_t *this = NULL; + + + local = frame->local; + this = frame->this; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == ENOENT || force) + missing_dirs++; + } + + if (missing_dirs == 0) { + dht_selfheal_dir_xattr (frame, loc, layout); + return 0; + } + + local->call_cnt = missing_dirs; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == ENOENT || force) { + gf_log (this->name, GF_LOG_DEBUG, + "creating directory %s on subvol %s", + loc->path, layout->list[i].xlator->name); + + STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->mkdir, + loc, local->stbuf.st_mode); + } + } + + return 0; +} + +void +dht_selfheal_fix_this_virgin (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) +{ + dht_conf_t *conf = NULL; + xlator_t *this = NULL; + uint32_t chunk = 0; + int i = 0; + uint32_t start = 0; + int cnt = 0; + int err = 0; + + this = frame->this; + conf = this->private; + + for (i = 0; i < layout->cnt; i++) { + err = layout->list[i].err; + if (err == -1) { + cnt++; + } + } + + chunk = ((unsigned long) 0xffffffff) / cnt; + + start = 0; + for (i = 0; i < layout->cnt; i++) { + err = layout->list[i].err; + if (err == -1) { + layout->list[i].start = start; + layout->list[i].stop = start + chunk - 1; + + start = start + chunk; + + gf_log (this->name, GF_LOG_DEBUG, + "gave fix: %u - %u on %s for %s", + layout->list[i].start, layout->list[i].stop, + layout->list[i].xlator->name, loc->path); + if (--cnt == 0) { + layout->list[i].stop = 0xffffffff; + break; + } + } + } +} + + +int +dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) +{ + dht_conf_t *conf = NULL; + xlator_t *this = NULL; + dht_local_t *local = NULL; + int missing = -1; + int down = -1; + int holes = -1; + int ret = -1; + int i = -1; + + this = frame->this; + conf = this->private; + local = frame->local; + + missing = local->selfheal.missing; + down = local->selfheal.down; + holes = local->selfheal.hole_cnt; + + if ((missing + down) == conf->subvolume_cnt) { + dht_selfheal_fix_this_virgin (frame, loc, layout); + ret = 0; + } + + if (holes <= down) { + /* the down subvol might fill up the holes */ + ret = 0; + } + + for (i = 0; i < layout->cnt; i++) { + /* directory not present */ + if (layout->list[i].err == ENOENT) { + ret = 0; + break; + } + } + + /* TODO: give a fix to these non-virgins */ + + return ret; +} + + +int +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) +{ + dht_local_t *local = NULL; + uint32_t holes = 0; + uint32_t overlaps = 0; + uint32_t missing = 0; + uint32_t down = 0; + uint32_t misc = 0; + int ret = 0; + xlator_t *this = NULL; + + + local = frame->local; + this = frame->this; + + ret = dht_layout_anomalies (this, loc, layout, + &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, + &local->selfheal.missing, + &local->selfheal.down, + &local->selfheal.misc); + + holes = local->selfheal.hole_cnt; + overlaps = local->selfheal.overlaps_cnt; + missing = local->selfheal.missing; + down = local->selfheal.down; + misc = local->selfheal.misc; + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = layout; + +/* + if (down) { + gf_log (this->name, GF_LOG_ERROR, + "%d subvolumes down -- not fixing", down); + ret = 0; + goto sorry_no_fix; + } + + if (overlaps) { + gf_log (this->name, GF_LOG_ERROR, + "not fixing overlaps in %s", loc->path); + local->op_errno = EINVAL; + ret = -1; + goto sorry_no_fix; + } + + if (misc) { + gf_log (this->name, GF_LOG_ERROR, + "%d subvolumes have unrecoverable errors", misc); + ret = 0; + goto sorry_no_fix; + } + + if (holes > missing) { + gf_log (this->name, GF_LOG_ERROR, + "%d holes and %d pigeons -- not fixing", + holes, missing); + ret = 0; + goto sorry_no_fix; + } +*/ + ret = dht_selfheal_dir_getafix (frame, loc, layout); + + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "the directory is not a virgin"); + goto sorry_no_fix; + } + + dht_selfheal_dir_mkdir (frame, loc, layout, 0); + + return 0; + +sorry_no_fix: + /* TODO: need to put appropriate local->op_errno */ + dht_selfheal_dir_finish (frame, this, ret); + + return 0; +} + + +int +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) +{ + int ret = 0; + dht_local_t *local = NULL; + + + local = frame->local; + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = layout; + + ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c new file mode 100644 index 00000000000..836e7a4e81f --- /dev/null +++ b/xlators/cluster/dht/src/dht.c @@ -0,0 +1,222 @@ +/* + Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "dht-common.c" + +/* TODO: + - use volumename in xattr instead of "dht" + - use NS locks + - handle all cases in self heal layout reconstruction + - complete linkfile selfheal +*/ + + + +int +notify (xlator_t *this, int event, void *data, ...) +{ + int ret = -1; + + ret = dht_notify (this, event, data); + + return ret; +} + +void +fini (xlator_t *this) +{ + int i = 0; + dht_conf_t *conf = NULL; + + conf = this->private; + + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return; +} + +int +init (xlator_t *this) +{ + dht_conf_t *conf = NULL; + char *lookup_unhashed_str = NULL; + int ret = -1; + int i = 0; + + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "DHT needs more than one child defined"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + conf = CALLOC (1, sizeof (*conf)); + if (!conf) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + conf->search_unhashed = 0; + + if (dict_get_str (this->options, "lookup-unhashed", + &lookup_unhashed_str) == 0) { + gf_string2boolean (lookup_unhashed_str, + &conf->search_unhashed); + } + + ret = dht_init_subvolumes (this, conf); + if (ret == -1) { + goto err; + } + + ret = dht_layouts_init (this, conf); + if (ret == -1) { + goto err; + } + + LOCK_INIT (&conf->subvolume_lock); + + conf->gen = 1; + + this->private = conf; + + return 0; + +err: + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return -1; +} + + +struct xlator_fops fops = { + .lookup = dht_lookup, + .mknod = dht_mknod, + .create = dht_create, + + .stat = dht_stat, + .chmod = dht_chmod, + .chown = dht_chown, + .fchown = dht_fchown, + .fchmod = dht_fchmod, + .fstat = dht_fstat, + .utimens = dht_utimens, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .access = dht_access, + .readlink = dht_readlink, + .setxattr = dht_setxattr, + .getxattr = dht_getxattr, + .removexattr = dht_removexattr, + .open = dht_open, + .readv = dht_readv, + .writev = dht_writev, + .flush = dht_flush, + .fsync = dht_fsync, + .statfs = dht_statfs, + .lk = dht_lk, + .opendir = dht_opendir, + .readdir = dht_readdir, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = dht_unlink, + .link = dht_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, +#if 0 + .setdents = dht_setdents, + .getdents = dht_getdents, + .checksum = dht_checksum, +#endif +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +// .release = dht_release, +// .releasedir = dht_releasedir, + .forget = dht_forget +}; + + +struct volume_options options[] = { + { .key = {"lookup-unhashed"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c new file mode 100644 index 00000000000..6333e002fbc --- /dev/null +++ b/xlators/cluster/dht/src/nufa.c @@ -0,0 +1,684 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dht-common.c" + +/* TODO: all 'TODO's in dht.c holds good */ + +int +nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ + dht_layout_t *layout = NULL; + xlator_t *subvol = NULL; + char is_linkfile = 0; + char is_dir = 0; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + loc_t *loc = NULL; + int i = 0; + call_frame_t *prev = NULL; + int call_cnt = 0; + + + conf = this->private; + + prev = cookie; + local = frame->local; + loc = &local->loc; + + if (ENTRY_MISSING (op_ret, op_errno)) { + if (conf->search_unhashed) { + local->op_errno = ENOENT; + dht_lookup_everywhere (frame, this, loc); + return 0; + } + } + + if (op_ret == -1) + goto out; + + is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_dir = check_is_dir (inode, stbuf, xattr); + + if (!is_dir && !is_linkfile) { + /* non-directory and not a linkfile */ + + dht_itransform (this, prev->this, stbuf->st_ino, + &stbuf->st_ino); + + layout = dht_layout_for_subvol (this, prev->this); + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "no pre-set layout for subvolume %s", + prev->this->name); + op_ret = -1; + op_errno = EINVAL; + goto err; + } + + inode_ctx_put (inode, this, (uint64_t)(long)layout); + goto out; + } + + if (is_dir) { + call_cnt = conf->subvolume_cnt; + local->call_cnt = call_cnt; + + local->inode = inode_ref (inode); + local->xattr = dict_ref (xattr); + + local->op_ret = 0; + local->op_errno = 0; + + local->layout = dht_layout_new (this, conf->subvolume_cnt); + if (!local->layout) { + op_ret = -1; + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + for (i = 0; i < call_cnt; i++) { + STACK_WIND (frame, dht_lookup_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->lookup, + &local->loc, local->xattr_req); + } + } + + if (is_linkfile) { + subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + + if (!subvol) { + gf_log (this->name, GF_LOG_WARNING, + "linkfile not having link subvolume. path=%s", + loc->path); + dht_lookup_everywhere (frame, this, loc); + return 0; + } + + STACK_WIND (frame, dht_lookup_linkfile_cbk, + subvol, subvol->fops->lookup, + &local->loc, local->xattr_req); + } + + return 0; + +out: + if (!local->hashed_subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + local->loc.path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_lookup_cbk, + local->hashed_subvol, local->hashed_subvol->fops->lookup, + &local->loc, local->xattr_req); + + return 0; + + err: + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); + return 0; +} + +int +nufa_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xattr_req) +{ + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + int ret = -1; + int op_errno = -1; + dht_layout_t *layout = NULL; + int i = 0; + int call_cnt = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "copying location failed for path=%s", + loc->path); + goto err; + } + + if (xattr_req) { + local->xattr_req = dict_ref (xattr_req); + } else { + local->xattr_req = dict_new (); + } + + hashed_subvol = dht_subvol_get_hashed (this, &local->loc); + cached_subvol = dht_subvol_get_cached (this, local->loc.inode); + + local->cached_subvol = cached_subvol; + local->hashed_subvol = hashed_subvol; + + if (is_revalidate (loc)) { + layout = dht_layout_get (this, loc->inode); + + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "revalidate without cache. path=%s", + loc->path); + op_errno = EINVAL; + goto err; + } + + if (layout->gen && (layout->gen < conf->gen)) { + gf_log (this->name, GF_LOG_WARNING, + "incomplete layout failure for path=%s", + loc->path); + op_errno = EAGAIN; + goto err; + } + + local->inode = inode_ref (loc->inode); + local->st_ino = loc->inode->ino; + + local->call_cnt = layout->cnt; + call_cnt = local->call_cnt; + + /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, + * revalidates directly go to the cached-subvolume. + */ + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + + STACK_WIND (frame, dht_revalidate_cbk, + subvol, subvol->fops->lookup, + loc, local->xattr_req); + + if (!--call_cnt) + break; + } + } else { + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht", 4 * 4); + + ret = dict_set_uint32 (local->xattr_req, + "trusted.glusterfs.dht.linkto", 256); + + /* Send it to only local volume */ + STACK_WIND (frame, nufa_local_lookup_cbk, + conf->local_volume, + conf->local_volume->fops->lookup, + loc, local->xattr_req); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int +nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + dht_conf_t *conf = NULL; + + local = frame->local; + prev = cookie; + conf = this->private; + + if (op_ret == -1) + goto err; + + STACK_WIND (frame, dht_create_cbk, + conf->local_volume, conf->local_volume->fops->create, + &local->loc, local->flags, local->mode, local->fd); + + return 0; + + err: + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int +nufa_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + int op_errno = -1; + int ret = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + conf = this->private; + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + if (subvol != conf->local_volume) { + /* create a link file instead of actual file */ + ret = loc_copy (&local->loc, loc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->fd = fd_ref (fd); + local->mode = mode; + local->flags = flags; + + dht_linkfile_create (frame, nufa_create_linkfile_create_cbk, + conf->local_volume, subvol, loc); + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + + return 0; +} + +int +nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + dht_conf_t *conf = NULL; + + local = frame->local; + prev = cookie; + conf = this->private; + + if (op_ret >= 0) { + STACK_WIND (frame, dht_newfile_cbk, + conf->local_volume, + conf->local_volume->fops->mknod, + &local->loc, local->mode, local->rdev); + + return 0; + } + + DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + return 0; +} + + +int +nufa_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev) +{ + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + xlator_t *subvol = NULL; + int op_errno = -1; + int ret = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + + conf = this->private; + + + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + subvol = dht_subvol_get_hashed (this, loc); + if (!subvol) { + gf_log (this->name, GF_LOG_ERROR, + "no subvolume in layout for path=%s", + loc->path); + op_errno = ENOENT; + goto err; + } + + + if (conf->local_volume != subvol) { + /* Create linkfile first */ + ret = loc_copy (&local->loc, loc); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + + local->mode = mode; + local->rdev = rdev; + + dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, + conf->local_volume, subvol, loc); + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_newfile_cbk, + subvol, subvol->fops->mknod, + loc, mode, rdev); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +notify (xlator_t *this, int event, void *data, ...) +{ + int ret = -1; + + ret = dht_notify (this, event, data); + + return ret; +} + +void +fini (xlator_t *this) +{ + int i = 0; + dht_conf_t *conf = NULL; + + conf = this->private; + + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return; +} + +int +init (xlator_t *this) +{ + dht_conf_t *conf = NULL; + xlator_list_t *trav = NULL; + data_t *data = NULL; + char *local_volname = NULL; + char *lookup_unhashed_str = NULL; + int ret = -1; + int i = 0; + char my_hostname[256]; + + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "DHT needs more than one child defined"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + conf = CALLOC (1, sizeof (*conf)); + if (!conf) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + conf->search_unhashed = 0; + + if (dict_get_str (this->options, "lookup-unhashed", + &lookup_unhashed_str) == 0) { + gf_string2boolean (lookup_unhashed_str, + &conf->search_unhashed); + } + + ret = dht_init_subvolumes (this, conf); + if (ret == -1) { + goto err; + } + + ret = dht_layouts_init (this, conf); + if (ret == -1) { + goto err; + } + + LOCK_INIT (&conf->subvolume_lock); + + conf->gen = 1; + + local_volname = "localhost"; + ret = gethostname (my_hostname, 256); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not find hostname (%s)", + strerror (errno)); + } + + if (ret == 0) + local_volname = my_hostname; + + data = dict_get (this->options, "local-volume-name"); + if (data) { + local_volname = data->data; + } + + trav = this->children; + while (trav) { + if (strcmp (trav->xlator->name, local_volname) == 0) + break; + trav = trav->next; + } + + if (!trav) { + gf_log (this->name, GF_LOG_ERROR, + "Could not find subvolume named '%s'. " + "Please define volume with the name as the hostname " + "or override it with 'option local-volume-name'", + local_volname); + goto err; + } + /* The volume specified exists */ + conf->local_volume = trav->xlator; + + this->private = conf; + + return 0; + +err: + if (conf) { + if (conf->file_layouts) { + for (i = 0; i < conf->subvolume_cnt; i++) { + FREE (conf->file_layouts[i]); + } + FREE (conf->file_layouts); + } + + if (conf->default_dir_layout) + FREE (conf->default_dir_layout); + + if (conf->subvolumes) + FREE (conf->subvolumes); + + if (conf->subvolume_status) + FREE (conf->subvolume_status); + + FREE (conf); + } + + return -1; +} + + +struct xlator_fops fops = { + .lookup = nufa_lookup, + .create = nufa_create, + .mknod = nufa_mknod, + + .stat = dht_stat, + .chmod = dht_chmod, + .chown = dht_chown, + .fchown = dht_fchown, + .fchmod = dht_fchmod, + .fstat = dht_fstat, + .utimens = dht_utimens, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .access = dht_access, + .readlink = dht_readlink, + .setxattr = dht_setxattr, + .getxattr = dht_getxattr, + .removexattr = dht_removexattr, + .open = dht_open, + .readv = dht_readv, + .writev = dht_writev, + .flush = dht_flush, + .fsync = dht_fsync, + .statfs = dht_statfs, + .lk = dht_lk, + .opendir = dht_opendir, + .readdir = dht_readdir, + .fsyncdir = dht_fsyncdir, + .symlink = dht_symlink, + .unlink = dht_unlink, + .link = dht_link, + .mkdir = dht_mkdir, + .rmdir = dht_rmdir, + .rename = dht_rename, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .entrylk = dht_entrylk, + .fentrylk = dht_fentrylk, + .xattrop = dht_xattrop, + .fxattrop = dht_fxattrop, +#if 0 + .setdents = dht_setdents, + .getdents = dht_getdents, + .checksum = dht_checksum, +#endif +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +// .release = dht_release, +// .releasedir = dht_releasedir, + .forget = dht_forget +}; + + +struct volume_options options[] = { + { .key = {"local-volume-name"}, + .type = GF_OPTION_TYPE_XLATOR + }, + { .key = {"lookup-unhashed"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} }, +}; |