diff options
Diffstat (limited to 'xlators/cluster/dht/src')
| -rw-r--r-- | xlators/cluster/dht/src/Makefile.am | 30 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 3470 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 212 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-hashfn-tea.c | 146 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-hashfn.c | 88 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 326 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 543 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-linkfile.c | 224 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rename.c | 562 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 460 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht.c | 222 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/nufa.c | 684 | 
12 files changed, 6967 insertions, 0 deletions
| diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am new file mode 100644 index 00000000000..b7d07d137a6 --- /dev/null +++ b/xlators/cluster/dht/src/Makefile.am @@ -0,0 +1,30 @@ + +xlator_LTLIBRARIES = dht.la nufa.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + + +dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \ +		dht-selfheal.c dht-rename.c dht-hashfn.c dht-hashfn-tea.c + +dht_la_SOURCES = $(dht_common_source) dht.c  + +nufa_la_SOURCES = $(dht_common_source) nufa.c + +dht_la_LDFLAGS = -module -avoidversion +dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +nufa_la_LDFLAGS = -module -avoidversion +nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = dht-common.h dht-common.c + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ +	-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES =  + +uninstall-local: +	rm -f $(DESTDIR)$(xlatordir)/distribute.so + +install-data-hook: +	ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
\ No newline at end of file diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c new file mode 100644 index 00000000000..5e4979e31b0 --- /dev/null +++ b/xlators/cluster/dht/src/dht-common.c @@ -0,0 +1,3470 @@ +/* +   Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + + +/* TODO: +   - use volumename in xattr instead of "dht" +   - use NS locks +   - handle all cases in self heal layout reconstruction +   - complete linkfile selfheal +*/ + +int +dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, +			 xlator_t *this, +			 int op_ret, int op_errno) +{ +	dht_local_t  *local = NULL; +	dht_layout_t *layout = NULL; +	int           ret = 0; + +	local = frame->local; +	ret = op_ret; + +	if (ret == 0) { +		layout = local->selfheal.layout; +		ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + +		if (ret == 0) +			local->selfheal.layout = NULL; +		 +		if (local->st_ino) { +			local->stbuf.st_ino = local->st_ino; +		} else { +			gf_log (this->name, GF_LOG_WARNING, +				"could not find hashed subvolume for %s", +				local->loc.path); +		} +	} + +	DHT_STACK_UNWIND (frame, ret, local->op_errno, local->inode, +			  &local->stbuf, local->xattr); + +	return 0; +} + + +int +dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                    int op_ret, int op_errno, +                    inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ +	dht_conf_t   *conf          = NULL; +        dht_local_t  *local         = NULL; +        int           this_call_cnt = 0; +        call_frame_t *prev          = NULL; +	dht_layout_t *layout        = NULL; +	int           ret           = 0; +	int           is_dir        = 0; + +	conf  = this->private; +        local = frame->local; +        prev  = cookie; + +	layout = local->layout; + +        LOCK (&frame->lock); +        { +                /* TODO: assert equal mode on stbuf->st_mode and +		   local->stbuf->st_mode + +		   else mkdir/chmod/chown and fix +		*/ +		/* TODO: assert equal hash type in xattr, local->xattr */ + +		/* TODO: always ensure same subvolume is in layout->list[0] */ + +		ret = dht_layout_merge (this, layout, prev->this, +					op_ret, op_errno, xattr); + +		if (op_ret == -1) { +			local->op_errno = ENOENT; +			gf_log (this->name, GF_LOG_WARNING, +				"lookup of %s on %s returned error (%s)", +				local->loc.path, prev->this->name, +				strerror (op_errno)); + +			goto unlock; +		} + + 		is_dir = check_is_dir (inode, stbuf, xattr); + 		if (!is_dir)  + 			goto unlock; + + 		local->op_ret = 0; + 		if (local->xattr == NULL) + 			local->xattr = dict_ref (xattr); + 		if (local->inode == NULL) + 			local->inode = inode_ref (inode); + +		dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + +		if (prev->this == local->hashed_subvol) +			local->st_ino = local->stbuf.st_ino; + +        } +unlock: +        UNLOCK (&frame->lock); + + +        this_call_cnt = dht_frame_return (frame); + +        if (is_last_call (this_call_cnt)) { +		if (local->op_ret == 0) { +			ret = dht_layout_normalize (this, &local->loc, layout); + +			local->layout = NULL; + +			if (ret != 0) { +				layout->gen = conf->gen; + +				gf_log (this->name, GF_LOG_WARNING, +					"fixing assignment on %s", +					local->loc.path); +				goto selfheal; +			} +			 +			inode_ctx_put (local->inode, this, (uint64_t)(long)layout); +			 +			if (local->st_ino) { +				local->stbuf.st_ino = local->st_ino; +			} else { +				gf_log (this->name, GF_LOG_WARNING, +					"could not find hashed subvolume for %s", +					local->loc.path); +			} +		} + +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +				  local->inode, &local->stbuf, local->xattr); +        } + +	return 0; + +selfheal: +	ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk, +				      &local->loc, layout); + +	return 0; +} + +int +dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                    int op_ret, int op_errno, +                    inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ +        dht_local_t  *local         = NULL; +        int           this_call_cnt = 0; +        call_frame_t *prev          = NULL; +	dht_layout_t *layout        = NULL; +	int           ret  = -1; +	int           is_dir = 0; +	int           is_linkfile = 0; + +        local = frame->local; +        prev  = cookie; + +        LOCK (&frame->lock); +        { +		if (op_ret == -1) { +			local->op_errno = op_errno; + +			if (op_errno != ENOTCONN && op_errno != ENOENT) { +				gf_log (this->name, GF_LOG_WARNING, +					"subvolume %s returned -1 (%s)", +					prev->this->name, strerror (op_errno)); +			} + +			goto unlock; +		} + +		if (S_IFMT & (stbuf->st_mode ^ local->inode->st_mode)) { +			gf_log (this->name, GF_LOG_WARNING, +				"mismatching filetypes 0%o v/s 0%o for %s", +				(stbuf->st_mode & S_IFMT), +				(local->inode->st_mode & S_IFMT), +				local->loc.path); + +			local->op_ret = -1; +			local->op_errno = EINVAL; + +			goto unlock; +		} + +		layout = dht_layout_get (this, inode); +		 +		is_dir = check_is_dir (inode, stbuf, xattr); +		is_linkfile = check_is_linkfile (inode, stbuf, xattr); +		 +		if (is_linkfile) { +			gf_log (this->name, GF_LOG_WARNING, +				"linkfile found in revalidate for %s", +				local->loc.path); +			local->layout_mismatch = 1; + +			goto unlock; +		} + +		if (is_dir) { +			ret = dht_layout_dir_mismatch (this, layout, +						       prev->this, &local->loc, +						       xattr); +			if (ret != 0) { +				gf_log (this->name, GF_LOG_WARNING, +					"mismatching layouts for %s",  +					local->loc.path); +			 +				local->layout_mismatch = 1; + +				goto unlock; +			} +		}  +		 +		dht_stat_merge (this, &local->stbuf, stbuf, prev->this); +		 +		local->op_ret = 0; +		local->stbuf.st_ino = local->st_ino; + +		if (!local->xattr) +			local->xattr = dict_ref (xattr); +	} +unlock: +	UNLOCK (&frame->lock); + +        this_call_cnt = dht_frame_return (frame); + +        if (is_last_call (this_call_cnt)) { +		if (!S_ISDIR (local->stbuf.st_mode) +		    && (local->hashed_subvol != local->cached_subvol) +		    && (local->stbuf.st_nlink == 1)) +			local->stbuf.st_mode |= S_ISVTX; +		 +		if (local->layout_mismatch) { +			local->op_ret = -1; +			local->op_errno = ESTALE; +		} +			 +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +				  local->inode, &local->stbuf, local->xattr); +	} + +        return 0; +} + + +int +dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, +				xlator_t *this, +				int32_t op_ret, int32_t op_errno, +				inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	dht_layout_t *layout = NULL; +	xlator_t     *cached_subvol = NULL; + +	local = frame->local; +	cached_subvol = local->cached_subvol; + +	layout = dht_layout_for_subvol (this, local->cached_subvol); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no pre-set layout for subvolume %s", +			cached_subvol ? cached_subvol->name : "<nil>"); +		local->op_ret = -1; +		local->op_errno = EINVAL; +		goto unwind; +	} + +	inode_ctx_put (local->inode, this, (uint64_t)(long)layout); +	local->op_ret = 0; +	if (local->stbuf.st_nlink == 1) +		local->stbuf.st_mode |= S_ISVTX; + +unwind: +	DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +			  local->inode, &local->stbuf, local->xattr); +	return 0; +} + + +int +dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			   int32_t op_ret, int32_t op_errno, +			   inode_t *inode, struct stat *buf, dict_t *xattr) +{ +	dht_conf_t   *conf          = NULL; +        dht_local_t  *local         = NULL; +        int           this_call_cnt = 0; +        call_frame_t *prev          = NULL; +	int           is_linkfile   = 0; +	int           is_dir        = 0; +	xlator_t     *subvol        = NULL; +	loc_t        *loc           = NULL; +	xlator_t     *link_subvol   = NULL; +	xlator_t     *hashed_subvol = NULL; +	xlator_t     *cached_subvol = NULL; + +	conf   = this->private; + +	local  = frame->local; +	loc    = &local->loc; + +	prev   = cookie; +	subvol = prev->this; + +	LOCK (&frame->lock); +	{ +		if (op_ret == -1) { +			if (op_errno != ENOENT) +				local->op_errno = op_errno; +			goto unlock; +		} + +		is_linkfile = check_is_linkfile (inode, buf, xattr); +		is_dir = check_is_dir (inode, buf, xattr); + +		if (is_linkfile) { +			link_subvol = dht_linkfile_subvol (this, inode, buf, +							   xattr); +			gf_log (this->name, GF_LOG_DEBUG, +				"found on %s linkfile %s (-> %s)", +				subvol->name, loc->path, +				link_subvol ? link_subvol->name : "''"); +			goto unlock; +		} else { +			gf_log (this->name, GF_LOG_DEBUG, +				"found on %s file %s", +				subvol->name, loc->path); +		} + +		if (!local->cached_subvol) { +			/* found one file */ +			dht_stat_merge (this, &local->stbuf, buf, subvol); +			local->xattr = dict_ref (xattr); +			local->cached_subvol = subvol; +		} else { +			gf_log (this->name, GF_LOG_WARNING, +				"multiple subvolumes (%s and %s atleast) have " +				"file %s", local->cached_subvol->name, +				subvol->name, local->loc.path); +		} +	} +unlock: +	UNLOCK (&frame->lock); + +	if (is_linkfile) { +		gf_log (this->name, GF_LOG_WARNING, +			"deleting stale linkfile %s on %s", +			loc->path, subvol->name); +		dht_linkfile_unlink (frame, this, subvol, loc); +	} + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) { +		hashed_subvol = local->hashed_subvol; +		cached_subvol = local->cached_subvol; + +		if (!cached_subvol) { +			DHT_STACK_UNWIND (frame, -1, ENOENT, NULL, NULL, NULL); +			return 0; +		} + +		gf_log (this->name, GF_LOG_WARNING, +			"linking file %s existing on %s to %s (hash)", +			loc->path, cached_subvol->name, hashed_subvol->name); + +		dht_linkfile_create (frame, dht_lookup_linkfile_create_cbk, +				     cached_subvol, hashed_subvol, loc); +	} + +	return 0; +} + + +int +dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ +	dht_conf_t     *conf = NULL; +	dht_local_t    *local = NULL; +	int             i = 0; +	int             call_cnt = 0; + +	conf = this->private; +	local = frame->local; + +	call_cnt = conf->subvolume_cnt; +	local->call_cnt = call_cnt; + +	if (!local->inode) +		local->inode = inode_ref (loc->inode); + +	for (i = 0; i < call_cnt; i++) { +		STACK_WIND (frame, dht_lookup_everywhere_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->lookup, +			    loc, local->xattr_req); +	} + +	return 0; +} + + +int +dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, +                         xlator_t *this, int op_ret, int op_errno, +                         inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ +        call_frame_t *prev = NULL; +	dht_local_t  *local = NULL; +	dht_layout_t *layout = NULL; +	xlator_t     *subvol = NULL; +	loc_t        *loc = NULL; + +        prev   = cookie; +	subvol = prev->this; + +	local  = frame->local; +	loc    = &local->loc; + +        if (op_ret == -1) { +		gf_log (this->name, GF_LOG_WARNING, +			"lookup of %s on %s (following linkfile) failed (%s)", +			local->loc.path, subvol->name, strerror (op_errno)); + +		dht_lookup_everywhere (frame, this, loc); +		return 0; +	} + +        /* TODO: assert type is non-dir and non-linkfile */ + +	if (stbuf->st_nlink == 1) +		stbuf->st_mode |= S_ISVTX; +        dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); + +	layout = dht_layout_for_subvol (this, prev->this); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no pre-set layout for subvolume %s", +			prev->this->name); +		op_ret   = -1; +		op_errno = EINVAL; +		goto out; +	} + +	inode_ctx_put (inode, this, (uint64_t)(long)layout); + +out: +        DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); + +        return 0; +} + + +int +dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                int op_ret, int op_errno, +                inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ +	dht_layout_t *layout      = NULL; +        char          is_linkfile = 0; +        char          is_dir      = 0; +        xlator_t     *subvol      = NULL; +        dht_conf_t   *conf        = NULL; +        dht_local_t  *local       = NULL; +        loc_t        *loc         = NULL; +        int           i           = 0; +        call_frame_t *prev        = NULL; +	int           call_cnt    = 0; + + +        conf  = this->private; + +        prev  = cookie; +        local = frame->local; +        loc   = &local->loc; + +	if (ENTRY_MISSING (op_ret, op_errno)) { +		if (conf->search_unhashed) { +			local->op_errno = ENOENT; +			dht_lookup_everywhere (frame, this, loc); +			return 0; +		} +	} + + 	if (op_ret == 0) { + 		is_dir      = check_is_dir (inode, stbuf, xattr); + 		if (is_dir) { + 			local->inode = inode_ref (inode); + 			local->xattr = dict_ref (xattr); + 		} + 	} + + 	if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) { +		call_cnt        = conf->subvolume_cnt; + 		local->call_cnt = call_cnt; +		 + 		local->layout = dht_layout_new (this, conf->subvolume_cnt); + 		if (!local->layout) { + 			op_ret   = -1; + 			op_errno = ENOMEM; + 			gf_log (this->name, GF_LOG_ERROR, + 				"memory allocation failed :("); + 			goto out; + 		} +		 +		for (i = 0; i < call_cnt; i++) { +			STACK_WIND (frame, dht_lookup_dir_cbk, +				    conf->subvolumes[i], +				    conf->subvolumes[i]->fops->lookup, +				    &local->loc, local->xattr_req); +		} + 		return 0; + 	} +  +        if (op_ret == -1) +                goto out; + +        is_linkfile = check_is_linkfile (inode, stbuf, xattr); +        is_dir      = check_is_dir (inode, stbuf, xattr); + +        if (!is_dir && !is_linkfile) { +                /* non-directory and not a linkfile */ + +		dht_itransform (this, prev->this, stbuf->st_ino, +				&stbuf->st_ino); + +		layout = dht_layout_for_subvol (this, prev->this); +		if (!layout) { +			gf_log (this->name, GF_LOG_ERROR, +				"no pre-set layout for subvolume %s", +				prev->this->name); +			op_ret   = -1; +			op_errno = EINVAL; +			goto out; +		} + +                inode_ctx_put (inode, this, (uint64_t)(long)layout); +                goto out; +        } + +        if (is_linkfile) { +                subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + +                if (!subvol) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "linkfile not having link subvolume. path=%s", +                                loc->path); +			dht_lookup_everywhere (frame, this, loc); +			return 0; +                } + +		STACK_WIND (frame, dht_lookup_linkfile_cbk, +			    subvol, subvol->fops->lookup, +			    &local->loc, local->xattr_req); +        } + +        return 0; + +out: +        DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); +        return 0; +} + + +int +dht_lookup (call_frame_t *frame, xlator_t *this, +            loc_t *loc, dict_t *xattr_req) +{ +        xlator_t     *subvol = NULL; +        xlator_t     *hashed_subvol = NULL; +        xlator_t     *cached_subvol = NULL; +        dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +        int           ret    = -1; +        int           op_errno = -1; +	dht_layout_t *layout = NULL; +	int           i = 0; +	int           call_cnt = 0; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	conf = this->private; + +        local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +        ret = loc_dup (loc, &local->loc); +        if (ret == -1) { +                op_errno = errno; +                gf_log (this->name, GF_LOG_ERROR, +                        "copying location failed for path=%s", +                        loc->path); +                goto err; +        } +	 +	if (xattr_req) { +		local->xattr_req = dict_ref (xattr_req); +	} else { +		local->xattr_req = dict_new (); +	} + +	hashed_subvol = dht_subvol_get_hashed (this, loc); +	cached_subvol = dht_subvol_get_cached (this, loc->inode); + +	local->cached_subvol = cached_subvol; +	local->hashed_subvol = hashed_subvol; + +        if (is_revalidate (loc)) { +		layout = dht_layout_get (this, loc->inode); + +                if (!layout) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "revalidate without cache. path=%s", +                                loc->path); +                        op_errno = EINVAL; +                        goto err; +                } + +		if (layout->gen && (layout->gen < conf->gen)) { +			gf_log (this->name, GF_LOG_WARNING, +				"incomplete layout failure for path=%s", +				loc->path); +			op_errno = EAGAIN; +			goto err; +		} + +		local->inode    = inode_ref (loc->inode); +		local->st_ino   = loc->inode->ino; +		 +		local->call_cnt = layout->cnt; +		call_cnt = local->call_cnt; +		 +		/* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, +		 *       revalidates directly go to the cached-subvolume. +		 */ +		ret = dict_set_uint32 (local->xattr_req,  +				       "trusted.glusterfs.dht", 4 * 4); + +		for (i = 0; i < layout->cnt; i++) { +			subvol = layout->list[i].xlator; +			 +			STACK_WIND (frame, dht_revalidate_cbk, +				    subvol, subvol->fops->lookup, +				    loc, local->xattr_req); + +			if (!--call_cnt) +				break; +		} +        } else { +		/* TODO: remove the hard-coding */ +		ret = dict_set_uint32 (local->xattr_req,  +				       "trusted.glusterfs.dht", 4 * 4); + +		ret = dict_set_uint32 (local->xattr_req,  +				       "trusted.glusterfs.dht.linkto", 256); + +                if (!hashed_subvol) { +			gf_log (this->name, GF_LOG_ERROR, +				"no subvolume in layout for path=%s, " +				"checking on all the subvols to see if " +				"it is a directory", loc->path); + 			call_cnt        = conf->subvolume_cnt; + 			local->call_cnt = call_cnt; + 			 + 			local->layout = dht_layout_new (this, conf->subvolume_cnt); + 			if (!local->layout) { + 				op_errno = ENOMEM; + 				gf_log (this->name, GF_LOG_ERROR, + 					"memory allocation failed :("); + 				goto err; + 			} + +			for (i = 0; i < call_cnt; i++) { + 				STACK_WIND (frame, dht_lookup_dir_cbk, + 					    conf->subvolumes[i], + 					    conf->subvolumes[i]->fops->lookup, + 					    &local->loc, local->xattr_req); + 			} + 			return 0; +                } + +                STACK_WIND (frame, dht_lookup_cbk, +                            hashed_subvol, hashed_subvol->fops->lookup, +                            loc, local->xattr_req); +        } + +        return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); +        return 0; +} + + +int +dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	      int op_ret, int op_errno, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; +	call_frame_t *prev = NULL; + + +	local = frame->local; +	prev = cookie; + +	LOCK (&frame->lock); +	{ +		if (op_ret == -1) { +			local->op_errno = op_errno; +			gf_log (this->name, GF_LOG_ERROR, +				"subvolume %s returned -1 (%s)", +				prev->this->name, strerror (op_errno)); +			goto unlock; +		} + +		dht_stat_merge (this, &local->stbuf, stbuf, prev->this); +		 +		if (local->inode) +			local->stbuf.st_ino = local->inode->ino; +		local->op_ret = 0; +	} +unlock: +	UNLOCK (&frame->lock); + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +				  &local->stbuf); + +        return 0; +} + + +int +dht_stat (call_frame_t *frame, xlator_t *this, +	  loc_t *loc) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; +	dht_layout_t *layout = NULL; +	int           i = 0; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	layout = dht_layout_get (this, loc->inode); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no layout for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = layout->cnt; + +	for (i = 0; i < layout->cnt; i++) { +		subvol = layout->list[i].xlator; + +		STACK_WIND (frame, dht_attr_cbk, +			    subvol, subvol->fops->stat, +			    loc); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_fstat (call_frame_t *frame, xlator_t *this, +	   fd_t *fd) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; +	dht_layout_t *layout = NULL; +	int           i = 0; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	layout = dht_layout_get (this, fd->inode); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no layout for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"local allocation failed :("); +		goto err; +	} + +	local->inode    = inode_ref (fd->inode); +	local->call_cnt = layout->cnt;; + +	for (i = 0; i < layout->cnt; i++) { +		subvol = layout->list[i].xlator; +		STACK_WIND (frame, dht_attr_cbk, +			    subvol, subvol->fops->fstat, +			    fd); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_chmod (call_frame_t *frame, xlator_t *this, +	   loc_t *loc, mode_t mode) +{ +	dht_layout_t *layout = NULL; +	dht_local_t  *local  = NULL; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	layout = dht_layout_get (this, loc->inode); + +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no layout for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	if (!layout_is_sane (layout)) { +		gf_log (this->name, GF_LOG_ERROR, +			"layout is not sane for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = layout->cnt; + +	for (i = 0; i < layout->cnt; i++) { +		STACK_WIND (frame, dht_attr_cbk, +			    layout->list[i].xlator, +			    layout->list[i].xlator->fops->chmod, +			    loc, mode); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_chown (call_frame_t *frame, xlator_t *this, +	   loc_t *loc, uid_t uid, gid_t gid) +{ +	dht_layout_t *layout = NULL; +	dht_local_t  *local  = NULL; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	layout = dht_layout_get (this, loc->inode); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no layout for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	if (!layout_is_sane (layout)) { +		gf_log (this->name, GF_LOG_ERROR, +			"layout is not sane for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = layout->cnt; + +	for (i = 0; i < layout->cnt; i++) { +		STACK_WIND (frame, dht_attr_cbk, +			    layout->list[i].xlator, +			    layout->list[i].xlator->fops->chown, +			    loc, uid, gid); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_fchmod (call_frame_t *frame, xlator_t *this, +	    fd_t *fd, mode_t mode) +{ +	dht_layout_t *layout = NULL; +	dht_local_t  *local  = NULL; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + + +	layout = dht_layout_get (this, fd->inode); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no layout for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	if (!layout_is_sane (layout)) { +		gf_log (this->name, GF_LOG_ERROR, +			"layout is not sane for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (fd->inode); +	local->call_cnt = layout->cnt; + +	for (i = 0; i < layout->cnt; i++) { +		STACK_WIND (frame, dht_attr_cbk, +			    layout->list[i].xlator, +			    layout->list[i].xlator->fops->fchmod, +			    fd, mode); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_fchown (call_frame_t *frame, xlator_t *this, +	    fd_t *fd, uid_t uid, gid_t gid) +{ +	dht_layout_t *layout = NULL; +	dht_local_t  *local  = NULL; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	layout = dht_layout_get (this, fd->inode); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no layout for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	if (!layout_is_sane (layout)) { +		gf_log (this->name, GF_LOG_ERROR, +			"layout is not sane for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (fd->inode); +	local->call_cnt = layout->cnt; + +	for (i = 0; i < layout->cnt; i++) { +		STACK_WIND (frame, dht_attr_cbk, +			    layout->list[i].xlator, +			    layout->list[i].xlator->fops->fchown, +			    fd, uid, gid); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_utimens (call_frame_t *frame, xlator_t *this, +	     loc_t *loc, struct timespec tv[2]) +{ +	dht_layout_t *layout = NULL; +	dht_local_t  *local  = NULL; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	layout = dht_layout_get (this, loc->inode); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no layout for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	if (!layout_is_sane (layout)) { +		gf_log (this->name, GF_LOG_ERROR, +			"layout is not sane for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = layout->cnt; + +	for (i = 0; i < layout->cnt; i++) { +		STACK_WIND (frame, dht_attr_cbk, +			    layout->list[i].xlator, +			    layout->list[i].xlator->fops->utimens, +			    loc, tv); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_truncate (call_frame_t *frame, xlator_t *this, +	      loc_t *loc, off_t offset) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_attr_cbk, +		    subvol, subvol->fops->truncate, +		    loc, offset); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_ftruncate (call_frame_t *frame, xlator_t *this, +	       fd_t *fd, off_t offset) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (fd->inode); +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_attr_cbk, +		    subvol, subvol->fops->ftruncate, +		    fd, offset); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	     int op_ret, int op_errno) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; +	call_frame_t *prev = NULL; + + +	local = frame->local; +	prev = cookie; + +	LOCK (&frame->lock); +	{ +		if (op_ret == -1) { +			local->op_errno = op_errno; +			gf_log (this->name, GF_LOG_ERROR, +				"subvolume %s returned -1 (%s)", +				prev->this->name, strerror (op_errno)); +			goto unlock; +		} + +		local->op_ret = 0; +	} +unlock: +	UNLOCK (&frame->lock); + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + +        return 0; +} + + +int +dht_access (call_frame_t *frame, xlator_t *this, +	    loc_t *loc, int32_t mask) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_err_cbk, +		    subvol, subvol->fops->access, +		    loc, mask); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +int +dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		  int op_ret, int op_errno, const char *path) +{ +        DHT_STACK_UNWIND (frame, op_ret, op_errno, path); + +        return 0; +} + + +int +dht_readlink (call_frame_t *frame, xlator_t *this, +	      loc_t *loc, size_t size) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, dht_readlink_cbk, +		    subvol, subvol->fops->readlink, +		    loc, size); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		  int op_ret, int op_errno, dict_t *xattr) +{ +        DHT_STACK_UNWIND (frame, op_ret, op_errno, xattr); + +        return 0; +} + + +int +dht_getxattr (call_frame_t *frame, xlator_t *this, +	      loc_t *loc, const char *key) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, dht_getxattr_cbk, +		    subvol, subvol->fops->getxattr, +		    loc, key); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_setxattr (call_frame_t *frame, xlator_t *this, +	      loc_t *loc, dict_t *xattr, int flags) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_err_cbk, +		    subvol, subvol->fops->setxattr, +		    loc, xattr, flags); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_removexattr (call_frame_t *frame, xlator_t *this, +		 loc_t *loc, const char *key) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_err_cbk, +		    subvol, subvol->fops->removexattr, +		    loc, key); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	      int op_ret, int op_errno, fd_t *fd) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; +	call_frame_t *prev = NULL; + + +	local = frame->local; +	prev = cookie; + +	LOCK (&frame->lock); +	{ +		if (op_ret == -1) { +			local->op_errno = op_errno; +			gf_log (this->name, GF_LOG_ERROR, +				"subvolume %s returned -1 (%s)", +				prev->this->name, strerror (op_errno)); +			goto unlock; +		} + +		local->op_ret = 0; +	} +unlock: +	UNLOCK (&frame->lock); + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +				  local->fd); + +        return 0; +} + + +int +dht_open (call_frame_t *frame, xlator_t *this, +	  loc_t *loc, int flags, fd_t *fd) +{ +	xlator_t     *subvol = NULL; +	int           ret = -1; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->fd = fd_ref (fd); +	ret = loc_dup (loc, &local->loc); +	if (ret == -1) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_fd_cbk, +		    subvol, subvol->fops->open, +		    loc, flags, fd); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	       int op_ret, int op_errno, +	       struct iovec *vector, int count, struct stat *stbuf) +{ +        DHT_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + +        return 0; +} + + +int +dht_readv (call_frame_t *frame, xlator_t *this, +	   fd_t *fd, size_t size, off_t off) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, dht_readv_cbk, +		    subvol, subvol->fops->readv, +		    fd, size, off); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL); + +	return 0; +} + + +int +dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		int op_ret, int op_errno, struct stat *stbuf) +{ +        DHT_STACK_UNWIND (frame, op_ret, op_errno, stbuf); + +        return 0; +} + + +int +dht_writev (call_frame_t *frame, xlator_t *this, +	    fd_t *fd, struct iovec *vector, int count, off_t off) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, dht_writev_cbk, +		    subvol, subvol->fops->writev, +		    fd, vector, count, off); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0); + +	return 0; +} + + +int +dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->fd = fd_ref (fd); +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_err_cbk, +		    subvol, subvol->fops->flush, fd); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +int +dht_fsync (call_frame_t *frame, xlator_t *this, +	   fd_t *fd, int datasync) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocatoin failed :("); +		goto err; +	} +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_err_cbk, +		    subvol, subvol->fops->fsync, +		    fd, datasync); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +int +dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	    int op_ret, int op_errno, struct flock *flock) +{ +        DHT_STACK_UNWIND (frame, op_ret, op_errno, flock); + +        return 0; +} + + +int +dht_lk (call_frame_t *frame, xlator_t *this, +	fd_t *fd, int cmd, struct flock *flock) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, dht_lk_cbk, +		    subvol, subvol->fops->lk, +		    fd, cmd, flock); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + +/* gf_lk no longer exists  +int +dht_gf_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	    int op_ret, int op_errno, struct flock *flock) +{ +        DHT_STACK_UNWIND (frame, op_ret, op_errno, flock); + +        return 0; +} + + +int +dht_gf_lk (call_frame_t *frame, xlator_t *this, +	   loc_t *loc, int cmd, struct flock *flock) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, dht_gf_lk_cbk, +		    subvol, subvol->fops->gf_lk, +		    fd, cmd, flock); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} +*/ + +int +dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		int op_ret, int op_errno, struct statvfs *statvfs) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; + + +	local = frame->local; + +	LOCK (&frame->lock); +	{ +		if (op_ret == -1) { +			local->op_errno = op_errno; +			goto unlock; +		} +		local->op_ret = 0; + +		/* TODO: normalize sizes */ +		local->statvfs.f_bsize    = statvfs->f_bsize; +		local->statvfs.f_frsize   = statvfs->f_frsize; + +		local->statvfs.f_blocks  += statvfs->f_blocks; +		local->statvfs.f_bfree   += statvfs->f_bfree; +		local->statvfs.f_bavail  += statvfs->f_bavail; +		local->statvfs.f_files   += statvfs->f_files; +		local->statvfs.f_ffree   += statvfs->f_ffree; +		local->statvfs.f_favail  += statvfs->f_favail; +		local->statvfs.f_fsid     = statvfs->f_fsid; +		local->statvfs.f_flag     = statvfs->f_flag; +		local->statvfs.f_namemax  = statvfs->f_namemax; + +	} +unlock: +	UNLOCK (&frame->lock); + + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +				  &local->statvfs); + +        return 0; +} + + +int +dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ +	dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	conf = this->private; + +	local = dht_local_init (frame); +	local->call_cnt = conf->subvolume_cnt; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		STACK_WIND (frame, dht_statfs_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->statfs, loc); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +{ +	dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +	int           ret = -1; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	conf = this->private; + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->fd = fd_ref (fd); +	ret = loc_dup (loc, &local->loc); +	if (ret == -1) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->call_cnt = conf->subvolume_cnt; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		STACK_WIND (frame, dht_fd_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->opendir, +			    loc, fd); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		 int op_ret, int op_errno, gf_dirent_t *orig_entries) +{ +	dht_local_t  *local = NULL; +	gf_dirent_t   entries; +	gf_dirent_t  *orig_entry = NULL; +	gf_dirent_t  *entry = NULL; +	call_frame_t *prev = NULL; +	xlator_t     *subvol = NULL; +	xlator_t     *next = NULL; +	dht_layout_t *layout = NULL; +	int           count = 0; + + +	INIT_LIST_HEAD (&entries.list); +	prev = cookie; +	local = frame->local; + +	if (op_ret < 0) +		goto done; + +	layout = dht_layout_get (this, local->fd->inode); + +	list_for_each_entry (orig_entry, &orig_entries->list, list) { +		subvol = dht_layout_search (this, layout, orig_entry->d_name); + +		if (!subvol || subvol == prev->this) { +			entry = gf_dirent_for_name (orig_entry->d_name); +			if (!entry) { +				gf_log (this->name, GF_LOG_ERROR, +					"memory allocation failed :("); +				goto unwind; +			} + +			dht_itransform (this, subvol, orig_entry->d_ino, +					&entry->d_ino); +			dht_itransform (this, subvol, orig_entry->d_off, +					&entry->d_off); + +			entry->d_type = orig_entry->d_type; +			entry->d_len  = orig_entry->d_len; + +			list_add_tail (&entry->list, &entries.list); +			count++; +		} +	} +	op_ret = count; + +done: +	if (count == 0) { +		next = dht_subvol_next (this, prev->this); +		if (!next) { +			goto unwind; +		} + +		STACK_WIND (frame, dht_readdir_cbk, +			    next, next->fops->readdir, +			    local->fd, local->size, 0); +		return 0; +	} + +unwind: +	if (op_ret < 0) +		op_ret = 0; + +	DHT_STACK_UNWIND (frame, op_ret, op_errno, &entries); + +	gf_dirent_free (&entries); + +        return 0; +} + + +int +dht_readdir (call_frame_t *frame, xlator_t *this, +	     fd_t *fd, size_t size, off_t yoff) +{ +	dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +        int           op_errno = -1; +	xlator_t     *xvol = NULL; +	off_t         xoff = 0; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	conf = this->private; + +	local = dht_local_init (frame); +	if (!local) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	local->fd = fd_ref (fd); +	local->size = size; + +	dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); + +	/* TODO: do proper readdir */ +	STACK_WIND (frame, dht_readdir_cbk, +		    xvol, xvol->fops->readdir, +		    fd, size, xoff); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +int +dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		  int op_ret, int op_errno) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; + + +	local = frame->local; + +	LOCK (&frame->lock); +	{ +		if (op_ret == -1) +			local->op_errno = op_errno; + +		if (op_ret == 0) +			local->op_ret = 0; +	} +	UNLOCK (&frame->lock); + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + +        return 0; +} + + +int +dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) +{ +	dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +        int           op_errno = -1; +	int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	conf = this->private; + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->fd = fd_ref (fd); +	local->call_cnt = conf->subvolume_cnt; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		STACK_WIND (frame, dht_fsyncdir_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->fsyncdir, +			    fd, datasync); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +int +dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		 int op_ret, int op_errno, +		 inode_t *inode, struct stat *stbuf) +{ +	call_frame_t *prev = NULL; +	dht_layout_t *layout = NULL; +	int           ret = -1; + + +	if (op_ret == -1) +		goto out; + +	prev = cookie; + +	dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); +	layout = dht_layout_for_subvol (this, prev->this); + +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no pre-set layout for subvolume %s", +			prev->this->name); +		op_ret   = -1; +		op_errno = EINVAL; +		goto out; +	} + +	ret = inode_ctx_put (inode, this, (uint64_t)(long)layout); +	if (ret != 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"could not set inode context"); +		op_ret   = -1; +		op_errno = EINVAL; +		goto out; +	} + +out: +	DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); +	return 0; +} + + +int +dht_mknod (call_frame_t *frame, xlator_t *this, +	   loc_t *loc, mode_t mode, dev_t rdev) +{ +	xlator_t  *subvol = NULL; +	int        op_errno = -1; + + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (loc, err); + +	subvol = dht_subvol_get_hashed (this, loc); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			loc->path); +		op_errno = ENOENT; +		goto err; +	} + +	gf_log (this->name, GF_LOG_DEBUG, +		"creating %s on %s", loc->path, subvol->name); + +	STACK_WIND (frame, dht_newfile_cbk, +		    subvol, subvol->fops->mknod, +		    loc, mode, rdev); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + +	return 0; +} + + +int +dht_symlink (call_frame_t *frame, xlator_t *this, +	     const char *linkname, loc_t *loc) +{ +	xlator_t  *subvol = NULL; +	int        op_errno = -1; + + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (loc, err); + +	subvol = dht_subvol_get_hashed (this, loc); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			loc->path); +		op_errno = ENOENT; +		goto err; +	} + +	gf_log (this->name, GF_LOG_DEBUG, +		"creating %s on %s", loc->path, subvol->name); + +	STACK_WIND (frame, dht_newfile_cbk, +		    subvol, subvol->fops->symlink, +		    linkname, loc); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + +	return 0; +} + + +int +dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ +	xlator_t    *cached_subvol = NULL; +	xlator_t    *hashed_subvol = NULL; +	int          op_errno = -1; +	dht_local_t *local = NULL; + + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (loc, err); + +	cached_subvol = dht_subvol_get_cached (this, loc->inode); +	if (!cached_subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	hashed_subvol = dht_subvol_get_hashed (this, loc); +	if (!hashed_subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->call_cnt = 1; +	if (hashed_subvol != cached_subvol) +		local->call_cnt++; + +	STACK_WIND (frame, dht_err_cbk, +		    cached_subvol, cached_subvol->fops->unlink, loc); + +	if (hashed_subvol != cached_subvol) +		STACK_WIND (frame, dht_err_cbk, +			    hashed_subvol, hashed_subvol->fops->unlink, loc); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +int +dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	      int op_ret, int op_errno, +	      inode_t *inode, struct stat *stbuf) +{ +        call_frame_t *prev = NULL; +	dht_layout_t *layout = NULL; +	dht_local_t  *local = NULL; + +        prev = cookie; +	local = frame->local; + +        if (op_ret == -1) +                goto out; + +	layout = dht_layout_for_subvol (this, prev->this); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no pre-set layout for subvolume %s", +			prev->this->name); +		op_ret   = -1; +		op_errno = EINVAL; +		goto out; +	} + +	stbuf->st_ino = local->loc.inode->ino; + +out: +        DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + +	return 0; +} + + +int +dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		       int op_ret, int op_errno, +		       inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	xlator_t     *srcvol = NULL; + + +	if (op_ret == -1) +		goto err; + +	local = frame->local; +	srcvol = local->linkfile.srcvol; + +	STACK_WIND (frame, dht_link_cbk, +		    srcvol, srcvol->fops->link, +		    &local->loc, &local->loc2); + +	return 0; + +err: +	DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + +	return 0; +} + + +int +dht_link (call_frame_t *frame, xlator_t *this, +	  loc_t *oldloc, loc_t *newloc) +{ +	xlator_t    *cached_subvol = NULL; +	xlator_t    *hashed_subvol = NULL; +	int          op_errno = -1; +	int          ret = -1; +	dht_local_t *local = NULL; + + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (oldloc, err); +	VALIDATE_OR_GOTO (newloc, err); + +	cached_subvol = dht_subvol_get_cached (this, oldloc->inode); +	if (!cached_subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", oldloc->path); +		op_errno = EINVAL; +		goto err; +	} + +	hashed_subvol = dht_subvol_get_hashed (this, newloc); +	if (!hashed_subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			newloc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	ret = loc_copy (&local->loc, oldloc); +	if (ret == -1) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	ret = loc_copy (&local->loc2, newloc); +	if (ret == -1) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	if (hashed_subvol != cached_subvol) { +		dht_linkfile_create (frame, dht_link_linkfile_cbk, +				     cached_subvol, hashed_subvol, newloc); +	} else { +		STACK_WIND (frame, dht_link_cbk, +			    cached_subvol, cached_subvol->fops->link, +			    oldloc, newloc); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + +	return 0; +} + + +int +dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		 int op_ret, int op_errno, +		 fd_t *fd, inode_t *inode, struct stat *stbuf) +{ +	call_frame_t *prev = NULL; +	dht_layout_t *layout = NULL; +	int           ret = -1; + + +	if (op_ret == -1) +		goto out; + +	prev = cookie; + +	dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino); +	layout = dht_layout_for_subvol (this, prev->this); + +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"no pre-set layout for subvolume %s", +			prev->this->name); +		op_ret   = -1; +		op_errno = EINVAL; +		goto out; +	} + +	ret = inode_ctx_put (inode, this, (uint64_t)(long)layout); +	if (ret != 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"could not set inode context"); +		op_ret   = -1; +		op_errno = EINVAL; +		goto out; +	} + +out: +	DHT_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf); +	return 0; +} + + +int +dht_create (call_frame_t *frame, xlator_t *this, +	    loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ +	xlator_t  *subvol = NULL; +	int        op_errno = -1; + + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (loc, err); + +	subvol = dht_subvol_get_hashed (this, loc); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			loc->path); +		op_errno = ENOENT; +		goto err; +	} + +	gf_log (this->name, GF_LOG_DEBUG, +		"creating %s on %s", loc->path, subvol->name); + +	STACK_WIND (frame, dht_create_cbk, +		    subvol, subvol->fops->create, +		    loc, flags, mode, fd); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + +	return 0; +} + + +int +dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie, +			xlator_t *this, +			int32_t op_ret, int32_t op_errno) +{ +	dht_local_t   *local = NULL; +	dht_layout_t  *layout = NULL; + + +	local = frame->local; +	layout = local->selfheal.layout; + +	if (op_ret == 0) { +		inode_ctx_put (local->inode, this, (uint64_t)(long)layout); +		local->selfheal.layout = NULL; +		local->stbuf.st_ino = local->st_ino; +	} + +	DHT_STACK_UNWIND (frame, op_ret, op_errno, +			  local->inode, &local->stbuf); + +	return 0; +} + + +int +dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	       int op_ret, int op_errno, inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; +	int           ret = -1; +	call_frame_t *prev = NULL; +	dht_layout_t *layout = NULL; + +	local = frame->local; +	prev  = cookie; +	layout = local->layout; + +	LOCK (&frame->lock); +	{ +		ret = dht_layout_merge (this, layout, prev->this, +					op_ret, op_errno, NULL); + +		if (op_ret == -1) { +			local->op_errno = op_errno; +			goto unlock; +		} +		dht_stat_merge (this, &local->stbuf, stbuf, prev->this); +	} +unlock: +	UNLOCK (&frame->lock); + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) { +		local->layout = NULL; +		dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, +					&local->loc, layout); +	} + +        return 0; +} + +int +dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,  +		      xlator_t *this, int op_ret, int op_errno,  +		      inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	int           ret = -1; +	call_frame_t *prev = NULL; +	dht_layout_t *layout = NULL; +	dht_conf_t   *conf = NULL; +	int           i = 0; +	xlator_t     *hashed_subvol = NULL; + +	local = frame->local; +	prev  = cookie; +	layout = local->layout; +	conf = this->private; +	hashed_subvol = local->hashed_subvol; + +	ret = dht_layout_merge (this, layout, prev->this, +				op_ret, op_errno, NULL); + +	if (op_ret == -1) { +		local->op_errno = op_errno; +		goto err; +	} +	local->op_ret = 0; + +	dht_stat_merge (this, &local->stbuf, stbuf, prev->this); + +	local->st_ino = local->stbuf.st_ino; + +	local->call_cnt = conf->subvolume_cnt - 1; +	 +	if (local->call_cnt == 0) { +		local->layout = NULL; +		dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk, +					&local->loc, layout); +	} +	for (i = 0; i < conf->subvolume_cnt; i++) { +		if (conf->subvolumes[i] == hashed_subvol) +			continue; +		STACK_WIND (frame, dht_mkdir_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->mkdir, +			    &local->loc, local->mode); +	} +	return 0; +err: +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); +        return 0; +} + +int +dht_mkdir (call_frame_t *frame, xlator_t *this, +	   loc_t *loc, mode_t mode) +{ +	dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +        int           op_errno = -1; +	int           ret = -1; +	xlator_t     *hashed_subvol = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	conf = this->private; + +	local = dht_local_init (frame); +	if (!local) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	hashed_subvol = dht_subvol_get_hashed (this, loc); + +	if (hashed_subvol == NULL) { +		gf_log (this->name, GF_LOG_ERROR, +			"hashed subvol not found"); +		op_errno = EINVAL; +		goto err; +	} + +	local->hashed_subvol = hashed_subvol; +	local->inode = inode_ref (loc->inode); +	ret = loc_copy (&local->loc, loc); +	local->mode = mode; + +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	local->layout = dht_layout_new (this, conf->subvolume_cnt); +	if (!local->layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	STACK_WIND (frame, dht_mkdir_hashed_cbk, +		    hashed_subvol, +		    hashed_subvol->fops->mkdir, +		    loc, mode); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + +	return 0; +} + + +int +dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			int op_ret, int op_errno) +{ +	dht_local_t  *local = NULL; + +	local = frame->local; +	local->layout = NULL; + +	DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); + +	return 0; +} + + +int +dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +	       int op_ret, int op_errno) +{ +	uint64_t      tmp_layout = 0; +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; +	call_frame_t *prev = NULL; +	dht_layout_t *layout = NULL; + +	local = frame->local; +	prev  = cookie; + +	LOCK (&frame->lock); +	{ +		if (op_ret == -1) { +			local->op_errno = op_errno; +			local->op_ret   = -1; + +			if (op_errno != ENOENT) +				local->need_selfheal = 1; + +			gf_log (this->name, GF_LOG_ERROR, +				"rmdir on %s for %s failed (%s)", +				prev->this->name, local->loc.path, +				strerror (op_errno)); +			goto unlock; +		} +	} +unlock: +	UNLOCK (&frame->lock); + + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) { +		if (local->need_selfheal) { +			inode_ctx_get (local->loc.inode, this,  +				       &tmp_layout); +			layout = (dht_layout_t *)(long)tmp_layout; + +			/* TODO: neater interface needed below */ +			local->stbuf.st_mode = local->loc.inode->st_mode; + +			dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, +					      &local->loc, layout); +		} else { +			DHT_STACK_UNWIND (frame, local->op_ret, +					  local->op_errno); +		} +	} + +        return 0; +} + + +int +dht_rmdir_do (call_frame_t *frame, xlator_t *this) +{ +	dht_local_t  *local = NULL; +	dht_conf_t   *conf = NULL; +	int           i = 0; + +	conf = this->private; +	local = frame->local; + +	if (local->op_ret == -1) +		goto err; + +	local->call_cnt = conf->subvolume_cnt; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		STACK_WIND (frame, dht_rmdir_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->rmdir, +			    &local->loc); +	} + +	return 0; + +err: +	DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); +	return 0; +} + + +int +dht_rmdir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		       int op_ret, int op_errno, gf_dirent_t *entries) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = -1; +	call_frame_t *prev = NULL; + +	local = frame->local; +	prev  = cookie; + +	if (op_ret > 2) { +		gf_log (this->name, GF_LOG_DEBUG, +			"readdir on %s for %s returned %d entries", +			prev->this->name, local->loc.path, op_ret); +		local->op_ret = -1; +		local->op_errno = ENOTEMPTY; +	} + +	this_call_cnt = dht_frame_return (frame); + +	if (is_last_call (this_call_cnt)) { +		dht_rmdir_do (frame, this); +	} + +	return 0; +} + + +int +dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		       int op_ret, int op_errno, fd_t *fd) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = -1; +	call_frame_t *prev = NULL; + + +	local = frame->local; +	prev  = cookie; + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"opendir on %s for %s failed (%s)", +			prev->this->name, local->loc.path, +			strerror (op_errno)); +		goto err; +	} + +	STACK_WIND (frame, dht_rmdir_readdir_cbk, +		    prev->this, prev->this->fops->readdir, +		    local->fd, 4096, 0); + +	return 0; + +err: +	this_call_cnt = dht_frame_return (frame); + +	if (is_last_call (this_call_cnt)) { +		dht_rmdir_do (frame, this); +	} + +	return 0; +} + + +int +dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ +	dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +        int           op_errno = -1; +	int           i = -1; +	int           ret = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	conf = this->private; + +	local = dht_local_init (frame); +	if (!local) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	local->call_cnt = conf->subvolume_cnt; +	local->op_ret   = 0; + +	ret = loc_copy (&local->loc, loc); +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	local->fd = fd_create (local->loc.inode, frame->root->pid); +	if (!local->fd) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		STACK_WIND (frame, dht_rmdir_opendir_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->opendir, +			    loc, local->fd); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +static int32_t +dht_xattrop_cbk (call_frame_t *frame, +		 void *cookie, +		 xlator_t *this, +		 int32_t op_ret, +		 int32_t op_errno, +		 dict_t *dict) +{ +	DHT_STACK_UNWIND (frame, op_ret, op_errno, dict); +	return 0; +} + +int32_t +dht_xattrop (call_frame_t *frame, +	     xlator_t *this, +	     loc_t *loc, +	     gf_xattrop_flags_t flags, +	     dict_t *dict) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = 1; + +	STACK_WIND (frame, +		    dht_xattrop_cbk, +		    subvol, subvol->fops->xattrop, +		    loc, flags, dict); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + +static int32_t +dht_fxattrop_cbk (call_frame_t *frame, +		  void *cookie, +		  xlator_t *this, +		  int32_t op_ret, +		  int32_t op_errno, +		  dict_t *dict) +{ +	DHT_STACK_UNWIND (frame, op_ret, op_errno, dict); +	return 0; +} + +int32_t +dht_fxattrop (call_frame_t *frame, +	      xlator_t *this, +	      fd_t *fd, +	      gf_xattrop_flags_t flags, +	      dict_t *dict) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, +		    dht_fxattrop_cbk, +		    subvol, subvol->fops->fxattrop, +		    fd, flags, dict); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); + +	return 0; +} + + +static int32_t +dht_inodelk_cbk (call_frame_t *frame, void *cookie, +		 xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ +	DHT_STACK_UNWIND (frame, op_ret, op_errno); +	return 0; +} + + +int32_t +dht_inodelk (call_frame_t *frame, xlator_t *this, +	     loc_t *loc, int32_t cmd, struct flock *lock) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = 1; + +	STACK_WIND (frame, +		    dht_inodelk_cbk, +		    subvol, subvol->fops->inodelk, +		    loc, cmd, lock); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +static int32_t +dht_finodelk_cbk (call_frame_t *frame, void *cookie, +		  xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ +	DHT_STACK_UNWIND (frame, op_ret, op_errno); +	return 0; +} + + +int32_t +dht_finodelk (call_frame_t *frame, xlator_t *this, +	      fd_t *fd, int32_t cmd, struct flock *lock) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + + +	STACK_WIND (frame, +		    dht_finodelk_cbk, +		    subvol, subvol->fops->finodelk, +		    fd, cmd, lock); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +static int32_t +dht_entrylk_cbk (call_frame_t *frame, void *cookie, +		 xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ +	DHT_STACK_UNWIND (frame, op_ret, op_errno); +	return 0; +} + +int32_t +dht_entrylk (call_frame_t *frame, xlator_t *this, +	     loc_t *loc, const char *basename, +	     entrylk_cmd cmd, entrylk_type type) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; +	dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	subvol = dht_subvol_get_cached (this, loc->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", loc->path); +		op_errno = EINVAL; +		goto err; +	} + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->inode = inode_ref (loc->inode); +	local->call_cnt = 1; + +	STACK_WIND (frame, dht_entrylk_cbk, +		    subvol, subvol->fops->entrylk, +		    loc, basename, cmd, type); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + +static int32_t +dht_fentrylk_cbk (call_frame_t *frame, void *cookie, +		  xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ +	DHT_STACK_UNWIND (frame, op_ret, op_errno); +	return 0; +} + +int32_t +dht_fentrylk (call_frame_t *frame, xlator_t *this, +	      fd_t *fd, const char *basename, +	      entrylk_cmd cmd, entrylk_type type) +{ +	xlator_t     *subvol = NULL; +        int           op_errno = -1; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +	subvol = dht_subvol_get_cached (this, fd->inode); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for fd=%p", fd); +		op_errno = EINVAL; +		goto err; +	} + +	STACK_WIND (frame, dht_fentrylk_cbk, +		    subvol, subvol->fops->fentrylk, +		    fd, basename, cmd, type); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno); + +	return 0; +} + + +int +dht_forget (xlator_t *this, inode_t *inode) +{ +	uint64_t      tmp_layout = 0; +	dht_layout_t *layout = NULL; + +	inode_ctx_get (inode, this, &tmp_layout); + +	if (!layout) +		return 0; +	layout = (dht_layout_t *)(long)tmp_layout; +	if (!layout->preset) +		FREE (layout); + +	return 0; +} + + + +static int +dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) +{ +        xlator_list_t *subvols = NULL; +        int            cnt = 0; + + +        for (subvols = this->children; subvols; subvols = subvols->next) +                cnt++; + +        conf->subvolumes = CALLOC (cnt, sizeof (xlator_t *)); +        if (!conf->subvolumes) { +                gf_log (this->name, GF_LOG_ERROR, +                        "memory allocation failed :("); +                return -1; +        } +        conf->subvolume_cnt = cnt; + +        cnt = 0; +        for (subvols = this->children; subvols; subvols = subvols->next) +                conf->subvolumes[cnt++] = subvols->xlator; + +	conf->subvolume_status = CALLOC (cnt, sizeof (char)); +	if (!conf->subvolume_status) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		return -1; +	} + +        return 0; +} + + +int +dht_notify (xlator_t *this, int event, void *data, ...) +{ +	xlator_t   *subvol = NULL; +	int         cnt    = -1; +	int         i      = -1; +	dht_conf_t *conf   = NULL; +	int         ret    = -1; + + +	conf = this->private; + +	switch (event) { +	case GF_EVENT_CHILD_UP: +		subvol = data; + +		conf->gen++; + +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (subvol == conf->subvolumes[i]) { +				cnt = i; +				break; +			} +		} + +		if (cnt == -1) { +			gf_log (this->name, GF_LOG_ERROR, +				"got GF_EVENT_CHILD_UP bad subvolume %s", +				subvol->name); +			break; +		} + +		LOCK (&conf->subvolume_lock); +		{ +			conf->subvolume_status[cnt] = 1; +		} +		UNLOCK (&conf->subvolume_lock); + +		break; + +	case GF_EVENT_CHILD_DOWN: +		subvol = data; + +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (subvol == conf->subvolumes[i]) { +				cnt = i; +				break; +			} +		} + +		if (cnt == -1) { +			gf_log (this->name, GF_LOG_ERROR, +				"got GF_EVENT_CHILD_DOWN bad subvolume %s", +				subvol->name); +			break; +		} + +		LOCK (&conf->subvolume_lock); +		{ +			conf->subvolume_status[cnt] = 0; +		} +		UNLOCK (&conf->subvolume_lock); + +		break; +	} + +	ret = default_notify (this, event, data); + +	return ret; +} + diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h new file mode 100644 index 00000000000..17017381b08 --- /dev/null +++ b/xlators/cluster/dht/src/dht-common.h @@ -0,0 +1,212 @@ +/* +   Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#ifndef _DHT_H +#define _DHT_H + + +typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, +				       xlator_t *this, +				       int32_t op_ret, int32_t op_errno); + + +struct dht_layout { +        int               cnt; +	int               preset; +        int               gen; +	int               type; +        struct { +		int       err;   /* 0 = normal +				   -1 = dir exists and no xattr +				   >0 = dir lookup failed with errno +				 */ +                uint32_t  start; +                uint32_t  stop; +                xlator_t *xlator; +        } list[0]; +}; +typedef struct dht_layout dht_layout_t; + + +struct dht_local { +	int                      call_cnt; +	loc_t                    loc; +	loc_t                    loc2; +	int                      op_ret; +	int                      op_errno; +	int                      layout_mismatch; +	struct stat              stbuf; +	struct statvfs           statvfs; +	fd_t                    *fd; +	inode_t                 *inode; +	dict_t                  *xattr; +	dict_t                  *xattr_req; +	dht_layout_t            *layout; +	size_t                   size; +	ino_t                    st_ino; +	xlator_t                *src_hashed, *src_cached; +	xlator_t                *dst_hashed, *dst_cached; +	xlator_t                *cached_subvol; +	xlator_t                *hashed_subvol; +	char                     need_selfheal; +	struct { +		fop_mknod_cbk_t  linkfile_cbk; +		struct stat      stbuf; +		loc_t            loc; +		inode_t         *inode; +		dict_t          *xattr; +		xlator_t        *srcvol; +	} linkfile; +	struct { +		uint32_t         hole_cnt; +		uint32_t         overlaps_cnt; +		uint32_t         missing; +		uint32_t         down; +		uint32_t         misc; +		dht_selfheal_dir_cbk_t   dir_cbk; +		dht_layout_t    *layout; +	} selfheal; + +	/* needed by nufa */ +	int32_t flags; +	mode_t  mode; +	dev_t   rdev; +}; +typedef struct dht_local dht_local_t; + + +struct dht_conf { +	gf_lock_t      subvolume_lock; +        int            subvolume_cnt; +        xlator_t     **subvolumes; +	xlator_t      *local_volume;     /* Needed by NUFA */ +	char          *subvolume_status; +	dht_layout_t **file_layouts; +	dht_layout_t **dir_layouts; +	dht_layout_t  *default_dir_layout; +	gf_boolean_t   search_unhashed; +	int            gen; +}; +typedef struct dht_conf dht_conf_t; + + +struct dht_disk_layout { +	uint32_t           cnt; +	uint32_t           type; +	struct { +		uint32_t   start; +		uint32_t   stop; +	} list[1]; +}; +typedef struct dht_disk_layout dht_disk_layout_t; +  +#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT) + +#define is_fs_root(loc) (strcmp (loc->path, "/") == 0) + +#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0) + +#define is_last_call(cnt) (cnt == 0) + +#define DHT_LINKFILE_MODE (S_ISVTX) +#define check_is_linkfile(i,s,x) ((s->st_mode & ~S_IFMT) == DHT_LINKFILE_MODE) + +#define check_is_dir(i,s,x) (S_ISDIR(s->st_mode)) + +#define layout_is_sane(layout) ((layout) && (layout->cnt > 0)) + +#define DHT_STACK_UNWIND(frame, params ...) do {       \ +		dht_local_t *__local = NULL;           \ +		__local = frame->local;                \ +		frame->local = NULL;		       \ +		STACK_UNWIND (frame, params);          \ +		dht_local_wipe (__local);	       \ +	} while (0) + +#define DHT_STACK_DESTROY(frame) do {		       \ +		dht_local_t *__local = NULL;           \ +		__local = frame->local;                \ +		frame->local = NULL;		       \ +		STACK_DESTROY (frame->root);	       \ +		dht_local_wipe (__local);	       \ +	} while (0) + +dht_layout_t *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, +			     const char *name); +int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); +int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, +			  uint32_t *holes_p, uint32_t *overlaps_p, +			  uint32_t *missing_p, uint32_t *down_p, +			  uint32_t *misc_p); +int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, +			     xlator_t *subvol, loc_t *loc, dict_t *xattr); + +xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, +			       struct stat *buf, dict_t *xattr); +int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, +			 xlator_t *subvol, loc_t *loc); + +int dht_layouts_init (xlator_t *this, dht_conf_t *conf); +int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, +		      int op_ret, int op_errno, dict_t *xattr); + +int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, +			     int pos, int32_t **disk_layout_p); +int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, +			   int pos, int32_t *disk_layout); + + +int dht_frame_return (call_frame_t *frame); + +int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); +int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol, +		      uint64_t *x); + +void dht_local_wipe (dht_local_t *local); +dht_local_t *dht_local_init (call_frame_t *frame); +int dht_stat_merge (xlator_t *this, struct stat *to, struct stat *from, +		    xlator_t *subvol); + +xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc); +xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode); +xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev); +int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); + +int dht_hash_compute (int type, const char *name, uint32_t *hash_p); + +int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, +			 xlator_t *tovol, xlator_t *fromvol, loc_t *loc); +int +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, +			loc_t *loc, dht_layout_t *layout); +int +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, +		      loc_t *loc, dht_layout_t *layout); + +int dht_rename (call_frame_t *frame, xlator_t *this, +		loc_t *oldloc, loc_t *newloc); +#endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-hashfn-tea.c b/xlators/cluster/dht/src/dht-hashfn-tea.c new file mode 100644 index 00000000000..8437b495541 --- /dev/null +++ b/xlators/cluster/dht/src/dht-hashfn-tea.c @@ -0,0 +1,146 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + + +#include <stdint.h> +#include <stdio.h> +#include <string.h> + + +#define DELTA 0x9E3779B9 +#define FULLROUNDS 10		/* 32 is overkill, 16 is strong crypto */ +#define PARTROUNDS 6		/* 6 gets complete mixing */ + + +static int +tearound (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1) +{ +	uint32_t sum = 0; +	int      n = 0; +	uint32_t b0  = 0; +	uint32_t b1  = 0; + +	b0 = *h0; +	b1 = *h1; + +	n = rounds; + +	do { +		sum += DELTA; +		b0  += ((b1 << 4) + array[0]) +			^ (b1 + sum) +			^ ((b1 >> 5) + array[1]); +		b1  += ((b0 << 4) + array[2]) +			^ (b0 + sum) +			^ ((b0 >> 5) + array[3]); +	} while (--n); + +	*h0 += b0; +	*h1 += b1; + +	return 0; +} + + +uint32_t +__pad (int len) +{ +	uint32_t pad = 0; + +	pad = (uint32_t) len | ((uint32_t) len << 8); +	pad |= pad << 16; + +	return pad; +} + + +uint32_t +dht_hashfn_tea (const char *msg, int len) +{ +	uint32_t  h0 = 0x9464a485; +	uint32_t  h1 = 0x542e1a94; +	uint32_t  array[4]; +	uint32_t  pad = 0; +	int       i = 0; +	int       j = 0; +	int       full_quads = 0; +	int       full_words = 0; +	int       full_bytes = 0; +	uint32_t *intmsg = NULL; +	int       word = 0; + + +	intmsg = (uint32_t *) msg; +	pad = __pad (len); + +	full_bytes   = len; +	full_words   = len / 4; +	full_quads   = len / 16; + +	for (i = 0; i < full_quads; i++) { +		for (j = 0; j < 4; j++) { +			word     = *intmsg; +			array[j] = word; +			intmsg++; +			full_words--; +			full_bytes -= 4; +		} +		tearound (PARTROUNDS, &array[0], &h0, &h1); +	} + +	if ((len % 16) == 0) { +		goto done; +	} + +	for (j = 0; j < 4; j++) { +		if (full_words) { +			word     = *intmsg; +			array[j] = word; +			intmsg++; +			full_words--; +			full_bytes -= 4; +		} else { +			array[j] = pad; +			while (full_bytes) { +				array[j] <<= 8; +				array[j] |= msg[len - full_bytes]; +				full_bytes--; +			} +		} +	} +	tearound (FULLROUNDS, &array[0], &h0, &h1); + +done: +	return h0 ^ h1; +} + + +#if 0 +int +main (int argc, char *argv[]) +{ +	int i = 0; +	int hashval = 0; + +	for (i = 1; i < argc; i++) { +		hashval = tea (argv[i], strlen (argv[i])); +		printf ("%s: %x\n", argv[i], hashval); +	} +} +#endif diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c new file mode 100644 index 00000000000..9e321a43cec --- /dev/null +++ b/xlators/cluster/dht/src/dht-hashfn.c @@ -0,0 +1,88 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +uint32_t dht_hashfn_tea (const char *name, int len); + + +typedef enum { +	DHT_HASH_TYPE_TEA, +} dht_hashfn_type_t; + + +int +dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p) +{ +	int      ret = 0; +	uint32_t hash = 0; + +	switch (type) { +	case DHT_HASH_TYPE_TEA: +		hash = dht_hashfn_tea (name, strlen (name)); +		break; +	default: +		ret = -1; +		break; +	} + +	if (ret == 0) { +		*hash_p = hash; +	} + +	return ret; +} + + +#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do {          \ +                rsync_frndly_name = (char *) name;			\ +                if (name[0] == '.') {                                   \ +                        char *dot   = 0;                                \ +                        int namelen = 0;                                \ +                                                                        \ +                        dot = strrchr (name, '.');                      \ +                        if (dot && dot > (name + 1) && *(dot + 1)) {    \ +                                namelen = (dot - name);                 \ +                                rsync_frndly_name = alloca (namelen);   \ +                                strncpy (rsync_frndly_name, name + 1,   \ +                                         namelen);                      \ +                                rsync_frndly_name[namelen - 1] = 0;     \ +                        }                                               \ +                }                                                       \ +        } while (0); + + +int +dht_hash_compute (int type, const char *name, uint32_t *hash_p) +{ +	char     *rsync_friendly_name = NULL; + +	MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name); + +	return dht_hash_compute_internal (type, rsync_friendly_name, hash_p); +} diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c new file mode 100644 index 00000000000..52d0720025f --- /dev/null +++ b/xlators/cluster/dht/src/dht-helper.c @@ -0,0 +1,326 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +int +dht_frame_return (call_frame_t *frame) +{ +	dht_local_t *local = NULL; +	int          this_call_cnt = -1; + +	if (!frame) +		return -1; + +	local = frame->local; + +	LOCK (&frame->lock); +	{ +		this_call_cnt = --local->call_cnt; +	} +	UNLOCK (&frame->lock); + +	return this_call_cnt; +} + + +int +dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) +{ +	dht_conf_t *conf = NULL; +	int         cnt = 0; +	int         max = 0; +	uint64_t    y = 0; + + +	if (x == ((uint64_t) -1)) { +		y = (uint64_t) -1; +		goto out; +	} + +	conf = this->private; + +	max = conf->subvolume_cnt; +	cnt = dht_subvol_cnt (this, subvol); + +	y = ((x * max) + cnt); + +out: +	if (y_p) +		*y_p = y; + +	return 0; +} + + +int +dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, +		  uint64_t *x_p) +{ +	dht_conf_t *conf = NULL; +	int         cnt = 0; +	int         max = 0; +	uint64_t    x = 0; +	xlator_t   *subvol = 0; + + +	conf = this->private; +	max = conf->subvolume_cnt; + +	cnt = y % max; +	x   = y / max; + +	subvol = conf->subvolumes[cnt]; + +	if (subvol_p) +		*subvol_p = subvol; + +	if (x_p) +		*x_p = x; + +	return 0; +} + + +void +dht_local_wipe (dht_local_t *local) +{ +	if (!local) +		return; + +	loc_wipe (&local->loc); +	loc_wipe (&local->loc2); + +	if (local->xattr) +		dict_unref (local->xattr); + +	if (local->inode) +		inode_unref (local->inode); + +	if (local->layout) +		FREE (local->layout); + +	loc_wipe (&local->linkfile.loc); + +	if (local->linkfile.xattr) +		dict_unref (local->linkfile.xattr); + +	if (local->linkfile.inode) +		inode_unref (local->linkfile.inode); + +	if (local->fd) { +		fd_unref (local->fd); +		local->fd = NULL; +	} +	 +	if (local->xattr_req) +		dict_unref (local->xattr_req); + +	FREE (local); +} + + +dht_local_t * +dht_local_init (call_frame_t *frame) +{ +	dht_local_t *local = NULL; + +	/* TODO: use mem-pool */ +	local = CALLOC (1, sizeof (*local)); + +	if (!local) +		return NULL; + +	local->op_ret = -1; +	local->op_errno = EUCLEAN; + +	frame->local = local; + +	return local; +} + + +char * +basestr (const char *str) +{ +        char *basestr = NULL; + +        basestr = strrchr (str, '/'); +        if (basestr) +                basestr ++; + +        return basestr; +} + +xlator_t * +dht_first_up_child (xlator_t *this) +{ +	dht_conf_t *conf = NULL; +	xlator_t   *child = NULL; +	int         i = 0; + +	conf = this->private; +	 +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (conf->subvolume_status[i]) { +				child = conf->subvolumes[i]; +				break; +			} +		} +	} +	UNLOCK (&conf->subvolume_lock); +	 +	return child; +} + +xlator_t * +dht_subvol_get_hashed (xlator_t *this, loc_t *loc) +{ +        dht_layout_t *layout = NULL; +        xlator_t     *subvol = NULL; + +        if (is_fs_root (loc)) { +                subvol = dht_first_up_child (this); +                goto out; +        } + +        layout = dht_layout_get (this, loc->parent); + +        if (!layout) { +                gf_log (this->name, GF_LOG_ERROR, +                        "layout missing path=%s parent=%"PRId64, +                        loc->path, loc->parent->ino); +                goto out; +        } + +        subvol = dht_layout_search (this, layout, loc->name); + +        if (!subvol) { +                gf_log (this->name, GF_LOG_ERROR, +                        "could not find subvolume for path=%s", +                        loc->path); +                goto out; +        } + +out: +        return subvol; +} + + +xlator_t * +dht_subvol_get_cached (xlator_t *this, inode_t *inode) +{ +        dht_layout_t *layout = NULL; +        xlator_t     *subvol = NULL; + + +        layout = dht_layout_get (this, inode); + +        if (!layout) { +                goto out; +        } + +	subvol = layout->list[0].xlator; + +out: +        return subvol; +} + + +xlator_t * +dht_subvol_next (xlator_t *this, xlator_t *prev) +{ +	dht_conf_t *conf = NULL; +	int         i = 0; +	xlator_t   *next = NULL; + +	conf = this->private; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		if (conf->subvolumes[i] == prev) { +			if ((i + 1) < conf->subvolume_cnt) +				next = conf->subvolumes[i + 1]; +			break; +		} +	} + +	return next; +} + + +int +dht_subvol_cnt (xlator_t *this, xlator_t *subvol) +{ +	int i = 0; +	int ret = -1; +	dht_conf_t *conf = NULL; + + +	conf = this->private; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		if (subvol == conf->subvolumes[i]) { +			ret = i; +			break; +		} +	} + +	return ret; +} + + +#define set_if_greater(a, b) do {		\ +		if ((a) < (b))			\ +			(a) = (b);		\ +	} while (0) + +int +dht_stat_merge (xlator_t *this, struct stat *to, +		struct stat *from, xlator_t *subvol) +{ +	to->st_dev      = from->st_dev; + +	dht_itransform (this, subvol, from->st_ino, &to->st_ino); + +	to->st_mode     = from->st_mode; +	to->st_nlink    = from->st_nlink; +	to->st_uid      = from->st_uid; +	to->st_gid      = from->st_gid; +	to->st_rdev     = from->st_rdev; +	to->st_size    += from->st_size; +	to->st_blksize  = from->st_blksize; +	to->st_blocks  += from->st_blocks; + +	set_if_greater (to->st_atime, from->st_atime); +	set_if_greater (to->st_mtime, from->st_mtime); +	set_if_greater (to->st_ctime, from->st_ctime); + +	return 0; +} diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c new file mode 100644 index 00000000000..08b4a2746f8 --- /dev/null +++ b/xlators/cluster/dht/src/dht-layout.c @@ -0,0 +1,543 @@ +/* +   Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "byte-order.h" + +#define layout_base_size (sizeof (dht_layout_t)) + +#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0]) + +#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size)) + + +dht_layout_t * +dht_layout_new (xlator_t *this, int cnt) +{ +	dht_layout_t *layout = NULL; + + +	layout = CALLOC (1, layout_size (cnt)); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto out; +	} + +	layout->cnt = cnt; + +out: +	return layout; +} + + +dht_layout_t * +dht_layout_get (xlator_t *this, inode_t *inode) +{ +        uint64_t layout = 0; +        int      ret    = -1; + +        ret = inode_ctx_get (inode, this, &layout); + +        return (dht_layout_t *)(long)layout; +} + + +xlator_t * +dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) +{ +	uint32_t   hash = 0; +        xlator_t  *subvol = NULL; +	int        i = 0; +	int        ret = 0; + + +	ret = dht_hash_compute (layout->type, name, &hash); +	if (ret != 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"hash computation failed for type=%d name=%s", +			layout->type, name); +		goto out; +	} + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].start <= hash +		    && layout->list[i].stop >= hash) { +			subvol = layout->list[i].xlator; +			break; +		} +	} + +	if (!subvol) { +		gf_log (this->name, GF_LOG_DEBUG, +			"no subvolume for hash (value) = %u", hash); +	} + +out: +	return subvol; +} + + +dht_layout_t * +dht_layout_for_subvol (xlator_t *this, xlator_t *subvol) +{ +	dht_conf_t   *conf = NULL; +	dht_layout_t *layout = NULL; +	int           i = 0; + + +	conf = this->private; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		if (conf->subvolumes[i] == subvol) { +			layout = conf->file_layouts[i]; +			break; +		} +	} + +	return layout; +} + + +int +dht_layouts_init (xlator_t *this, dht_conf_t *conf) +{ +	dht_layout_t *layout = NULL; +	int           i = 0; +	int           ret = -1; +	 + +	conf->file_layouts = CALLOC (conf->subvolume_cnt, +				     sizeof (dht_layout_t *)); +	if (!conf->file_layouts) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto out; +	} + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		layout = dht_layout_new (this, 1); + +		if (!layout) { +			goto out; +		} + +		layout->preset = 1; + +		layout->list[0].xlator = conf->subvolumes[i]; + +		conf->file_layouts[i] = layout; +	} + +	ret = 0; +out: +	return ret; +} + + +int +dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, +			 int pos, int32_t **disk_layout_p) +{ +	int      ret = -1; +	int32_t *disk_layout = NULL; + +	disk_layout = CALLOC (5, sizeof (int)); +	if (!disk_layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto out; +	} + +	disk_layout[0] = hton32 (1); +	disk_layout[1] = hton32 (layout->type); +	disk_layout[2] = hton32 (layout->list[pos].start); +	disk_layout[3] = hton32 (layout->list[pos].stop); + +	if (disk_layout_p) +		*disk_layout_p = disk_layout; +	ret = 0; + +out: +	return ret; +} + + +int +dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, +		       int pos, int32_t *disk_layout) +{ +	int      cnt = 0; +	int      type = 0; +	int      start_off = 0; +	int      stop_off = 0; + + +	/* TODO: assert disk_layout_ptr is of required length */ + +	cnt  = ntoh32 (disk_layout[0]); +	if (cnt != 1) { +		gf_log (this->name, GF_LOG_ERROR, +			"disk layout has invalid count %d", cnt); +		return -1; +	} + +	/* TODO: assert type is compatible */ +	type      = ntoh32 (disk_layout[1]); +	start_off = ntoh32 (disk_layout[2]); +	stop_off  = ntoh32 (disk_layout[3]); + +	layout->list[pos].start = start_off; +	layout->list[pos].stop  = stop_off; + +	gf_log (this->name, GF_LOG_DEBUG, +		"merged to layout: %u - %u (type %d) from %s", +		start_off, stop_off, type, +		layout->list[pos].xlator->name); + +	return 0; +} + + +int +dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, +		  int op_ret, int op_errno, dict_t *xattr) +{ +	int      i     = 0; +	int      ret   = -1; +	int      err   = -1; +	int32_t *disk_layout = NULL; + + +	if (op_ret != 0) { +		err = op_errno; +	} + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].xlator == NULL) { +			layout->list[i].err    = err; +			layout->list[i].xlator = subvol; +			break; +		} +	} + +	if (op_ret != 0) { +		ret = 0; +		goto out; +	} + +	if (xattr) { +		/* during lookup and not mkdir */ +		ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", +				    VOID(&disk_layout)); +	} + +	if (ret != 0) { +		layout->list[i].err = -1; +		gf_log (this->name, GF_LOG_DEBUG, +			"missing disk layout on %s. err = %d", +			subvol->name, err); +		ret = 0; +		goto out; +	} + +	ret = dht_disk_layout_merge (this, layout, i, disk_layout); +	if (ret != 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"layout merge from subvolume %s failed", +			subvol->name); +		goto out; +	} +	layout->list[i].err = 0; + +out: +	return ret; +} + + +void +dht_layout_entry_swap (dht_layout_t *layout, int i, int j) +{ +	uint32_t  start_swap = 0; +	uint32_t  stop_swap = 0; +	xlator_t *xlator_swap = 0; +	int       err_swap = 0; + + +	start_swap  = layout->list[i].start; +	stop_swap   = layout->list[i].stop; +	xlator_swap = layout->list[i].xlator; +	err_swap    = layout->list[i].err; + +	layout->list[i].start  = layout->list[j].start; +	layout->list[i].stop   = layout->list[j].stop; +	layout->list[i].xlator = layout->list[j].xlator; +	layout->list[i].err    = layout->list[j].err; + +	layout->list[j].start  = start_swap; +	layout->list[j].stop   = stop_swap; +	layout->list[j].xlator = xlator_swap; +	layout->list[j].err    = err_swap; +} + + +int64_t +dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) +{ +	int64_t diff = 0; + +	if (layout->list[i].err || layout->list[j].err) +		diff = layout->list[i].err - layout->list[j].err; +	else +		diff = (int64_t) layout->list[i].start +			- (int64_t) layout->list[j].start; + +	return diff; +} + + +int +dht_layout_sort (dht_layout_t *layout) +{ +	int       i = 0; +	int       j = 0; +	int64_t   ret = 0; + +	/* TODO: O(n^2) -- bad bad */ + +	for (i = 0; i < layout->cnt - 1; i++) { +		for (j = i + 1; j < layout->cnt; j++) { +			ret = dht_layout_entry_cmp (layout, i, j); +			if (ret > 0) +				dht_layout_entry_swap (layout, i, j); +		} +	} + +	return 0; +} + + +int +dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, +		      uint32_t *holes_p, uint32_t *overlaps_p, +		      uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p) +{ +	dht_conf_t *conf = NULL; +	uint32_t    holes    = 0; +	uint32_t    overlaps = 0; +	uint32_t    missing  = 0; +	uint32_t    down     = 0; +	uint32_t    misc     = 0; +	uint32_t    hole_cnt = 0; +	uint32_t    overlap_cnt = 0; +	int         i = 0; +	int         ret = 0; +	uint32_t    prev_stop = 0; +	uint32_t    last_stop = 0; +	char        is_virgin = 1; + + +	conf = this->private; + +	/* TODO: explain WTF is happening */ + +	last_stop = layout->list[0].start - 1; +	prev_stop = last_stop; + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].err) { +			switch (layout->list[i].err) { +			case -1: +			case ENOENT: +				missing++; +				break; +			case ENOTCONN: +				down++; +				break; +			default: +				misc++; +			} +			continue; +		} + +		is_virgin = 0; + +		if ((prev_stop + 1) < layout->list[i].start) { +			hole_cnt++; +			holes += (layout->list[i].start - (prev_stop + 1)); +		} + +		if ((prev_stop + 1) > layout->list[i].start) { +			overlap_cnt++; +			overlaps += ((prev_stop + 1) - layout->list[i].start); +		} +		prev_stop = layout->list[i].stop; +	} + +	if ((last_stop - prev_stop) || is_virgin) +	    hole_cnt++; +	holes += (last_stop - prev_stop); + +	if (holes_p) +		*holes_p = hole_cnt; + +	if (overlaps_p) +		*overlaps_p = overlap_cnt; + +	if (missing_p) +		*missing_p = missing; + +	if (down_p) +		*down_p = down; + +	if (misc_p) +		*misc_p = misc; + +	return ret; +} + + +int +dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) +{ +	int          ret   = 0; +	uint32_t     holes = 0; +	uint32_t     overlaps = 0; +	uint32_t     missing = 0; +	uint32_t     down = 0; +	uint32_t     misc = 0; + + +	ret = dht_layout_sort (layout); +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"sort failed?! how the ...."); +		goto out; +	} + +	ret = dht_layout_anomalies (this, loc, layout, +				    &holes, &overlaps, +				    &missing, &down, &misc); +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"error while finding anomalies in %s -- not good news", +			loc->path); +		goto out; +	} + +	if (holes || overlaps) { +		if (missing == layout->cnt) { +			gf_log (this->name, GF_LOG_WARNING, +				"directory %s looked up first time", +				loc->path); +		} else { +			gf_log (this->name, GF_LOG_ERROR, +				"found anomalies in %s. holes=%d overlaps=%d", +				loc->path, holes, overlaps); +		} +		ret = 1; +	} + +out: +	return ret; +} + + +int +dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, +			 loc_t *loc, dict_t *xattr) +{ +	int       idx = 0; +	int       pos = -1; +	int       ret = -1; +	int32_t  *disk_layout = NULL; +	int32_t   count = -1; +	uint32_t  start_off = -1; +	uint32_t  stop_off = -1; + + +	for (idx = 0; idx < layout->cnt; idx++) { +		if (layout->list[idx].xlator == subvol) { +			pos = idx; +			break; +		} +	} +	 +	if (pos == -1) { +		gf_log (this->name, GF_LOG_DEBUG, +			"%s - no layout info for subvolume %s", +			loc->path, subvol->name); +		ret = 1; +		goto out; +	} +	 +	if (xattr == NULL) { +		gf_log (this->name, GF_LOG_ERROR, +			"%s - xattr dictionary is NULL", +			loc->path); +		ret = -1; +		goto out; +	} + +	ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", +			    VOID(&disk_layout)); +	 +	if (ret < 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"%s - disk layout missing", loc->path); +		ret = -1; +		goto out; +	}  + +	count  = ntoh32 (disk_layout[0]); +	if (count != 1) { +		gf_log (this->name, GF_LOG_ERROR, +			"%s - disk layout has invalid count %d", +			loc->path, count); +		ret = -1; +		goto out; +	} + +	start_off = ntoh32 (disk_layout[2]); +	stop_off  = ntoh32 (disk_layout[3]); +	 +	if ((layout->list[pos].start != start_off) +	    || (layout->list[pos].stop != stop_off)) { +		gf_log (this->name, GF_LOG_DEBUG, +			"subvol: %s; inode layout - %"PRId32" - %"PRId32"; " +			"disk layout - %"PRId32" - %"PRId32, +			layout->list[pos].xlator->name, +			layout->list[pos].start, layout->list[pos].stop, +			start_off, stop_off); +		ret = 1; +	} else { +		ret = 0; +	} +out: +	return ret; +} + diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c new file mode 100644 index 00000000000..9cc24ccf6b3 --- /dev/null +++ b/xlators/cluster/dht/src/dht-linkfile.c @@ -0,0 +1,224 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "compat.h" +#include "dht-common.h" + + + +int +dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			int op_ret, int op_errno) +{ +	dht_local_t *local = NULL; + + +	local = frame->local; +	local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno, +				      local->linkfile.inode, +				      &local->linkfile.stbuf); + +	return 0; +} + + +int +dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			 int op_ret, int op_errno, +			 inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	call_frame_t *prev = NULL; +	dict_t       *xattr = NULL; +	data_t       *str_data = NULL; +	int           ret = -1; + +	local = frame->local; +	prev  = cookie; + +	if (op_ret == -1) +		goto err; + +	xattr = get_new_dict (); +	if (!xattr) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	local->linkfile.xattr = dict_ref (xattr); +	local->linkfile.inode = inode_ref (inode); + +	str_data = str_to_data (local->linkfile.srcvol->name); +	if (!str_data) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data); +	if (ret < 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"failed to initialize linkfile data"); +		op_errno = EINVAL; +	} +	str_data = NULL; + +	local->linkfile.stbuf = *stbuf; + +	STACK_WIND (frame, dht_linkfile_xattr_cbk, +		    prev->this, prev->this->fops->setxattr, +		    &local->linkfile.loc, local->linkfile.xattr, 0); + +	return 0; + +err: +	if (str_data) { +		data_destroy (str_data); +		str_data = NULL; +	} + +	local->linkfile.linkfile_cbk (frame, cookie, this, +				      op_ret, op_errno, inode, stbuf); +	return 0; +} + + +int +dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, +		     xlator_t *tovol, xlator_t *fromvol, loc_t *loc) +{ +	dht_local_t *local = NULL; + + +	local = frame->local; +	local->linkfile.linkfile_cbk = linkfile_cbk; +	local->linkfile.srcvol = tovol; +	loc_copy (&local->linkfile.loc, loc); + +	STACK_WIND (frame, dht_linkfile_create_cbk, +		    fromvol, fromvol->fops->mknod, loc, +		    S_IFREG | DHT_LINKFILE_MODE, 0); + +	return 0; +} + + +int +dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			 int32_t op_ret, int32_t op_errno) +{ +	dht_local_t   *local = NULL; +	call_frame_t  *prev = NULL; +	xlator_t      *subvol = NULL; + +	local = frame->local; +	prev = cookie; +	subvol = prev->this; + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_WARNING, +			"unlinking linkfile %s on %s failed (%s)", +			local->loc.path, subvol->name, strerror (op_errno)); +	} + +	DHT_STACK_DESTROY (frame); + +	return 0; +} + + +int +dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, +		     xlator_t *subvol, loc_t *loc) +{ +	call_frame_t *unlink_frame = NULL; +	dht_local_t  *unlink_local = NULL; + +	unlink_frame = copy_frame (frame); +	if (!unlink_frame) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	unlink_local = dht_local_init (unlink_frame); +	if (!unlink_local) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	loc_copy (&unlink_local->loc, loc); + +	STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk, +		    subvol, subvol->fops->unlink, +		    &unlink_local->loc); + +	return 0; +err: +	if (unlink_frame) +		DHT_STACK_DESTROY (unlink_frame); + +	return -1; +} + + +xlator_t * +dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct stat *stbuf, +		     dict_t *xattr) +{ +	dht_conf_t *conf = NULL; +	xlator_t   *subvol = NULL; +	void       *volname = NULL; +	int         i = 0, ret = 0; + + +	conf = this->private; + +	if (!xattr) +		goto out; + +	ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname); + +	if ((-1 == ret) || !volname) +		goto out; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) { +			subvol = conf->subvolumes[i]; +			break; +		} +	} + +out: +	return subvol; +} + + diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c new file mode 100644 index 00000000000..e5532f1bc87 --- /dev/null +++ b/xlators/cluster/dht/src/dht-rename.c @@ -0,0 +1,562 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should + *       delete the newpath if it gets EEXISTS from link() call. + */ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + + +int +dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		    int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = 0; +	call_frame_t *prev = NULL; + + +	local = frame->local; +	prev = cookie; + +	if (op_ret == -1) { +		/* TODO: undo the damage */ + +		gf_log (this->name, GF_LOG_ERROR, +			"rename %s -> %s on %s failed (%s)", +			local->loc.path, local->loc2.path, +			prev->this->name, strerror (op_errno)); + +		local->op_ret   = op_ret; +		local->op_errno = op_errno; +	} else { +		/* TODO: construct proper stbuf for dir */ +		local->stbuf = *stbuf; +	} + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) { +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +				  &local->stbuf); +	} + +	return 0; +} + + + +int +dht_rename_dir_do (call_frame_t *frame, xlator_t *this) +{ +	dht_local_t  *local = NULL; +	dht_conf_t   *conf = NULL; +	int           i = 0; + +	conf = this->private; +	local = frame->local; + +	if (local->op_ret == -1) +		goto err; + +	local->call_cnt = conf->subvolume_cnt; +	local->op_ret = 0; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		STACK_WIND (frame, dht_rename_dir_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->rename, +			    &local->loc, &local->loc2); +	} + +	return 0; + +err: +	DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno); +	return 0; +} + + +int +dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			int op_ret, int op_errno, gf_dirent_t *entries) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = -1; +	call_frame_t *prev = NULL; + +	local = frame->local; +	prev  = cookie; + +	if (op_ret > 2) { +		gf_log (this->name, GF_LOG_DEBUG, +			"readdir on %s for %s returned %d entries", +			prev->this->name, local->loc.path, op_ret); +		local->op_ret = -1; +		local->op_errno = ENOTEMPTY; +	} + +	this_call_cnt = dht_frame_return (frame); + +	if (is_last_call (this_call_cnt)) { +		dht_rename_dir_do (frame, this); +	} + +	return 0; +} + + +int +dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			int op_ret, int op_errno, fd_t *fd) +{ +	dht_local_t  *local = NULL; +	int           this_call_cnt = -1; +	call_frame_t *prev = NULL; + + +	local = frame->local; +	prev  = cookie; + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"opendir on %s for %s failed (%s)", +			prev->this->name, local->loc.path, +			strerror (op_errno)); +		goto err; +	} + +	STACK_WIND (frame, dht_rename_readdir_cbk, +		    prev->this, prev->this->fops->readdir, +		    local->fd, 4096, 0); + +	return 0; + +err: +	this_call_cnt = dht_frame_return (frame); + +	if (is_last_call (this_call_cnt)) { +		dht_rename_dir_do (frame, this); +	} + +	return 0; +} + + +int +dht_rename_dir (call_frame_t *frame, xlator_t *this) +{ +	dht_conf_t  *conf = NULL; +	dht_local_t *local = NULL; +	int          i = 0; +	int          op_errno = -1; + + +	conf = frame->this->private; +	local = frame->local; + +	local->call_cnt = conf->subvolume_cnt; + +	local->fd = fd_create (local->loc.inode, frame->root->pid); +	if (!local->fd) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} + +	local->op_ret = 0; + +	if (!local->dst_cached) { +		dht_rename_dir_do (frame, this); +		return 0; +	} + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		STACK_WIND (frame, dht_rename_opendir_cbk, +			    conf->subvolumes[i], +			    conf->subvolumes[i]->fops->opendir, +			    &local->loc2, local->fd); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL); +	return 0; +} + + +int +dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		       int32_t op_ret, int32_t op_errno) +{ +	dht_local_t  *local = NULL; +	call_frame_t *prev = NULL; +	int           this_call_cnt = 0; + +	local = frame->local; +	prev  = cookie; + +	this_call_cnt = dht_frame_return (frame); + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_WARNING, +			"unlink on %s failed (%s)", +			prev->this->name, strerror (op_errno)); +	} + +	if (is_last_call (this_call_cnt)) +		DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +				  &local->stbuf); + +	return 0; +} + + +int +dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		int32_t op_ret, int32_t op_errno, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	call_frame_t *prev = NULL; +	xlator_t     *src_hashed = NULL; +	xlator_t     *src_cached = NULL; +	xlator_t     *dst_hashed = NULL; +	xlator_t     *dst_cached = NULL; +	xlator_t     *rename_subvol = NULL; + +	local = frame->local; +	prev = cookie; + +	src_hashed = local->src_hashed; +	src_cached = local->src_cached; +	dst_hashed = local->dst_hashed; +	dst_cached = local->dst_cached; + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_DEBUG, +			"rename on %s failed (%s)", prev->this->name, +			strerror (op_errno)); +		local->op_ret   = op_ret; +		local->op_errno = op_errno; +		goto unwind; +	} +	 +	/* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk +	 *       is called. since rename has already happened on rename_subvol, +	 *       unlink should not be sent for oldpath (either linkfile or cached-file) +	 *       on rename_subvol. */ +	if (src_cached == dst_cached) +		rename_subvol = src_cached; +	else +		rename_subvol = dst_hashed; + +	/* TODO: delete files in background */ + +	if (src_cached != dst_hashed && src_cached != dst_cached) +		local->call_cnt++; + +	if (src_hashed != rename_subvol && src_hashed != src_cached) +		local->call_cnt++; + +	if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached) +		local->call_cnt++; + +	if (local->call_cnt == 0) +		goto unwind; + +	if (src_cached != dst_hashed && src_cached != dst_cached) { +		gf_log (this->name, GF_LOG_DEBUG, +			"deleting old src datafile %s @ %s", +			local->loc.path, src_cached->name); + +		STACK_WIND (frame, dht_rename_unlink_cbk, +			    src_cached, src_cached->fops->unlink, +			    &local->loc); +	} + +	if (src_hashed != rename_subvol && src_hashed != src_cached) { +		gf_log (this->name, GF_LOG_DEBUG, +			"deleting old src linkfile %s @ %s", +			local->loc.path, src_hashed->name); + +		STACK_WIND (frame, dht_rename_unlink_cbk, +			    src_hashed, src_hashed->fops->unlink, +			    &local->loc); +	} + +	if (dst_cached +	    && (dst_cached != dst_hashed) +	    && (dst_cached != src_cached)) { +		gf_log (this->name, GF_LOG_DEBUG, +			"deleting old dst datafile %s @ %s", +			local->loc2.path, dst_cached->name); + +		STACK_WIND (frame, dht_rename_unlink_cbk, +			    dst_cached, dst_cached->fops->unlink, +			    &local->loc2); +	} +	return 0; + +unwind: +	DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +			  &local->stbuf); + +	return 0; +} + + +int +dht_do_rename (call_frame_t *frame) +{ +	dht_local_t *local = NULL; +	xlator_t    *dst_hashed = NULL; +	xlator_t    *src_cached = NULL; +	xlator_t    *dst_cached = NULL; +	xlator_t    *this = NULL; +	xlator_t    *rename_subvol = NULL; + + +	local = frame->local; +	this  = frame->this; + +	dst_hashed = local->dst_hashed; +	dst_cached = local->dst_cached; +	src_cached = local->src_cached; + +	if (src_cached == dst_cached) +		rename_subvol = src_cached; +	else +		rename_subvol = dst_hashed; + +	gf_log (this->name, GF_LOG_DEBUG, +		"renaming %s => %s (%s)", +		local->loc.path, local->loc2.path, rename_subvol->name); + +	STACK_WIND (frame, dht_rename_cbk, +		    rename_subvol, rename_subvol->fops->rename, +		    &local->loc, &local->loc2); + +	return 0; +} + + +int +dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		      int32_t op_ret, int32_t op_errno, +		      inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	call_frame_t *prev = NULL; +	int           this_call_cnt  = 0; + + +	local = frame->local; +	prev = cookie; +	 +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_DEBUG, +			"link/file on %s failed (%s)", +			prev->this->name, strerror (op_errno)); +		local->op_ret   = -1; +		local->op_errno = op_errno; +	} + +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) { +		if (local->op_ret == -1) +			goto unwind; +		 +		dht_do_rename (frame); +	} + +	return 0; + +unwind: +	DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno, +			  &local->stbuf); + +	return 0; +} + + +int +dht_rename_create_links (call_frame_t *frame) +{ +	dht_local_t *local = NULL; +	xlator_t    *this = NULL; +	xlator_t    *src_hashed = NULL; +	xlator_t    *src_cached = NULL; +	xlator_t    *dst_hashed = NULL; +	xlator_t    *dst_cached = NULL; +	int          call_cnt = 0; + + +	local = frame->local; +	this  = frame->this; + +	src_hashed = local->src_hashed; +	src_cached = local->src_cached; +	dst_hashed = local->dst_hashed; +	dst_cached = local->dst_cached; + +	if (src_cached == dst_cached) +		goto nolinks; + +	if (dst_hashed != src_hashed && dst_hashed != src_cached) +		call_cnt++; + +	if (src_cached != dst_hashed) +		call_cnt++; + +	local->call_cnt = call_cnt; + +	if (dst_hashed != src_hashed && dst_hashed != src_cached) { +		gf_log (this->name, GF_LOG_DEBUG, +			"linkfile %s @ %s => %s", +			local->loc.path, dst_hashed->name, src_cached->name); +		dht_linkfile_create (frame, dht_rename_links_cbk, +				     src_cached, dst_hashed, &local->loc); +	} + +	if (src_cached != dst_hashed) { +		gf_log (this->name, GF_LOG_DEBUG, +			"link %s => %s (%s)", local->loc.path, +			local->loc2.path, src_cached->name); +		STACK_WIND (frame, dht_rename_links_cbk, +			    src_cached, src_cached->fops->link, +			    &local->loc, &local->loc2); +	} + +nolinks: +	if (!call_cnt) { +		/* skip to next step */ +		dht_do_rename (frame); +	} + +	return 0; +} + + +int +dht_rename (call_frame_t *frame, xlator_t *this, +	    loc_t *oldloc, loc_t *newloc) +{ +	xlator_t    *src_cached = NULL; +	xlator_t    *src_hashed = NULL; +	xlator_t    *dst_cached = NULL; +	xlator_t    *dst_hashed = NULL; +	int          op_errno = -1; +	int          ret = -1; +	dht_local_t *local = NULL; + + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (oldloc, err); +	VALIDATE_OR_GOTO (newloc, err); + +	src_hashed = dht_subvol_get_hashed (this, oldloc); +	if (!src_hashed) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			oldloc->path); +		op_errno = EINVAL; +		goto err; +	} + +	src_cached = dht_subvol_get_cached (this, oldloc->inode); +	if (!src_cached) { +		gf_log (this->name, GF_LOG_ERROR, +			"no cached subvolume for path=%s", oldloc->path); +		op_errno = EINVAL; +		goto err; +	} + +	dst_hashed = dht_subvol_get_hashed (this, newloc); +	if (!dst_hashed) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			newloc->path); +		op_errno = EINVAL; +		goto err; +	} + +	if (newloc->inode) +		dst_cached = dht_subvol_get_cached (this, newloc->inode); + +	local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	ret = loc_copy (&local->loc, oldloc); +	if (ret == -1) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	ret = loc_copy (&local->loc2, newloc); +	if (ret == -1) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	local->src_hashed = src_hashed; +	local->src_cached = src_cached; +	local->dst_hashed = dst_hashed; +	local->dst_cached = dst_cached; + +	gf_log (this->name, GF_LOG_DEBUG, +		"renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)", +		oldloc->path, src_hashed->name, src_cached->name, +		newloc->path, dst_hashed->name, +		dst_cached ? dst_cached->name : "<nul>"); + +	if (S_ISDIR (oldloc->inode->st_mode)) { +		dht_rename_dir (frame, this); +	} else { +		local->op_ret = 0; +		dht_rename_create_links (frame); +	} + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + +	return 0; +} diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c new file mode 100644 index 00000000000..ee32b2253ed --- /dev/null +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -0,0 +1,460 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" + + +int +dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret) +{ +	dht_local_t  *local = NULL; + + +	local = frame->local; +	local->selfheal.dir_cbk (frame, NULL, frame->this, ret, +				 local->op_errno); + +	return 0; +} + + +int +dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			    int op_ret, int op_errno) +{ +	dht_local_t  *local = NULL; +	call_frame_t *prev = NULL; +	xlator_t     *subvol = NULL; +	int           i = 0; +	dht_layout_t *layout = NULL; +	int           err = 0; +	int           this_call_cnt = 0; + +	local = frame->local; +	layout = local->selfheal.layout; +	prev = cookie; +	subvol = prev->this; + +	if (op_ret == 0) +		err = 0; +	else +		err = op_errno; + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].xlator == subvol) { +			layout->list[i].err = err; +			break; +		} +	} + +	this_call_cnt = dht_frame_return (frame); + +	if (is_last_call (this_call_cnt)) { +		dht_selfheal_dir_finish (frame, this, 0); +	} + +	return 0; +} + + +int +dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc, +				  dht_layout_t *layout, int i) +{ +	xlator_t          *subvol = NULL; +	dict_t            *xattr = NULL; +	int                ret = 0; +	xlator_t          *this = NULL; +	int32_t           *disk_layout = NULL; + + +	subvol = layout->list[i].xlator; +	this = frame->this; + +	xattr = get_new_dict (); +	if (!xattr) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	ret = dht_disk_layout_extract (this, layout, i, &disk_layout); +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"failed to extract disk layout"); +		goto err; +	} + +	ret = dict_set_bin (xattr, "trusted.glusterfs.dht", +			    disk_layout, 4 * 4); +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"failed to set xattr dictionary"); +		goto err; +	} +	disk_layout = NULL; + +	gf_log (this->name, GF_LOG_DEBUG, +		"setting hash range %u - %u (type %d) on subvolume %s for %s", +		layout->list[i].start, layout->list[i].stop, +		layout->type, subvol->name, loc->path); + +	dict_ref (xattr); + +	STACK_WIND (frame, dht_selfheal_dir_xattr_cbk, +		    subvol, subvol->fops->setxattr, +		    loc, xattr, 0); + +	dict_unref (xattr); + +	return 0; + +err: +	if (xattr) +		dict_destroy (xattr); + +	if (disk_layout) +		FREE (disk_layout); + +	dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this, +				    -1, ENOMEM); +	return 0; +} + + +int +dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout) +{ +	dht_local_t *local = NULL; +	int          missing_xattr = 0; +	int          i = 0; +	int          ret = 0; +	xlator_t    *this = NULL; + +	local = frame->local; +	this = frame->this; + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].err != -1 || !layout->list[i].stop) +			continue; +		/* attr missing and layout present */ +		missing_xattr++; +	} + +	gf_log (this->name, GF_LOG_DEBUG, +		"%d subvolumes missing xattr for %s", +		missing_xattr, loc->path); + +	if (missing_xattr == 0) { +		dht_selfheal_dir_finish (frame, this, 0); +		return 0; +	} + +	local->call_cnt = missing_xattr; + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].err != -1 || !layout->list[i].stop) +			continue; + +		ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i); + +		if (--missing_xattr == 0) +			break; +	} +	return 0; +} + + +int +dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			    int op_ret, int op_errno, +			    inode_t *inode, struct stat *stbuf) +{ +	dht_local_t   *local = NULL; +	dht_layout_t  *layout = NULL; +	call_frame_t  *prev = NULL; +	xlator_t      *subvol = NULL; +	int            i = 0; +	int            this_call_cnt = 0; + + +	local  = frame->local; +	layout = local->selfheal.layout; +	prev   = cookie; +	subvol = prev->this; + +	if ((op_ret == 0) || (op_errno == EEXIST)) { +		for (i = 0; i < layout->cnt; i++) { +			if (layout->list[i].xlator == subvol) { +				layout->list[i].err = -1; +				break; +			} +		} +	} + +	this_call_cnt = dht_frame_return (frame); + +	if (is_last_call (this_call_cnt)) { +		dht_selfheal_dir_xattr (frame, &local->loc, layout); +	} + +	return 0; +} + + +int +dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc, +			dht_layout_t *layout, int force) +{ +	int           missing_dirs = 0; +	int           i = 0; +	dht_local_t  *local = NULL; +	xlator_t     *this = NULL; + + +	local = frame->local; +	this = frame->this; + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].err == ENOENT || force) +			missing_dirs++; +	} + +	if (missing_dirs == 0) { +		dht_selfheal_dir_xattr (frame, loc, layout); +		return 0; +	} + +	local->call_cnt = missing_dirs; +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].err == ENOENT || force) { +			gf_log (this->name, GF_LOG_DEBUG, +				"creating directory %s on subvol %s", +				loc->path, layout->list[i].xlator->name); + +			STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk, +				    layout->list[i].xlator, +				    layout->list[i].xlator->fops->mkdir, +				    loc, local->stbuf.st_mode); +		} +	} + +	return 0; +} + +void +dht_selfheal_fix_this_virgin (call_frame_t *frame, loc_t *loc, +			      dht_layout_t *layout) +{ +	dht_conf_t  *conf = NULL; +	xlator_t    *this = NULL; +	uint32_t     chunk = 0; +	int          i = 0; +	uint32_t     start = 0; +	int          cnt = 0; +	int          err = 0; + +	this = frame->this; +	conf = this->private; + +	for (i = 0; i < layout->cnt; i++) { +		err = layout->list[i].err; +		if (err == -1) { +			cnt++; +		} +	} + +	chunk = ((unsigned long) 0xffffffff) / cnt; + +	start = 0; +	for (i = 0; i < layout->cnt; i++) { +		err = layout->list[i].err; +		if (err == -1) { +			layout->list[i].start = start; +			layout->list[i].stop  = start + chunk - 1; +			 +			start = start + chunk; + +			gf_log (this->name, GF_LOG_DEBUG, +				"gave fix: %u - %u on %s for %s", +				layout->list[i].start, layout->list[i].stop, +				layout->list[i].xlator->name, loc->path); +			if (--cnt == 0) { +				layout->list[i].stop = 0xffffffff; +				break; +			} +		} +	} +} + + +int +dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc, +			  dht_layout_t *layout) +{ +	dht_conf_t  *conf = NULL; +	xlator_t    *this = NULL; +	dht_local_t *local = NULL; +	int          missing = -1; +	int          down = -1; +	int          holes = -1; +	int          ret = -1; +	int          i = -1; + +	this = frame->this; +	conf = this->private; +	local = frame->local; + +	missing = local->selfheal.missing; +	down = local->selfheal.down; +	holes = local->selfheal.hole_cnt; + +	if ((missing + down) == conf->subvolume_cnt) { +		dht_selfheal_fix_this_virgin (frame, loc, layout); +		ret = 0; +	} + +	if (holes <= down) { +		/* the down subvol might fill up the holes */ +		ret = 0; +	} + +	for (i = 0; i < layout->cnt; i++) { +		/* directory not present */ +		if (layout->list[i].err == ENOENT) { +			ret = 0; +			break; +		} +	} + +	/* TODO: give a fix to these non-virgins */ + +	return ret; +} + + +int +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, +			loc_t *loc, dht_layout_t *layout) +{ +	dht_local_t *local    = NULL; +	uint32_t     holes    = 0; +	uint32_t     overlaps = 0; +	uint32_t     missing  = 0; +	uint32_t     down     = 0; +	uint32_t     misc     = 0; +	int          ret      = 0; +	xlator_t    *this     = NULL; + + +	local = frame->local; +	this = frame->this; + +	ret = dht_layout_anomalies (this, loc, layout, +				    &local->selfheal.hole_cnt, +				    &local->selfheal.overlaps_cnt, +				    &local->selfheal.missing, +				    &local->selfheal.down, +				    &local->selfheal.misc); + +	holes    = local->selfheal.hole_cnt; +	overlaps = local->selfheal.overlaps_cnt; +	missing  = local->selfheal.missing; +	down     = local->selfheal.down; +	misc     = local->selfheal.misc; + +	local->selfheal.dir_cbk = dir_cbk; +	local->selfheal.layout = layout; + +/* +	if (down) { +		gf_log (this->name, GF_LOG_ERROR, +			"%d subvolumes down -- not fixing", down); +		ret = 0; +		goto sorry_no_fix; +	} + +	if (overlaps) { +		gf_log (this->name, GF_LOG_ERROR, +			"not fixing overlaps in %s", loc->path); +		local->op_errno = EINVAL; +		ret = -1; +		goto sorry_no_fix; +	} + +	if (misc) { +		gf_log (this->name, GF_LOG_ERROR, +			"%d subvolumes have unrecoverable errors", misc); +		ret = 0; +		goto sorry_no_fix; +	} + +	if (holes > missing) { +		gf_log (this->name, GF_LOG_ERROR, +			"%d holes and %d pigeons -- not fixing", +			holes, missing); +		ret = 0; +		goto sorry_no_fix; +	} +*/ +	ret = dht_selfheal_dir_getafix (frame, loc, layout); + +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"the directory is not a virgin"); +		goto sorry_no_fix; +	} + +	dht_selfheal_dir_mkdir (frame, loc, layout, 0); + +	return 0; + +sorry_no_fix: +	/* TODO: need to put appropriate local->op_errno */ +	dht_selfheal_dir_finish (frame, this, ret); + +	return 0; +} + + +int +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, +		      loc_t *loc, dht_layout_t *layout) +{ +	int          ret = 0; +	dht_local_t *local    = NULL; + + +	local = frame->local; + +	local->selfheal.dir_cbk = dir_cbk; +	local->selfheal.layout = layout; + +	ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1); + +	return 0; +} diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c new file mode 100644 index 00000000000..836e7a4e81f --- /dev/null +++ b/xlators/cluster/dht/src/dht.c @@ -0,0 +1,222 @@ +/* +   Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "dht-common.c" + +/* TODO: +   - use volumename in xattr instead of "dht" +   - use NS locks +   - handle all cases in self heal layout reconstruction +   - complete linkfile selfheal +*/ + + + +int +notify (xlator_t *this, int event, void *data, ...) +{ +	int ret = -1; + +	ret = dht_notify (this, event, data); + +	return ret; +} + +void +fini (xlator_t *this) +{ +        int         i = 0; +        dht_conf_t *conf = NULL; + +	conf = this->private; + +        if (conf) { +                if (conf->file_layouts) { +                        for (i = 0; i < conf->subvolume_cnt; i++) { +                                FREE (conf->file_layouts[i]); +                        } +                        FREE (conf->file_layouts); +                } + +                if (conf->default_dir_layout) +                        FREE (conf->default_dir_layout); + +                if (conf->subvolumes) +                        FREE (conf->subvolumes); + +		if (conf->subvolume_status) +			FREE (conf->subvolume_status); + +                FREE (conf); +        } + +	return; +} + +int +init (xlator_t *this) +{ +        dht_conf_t    *conf = NULL; +	char          *lookup_unhashed_str = NULL; +        int            ret = -1; +        int            i = 0; + +	if (!this->children) { +		gf_log (this->name, GF_LOG_ERROR, +			"DHT needs more than one child defined"); +		return -1; +	} +   +	if (!this->parents) { +		gf_log (this->name, GF_LOG_WARNING, +			"dangling volume. check volfile "); +	} + +        conf = CALLOC (1, sizeof (*conf)); +        if (!conf) { +                gf_log (this->name, GF_LOG_ERROR, +                        "memory allocation failed :("); +                goto err; +        } + +	conf->search_unhashed = 0; + +	if (dict_get_str (this->options, "lookup-unhashed", +			  &lookup_unhashed_str) == 0) { +		gf_string2boolean (lookup_unhashed_str, +				   &conf->search_unhashed); +	} + +        ret = dht_init_subvolumes (this, conf); +        if (ret == -1) { +                goto err; +        } + +        ret = dht_layouts_init (this, conf); +        if (ret == -1) { +                goto err; +        } + +	LOCK_INIT (&conf->subvolume_lock); + +	conf->gen = 1; + +        this->private = conf; + +        return 0; + +err: +        if (conf) { +                if (conf->file_layouts) { +                        for (i = 0; i < conf->subvolume_cnt; i++) { +                                FREE (conf->file_layouts[i]); +                        } +                        FREE (conf->file_layouts); +                } + +                if (conf->default_dir_layout) +                        FREE (conf->default_dir_layout); + +                if (conf->subvolumes) +                        FREE (conf->subvolumes); + +		if (conf->subvolume_status) +			FREE (conf->subvolume_status); + +                FREE (conf); +        } + +        return -1; +} + + +struct xlator_fops fops = { +	.lookup      = dht_lookup, +	.mknod       = dht_mknod, +	.create      = dht_create, + +	.stat        = dht_stat, +	.chmod       = dht_chmod, +	.chown       = dht_chown, +	.fchown      = dht_fchown, +	.fchmod      = dht_fchmod, +	.fstat       = dht_fstat, +	.utimens     = dht_utimens, +	.truncate    = dht_truncate, +	.ftruncate   = dht_ftruncate, +	.access      = dht_access, +	.readlink    = dht_readlink, +	.setxattr    = dht_setxattr, +	.getxattr    = dht_getxattr, +	.removexattr = dht_removexattr, +	.open        = dht_open, +	.readv       = dht_readv, +	.writev      = dht_writev, +	.flush       = dht_flush, +	.fsync       = dht_fsync, +	.statfs      = dht_statfs, +	.lk          = dht_lk, +	.opendir     = dht_opendir, +	.readdir     = dht_readdir, +	.fsyncdir    = dht_fsyncdir, +	.symlink     = dht_symlink, +	.unlink      = dht_unlink, +	.link        = dht_link, +	.mkdir       = dht_mkdir, +	.rmdir       = dht_rmdir, +	.rename      = dht_rename, +	.inodelk     = dht_inodelk, +	.finodelk    = dht_finodelk, +	.entrylk     = dht_entrylk, +	.fentrylk    = dht_fentrylk, +	.xattrop     = dht_xattrop, +	.fxattrop    = dht_fxattrop, +#if 0 +	.setdents    = dht_setdents, +	.getdents    = dht_getdents, +	.checksum    = dht_checksum, +#endif +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +//	.release    = dht_release, +//      .releasedir = dht_releasedir, +	.forget     = dht_forget +}; + + +struct volume_options options[] = { +        { .key  = {"lookup-unhashed"},  +	  .type = GF_OPTION_TYPE_BOOL  +	}, +	{ .key  = {NULL} }, +}; diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c new file mode 100644 index 00000000000..6333e002fbc --- /dev/null +++ b/xlators/cluster/dht/src/nufa.c @@ -0,0 +1,684 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dht-common.c" + +/* TODO: all 'TODO's in dht.c holds good */ + +int  +nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +		       int op_ret, int op_errno, +		       inode_t *inode, struct stat *stbuf, dict_t *xattr) +{ +	dht_layout_t *layout      = NULL; +        xlator_t     *subvol      = NULL; +        char          is_linkfile = 0; +        char          is_dir      = 0; +        dht_conf_t   *conf        = NULL; +        dht_local_t  *local       = NULL; +        loc_t        *loc         = NULL; +        int           i           = 0; +        call_frame_t *prev        = NULL; +	int           call_cnt    = 0; + + +        conf  = this->private; + +        prev  = cookie; +        local = frame->local; +        loc   = &local->loc; + +	if (ENTRY_MISSING (op_ret, op_errno)) { +		if (conf->search_unhashed) { +			local->op_errno = ENOENT; +			dht_lookup_everywhere (frame, this, loc); +			return 0; +		} +	} + +        if (op_ret == -1) +                goto out; + +        is_linkfile = check_is_linkfile (inode, stbuf, xattr); +        is_dir      = check_is_dir (inode, stbuf, xattr); + +        if (!is_dir && !is_linkfile) { +                /* non-directory and not a linkfile */ + +		dht_itransform (this, prev->this, stbuf->st_ino, +				&stbuf->st_ino); + +		layout = dht_layout_for_subvol (this, prev->this); +		if (!layout) { +			gf_log (this->name, GF_LOG_ERROR, +				"no pre-set layout for subvolume %s", +				prev->this->name); +			op_ret   = -1; +			op_errno = EINVAL; +			goto err; +		} + +                inode_ctx_put (inode, this, (uint64_t)(long)layout); +                goto out; +        } + +        if (is_dir) { +                call_cnt        = conf->subvolume_cnt; +		local->call_cnt = call_cnt; + +                local->inode = inode_ref (inode); +                local->xattr = dict_ref (xattr); + +		local->op_ret = 0; +		local->op_errno = 0; + +		local->layout = dht_layout_new (this, conf->subvolume_cnt); +		if (!local->layout) { +			op_ret   = -1; +			op_errno = ENOMEM; +			gf_log (this->name, GF_LOG_ERROR, +				"memory allocation failed :("); +			goto err; +		} + +                for (i = 0; i < call_cnt; i++) { +                        STACK_WIND (frame, dht_lookup_dir_cbk, +                                    conf->subvolumes[i], +                                    conf->subvolumes[i]->fops->lookup, +                                    &local->loc, local->xattr_req); +                } +        } + +        if (is_linkfile) { +                subvol = dht_linkfile_subvol (this, inode, stbuf, xattr); + +                if (!subvol) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "linkfile not having link subvolume. path=%s", +                                loc->path); +			dht_lookup_everywhere (frame, this, loc); +			return 0; +                } + +		STACK_WIND (frame, dht_lookup_linkfile_cbk, +			    subvol, subvol->fops->lookup, +			    &local->loc, local->xattr_req); +        } + +        return 0; + +out: +	if (!local->hashed_subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			local->loc.path); +		op_errno = EINVAL; +		goto err; +	} +		 +	STACK_WIND (frame, dht_lookup_cbk, +		    local->hashed_subvol, local->hashed_subvol->fops->lookup, +		    &local->loc, local->xattr_req); + +	return 0; + + err: +        DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr); +        return 0; +} + +int +nufa_lookup (call_frame_t *frame, xlator_t *this, +	     loc_t *loc, dict_t *xattr_req) +{ +        xlator_t     *hashed_subvol = NULL; +        xlator_t     *cached_subvol = NULL; +        xlator_t     *subvol = NULL; +        dht_local_t  *local  = NULL; +	dht_conf_t   *conf = NULL; +        int           ret    = -1; +        int           op_errno = -1; +	dht_layout_t *layout = NULL; +	int           i = 0; +	int           call_cnt = 0; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +	conf = this->private; + +        local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +        ret = loc_dup (loc, &local->loc); +        if (ret == -1) { +                op_errno = errno; +                gf_log (this->name, GF_LOG_ERROR, +                        "copying location failed for path=%s", +                        loc->path); +                goto err; +        } + +	if (xattr_req) { +		local->xattr_req = dict_ref (xattr_req); +	} else { +		local->xattr_req = dict_new (); +	} + +	hashed_subvol = dht_subvol_get_hashed (this, &local->loc); +	cached_subvol = dht_subvol_get_cached (this, local->loc.inode); +	 +	local->cached_subvol = cached_subvol; +	local->hashed_subvol = hashed_subvol; + +        if (is_revalidate (loc)) { +		layout = dht_layout_get (this, loc->inode); + +                if (!layout) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "revalidate without cache. path=%s", +                                loc->path); +                        op_errno = EINVAL; +                        goto err; +                } + +		if (layout->gen && (layout->gen < conf->gen)) { +			gf_log (this->name, GF_LOG_WARNING, +				"incomplete layout failure for path=%s", +				loc->path); +			op_errno = EAGAIN; +			goto err; +		} + +		local->inode    = inode_ref (loc->inode); +		local->st_ino   = loc->inode->ino; + +		local->call_cnt = layout->cnt; +		call_cnt = local->call_cnt; +		 +		/* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, +		 *       revalidates directly go to the cached-subvolume. +		 */ +		ret = dict_set_uint32 (local->xattr_req,  +				       "trusted.glusterfs.dht", 4 * 4); + +		for (i = 0; i < layout->cnt; i++) { +			subvol = layout->list[i].xlator; +			 +			STACK_WIND (frame, dht_revalidate_cbk, +				    subvol, subvol->fops->lookup, +				    loc, local->xattr_req); + +			if (!--call_cnt) +				break; +		} +	} else { +		ret = dict_set_uint32 (local->xattr_req,  +				       "trusted.glusterfs.dht", 4 * 4); + +		ret = dict_set_uint32 (local->xattr_req,  +				       "trusted.glusterfs.dht.linkto", 256); + +		/* Send it to only local volume */ +		STACK_WIND (frame, nufa_local_lookup_cbk, +			    conf->local_volume,  +			    conf->local_volume->fops->lookup, +			    loc, local->xattr_req); +	} + +        return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); +	return 0; +} + +int +nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,  +				 xlator_t *this, int op_ret, int op_errno, +				 inode_t *inode, struct stat *stbuf) +{ + 	dht_local_t  *local = NULL; + 	call_frame_t *prev = NULL; +	dht_conf_t   *conf  = NULL; +	 + 	local = frame->local; + 	prev  = cookie; + 	conf  = this->private; +	 + 	if (op_ret == -1) + 		goto err; +	 + 	STACK_WIND (frame, dht_create_cbk, + 		    conf->local_volume, conf->local_volume->fops->create, + 		    &local->loc, local->flags, local->mode, local->fd); +	 + 	return 0; +	 + err: + 	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);	 + 	return 0; +} + +int +nufa_create (call_frame_t *frame, xlator_t *this, +	     loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) +{ + 	dht_local_t *local = NULL; +	dht_conf_t  *conf  = NULL; +	xlator_t    *subvol = NULL; +	int          op_errno = -1; +	int          ret = -1; + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (loc, err); + + 	conf  = this->private; 	 + +        local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	subvol = dht_subvol_get_hashed (this, loc); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			loc->path); +		op_errno = ENOENT; +		goto err; +	} + + 	if (subvol != conf->local_volume) { + 		/* create a link file instead of actual file */ + 		ret = loc_copy (&local->loc, loc); + 		if (ret == -1) { + 			gf_log (this->name, GF_LOG_ERROR, + 				"memory allocation failed :("); + 			op_errno = ENOMEM; + 			goto err; + 		} +  + 		local->fd = fd_ref (fd); + 		local->mode = mode; + 		local->flags = flags; + 		 + 		dht_linkfile_create (frame, nufa_create_linkfile_create_cbk, + 				      conf->local_volume, subvol, loc); + 		return 0; + 	} + +	gf_log (this->name, GF_LOG_DEBUG, +		"creating %s on %s", loc->path, subvol->name); + +	STACK_WIND (frame, dht_create_cbk, +		    subvol, subvol->fops->create, +		    loc, flags, mode, fd); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + +	return 0; +} + +int +nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +			 int op_ret, int op_errno, +			 inode_t *inode, struct stat *stbuf) +{ + 	dht_local_t  *local = NULL; + 	call_frame_t *prev = NULL; +	dht_conf_t   *conf  = NULL; +	 + 	local = frame->local; + 	prev  = cookie; + 	conf  = this->private; + 	 + 	if (op_ret >= 0) { + 		STACK_WIND (frame, dht_newfile_cbk, + 			    conf->local_volume,  + 			    conf->local_volume->fops->mknod, + 			    &local->loc, local->mode, local->rdev); +		 + 		return 0; + 	} +	 + 	DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf); + 	return 0; +} + + +int +nufa_mknod (call_frame_t *frame, xlator_t *this, +	    loc_t *loc, mode_t mode, dev_t rdev) +{ + 	dht_local_t *local = NULL; +	dht_conf_t  *conf  = NULL; +	xlator_t    *subvol = NULL; +	int          op_errno = -1; +	int          ret = -1; + +	VALIDATE_OR_GOTO (frame, err); +	VALIDATE_OR_GOTO (this, err); +	VALIDATE_OR_GOTO (loc, err); + + 	conf  = this->private; 	 + + +        local = dht_local_init (frame); +	if (!local) { +		op_errno = ENOMEM; +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto err; +	} + +	subvol = dht_subvol_get_hashed (this, loc); +	if (!subvol) { +		gf_log (this->name, GF_LOG_ERROR, +			"no subvolume in layout for path=%s", +			loc->path); +		op_errno = ENOENT; +		goto err; +	} + + + 	if (conf->local_volume != subvol) { + 		/* Create linkfile first */ + 		ret = loc_copy (&local->loc, loc); + 		if (ret == -1) { + 			gf_log (this->name, GF_LOG_ERROR, + 				"memory allocation failed :("); + 			op_errno = ENOMEM; + 			goto err; + 		} +  +		local->mode = mode; + 		local->rdev = rdev; + 		 + 		dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, + 				      conf->local_volume, subvol, loc); + 		return 0; + 	} + +	gf_log (this->name, GF_LOG_DEBUG, +		"creating %s on %s", loc->path, subvol->name); + +	STACK_WIND (frame, dht_newfile_cbk, +		    subvol, subvol->fops->mknod, +		    loc, mode, rdev); + +	return 0; + +err: +	op_errno = (op_errno == -1) ? errno : op_errno; +	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + +	return 0; +} + + +int +notify (xlator_t *this, int event, void *data, ...) +{ +	int ret = -1; + +	ret = dht_notify (this, event, data); + +	return ret; +} + +void +fini (xlator_t *this) +{ +        int         i = 0; +        dht_conf_t *conf = NULL; + +	conf = this->private; + +        if (conf) { +                if (conf->file_layouts) { +                        for (i = 0; i < conf->subvolume_cnt; i++) { +                                FREE (conf->file_layouts[i]); +                        } +                        FREE (conf->file_layouts); +                } + +                if (conf->default_dir_layout) +                        FREE (conf->default_dir_layout); + +                if (conf->subvolumes) +                        FREE (conf->subvolumes); + +		if (conf->subvolume_status) +			FREE (conf->subvolume_status); + +                FREE (conf); +        } + +	return; +} + +int +init (xlator_t *this) +{ +        dht_conf_t    *conf = NULL; +	xlator_list_t *trav = NULL; +	data_t        *data = NULL; +	char          *local_volname = NULL; +	char          *lookup_unhashed_str = NULL; +        int            ret = -1; +        int            i = 0; +	char           my_hostname[256]; + +	if (!this->children) { +		gf_log (this->name, GF_LOG_ERROR, +			"DHT needs more than one child defined"); +		return -1; +	} +   +	if (!this->parents) { +		gf_log (this->name, GF_LOG_WARNING, +			"dangling volume. check volfile "); +	} + +        conf = CALLOC (1, sizeof (*conf)); +        if (!conf) { +                gf_log (this->name, GF_LOG_ERROR, +                        "memory allocation failed :("); +                goto err; +        } + +	conf->search_unhashed = 0; + +	if (dict_get_str (this->options, "lookup-unhashed", +			  &lookup_unhashed_str) == 0) { +		gf_string2boolean (lookup_unhashed_str, +				   &conf->search_unhashed); +	} + +        ret = dht_init_subvolumes (this, conf); +        if (ret == -1) { +                goto err; +        } + +        ret = dht_layouts_init (this, conf); +        if (ret == -1) { +                goto err; +        } + +	LOCK_INIT (&conf->subvolume_lock); + +	conf->gen = 1; + +	local_volname = "localhost"; +	ret = gethostname (my_hostname, 256); +	if (ret < 0) { +		gf_log (this->name, GF_LOG_WARNING, +			"could not find hostname (%s)", +			strerror (errno)); +	} + +	if (ret == 0) +		local_volname = my_hostname; + +	data = dict_get (this->options, "local-volume-name"); +	if (data) { +		local_volname = data->data; +	} + +	trav = this->children; +	while (trav) { +		if (strcmp (trav->xlator->name, local_volname) == 0) +			break; +		trav = trav->next; +	} + +	if (!trav) { +		gf_log (this->name, GF_LOG_ERROR,  +			"Could not find subvolume named '%s'. " +			"Please define volume with the name as the hostname " +			"or override it with 'option local-volume-name'", +			local_volname); +		goto err; +	} +	/* The volume specified exists */ +	conf->local_volume = trav->xlator; + +        this->private = conf; + +        return 0; + +err: +        if (conf) { +                if (conf->file_layouts) { +                        for (i = 0; i < conf->subvolume_cnt; i++) { +                                FREE (conf->file_layouts[i]); +                        } +                        FREE (conf->file_layouts); +                } + +                if (conf->default_dir_layout) +                        FREE (conf->default_dir_layout); + +                if (conf->subvolumes) +                        FREE (conf->subvolumes); + +		if (conf->subvolume_status) +			FREE (conf->subvolume_status); + +                FREE (conf); +        } + +        return -1; +} + + +struct xlator_fops fops = { +	.lookup      = nufa_lookup, +	.create      = nufa_create, +	.mknod       = nufa_mknod, + +	.stat        = dht_stat, +	.chmod       = dht_chmod, +	.chown       = dht_chown, +	.fchown      = dht_fchown, +	.fchmod      = dht_fchmod, +	.fstat       = dht_fstat, +	.utimens     = dht_utimens, +	.truncate    = dht_truncate, +	.ftruncate   = dht_ftruncate, +	.access      = dht_access, +	.readlink    = dht_readlink, +	.setxattr    = dht_setxattr, +	.getxattr    = dht_getxattr, +	.removexattr = dht_removexattr, +	.open        = dht_open, +	.readv       = dht_readv, +	.writev      = dht_writev, +	.flush       = dht_flush, +	.fsync       = dht_fsync, +	.statfs      = dht_statfs, +	.lk          = dht_lk, +	.opendir     = dht_opendir, +	.readdir     = dht_readdir, +	.fsyncdir    = dht_fsyncdir, +	.symlink     = dht_symlink, +	.unlink      = dht_unlink, +	.link        = dht_link, +	.mkdir       = dht_mkdir, +	.rmdir       = dht_rmdir, +	.rename      = dht_rename, +	.inodelk     = dht_inodelk, +	.finodelk    = dht_finodelk, +	.entrylk     = dht_entrylk, +	.fentrylk    = dht_fentrylk, +	.xattrop     = dht_xattrop, +	.fxattrop    = dht_fxattrop, +#if 0 +	.setdents    = dht_setdents, +	.getdents    = dht_getdents, +	.checksum    = dht_checksum, +#endif +}; + + +struct xlator_mops mops = { +}; + + +struct xlator_cbks cbks = { +//	.release    = dht_release, +//      .releasedir = dht_releasedir, +	.forget     = dht_forget +}; + + +struct volume_options options[] = { +	{ .key  = {"local-volume-name"},  +	  .type = GF_OPTION_TYPE_XLATOR  +	}, +        { .key  = {"lookup-unhashed"},  +	  .type = GF_OPTION_TYPE_BOOL  +	}, +	{ .key  = {NULL} }, +}; | 
