diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 1141 |
1 files changed, 822 insertions, 319 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 500c9735f..8f61339e6 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -31,17 +22,18 @@ #include "dht-common.h" #include "defaults.h" #include "byte-order.h" +#include "glusterfs-acl.h" #include <sys/time.h> #include <libgen.h> -void +int dht_aggregate (dict_t *this, char *key, data_t *value, void *data) { dict_t *dst = NULL; int64_t *ptr = 0, *size = NULL; int32_t ret = -1; - data_pair_t *data_pair = NULL; + data_t *dict_data = NULL; dst = data; @@ -53,32 +45,37 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data) if (size == NULL) { gf_log ("dht", GF_LOG_WARNING, "memory allocation failed"); - return; + return -1; } ret = dict_set_bin (dst, key, size, sizeof (int64_t)); if (ret < 0) { gf_log ("dht", GF_LOG_WARNING, "dht aggregate dict set failed"); GF_FREE (size); - return; + return -1; } } ptr = data_to_bin (value); if (ptr == NULL) { gf_log ("dht", GF_LOG_WARNING, "data to bin failed"); - return; + return -1; } *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr)); + + } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) { + ret = gf_get_min_stime (THIS, dst, key, value); + if (ret < 0) + return ret; } else { /* compare user xattrs only */ if (!strncmp (key, "user.", strlen ("user."))) { - ret = dict_lookup (dst, key, &data_pair); - if (!ret && data_pair && value) { - ret = is_data_equal (data_pair->value, value); + ret = dict_lookup (dst, key, &dict_data); + if (!ret && dict_data && value) { + ret = is_data_equal (dict_data, value); if (!ret) - gf_log ("dht", GF_LOG_WARNING, + gf_log ("dht", GF_LOG_DEBUG, "xattr mismatch for %s", key); } } @@ -87,7 +84,7 @@ dht_aggregate (dict_t *this, char *key, data_t *value, void *data) gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed"); } - return; + return 0; } @@ -134,7 +131,10 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, ret = dht_layout_set (this, local->inode, layout); } - WIPE (&local->postparent); + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } DHT_STRIP_PHASE1_FLAGS (&local->stbuf); @@ -154,9 +154,11 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) int op_errno = 0; int ret = -1; dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; local = discover_frame->local; layout = local->layout; + conf = this->private; LOCK(&discover_frame->lock); { @@ -199,11 +201,14 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) "(overlaps/holes present: %s, " "ENOENT errors: %d)", local->loc.path, (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0); - op_errno = EINVAL; - goto out; + if ((ret > 0) && (ret == conf->subvolume_cnt)) { + op_errno = ESTALE; + goto out; + } } - dht_layout_set (this, local->inode, layout); + if (local->inode) + dht_layout_set (this, local->inode, layout); } DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno, @@ -232,6 +237,7 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int is_dir = 0; int is_linkfile = 0; int attempt_unwind = 0; + dht_conf_t *conf = 0; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -241,6 +247,7 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; prev = cookie; + conf = this->private; layout = local->layout; @@ -275,7 +282,8 @@ dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unlock; } - is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); is_dir = check_is_dir (inode, stbuf, xattr); if (is_dir) { @@ -334,23 +342,20 @@ dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc) int i = 0; call_frame_t *discover_frame = NULL; - conf = this->private; local = frame->local; - ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); + ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4); if (ret) gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set 'trusted.glusterfs.dht' key", - loc->path); + "%s: failed to set '%s' key", + loc->path, conf->xattr_name); - ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht.linkto", 256); + ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256); if (ret) gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set 'trusted.glusterfs.dht.linkto' key", - loc->path); + "%s: failed to set '%s' key", + loc->path, conf->link_xattr_name); call_cnt = conf->subvolume_cnt; local->call_cnt = call_cnt; @@ -436,7 +441,7 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, op_ret, op_errno, xattr); if (op_ret == -1) { - local->op_errno = ENOENT; + local->op_errno = op_errno; gf_log (this->name, GF_LOG_DEBUG, "lookup of %s on %s returned error (%s)", local->loc.path, prev->this->name, @@ -495,6 +500,11 @@ unlock: dht_layout_set (this, local->inode, layout); } + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, local->xattr, @@ -526,6 +536,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int ret = -1; int is_dir = 0; int is_linkfile = 0; + call_frame_t *copy = NULL; + dht_local_t *copy_local = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, err); GF_VALIDATE_OR_GOTO ("dht", this, err); @@ -584,7 +596,8 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, layout = local->layout; is_dir = check_is_dir (inode, stbuf, xattr); - is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); if (is_linkfile) { gf_log (this->name, GF_LOG_INFO, @@ -596,6 +609,23 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } if (is_dir) { + ret = dht_dir_has_layout (xattr, conf->xattr_name); + if (ret >= 0) { + if (is_greater_time(local->stbuf.ia_ctime, + local->stbuf.ia_ctime_nsec, + stbuf->ia_ctime, + stbuf->ia_ctime_nsec)) { + local->prebuf.ia_gid = stbuf->ia_gid; + local->prebuf.ia_uid = stbuf->ia_uid; + } + } + if (local->stbuf.ia_type != IA_INVAL) + { + if ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid)) { + local->need_selfheal = 1; + } + } ret = dht_layout_dir_mismatch (this, layout, prev->this, &local->loc, xattr); @@ -634,7 +664,28 @@ out: && (conf && conf->unhashed_sticky_bit)) { local->stbuf.ia_prot.sticky = 1; } - if (local->layout_mismatch) { + if (local->need_selfheal) { + local->need_selfheal = 0; + uuid_copy (local->gfid, local->stbuf.ia_gfid); + local->stbuf.ia_gid = local->prebuf.ia_gid; + local->stbuf.ia_uid = local->prebuf.ia_uid; + copy = create_frame (this, this->ctx->pool); + if (copy) { + copy_local = dht_local_init (copy, &local->loc, + NULL, 0); + if (!copy_local) + goto cont; + copy_local->stbuf = local->stbuf; + copy->local = copy_local; + FRAME_SU_DO (copy, dht_local_t); + ret = synctask_new (this->ctx->env, + dht_dir_attr_heal, + dht_dir_attr_heal_done, + copy, copy); + } + } +cont: + if (local->layout_mismatch) { /* Found layout mismatch in the directory, need to fix this in the inode context */ dht_layout_unref (this, local->layout); @@ -660,7 +711,10 @@ out: local->op_errno = ESTALE; } - WIPE (&local->postparent); + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, @@ -712,8 +766,14 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie, local->stbuf.ia_prot.sticky = 1; } + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } + unwind: - WIPE (&local->postparent); + if (local->linked == _gf_true) + dht_linkfile_attr_heal (frame, this); DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, @@ -786,7 +846,10 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) "<nil>")); } - WIPE (&local->postparent); + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (lookup, frame, local->op_ret, @@ -816,7 +879,10 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) local->op_errno = EINVAL; } - WIPE (&local->postparent); + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (lookup, frame, local->op_ret, @@ -832,7 +898,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) hashed_subvol->name); ret = dht_linkfile_create (frame, - dht_lookup_linkfile_create_cbk, + dht_lookup_linkfile_create_cbk, this, cached_subvol, hashed_subvol, &local->loc); return ret; @@ -870,8 +936,9 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, xlator_t *subvol = NULL; loc_t *loc = NULL; xlator_t *link_subvol = NULL; - int ret = -1; - int32_t fd_count = 0; + int ret = -1; + int32_t fd_count = 0; + dht_conf_t *conf = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -881,6 +948,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; loc = &local->loc; + conf = this->private; prev = cookie; subvol = prev->this; @@ -902,7 +970,8 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc->path, prev->this->name); } - is_linkfile = check_is_linkfile (inode, buf, xattr); + is_linkfile = check_is_linkfile (inode, buf, xattr, + conf->link_xattr_name); is_dir = check_is_dir (inode, buf, xattr); if (is_linkfile) { @@ -1044,7 +1113,16 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, gf_log (this->name, GF_LOG_INFO, "lookup of %s on %s (following linkfile) failed (%s)", local->loc.path, subvol->name, strerror (op_errno)); - goto err; + + /* If cached subvol returned ENOTCONN, do not do + lookup_everywhere. We need to make sure linkfile does not get + removed, which can take away the namespace, and subvol is + anyways down. */ + + if (op_errno != ENOTCONN) + goto err; + else + goto unwind; } if (check_is_dir (inode, stbuf, xattr)) { @@ -1054,7 +1132,7 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, goto err; } - if (check_is_linkfile (inode, stbuf, xattr)) { + if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { gf_log (this->name, GF_LOG_INFO, "lookup of %s on %s (following linkfile) reached link", local->loc.path, subvol->name); @@ -1082,9 +1160,12 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, op_errno = EINVAL; } -unwind: - WIPE (postparent); + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } +unwind: DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr, postparent); @@ -1166,7 +1247,6 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc_t *loc = NULL; call_frame_t *prev = NULL; int ret = 0; - uint64_t tmp_layout = 0; dht_layout_t *parent_layout = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, err); @@ -1197,8 +1277,10 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) && (loc->parent)) { - ret = inode_ctx_get (loc->parent, this, &tmp_layout); - parent_layout = (dht_layout_t *)(long)tmp_layout; + ret = dht_inode_ctx_layout_get (loc->parent, this, + &parent_layout); + if (ret || !parent_layout) + goto out; if (parent_layout->search_unhashed) { local->op_errno = ENOENT; dht_lookup_everywhere (frame, this, loc); @@ -1227,7 +1309,8 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - is_linkfile = check_is_linkfile (inode, stbuf, xattr); + is_linkfile = check_is_linkfile (inode, stbuf, xattr, + conf->link_xattr_name); if (!is_linkfile) { /* non-directory and not a linkfile */ @@ -1267,7 +1350,10 @@ out: * from each of the subvolume. See dht_iatt_merge for reference. */ - WIPE (postparent); + if (!op_ret && local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr, @@ -1329,7 +1415,6 @@ dht_lookup (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); conf = this->private; if (!conf) @@ -1373,16 +1458,8 @@ dht_lookup (call_frame_t *frame, xlator_t *this, return 0; } - if (!hashed_subvol) { + if (!hashed_subvol) hashed_subvol = dht_subvol_get_hashed (this, loc); - if (!hashed_subvol) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to get hashed subvol for %s", - loc->path); - op_errno = EINVAL; - goto err; - } - } local->hashed_subvol = hashed_subvol; if (is_revalidate (loc)) { @@ -1412,7 +1489,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this, * revalidates directly go to the cached-subvolume. */ ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); + conf->xattr_name, 4 * 4); if (IA_ISDIR (local->inode->ia_type)) { local->call_cnt = call_cnt = conf->subvolume_cnt; @@ -1447,10 +1524,10 @@ dht_lookup (call_frame_t *frame, xlator_t *this, do_fresh_lookup: /* TODO: remove the hard-coding */ ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht", 4 * 4); + conf->xattr_name, 4 * 4); ret = dict_set_uint32 (local->xattr_req, - DHT_LINKFILE_KEY, 256); + conf->link_xattr_name, 256); /* need it for self-healing linkfiles which is 'in-migration' state */ @@ -1526,8 +1603,12 @@ dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->postparent = *postparent; local->preparent = *preparent; - WIPE (&local->postparent); - WIPE (&local->preparent); + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + &local->preparent, 0); + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); + } } unlock: UNLOCK (&frame->lock); @@ -1554,7 +1635,8 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { - if (op_ret == -1) { + if ((op_ret == -1) && !((op_errno == ENOENT) || + (op_errno == ENOTCONN))) { local->op_errno = op_errno; gf_log (this->name, GF_LOG_DEBUG, "subvolume %s returned -1 (%s)", @@ -1567,7 +1649,7 @@ dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unlock: UNLOCK (&frame->lock); - if (op_ret == -1) + if (local->op_ret == -1) goto err; cached_subvol = dht_subvol_get_cached (this, local->loc.inode); @@ -1591,41 +1673,6 @@ err: return 0; } -static int -dht_ufo_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_ret = -1; - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { - DHT_STACK_UNWIND (setxattr, frame, local->op_ret, - local->op_errno, NULL); - } - - return 0; -} - - int dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) @@ -1698,115 +1745,222 @@ dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local, } int -dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this, + int op_errno) { - dht_local_t *local = NULL; - int ret = 0; - int flag = 0; - int this_call_cnt = 0; - char *value_got = NULL; - char layout_buf[8192] = {0,}; - char *xattr_buf = NULL; - dict_t *dict = NULL; - int32_t alloc_len = 0; - int32_t plen = 0; - call_frame_t *prev = NULL; + int ret = -1; + char *value = NULL; + int32_t plen = 0; - local = frame->local; - prev = cookie; + ret = dict_get_str (xattr, local->xsel, &value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Subvolume %s returned -1 (%s)", this->name, + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + goto out; + } - if (op_ret >= 0) { - ret = dict_get_str (xattr, local->xsel, &value_got); - if (!ret) { - alloc_len = strlen (value_got); + local->alloc_len += strlen(value); - /** - * allocate the buffer:- we allocate 10 bytes extra in - * case we need to append ' Link: ' in the buffer for - * another STACK_WIND - */ + if (!local->xattr_val) { + local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10); + local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char), + gf_common_mt_char); + if (!local->xattr_val) { + ret = -1; + goto out; + } + } + + if (local->xattr_val) { + plen = strlen (local->xattr_val); + if (plen) { + /* extra byte(s) for \0 to be safe */ + local->alloc_len += (plen + 2); + local->xattr_val = GF_REALLOC (local->xattr_val, + local->alloc_len); if (!local->xattr_val) { - alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10); - local->xattr_val = - GF_CALLOC (alloc_len, - sizeof (char), - gf_common_mt_char); + ret = -1; + goto out; } + } - if (local->xattr_val) { - plen = strlen (local->xattr_val); - if (plen) { - /* extra byte(s) for \0 to be safe */ - alloc_len += (plen + 2); - local->xattr_val = - GF_REALLOC (local->xattr_val, - alloc_len); - if (!local->xattr_val) - goto out; - } + (void) strcat (local->xattr_val, value); + (void) strcat (local->xattr_val, " "); + local->op_ret = 0; + } - strcat (local->xattr_val, value_got); - } - local->op_ret = 0; - } + ret = 0; + + out: + return ret; +} + +int +dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this, + gf_boolean_t flag) +{ + int ret = -1; + char *xattr_buf = NULL; + char layout_buf[8192] = {0,}; + + if (flag) + fill_layout_info (local->layout, layout_buf); + + *dict = dict_new (); + if (!*dict) + goto out; + + local->xattr_val[strlen (local->xattr_val) - 1] = '\0'; + + /* we would need max this many bytes to create xattr string + * extra 40 bytes is just an estimated amount of additional + * space required as we include translator name and some + * spaces, brackets etc. when forming the pathinfo string. + * + * For node-uuid we just don't have all the pretty formatting, + * but since this is a generic routine for pathinfo & node-uuid + * we dont have conditional space allocation and try to be + * generic + */ + local->alloc_len += (2 * strlen (this->name)) + + strlen (layout_buf) + + 40; + xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char), + gf_common_mt_char); + if (!xattr_buf) + goto out; + + if (XATTR_IS_PATHINFO (local->xsel)) { + (void) dht_fill_pathinfo_xattr (this, local, xattr_buf, + local->alloc_len, flag, + layout_buf); + } else if (XATTR_IS_NODE_UUID (local->xsel)) { + (void) snprintf (xattr_buf, local->alloc_len, "%s", + local->xattr_val); } else { - local->op_ret = -1; - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 " - "(%s)", prev->this->name, strerror (op_errno)); + gf_log (this->name, GF_LOG_WARNING, + "Unknown local->xsel (%s)", local->xsel); + goto out; } + ret = dict_set_dynstr (*dict, local->xsel, xattr_buf); + GF_FREE (local->xattr_val); + out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { + return ret; +} - if (local->op_ret == -1) { - goto unwind; - } +int +dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + int ret = 0; + dht_local_t *local = NULL; + int this_call_cnt = 0; + dict_t *dict = NULL; - if (local->layout->cnt > 1) { - /* Set it for directory */ - fill_layout_info (local->layout, layout_buf); - flag = 1; - } + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); - dict = dict_new (); - - /* we would need max this many bytes to create xattr string */ - alloc_len += (2 * strlen (this->name)) - + strlen (layout_buf) - + 40; - xattr_buf = GF_CALLOC (alloc_len, - sizeof (char), gf_common_mt_char); - - if (XATTR_IS_PATHINFO (local->xsel)) { - (void) dht_fill_pathinfo_xattr (this, local, xattr_buf, - alloc_len, flag, - layout_buf); - } else if (XATTR_IS_NODE_UUID (local->xsel)) { - (void) snprintf (xattr_buf, alloc_len, "%s", - local->xattr_val); - } else - gf_log (this->name, GF_LOG_WARNING, - "Unknown local->xsel (%s)", local->xsel); + local = frame->local; - ret = dict_set_dynstr (dict, local->xsel, xattr_buf); + LOCK (&frame->lock); + { + this_call_cnt = --local->call_cnt; + if (op_ret < 0) { + if (op_errno != ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "getxattr err (%s) for dir", + strerror (op_errno)); + local->op_ret = -1; + local->op_errno = op_errno; + } - if (local->xattr_val) - GF_FREE (local->xattr_val); + goto unlock; + } - DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, - xdata); + ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, + op_errno); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "alloc or fill failure"); + } + unlock: + UNLOCK (&frame->lock); - if (dict) - dict_unref (dict); + if (!is_last_call (this_call_cnt)) + goto out; - return 0; + /* -- last call: do patch ups -- */ + + if (local->op_ret == -1) { + goto unwind; } -unwind: + ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true); + if (ret) + goto unwind; + + DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); + goto cleanup; + + unwind: DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL); + cleanup: + if (dict) + dict_unref (dict); + out: + return 0; +} + +int +dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + dht_local_t *local = NULL; + int ret = 0; + dict_t *dict = NULL; + call_frame_t *prev = NULL; + gf_boolean_t flag = _gf_true; + + local = frame->local; + prev = cookie; + + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 " + "(%s)", prev->this->name, strerror (op_errno)); + goto unwind; + } + + ret = dht_vgetxattr_alloc_and_fill (local, xattr, this, + op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "alloc or fill failure"); + goto unwind; + } + + flag = (local->layout->cnt > 1) ? _gf_true : _gf_false; + + ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag); + if (ret) + goto unwind; + + DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata); + goto cleanup; + + unwind: + DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, + NULL, NULL); + cleanup: + if (dict) + dict_unref (dict); + return 0; } @@ -1839,10 +1993,13 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { int this_call_cnt = 0; dht_local_t *local = NULL; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (frame->local, out); + VALIDATE_OR_GOTO (this->private, out); + conf = this->private; local = frame->local; this_call_cnt = dht_frame_return (frame); @@ -1850,8 +2007,8 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!xattr || (op_ret == -1)) goto out; - if (dict_get (xattr, "trusted.glusterfs.dht")) { - dict_del (xattr, "trusted.glusterfs.dht"); + if (dict_get (xattr, conf->xattr_name)) { + dict_del (xattr, conf->xattr_name); } local->op_ret = 0; @@ -1884,9 +2041,72 @@ dht_getxattr_unwind (call_frame_t *frame, int +dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xattr, dict_t *xdata) +{ + int this_call_cnt = 0; + dht_local_t *local = NULL; + + + local = frame->local; + + if (op_ret != -1) { + if (local->xattr) + dict_unref (local->xattr); + local->xattr = dict_ref (xattr); + + if (local->xattr_req) + dict_unref (local->xattr_req); + local->xattr_req = dict_ref (xdata); + } + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, + local->xattr, local->xattr_req); + } + + return 0; +} + + +int +dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *key, dict_t *xdata) +{ + dht_local_t *local = NULL; + int i = 0; + dht_layout_t *layout = NULL; + int cnt = 0; + xlator_t *subvol = NULL; + + + local = frame->local; + layout = local->layout; + + cnt = local->call_cnt = layout->cnt; + + local->op_ret = -1; + local->op_errno = ENODATA; + + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_getxattr_get_real_filename_cbk, + subvol, subvol->fops->getxattr, + loc, key, xdata); + } + + return 0; +} + + +int dht_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *xdata) +#define DHT_IS_DIR(layout) (layout->cnt > 1) { + xlator_t *subvol = NULL; xlator_t *hashed_subvol = NULL; xlator_t *cached_subvol = NULL; @@ -1902,7 +2122,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); VALIDATE_OR_GOTO (this->private, err); conf = this->private; @@ -1930,8 +2149,40 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } } - if (key && ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0) - || strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)) { + if (key && + (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) + && DHT_IS_DIR(layout)) { + dht_getxattr_get_real_filename (frame, this, loc, key, xdata); + return 0; + } + + /* for file use cached subvolume (obviously!): see if {} + * below + * for directory: + * wind to all subvolumes and exclude subvolumes which + * return ENOTCONN (in callback) + * + * NOTE: Don't trust inode here, as that may not be valid + * (until inode_link() happens) + */ + if (key && DHT_IS_DIR(layout) && + ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0) + || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) { + (void) strncpy (local->xsel, key, 256); + cnt = local->call_cnt = layout->cnt; + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_vgetxattr_dir_cbk, + subvol, subvol->fops->getxattr, + loc, key, NULL); + } + return 0; + } + + /* node-uuid or pathinfo for files */ + if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0) + || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) { cached_subvol = local->cached_subvol; (void) strncpy (local->xsel, key, 256); @@ -1941,6 +2192,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, return 0; } + if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) { hashed_subvol = dht_subvol_get_hashed (this, loc); if (!hashed_subvol) { @@ -1973,13 +2225,13 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } if (key && (!strcmp (GF_XATTR_MARKER_KEY, key)) - && (-1 == frame->root->pid)) { - - if (loc->inode-> ia_type == IA_IFDIR) { + && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + if (DHT_IS_DIR(layout)) { cnt = layout->cnt; } else { cnt = 1; } + sub_volumes = alloca ( cnt * sizeof (xlator_t *)); for (i = 0; i < cnt; i++) *(sub_volumes + i) = layout->list[i].xlator; @@ -1987,7 +2239,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, if (cluster_getmarkerattr (frame, this, loc, key, local, dht_getxattr_unwind, sub_volumes, cnt, - MARKER_UUID_TYPE, conf->vol_uuid)) { + MARKER_UUID_TYPE, marker_uuid_default_gauge, + conf->vol_uuid)) { op_errno = EINVAL; goto err; } @@ -1997,8 +2250,8 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, if (key && *conf->vol_uuid) { if ((match_uuid_local (key, conf->vol_uuid) == 0) && - (-1 == frame->root->pid)) { - if (loc->inode-> ia_type == IA_IFDIR) { + (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + if (DHT_IS_DIR(layout)) { cnt = layout->cnt; } else { cnt = 1; @@ -2011,6 +2264,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, local, dht_getxattr_unwind, sub_volumes, cnt, MARKER_XTIME_TYPE, + marker_xtime_default_gauge, conf->vol_uuid)) { op_errno = EINVAL; goto err; @@ -2020,7 +2274,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } } - if (loc->inode-> ia_type == IA_IFDIR) { + if (DHT_IS_DIR(layout)) { cnt = local->call_cnt = layout->cnt; } else { cnt = local->call_cnt = 1; @@ -2040,6 +2294,7 @@ err: return 0; } +#undef DHT_IS_DIR int dht_fgetxattr (call_frame_t *frame, xlator_t *this, @@ -2081,7 +2336,9 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this, } } - if (fd->inode->ia_type == IA_IFDIR) { + if ((fd->inode->ia_type == IA_IFDIR) + && (strncmp (key, GF_XATTR_LOCKINFO_KEY, + strlen (GF_XATTR_LOCKINFO_KEY) != 0))) { cnt = local->call_cnt = layout->cnt; } else { cnt = local->call_cnt = 1; @@ -2109,15 +2366,18 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; dht_local_t *local = NULL; int op_errno = EINVAL; - data_pair_t *trav = NULL; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); VALIDATE_OR_GOTO (fd->inode, err); + VALIDATE_OR_GOTO (this->private, err); + + conf = this->private; - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr, - trav, op_errno, err); + GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, + op_errno, err); local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR); if (!local) { @@ -2215,19 +2475,17 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, char value[4096] = {0,}; gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA; int call_cnt = 0; - data_pair_t *trav = NULL; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr, - trav, op_errno, err); conf = this->private; + + GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr, + op_errno, err); + local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR); if (!local) { op_errno = ENOMEM; @@ -2252,25 +2510,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, local->call_cnt = call_cnt = layout->cnt; - /* This key is sent by Unified File and Object storage - * to test xattr support in backend. - */ - tmp = dict_get (xattr, "user.ufo-test"); - if (tmp) { - if (IA_ISREG (loc->inode->ia_type)) { - op_errno = ENOTSUP; - goto err; - } - local->op_ret = 0; - for (i = 0; i < call_cnt; i++) { - STACK_WIND (frame, dht_ufo_xattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator->fops->setxattr, - loc, xattr, flags, NULL); - } - return 0; - } - tmp = dict_get (xattr, "distribute.migrate-data"); if (tmp) { if (IA_ISDIR (loc->inode->ia_type)) { @@ -2345,9 +2584,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_INFO, "fixing the layout of %s", loc->path); - dht_fix_directory_layout (frame, dht_common_setxattr_cbk, - layout); - return 0; + ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; } tmp = dict_get (xattr, "distribute.directory-spread-count"); @@ -2359,10 +2602,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, (dir_spread > 0))) { layout->spread_cnt = dir_spread; - dht_fix_directory_layout (frame, - dht_common_setxattr_cbk, - layout); - return 0; + ret = dht_fix_directory_layout (frame, + dht_common_setxattr_cbk, + layout); + if (ret) { + op_errno = ENOTCONN; + goto err; + } + return ret; } gf_log (this->name, GF_LOG_ERROR, "wrong 'directory-spread-count' value (%s)", value); @@ -2432,18 +2679,20 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, dht_local_t *local = NULL; dht_layout_t *layout = NULL; int call_cnt = 0; + dht_conf_t *conf = NULL; int i; VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (this->private, err); - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*", - key, op_errno, err); + conf = this->private; + + GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR); if (!local) { @@ -2495,13 +2744,16 @@ dht_fremovexattr (call_frame_t *frame, xlator_t *this, dht_local_t *local = NULL; dht_layout_t *layout = NULL; int call_cnt = 0; + dht_conf_t *conf = 0; int i; VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (this->private, err); - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*", - key, op_errno, err); + conf = this->private; + + GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err); VALIDATE_OR_GOTO (frame, err); @@ -2672,7 +2924,6 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); VALIDATE_OR_GOTO (this->private, err); conf = this->private; @@ -2775,6 +3026,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, dht_layout_t *layout = 0; dht_conf_t *conf = NULL; xlator_t *subvol = 0; + int ret = 0; INIT_LIST_HEAD (&entries.list); prev = cookie; @@ -2791,10 +3043,13 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, list_for_each_entry (orig_entry, (&orig_entries->list), list) { next_offset = orig_entry->d_off; - if ((check_is_dir (NULL, (&orig_entry->d_stat), NULL) && - (prev->this != dht_first_up_subvol (this))) || - check_is_linkfile (NULL, (&orig_entry->d_stat), - orig_entry->dict)) { + if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) && + (prev->this != local->first_up_subvol)) { + continue; + } + if (check_is_linkfile (NULL, (&orig_entry->d_stat), + orig_entry->dict, + conf->link_xattr_name)) { continue; } @@ -2823,6 +3078,24 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, entry->d_type = orig_entry->d_type; entry->d_len = orig_entry->d_len; + if (orig_entry->dict) + entry->dict = dict_ref (orig_entry->dict); + + /* making sure we set the inode ctx right with layout, + currently possible only for non-directories, so for + directories don't set entry inodes */ + if (!IA_ISDIR(entry->d_stat.ia_type)) { + ret = dht_layout_preset (this, prev->this, + orig_entry->inode); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to link the layout in inode"); + entry->inode = inode_ref (orig_entry->inode); + } else if (orig_entry->inode) { + dht_inode_ctx_time_update (orig_entry->inode, this, + &entry->d_stat, 1); + } + list_add_tail (&entry->list, &entries.list); count++; } @@ -2852,6 +3125,19 @@ done: goto unwind; } + if (conf->readdir_optimize == _gf_true) { + if (next_subvol != local->first_up_subvol) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "dict set failed"); + } else { + dict_del (local->xattr, + GF_READDIR_SKIP_DIRS); + } + } + STACK_WIND (frame, dht_readdirp_cbk, next_subvol, next_subvol->fops->readdirp, local->fd, local->size, next_offset, @@ -2977,10 +3263,14 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, xlator_t *xvol = NULL; off_t xoff = 0; int ret = 0; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (this->private, err); + + conf = this->private; local = dht_local_init (frame, NULL, NULL, whichop); if (!local) { @@ -2991,6 +3281,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->fd = fd_ref (fd); local->size = size; local->xattr_req = (dict)? dict_ref (dict) : NULL; + local->first_up_subvol = dht_first_up_subvol (this); dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); @@ -3003,12 +3294,24 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, if (local->xattr) { ret = dict_set_uint32 (local->xattr, - "trusted.glusterfs.dht.linkto", - 256); + conf->link_xattr_name, 256); if (ret) gf_log (this->name, GF_LOG_WARNING, - "failed to set 'glusterfs.dht.linkto'" - " key"); + "failed to set '%s' key", + conf->link_xattr_name); + if (conf->readdir_optimize == _gf_true) { + if (xvol != local->first_up_subvol) { + ret = dict_set_int32 (local->xattr, + GF_READDIR_SKIP_DIRS, 1); + if (ret) + gf_log (this->name, + GF_LOG_ERROR, + "Dict set failed"); + } else { + dict_del (local->xattr, + GF_READDIR_SKIP_DIRS); + } + } } STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, @@ -3142,7 +3445,7 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - call_frame_t *prev = NULL; + xlator_t *prev = NULL; int ret = -1; dht_local_t *local = NULL; @@ -3160,19 +3463,24 @@ dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, prev = cookie; if (local->loc.parent) { - WIPE (preparent); - WIPE (postparent); + + dht_inode_ctx_time_update (local->loc.parent, this, + preparent, 0); + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); } - ret = dht_layout_preset (this, prev->this, inode); + ret = dht_layout_preset (this, prev, inode); if (ret < 0) { gf_log (this->name, GF_LOG_DEBUG, "could not set pre-set layout for subvolume %s", - prev->this->name); + prev? prev->name: NULL); op_ret = -1; op_errno = EINVAL; goto out; } + if (local->linked == _gf_true) + dht_linkfile_attr_heal (frame, this); out: /* * FIXME: ia_size and st_blocks of preparent and postparent do not have @@ -3181,7 +3489,6 @@ out: * corresponding values from each of the subvolume. * See dht_iatt_merge for reference. */ - DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf, preparent, postparent, xdata); @@ -3203,12 +3510,17 @@ dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie, goto err; local = frame->local; + if (!local || !local->cached_subvol) { + op_errno = EINVAL; + goto err; + } + cached_subvol = local->cached_subvol; - STACK_WIND (frame, dht_newfile_cbk, - cached_subvol, cached_subvol->fops->mknod, - &local->loc, local->mode, local->rdev, local->umask, - local->params); + STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol, + cached_subvol, cached_subvol->fops->mknod, + &local->loc, local->mode, local->rdev, local->umask, + local->params); return 0; err: @@ -3251,11 +3563,13 @@ dht_mknod (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); - STACK_WIND (frame, dht_newfile_cbk, - subvol, subvol->fops->mknod, - loc, mode, rdev, umask, params); + STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, + subvol, subvol->fops->mknod, loc, mode, + rdev, umask, params); } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol); + + avail_subvol = dht_free_disk_available_subvol (this, subvol, + local); if (avail_subvol != subvol) { /* Choose the minimum filled volume, and create the files there */ @@ -3267,14 +3581,15 @@ dht_mknod (call_frame_t *frame, xlator_t *this, local->umask = umask; dht_linkfile_create (frame, dht_mknod_linkfile_create_cbk, - avail_subvol, subvol, loc); + this, avail_subvol, subvol, loc); } else { gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); - STACK_WIND (frame, dht_newfile_cbk, - subvol, subvol->fops->mknod, - loc, mode, rdev, umask, params); + STACK_WIND_COOKIE (frame, dht_newfile_cbk, + (void *)subvol, subvol, + subvol->fops->mknod, loc, mode, + rdev, umask, params); } } @@ -3319,9 +3634,9 @@ dht_symlink (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); - STACK_WIND (frame, dht_newfile_cbk, - subvol, subvol->fops->symlink, - linkname, loc, umask, params); + STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol, + subvol->fops->symlink, linkname, loc, umask, + params); return 0; @@ -3410,9 +3725,12 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { call_frame_t *prev = NULL; dht_layout_t *layout = NULL; + dht_local_t *local = NULL; prev = cookie; + local = frame->local; + if (op_ret == -1) goto out; @@ -3426,9 +3744,16 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - WIPE (preparent); - WIPE (postparent); - + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, this, + preparent, 0); + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); + } + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal (frame, this); + } out: DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, @@ -3515,7 +3840,7 @@ dht_link (call_frame_t *frame, xlator_t *this, if (hashed_subvol != cached_subvol) { uuid_copy (local->gfid, oldloc->inode->gfid); - dht_linkfile_create (frame, dht_link_linkfile_cbk, + dht_linkfile_create (frame, dht_link_linkfile_cbk, this, cached_subvol, hashed_subvol, newloc); } else { STACK_WIND (frame, dht_link_cbk, @@ -3556,8 +3881,11 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, prev = cookie; if (local->loc.parent) { - WIPE (preparent); - WIPE (postparent); + dht_inode_ctx_time_update (local->loc.parent, this, + preparent, 0); + + dht_inode_ctx_time_update (local->loc.parent, this, + postparent, 1); } ret = dht_layout_preset (this, prev->this, inode); @@ -3569,7 +3897,10 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, op_errno = EINVAL; goto out; } - + if (local->linked == _gf_true) { + local->stbuf = *stbuf; + dht_linkfile_attr_heal (frame, this); + } out: DHT_STRIP_PHASE1_FLAGS (stbuf); DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent, @@ -3659,7 +3990,7 @@ dht_create (call_frame_t *frame, xlator_t *this, } /* Choose the minimum filled volume, and create the files there */ - avail_subvol = dht_free_disk_available_subvol (this, subvol); + avail_subvol = dht_free_disk_available_subvol (this, subvol, local); if (avail_subvol != subvol) { local->params = dict_ref (params); local->flags = flags; @@ -3670,9 +4001,8 @@ dht_create (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_TRACE, "creating %s on %s (link at %s)", loc->path, avail_subvol->name, subvol->name); - dht_linkfile_create (frame, - dht_create_linkfile_create_cbk, - avail_subvol, subvol, loc); + dht_linkfile_create (frame, dht_create_linkfile_create_cbk, + this, avail_subvol, subvol, loc); goto done; } gf_log (this->name, GF_LOG_TRACE, @@ -3706,8 +4036,11 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie, if (op_ret == 0) { dht_layout_set (this, local->inode, layout); if (local->loc.parent) { - WIPE (&local->preparent); - WIPE (&local->postparent); + dht_inode_ctx_time_update (local->loc.parent, this, + &local->preparent, 0); + + dht_inode_ctx_time_update (local->loc.parent, this, + &local->postparent, 1); } } @@ -3742,6 +4075,15 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = dht_layout_merge (this, layout, prev->this, -1, ENOSPC, NULL); } else { + if (op_ret == -1 && op_errno == EEXIST) + /* Very likely just a race between mkdir and + self-heal (from lookup of a concurrent mkdir + attempt). + Ignore error for now. layout setting will + anyways fail if this was a different (old) + pre-existing different directory. + */ + op_ret = 0; ret = dht_layout_merge (this, layout, prev->this, op_ret, op_errno, NULL); } @@ -3923,6 +4265,79 @@ dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int +dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = -1; + if (op_errno != ENOENT && op_errno != EACCES) { + local->need_selfheal = 1; + } + + + gf_log (this->name, GF_LOG_DEBUG, + "rmdir on %s for %s failed (%s)", + prev->this->name, local->loc.path, + strerror (op_errno)); + goto unlock; + } + + dht_iatt_merge (this, &local->preparent, preparent, prev->this); + dht_iatt_merge (this, &local->postparent, postparent, + prev->this); + + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + if (local->need_selfheal) { + local->layout = + dht_layout_get (this, local->loc.inode); + + /* TODO: neater interface needed below */ + local->stbuf.ia_type = local->loc.inode->ia_type; + + uuid_copy (local->gfid, local->loc.inode->gfid); + dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, + &local->loc, local->layout); + } else { + + if (local->loc.parent) { + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->preparent, + 0); + + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->postparent, + 1); + } + + DHT_STACK_UNWIND (rmdir, frame, local->op_ret, + local->op_errno, &local->preparent, + &local->postparent, NULL); + } + } + + return 0; +} + + +int dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -3930,6 +4345,7 @@ dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; int this_call_cnt = 0; call_frame_t *prev = NULL; + int done = 0; local = frame->local; prev = cookie; @@ -3962,7 +4378,16 @@ unlock: this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { + + /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */ + if (local->hashed_subvol && (this_call_cnt == 1)) { + done = 1; + } else if (!local->hashed_subvol && !this_call_cnt) { + done = 1; + } + + + if (done) { if (local->need_selfheal && local->fop_succeeded) { local->layout = dht_layout_get (this, local->loc.inode); @@ -3973,10 +4398,29 @@ unlock: uuid_copy (local->gfid, local->loc.inode->gfid); dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk, &local->loc, local->layout); - } else { + } else if (this_call_cnt) { + /* If non-hashed subvol's have responded, proceed */ + + local->need_selfheal = 0; + STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk, + local->hashed_subvol, + local->hashed_subvol->fops->rmdir, + &local->loc, local->flags, NULL); + } else if (!this_call_cnt) { + /* All subvol's have responded, proceed */ + if (local->loc.parent) { - WIPE (&local->preparent); - WIPE (&local->postparent); + + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->preparent, + 0); + + dht_inode_ctx_time_update (local->loc.parent, + this, + &local->postparent, + 1); + } DHT_STACK_UNWIND (rmdir, frame, local->op_ret, @@ -3995,6 +4439,7 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this) dht_local_t *local = NULL; dht_conf_t *conf = NULL; int i = 0; + xlator_t *hashed_subvol = NULL; VALIDATE_OR_GOTO (this->private, err); @@ -4006,7 +4451,30 @@ dht_rmdir_do (call_frame_t *frame, xlator_t *this) local->call_cnt = conf->subvolume_cnt; + /* first remove from non-hashed_subvol */ + hashed_subvol = dht_subvol_get_hashed (this, &local->loc); + + if (!hashed_subvol) { + gf_log (this->name, GF_LOG_WARNING, "failed to get hashed " + "subvol for %s",local->loc.path); + } else { + local->hashed_subvol = hashed_subvol; + } + + /* When DHT has only 1 child */ + if (conf->subvolume_cnt == 1) { + STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk, + conf->subvolumes[0], + conf->subvolumes[0]->fops->rmdir, + &local->loc, local->flags, NULL); + return 0; + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (hashed_subvol && + (hashed_subvol == conf->subvolumes[i])) + continue; + STACK_WIND (frame, dht_rmdir_cbk, conf->subvolumes[i], conf->subvolumes[i]->fops->rmdir, @@ -4073,6 +4541,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_frame_t *main_frame = NULL; dht_local_t *main_local = NULL; int this_call_cnt = 0; + dht_conf_t *conf = this->private; local = frame->local; prev = cookie; @@ -4084,7 +4553,7 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret != 0) goto err; - if (check_is_linkfile (inode, stbuf, xattr) == 0) { + if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) { main_local->op_ret = -1; main_local->op_errno = ENOTEMPTY; @@ -4119,6 +4588,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, dht_local_t *lookup_local = NULL; dht_local_t *local = NULL; dict_t *xattrs = NULL; + dht_conf_t *conf = this->private; local = frame->local; @@ -4127,7 +4597,8 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, continue; if (strcmp (trav->d_name, "..") == 0) continue; - if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict)) { + if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict, + conf->link_xattr_name)) { ret++; continue; } @@ -4145,7 +4616,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, return -1; } - ret = dict_set_uint32 (xattrs, DHT_LINKFILE_KEY, 256); + ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key" " in dict"); @@ -4268,6 +4739,7 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_frame_t *prev = NULL; dict_t *dict = NULL; int ret = 0; + dht_conf_t *conf = this->private; local = frame->local; prev = cookie; @@ -4277,8 +4749,10 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, "opendir on %s for %s failed (%s)", prev->this->name, local->loc.path, strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; + if (op_errno != ENOENT) { + local->op_ret = -1; + local->op_errno = op_errno; + } goto err; } @@ -4289,12 +4763,11 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } - ret = dict_set_uint32 (dict, - "trusted.glusterfs.dht.linkto", 256); + ret = dict_set_uint32 (dict, conf->link_xattr_name, 256); if (ret) gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set 'trusted.glusterfs.dht.linkto' key", - local->loc.path); + "%s: failed to set '%s' key", + local->loc.path, conf->link_xattr_name); STACK_WIND (frame, dht_rmdir_readdirp_cbk, prev->this, prev->this->fops->readdirp, @@ -4393,7 +4866,6 @@ dht_entrylk (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK); if (!local) { @@ -4472,16 +4944,21 @@ err: int dht_forget (xlator_t *this, inode_t *inode) { - uint64_t tmp_layout = 0; + uint64_t ctx_int = 0; + dht_inode_ctx_t *ctx = NULL; dht_layout_t *layout = NULL; - inode_ctx_del (inode, this, &tmp_layout); + inode_ctx_del (inode, this, &ctx_int); - if (!tmp_layout) + if (!ctx_int) return 0; - layout = (dht_layout_t *)(long)tmp_layout; + ctx = (dht_inode_ctx_t *) (long) ctx_int; + + layout = ctx->layout; + ctx->layout = NULL; dht_layout_unref (this, layout); + GF_FREE (ctx); return 0; } @@ -4669,15 +5146,7 @@ unlock: or wait for anything else. Just propagate blindly */ if (have_heard_from_all) { propagate = 1; - if (conf->defrag) { - ret = pthread_create (&conf->defrag->th, NULL, - gf_defrag_start, this); - if (ret) { - conf->defrag = NULL; - GF_FREE (conf->defrag); - kill (getpid(), SIGTERM); - } - } + } @@ -4699,6 +5168,19 @@ unlock: /* continue to check other events for CHILD_UP */ } } + + /* rebalance is started with assert_no_child_down. So we do + * not need to handle CHILD_DOWN event here. + */ + if (conf->defrag) { + ret = gf_thread_create (&conf->defrag->th, NULL, + gf_defrag_start, this); + if (ret) { + conf->defrag = NULL; + GF_FREE (conf->defrag); + kill (getpid(), SIGTERM); + } + } } ret = 0; @@ -4707,3 +5189,24 @@ unlock: return ret; } + +int +dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout) +{ + dht_inode_ctx_t *ctx = NULL; + int ret = -1; + + ret = dht_inode_ctx_get (inode, this, &ctx); + + if (!ret && ctx) { + if (ctx->layout) { + if (layout) + *layout = ctx->layout; + ret = 0; + } else { + ret = -1; + } + } + + return ret; +} |
