diff options
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/dht/src/Makefile.am | 9 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 1523 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 413 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 9 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 416 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-inode-read.c | 1115 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 597 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-linkfile.c | 7 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 326 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rename.c | 18 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht.c | 40 | ||||
-rw-r--r-- | xlators/cluster/dht/src/nufa.c | 39 | ||||
-rw-r--r-- | xlators/cluster/dht/src/switch.c | 37 |
13 files changed, 2898 insertions, 1651 deletions
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am index cf883a974ca..e35058d65d4 100644 --- a/xlators/cluster/dht/src/Makefile.am +++ b/xlators/cluster/dht/src/Makefile.am @@ -2,10 +2,10 @@ xlator_LTLIBRARIES = dht.la nufa.la switch.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster - dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \ - dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \ - $(top_builddir)/xlators/lib/src/libxlator.c + dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \ + dht-common.c dht-inode-write.c dht-inode-read.c \ + $(top_builddir)/xlators/lib/src/libxlator.c dht_la_SOURCES = $(dht_common_source) dht.c @@ -21,7 +21,8 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la switch_la_LDFLAGS = -module -avoidversion switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = dht-common.h dht-common.c dht-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = dht-common.h dht-mem-types.h \ + $(top_builddir)/xlators/lib/src/libxlator.h AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index b5168605bbb..6f8594e30b7 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -309,6 +309,14 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->return_estale = 1; } + /* if it is ENOENT, we may have to do a + * 'lookup_everywhere()' to make sure + * the file is not migrated */ + if (op_errno == ENOENT) { + if (IA_ISREG (local->loc.inode->ia_type)) { + local->need_lookup_everywhere = 1; + } + } goto unlock; } @@ -386,6 +394,18 @@ out: return 0; } + if (local->need_lookup_everywhere) { + /* As the current layout gave ENOENT error, we would + need a new layout */ + dht_layout_unref (this, local->layout); + local->layout = NULL; + + /* We know that current cached subvol is no more + valid, get the new one */ + local->cached_subvol = NULL; + dht_lookup_everywhere (frame, this, &local->loc); + return 0; + } if (local->return_estale) { local->op_ret = -1; local->op_errno = ESTALE; @@ -459,7 +479,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) dht_local_t *local = NULL; xlator_t *hashed_subvol = NULL; xlator_t *cached_subvol = NULL; - + dht_layout_t *layout = NULL; local = frame->local; hashed_subvol = local->hashed_subvol; @@ -487,6 +507,42 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this) return 0; } + if (local->need_lookup_everywhere) { + if (uuid_compare (local->gfid, local->inode->gfid)) { + /* GFID different, return error */ + DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, + NULL, NULL, NULL); + return 0; + } + local->op_ret = 0; + local->op_errno = 0; + layout = dht_layout_for_subvol (this, cached_subvol); + if (!layout) { + gf_log (this->name, GF_LOG_INFO, + "%s: no pre-set layout for subvolume %s", + local->loc.path, (cached_subvol ? + cached_subvol->name : + "<nil>")); + } + + ret = dht_layout_set (this, local->inode, layout); + if (ret < 0) { + gf_log (this->name, GF_LOG_INFO, + "%s: failed to set layout for subvol %s", + local->loc.path, (cached_subvol ? + cached_subvol->name : + "<nil>")); + } + + WIPE (&local->postparent); + + DHT_STACK_UNWIND (lookup, frame, local->op_ret, + local->op_errno, local->inode, + &local->stbuf, local->xattr, + &local->postparent); + return 0; + } + if (!hashed_subvol) { gf_log (this->name, GF_LOG_INFO, "cannot create linkfile file for %s on %s: " @@ -560,6 +616,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc_t *loc = NULL; xlator_t *link_subvol = NULL; int ret = -1; + int32_t fd_count = 0; GF_VALIDATE_OR_GOTO ("dht", frame, out); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -642,12 +699,17 @@ unlock: UNLOCK (&frame->lock); if (is_linkfile) { - gf_log (this->name, GF_LOG_INFO, - "deleting stale linkfile %s on %s", - loc->path, subvol->name); - STACK_WIND (frame, dht_lookup_unlink_cbk, - subvol, subvol->fops->unlink, loc); - return 0; + ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count); + /* Delete the linkfile only if there are no open fds on it. + if there is a open-fd, it may be in migration */ + if (!ret && (fd_count == 0)) { + gf_log (this->name, GF_LOG_INFO, + "deleting stale linkfile %s on %s", + loc->path, subvol->name); + STACK_WIND (frame, dht_lookup_unlink_cbk, + subvol, subvol->fops->unlink, loc); + return 0; + } } this_call_cnt = dht_frame_return (frame); @@ -980,7 +1042,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this, if (!conf) goto err; - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); if (!local) { op_errno = ENOMEM; goto err; @@ -1003,16 +1065,14 @@ dht_lookup (call_frame_t *frame, xlator_t *this, local->xattr_req = dict_new (); } + + cached_subvol = local->cached_subvol; if (!hashed_subvol) hashed_subvol = dht_subvol_get_hashed (this, loc); - cached_subvol = dht_subvol_get_cached (this, loc->inode); - - local->cached_subvol = cached_subvol; local->hashed_subvol = hashed_subvol; if (is_revalidate (loc)) { - local->layout = layout = dht_layout_get (this, loc->inode); - + layout = local->layout; if (!layout) { gf_log (this->name, GF_LOG_DEBUG, "revalidate without cache. path=%s", @@ -1032,10 +1092,9 @@ dht_lookup (call_frame_t *frame, xlator_t *this, goto do_fresh_lookup; } - local->inode = inode_ref (loc->inode); + local->inode = inode_ref (loc->inode); - local->call_cnt = layout->cnt; - call_cnt = local->call_cnt; + call_cnt = local->call_cnt = layout->cnt; /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute, * revalidates directly go to the cached-subvolume. @@ -1043,6 +1102,11 @@ dht_lookup (call_frame_t *frame, xlator_t *this, ret = dict_set_uint32 (local->xattr_req, "trusted.glusterfs.dht", 4 * 4); + /* need it for self-healing linkfiles which is + 'in-migration' state */ + ret = dict_set_uint32 (local->xattr_req, + GLUSTERFS_OPEN_FD_COUNT, 4); + for (i = 0; i < layout->cnt; i++) { subvol = layout->list[i].xlator; @@ -1060,7 +1124,12 @@ dht_lookup (call_frame_t *frame, xlator_t *this, "trusted.glusterfs.dht", 4 * 4); ret = dict_set_uint32 (local->xattr_req, - "trusted.glusterfs.dht.linkto", 256); + DHT_LINKFILE_KEY, 256); + + /* need it for self-healing linkfiles which is + 'in-migration' state */ + ret = dict_set_uint32 (local->xattr_req, + GLUSTERFS_OPEN_FD_COUNT, 4); if (!hashed_subvol) { gf_log (this->name, GF_LOG_DEBUG, @@ -1101,286 +1170,6 @@ err: int -dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - local->op_ret = -1; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - - dht_iatt_merge (this, &local->prebuf, prebuf, prev->this); - dht_iatt_merge (this, &local->stbuf, postbuf, prev->this); - - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); -out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (truncate, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->stbuf); -err: - return 0; -} - - - -int -dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *stbuf) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - GF_VALIDATE_OR_GOTO ("dht", frame, err); - GF_VALIDATE_OR_GOTO ("dht", this, out); - GF_VALIDATE_OR_GOTO ("dht", frame->local, out); - GF_VALIDATE_OR_GOTO ("dht", cookie, out); - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - - dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); - - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); -out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, - &local->stbuf); -err: - return 0; -} - - -int -dht_stat (call_frame_t *frame, xlator_t *this, - loc_t *loc) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int i = 0; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->layout = layout = dht_layout_get (this, loc->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = layout->cnt; - - for (i = 0; i < layout->cnt; i++) { - subvol = layout->list[i].xlator; - - STACK_WIND (frame, dht_attr_cbk, - subvol, subvol->fops->stat, - loc); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - dht_layout_t *layout = NULL; - int i = 0; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->layout = layout = dht_layout_get (this, fd->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local->inode = inode_ref (fd->inode); - local->call_cnt = layout->cnt;; - - for (i = 0; i < layout->cnt; i++) { - subvol = layout->list[i].xlator; - STACK_WIND (frame, dht_attr_cbk, - subvol, subvol->fops->fstat, - fd); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = 1; - - STACK_WIND (frame, dht_truncate_cbk, - subvol, subvol->fops->truncate, - loc, offset); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int -dht_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->inode = inode_ref (fd->inode); - local->call_cnt = 1; - - STACK_WIND (frame, dht_truncate_cbk, - subvol, subvol->fops->ftruncate, - fd, offset); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent) @@ -1473,42 +1262,6 @@ err: } -int -dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct iatt *prebuf, struct iatt *postbuf) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, - prebuf, postbuf); - - return 0; -} - - int dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1518,7 +1271,6 @@ dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int this_call_cnt = 0; call_frame_t *prev = NULL; - local = frame->local; prev = cookie; @@ -1545,125 +1297,6 @@ unlock: return 0; } - -int -dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno) -{ - DHT_STACK_UNWIND (access, frame, op_ret, op_errno); - return 0; -} - - -int -dht_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->call_cnt = 1; - - STACK_WIND (frame, dht_access_cbk, - subvol, subvol->fops->access, - loc, mask); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (access, frame, -1, op_errno); - - return 0; -} - - -int -dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, const char *path, struct iatt *sbuf) -{ - dht_local_t *local = NULL; - - local = frame->local; - if (op_ret == -1) - goto err; - - if (!local) { - op_ret = -1; - op_errno = EINVAL; - } - -err: - DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf); - - return 0; -} - - -int -dht_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - STACK_WIND (frame, dht_readlink_cbk, - subvol, subvol->fops->readlink, - loc, size); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - static void fill_layout_info (dht_layout_t *layout, char *buf) { @@ -1869,7 +1502,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, dht_layout_t *layout = NULL; xlator_t **sub_volumes = NULL; int op_errno = -1; - int ret = 0; int i = 0; int cnt = 0; @@ -1881,38 +1513,33 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this->private, err); conf = this->private; - layout = dht_layout_get (this, loc->inode); - if (!layout) { - gf_log (this->name, GF_LOG_ERROR, - "layout is NULL"); - op_errno = ENOENT; - goto err; - } - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR); if (!local) { op_errno = ENOMEM; goto err; } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_ERROR, + "layout is NULL"); + op_errno = ENOENT; goto err; } - local->layout = layout; - - if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) { - hashed_subvol = dht_subvol_get_hashed (this, loc); - cached_subvol = dht_subvol_get_cached (this, loc->inode); + if (key) { local->key = gf_strdup (key); if (!local->key) { op_errno = ENOMEM; - goto err; } + } + + if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) { + hashed_subvol = dht_subvol_get_hashed (this, loc); + cached_subvol = local->cached_subvol; local->call_cnt = 1; if (hashed_subvol != cached_subvol) { @@ -1990,14 +1617,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } } - if (key) { - local->key = gf_strdup (key); - if (!local->key) { - op_errno = ENOMEM; - goto err; - } - } - if (loc->inode-> ia_type == IA_IFDIR) { cnt = local->call_cnt = layout->cnt; } else { @@ -2032,7 +1651,13 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, err); VALIDATE_OR_GOTO (fd->inode, err); - subvol = dht_subvol_get_cached (this, fd->inode); + local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; if (!subvol) { gf_log (this->name, GF_LOG_DEBUG, "no cached subvolume for fd=%p", fd); @@ -2040,13 +1665,6 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, goto err; } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->inode = inode_ref (fd->inode); local->call_cnt = 1; STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr, @@ -2095,7 +1713,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc->path, err); conf = this->private; - subvol = dht_subvol_get_cached (this, loc->inode); + local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; if (!subvol) { gf_log (this->name, GF_LOG_DEBUG, "no cached subvolume for path=%s", loc->path); @@ -2103,13 +1727,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, goto err; } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->layout = layout = dht_layout_get (this, loc->inode); + layout = local->layout; if (!layout) { gf_log (this->name, GF_LOG_DEBUG, "no layout for path=%s", loc->path); @@ -2117,12 +1735,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, goto err; } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - tmp = dict_get (xattr, "distribute.migrate-data"); if (tmp) { if (!IA_ISREG (loc->inode->ia_type)) { @@ -2136,13 +1748,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, if (strcmp (value, "force") == 0) forced_rebalance = 1; - local->to_subvol = dht_subvol_get_hashed (this, loc); - local->from_subvol = dht_subvol_get_cached (this, loc->inode); - if (local->to_subvol == local->from_subvol) { - op_errno = ENOTSUP; + local->rebalance.target_node = dht_subvol_get_hashed (this, loc); + local->rebalance.from_subvol = local->cached_subvol; + + if (local->rebalance.target_node == local->rebalance.from_subvol) { + op_errno = EEXIST; goto err; } - if (local->to_subvol) { + if (local->rebalance.target_node) { local->flags = forced_rebalance; ret = dht_start_rebalance_task (this, frame); @@ -2153,7 +1766,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, "%s: failed to create a new synctask", loc->path); } - op_errno = ENOTSUP; + op_errno = EINVAL; goto err; } @@ -2259,7 +1872,13 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc->inode, err); VALIDATE_OR_GOTO (loc->path, err); - subvol = dht_subvol_get_cached (this, loc->inode); + local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; if (!subvol) { gf_log (this->name, GF_LOG_DEBUG, "no cached subvolume for path=%s", loc->path); @@ -2267,13 +1886,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, goto err; } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->layout = layout = dht_layout_get (this, loc->inode); + layout = local->layout; if (!local->layout) { gf_log (this->name, GF_LOG_DEBUG, "no layout for path=%s", loc->path); @@ -2282,6 +1895,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, } local->call_cnt = layout->cnt; + local->key = gf_strdup (key); for (i = 0; i < layout->cnt; i++) { STACK_WIND (frame, dht_removexattr_cbk, @@ -2308,7 +1922,6 @@ dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int this_call_cnt = 0; call_frame_t *prev = NULL; - local = frame->local; prev = cookie; @@ -2335,318 +1948,6 @@ unlock: return 0; } - -int -dht_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags, fd_t *fd, int wbflags) -{ - xlator_t *subvol = NULL; - int ret = -1; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - local->fd = fd_ref (fd); - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - - goto err; - } - - local->call_cnt = 1; - - STACK_WIND (frame, dht_fd_cbk, - subvol, subvol->fops->open, - loc, flags, fd, wbflags); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iovec *vector, int count, struct iatt *stbuf, - struct iobref *iobref) -{ - dht_local_t *local = frame->local; - - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } - -out: - DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, - iobref); - - return 0; -} - - -int -dht_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - STACK_WIND (frame, dht_readv_cbk, - subvol, subvol->fops->readv, - fd, size, off); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); - - return 0; -} - - -int -dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - dht_local_t *local = NULL; - - if (op_ret == -1) { - goto out; - } - - local = frame->local; - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } - -out: - DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf); - - return 0; -} - - -int -dht_writev (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iovec *vector, int count, off_t off, - struct iobref *iobref) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - - op_errno = ENOMEM; - goto err; - } - - STACK_WIND (frame, dht_writev_cbk, - subvol, subvol->fops->writev, - fd, vector, count, off, iobref); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int -dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - local->fd = fd_ref (fd); - local->call_cnt = 1; - - STACK_WIND (frame, dht_err_cbk, - subvol, subvol->fops->flush, fd); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (flush, frame, -1, op_errno); - - return 0; -} - - -int -dht_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - local->call_cnt = 1; - - STACK_WIND (frame, dht_fsync_cbk, - subvol, subvol->fops->fsync, - fd, datasync); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int -dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct gf_flock *flock) -{ - DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock); - - return 0; -} - - -int -dht_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int cmd, struct gf_flock *flock) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - STACK_WIND (frame, dht_lk_cbk, - subvol, subvol->fops->lk, - fd, cmd, flock); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL); - - return 0; -} - /* * dht_normalize_stats - */ @@ -2742,7 +2043,7 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) conf = this->private; - local = dht_local_init (frame); + local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS); if (!local) { op_errno = ENOMEM; goto err; @@ -2787,11 +2088,9 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) { dht_local_t *local = NULL; dht_conf_t *conf = NULL; - int ret = -1; int op_errno = -1; int i = -1; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); @@ -2799,21 +2098,13 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) conf = this->private; - local = dht_local_init (frame); + local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR); if (!local) { op_errno = ENOMEM; goto err; } - local->fd = fd_ref (fd); - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - - goto err; - } - local->call_cnt = conf->subvolume_cnt; for (i = 0; i < conf->subvolume_cnt; i++) { @@ -2865,7 +2156,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, list_for_each_entry (orig_entry, (&orig_entries->list), list) { next_offset = orig_entry->d_off; - if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL) + if (check_is_linkfile_wo_dict (NULL, (&orig_entry->d_stat)) || (check_is_dir (NULL, (&orig_entry->d_stat), NULL) && (prev->this != dht_first_up_subvol (this)))) { continue; @@ -3053,9 +2344,8 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); - local = dht_local_init (frame); + local = dht_local_init (frame, NULL, NULL, whichop); if (!local) { - op_errno = ENOMEM; goto err; } @@ -3156,7 +2446,6 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) int op_errno = -1; int i = -1; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); @@ -3164,10 +2453,9 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) conf = this->private; - local = dht_local_init (frame); + local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR); if (!local) { op_errno = ENOMEM; - goto err; } @@ -3275,7 +2563,6 @@ dht_mknod (call_frame_t *frame, xlator_t *this, { xlator_t *subvol = NULL; int op_errno = -1; - int ret = -1; xlator_t *avail_subvol = NULL; dht_local_t *local = NULL; @@ -3285,10 +2572,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); if (!local) { op_errno = ENOMEM; - goto err; } @@ -3301,13 +2587,6 @@ dht_mknod (call_frame_t *frame, xlator_t *this, goto err; } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - - goto err; - } - if (!dht_is_subvol_filled (this, subvol)) { gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); @@ -3357,17 +2636,14 @@ dht_symlink (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; int op_errno = -1; dht_local_t *local = NULL; - int ret = -1; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK); if (!local) { op_errno = ENOMEM; - goto err; } @@ -3380,13 +2656,6 @@ dht_symlink (call_frame_t *frame, xlator_t *this, goto err; } - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "Failed to copy loc"); - op_errno = ENOMEM; - goto err; - } - gf_log (this->name, GF_LOG_TRACE, "creating %s on %s", loc->path, subvol->name); @@ -3410,11 +2679,9 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) { xlator_t *cached_subvol = NULL; xlator_t *hashed_subvol = NULL; - int ret = -1; int op_errno = -1; dht_local_t *local = NULL; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); @@ -3430,11 +2697,10 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) goto done; } - cached_subvol = dht_subvol_get_cached (this, loc->inode); - if (!cached_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; + local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK); + if (!local) { + op_errno = ENOMEM; + goto err; } @@ -3447,17 +2713,11 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) goto err; } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - + cached_subvol = local->cached_subvol; + if (!cached_subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; goto err; } @@ -3522,15 +2782,13 @@ dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; xlator_t *srcvol = NULL; - if (op_ret == -1) goto err; local = frame->local; srcvol = local->linkfile.srcvol; - STACK_WIND (frame, dht_link_cbk, - srcvol, srcvol->fops->link, + STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link, &local->loc, &local->loc2); return 0; @@ -3553,13 +2811,19 @@ dht_link (call_frame_t *frame, xlator_t *this, int ret = -1; dht_local_t *local = NULL; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (oldloc, err); VALIDATE_OR_GOTO (newloc, err); - cached_subvol = dht_subvol_get_cached (this, oldloc->inode); + local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK); + if (!local) { + op_errno = ENOMEM; + + goto err; + } + + cached_subvol = local->cached_subvol; if (!cached_subvol) { gf_log (this->name, GF_LOG_DEBUG, "no cached subvolume for path=%s", oldloc->path); @@ -3576,29 +2840,14 @@ dht_link (call_frame_t *frame, xlator_t *this, goto err; } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - ret = loc_copy (&local->loc, oldloc); - if (ret == -1) { - op_errno = ENOMEM; - - goto err; - } - ret = loc_copy (&local->loc2, newloc); if (ret == -1) { op_errno = ENOMEM; - goto err; } if (hashed_subvol != cached_subvol) { - memcpy (local->gfid, oldloc->inode->gfid, 16); + uuid_copy (local->gfid, oldloc->inode->gfid); dht_linkfile_create (frame, dht_link_linkfile_cbk, cached_subvol, hashed_subvol, newloc); } else { @@ -3694,7 +2943,6 @@ dht_create (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *params) { int op_errno = -1; - int ret = -1; xlator_t *subvol = NULL; dht_local_t *local = NULL; xlator_t *avail_subvol = NULL; @@ -3705,9 +2953,8 @@ dht_create (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); if (!local) { - op_errno = ENOMEM; goto err; } @@ -3723,12 +2970,6 @@ dht_create (call_frame_t *frame, xlator_t *this, goto done; } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = ENOMEM; - - goto err; - } subvol = dht_subvol_get_hashed (this, loc); if (!subvol) { gf_log (this->name, GF_LOG_DEBUG, @@ -3748,10 +2989,8 @@ dht_create (call_frame_t *frame, xlator_t *this, } /* Choose the minimum filled volume, and create the files there */ - /* TODO */ avail_subvol = dht_free_disk_available_subvol (this, subvol); if (avail_subvol != subvol) { - local->fd = fd_ref (fd); local->params = dict_ref (params); local->flags = flags; local->mode = mode; @@ -3790,7 +3029,6 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie, dht_local_t *local = NULL; dht_layout_t *layout = NULL; - local = frame->local; layout = local->selfheal.layout; @@ -3928,7 +3166,6 @@ dht_mkdir (call_frame_t *frame, xlator_t *this, dht_local_t *local = NULL; dht_conf_t *conf = NULL; int op_errno = -1; - int ret = -1; xlator_t *hashed_subvol = NULL; @@ -3943,15 +3180,13 @@ dht_mkdir (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR); if (!local) { - op_errno = ENOMEM; goto err; } hashed_subvol = dht_subvol_get_hashed (this, loc); - if (hashed_subvol == NULL) { gf_log (this->name, GF_LOG_DEBUG, "hashed subvol not found for %s", @@ -3961,21 +3196,12 @@ dht_mkdir (call_frame_t *frame, xlator_t *this, } local->hashed_subvol = hashed_subvol; - local->inode = inode_ref (loc->inode); - ret = loc_copy (&local->loc, loc); local->mode = mode; - - if (ret == -1) { - - op_errno = ENOMEM; - goto err; - } - local->params = dict_ref (params); + local->inode = inode_ref (loc->inode); local->layout = dht_layout_new (this, conf->subvolume_cnt); if (!local->layout) { - op_errno = ENOMEM; goto err; } @@ -4366,8 +3592,6 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) dht_conf_t *conf = NULL; int op_errno = -1; int i = -1; - int ret = -1; - VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -4378,9 +3602,8 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) conf = this->private; - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR); if (!local) { - op_errno = ENOMEM; goto err; } @@ -4388,13 +3611,6 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) local->call_cnt = conf->subvolume_cnt; local->op_ret = 0; - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - - op_errno = ENOMEM; - goto err; - } - local->flags = flags; local->fd = fd_create (local->loc.inode, frame->root->pid); @@ -4421,209 +3637,6 @@ err: return 0; } - -int -dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict); - return 0; -} - - -int -dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = 1; - - STACK_WIND (frame, - dht_xattrop_cbk, - subvol, subvol->fops->xattrop, - loc, flags, dict); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict); - return 0; -} - - -int -dht_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - STACK_WIND (frame, - dht_fxattrop_cbk, - subvol, subvol->fops->fxattrop, - fd, flags, dict); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL); - - return 0; -} - - -int -dht_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ - DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno); - return 0; -} - - -int32_t -dht_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - subvol = dht_subvol_get_cached (this, loc->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = 1; - - STACK_WIND (frame, - dht_inodelk_cbk, - subvol, subvol->fops->inodelk, - volume, loc, cmd, lock); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (inodelk, frame, -1, op_errno); - - return 0; -} - - -int -dht_finodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ - DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno); - return 0; -} - - -int -dht_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) -{ - xlator_t *subvol = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - subvol = dht_subvol_get_cached (this, fd->inode); - if (!subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no cached subvolume for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - - STACK_WIND (frame, - dht_finodelk_cbk, - subvol, subvol->fops->finodelk, - volume, fd, cmd, lock); - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (finodelk, frame, -1, op_errno); - - return 0; -} - - int dht_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno) @@ -4649,7 +3662,13 @@ dht_entrylk (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc->inode, err); VALIDATE_OR_GOTO (loc->path, err); - subvol = dht_subvol_get_cached (this, loc->inode); + local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; if (!subvol) { gf_log (this->name, GF_LOG_DEBUG, "no cached subvolume for path=%s", loc->path); @@ -4657,14 +3676,6 @@ dht_entrylk (call_frame_t *frame, xlator_t *this, goto err; } - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - local->inode = inode_ref (loc->inode); local->call_cnt = 1; STACK_WIND (frame, dht_entrylk_cbk, @@ -4726,161 +3737,6 @@ err: int -dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost) -{ - dht_local_t *local = NULL; - int this_call_cnt = 0; - call_frame_t *prev = NULL; - - - local = frame->local; - prev = cookie; - - LOCK (&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - gf_log (this->name, GF_LOG_DEBUG, - "subvolume %s returned -1 (%s)", - prev->this->name, strerror (op_errno)); - goto unlock; - } - - dht_iatt_merge (this, &local->prebuf, statpre, prev->this); - dht_iatt_merge (this, &local->stbuf, statpost, prev->this); - - local->op_ret = 0; - } -unlock: - UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->stbuf); - - return 0; -} - - -int -dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) -{ - dht_layout_t *layout = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int i = -1; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->inode, err); - VALIDATE_OR_GOTO (loc->path, err); - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, - "memory allocation failed :("); - goto err; - } - - local->layout = layout = dht_layout_get (this, loc->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - if (!layout_is_sane (layout)) { - gf_log (this->name, GF_LOG_DEBUG, - "layout is not sane for path=%s", loc->path); - op_errno = EINVAL; - goto err; - } - - local->inode = inode_ref (loc->inode); - local->call_cnt = layout->cnt; - - for (i = 0; i < layout->cnt; i++) { - STACK_WIND (frame, dht_setattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator->fops->setattr, - loc, stbuf, valid); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int -dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, - int32_t valid) -{ - dht_layout_t *layout = NULL; - dht_local_t *local = NULL; - int op_errno = -1; - int i = -1; - - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - local = dht_local_init (frame); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - local->layout = layout = dht_layout_get (this, fd->inode); - if (!layout) { - gf_log (this->name, GF_LOG_DEBUG, - "no layout for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - if (!layout_is_sane (layout)) { - gf_log (this->name, GF_LOG_DEBUG, - "layout is not sane for fd=%p", fd); - op_errno = EINVAL; - goto err; - } - - local->inode = inode_ref (fd->inode); - local->call_cnt = layout->cnt; - - for (i = 0; i < layout->cnt; i++) { - STACK_WIND (frame, dht_setattr_cbk, - layout->list[i].xlator, - layout->list[i].xlator->fops->fsetattr, - fd, stbuf, valid); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL); - - return 0; -} - - -int dht_forget (xlator_t *this, inode_t *inode) { uint64_t tmp_layout = 0; @@ -4898,53 +3754,6 @@ dht_forget (xlator_t *this, inode_t *inode) } - -int -dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) -{ - xlator_list_t *subvols = NULL; - int cnt = 0; - - if (!conf) - return -1; - - for (subvols = this->children; subvols; subvols = subvols->next) - cnt++; - - conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *), - gf_dht_mt_xlator_t); - if (!conf->subvolumes) { - - return -1; - } - conf->subvolume_cnt = cnt; - - cnt = 0; - for (subvols = this->children; subvols; subvols = subvols->next) - conf->subvolumes[cnt++] = subvols->xlator; - - conf->subvolume_status = GF_CALLOC (cnt, sizeof (char), - gf_dht_mt_char); - if (!conf->subvolume_status) { - - return -1; - } - - conf->last_event = GF_CALLOC (cnt, sizeof (int), - gf_dht_mt_char); - if (!conf->last_event) { - - return -1; - } - conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t), - gf_dht_mt_subvol_time); - if (!conf->subvol_up_time) { - return -1; - } - return 0; -} - - int dht_notify (xlator_t *this, int event, void *data, ...) { diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 95d1b1d6a08..ab1b82af2a0 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -29,45 +29,61 @@ #ifndef _DHT_H #define _DHT_H -#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout" +#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout" #define GF_DHT_LOOKUP_UNHASHED_ON 1 #define GF_DHT_LOOKUP_UNHASHED_AUTO 2 -#define DHT_PATHINFO_HEADER "DISTRIBUTE:" +#define DHT_PATHINFO_HEADER "DISTRIBUTE:" #include <fnmatch.h> typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno); + xlator_t *this, + int32_t op_ret, int32_t op_errno); +typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame, + int ret); struct dht_layout { - int spread_cnt; /* layout spread count per directory, - is controlled by 'setxattr()' with - special key */ - int cnt; - int preset; - int gen; - int type; - int ref; /* use with dht_conf_t->layout_lock */ - int search_unhashed; + int spread_cnt; /* layout spread count per directory, + is controlled by 'setxattr()' with + special key */ + int cnt; + int preset; + int gen; + int type; + int ref; /* use with dht_conf_t->layout_lock */ + int search_unhashed; struct { - int err; /* 0 = normal - -1 = dir exists and no xattr - >0 = dir lookup failed with errno - */ - uint32_t start; - uint32_t stop; - xlator_t *xlator; + int err; /* 0 = normal + -1 = dir exists and no xattr + >0 = dir lookup failed with errno + */ + uint32_t start; + uint32_t stop; + xlator_t *xlator; } list[0]; }; -typedef struct dht_layout dht_layout_t; +typedef struct dht_layout dht_layout_t; typedef enum { DHT_HASH_TYPE_DM, } dht_hashfn_type_t; +/* rebalance related */ +struct dht_rebalance_ { + xlator_t *from_subvol; + xlator_t *target_node; + int32_t wbflags; + off_t offset; + size_t size; + int32_t flags; + int count; + struct iobref *iobref; + struct iovec *vector; + struct iatt stbuf; + dht_defrag_cbk_fn_t target_op_fn; +}; struct dht_local { int call_cnt; @@ -140,11 +156,12 @@ struct dht_local { /* flag used to make sure we need to return estale in {lookup,revalidate}_cbk */ - char return_estale; + char return_estale; + char need_lookup_everywhere; - /* rebalance related */ -#define to_subvol hashed_subvol -#define from_subvol cached_subvol + glusterfs_fop_t fop; + + struct dht_rebalance_ rebalance; }; typedef struct dht_local dht_local_t; @@ -209,11 +226,21 @@ typedef struct dht_disk_layout dht_disk_layout_t; #define is_last_call(cnt) (cnt == 0) -#define DHT_LINKFILE_MODE (S_ISVTX) +#define DHT_MIGRATION_IN_PROGRESS 1 +#define DHT_MIGRATION_COMPLETED 2 + +#define DHT_LINKFILE_KEY "trusted.glusterfs.dht.linkto" +#define DHT_LINKFILE_MODE (S_ISVTX) + #define check_is_linkfile(i,s,x) ( \ ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \ == DHT_LINKFILE_MODE) && \ - (s->ia_size == 0)) + dict_get (x, DHT_LINKFILE_KEY)) + +#define check_is_linkfile_wo_dict(i,s) ( \ + ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \ + == DHT_LINKFILE_MODE)) + #define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type)) @@ -221,85 +248,86 @@ typedef struct dht_disk_layout dht_disk_layout_t; #define DHT_STACK_UNWIND(fop, frame, params ...) do { \ dht_local_t *__local = NULL; \ - xlator_t *__xl = NULL; \ + xlator_t *__xl = NULL; \ if (frame) { \ - __xl = frame->this; \ - __local = frame->local; \ + __xl = frame->this; \ + __local = frame->local; \ frame->local = NULL; \ } \ STACK_UNWIND_STRICT (fop, frame, params); \ dht_local_wipe (__xl, __local); \ } while (0) -#define DHT_STACK_DESTROY(frame) do { \ - dht_local_t *__local = NULL; \ - xlator_t *__xl = NULL; \ - __xl = frame->this; \ - __local = frame->local; \ - frame->local = NULL; \ - STACK_DESTROY (frame->root); \ - dht_local_wipe (__xl, __local); \ +#define DHT_STACK_DESTROY(frame) do { \ + dht_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + __xl = frame->this; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + dht_local_wipe (__xl, __local); \ } while (0) -dht_layout_t *dht_layout_new (xlator_t *this, int cnt); -dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); -dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); -xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, +dht_layout_t *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name); -int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); -int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, - uint32_t *holes_p, uint32_t *overlaps_p, - uint32_t *missing_p, uint32_t *down_p, - uint32_t *misc_p); -int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, - xlator_t *subvol, loc_t *loc, dict_t *xattr); +int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); +int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, + uint32_t *misc_p); +int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, + xlator_t *subvol, loc_t *loc, dict_t *xattr); xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *buf, dict_t *xattr); -int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, - xlator_t *subvol, loc_t *loc); +int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc); int dht_layouts_init (xlator_t *this, dht_conf_t *conf); int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, - int op_ret, int op_errno, dict_t *xattr); + int op_ret, int op_errno, dict_t *xattr); int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, - int pos, int32_t **disk_layout_p); -int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, - int pos, void *disk_layout_raw); + int pos, int32_t **disk_layout_p); +int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, + int pos, void *disk_layout_raw); int dht_frame_return (call_frame_t *frame); -int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); +int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol, uint64_t *x); void dht_local_wipe (xlator_t *this, dht_local_t *local); -dht_local_t *dht_local_init (call_frame_t *frame); -int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from, - xlator_t *subvol); +dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, + glusterfs_fop_t fop); +int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from, + xlator_t *subvol); xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc); xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode); xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev); -int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); +int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); int dht_hash_compute (int type, const char *name, uint32_t *hash_p); -int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, - xlator_t *tovol, xlator_t *fromvol, loc_t *loc); -int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc); -int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc); +int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, + xlator_t *tovol, xlator_t *fromvol, loc_t *loc); +int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc); +int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc); int -dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, - loc_t *loc, dht_layout_t *layout); +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); int dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, dht_layout_t *layout); int -dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, - loc_t *loc, dht_layout_t *layout); +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); int dht_layout_sort_volname (dht_layout_t *layout); @@ -308,31 +336,252 @@ int dht_rename (call_frame_t *frame, xlator_t *this, int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); -int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); -int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); +int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); -int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout); -void dht_layout_unref (xlator_t *this, dht_layout_t *layout); +int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout); +void dht_layout_unref (xlator_t *this, dht_layout_t *layout); dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout); -xlator_t *dht_first_up_subvol (xlator_t *this); -xlator_t *dht_last_up_subvol (xlator_t *this); +xlator_t *dht_first_up_subvol (xlator_t *this); +xlator_t *dht_last_up_subvol (xlator_t *this); int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name); -int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, +int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, xlator_t **subvol); -int dht_rename_cleanup (call_frame_t *frame); +int dht_rename_cleanup (call_frame_t *frame); int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent); + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent); int dht_fix_directory_layout (call_frame_t *frame, - dht_selfheal_dir_cbk_t dir_cbk, - dht_layout_t *layout); + dht_selfheal_dir_cbk_t dir_cbk, + dht_layout_t *layout); +int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf); + +/* migration/rebalance */ int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame); -#endif /* _DHT_H */ + +int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame); +int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame); + + +/* FOPS */ +int32_t dht_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req); + +int32_t dht_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc); + +int32_t dht_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd); + +int32_t dht_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset); + +int32_t dht_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset); + +int32_t dht_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask); + +int32_t dht_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size); + +int32_t dht_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev, dict_t *params); + +int32_t dht_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dict_t *params); + +int32_t dht_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc); + +int32_t dht_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags); + +int32_t dht_symlink (call_frame_t *frame, xlator_t *this, + const char *linkpath, loc_t *loc, dict_t *params); + +int32_t dht_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc); + +int32_t dht_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc); + +int32_t dht_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + fd_t *fd, dict_t *params); + +int32_t dht_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, fd_t *fd, + int32_t wbflags); + +int32_t dht_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset); + +int32_t dht_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset, + struct iobref *iobref); + +int32_t dht_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd); + +int32_t dht_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync); + +int32_t dht_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, fd_t *fd); + +int32_t dht_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync); + +int32_t dht_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc); + +int32_t dht_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags); + +int32_t dht_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name); + +int32_t dht_fsetxattr (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + dict_t *dict, + int32_t flags); + +int32_t dht_fgetxattr (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + const char *name); + +int32_t dht_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name); + +int32_t dht_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct gf_flock *flock); + +int32_t dht_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock); + +int32_t dht_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock); + +int32_t dht_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +int32_t dht_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, off_t off); + +int32_t dht_readdirp (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, off_t off); + +int32_t dht_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict); + +int32_t dht_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict); + +int32_t dht_forget (xlator_t *this, inode_t *inode); +int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid); +int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid); + +int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...); + +/* definitions for nufa/switch */ +int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent); +int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent); + + + +#endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 9e2327bffeb..d27d8bf91b8 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -98,7 +98,9 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx) goto err; } - statfs_local = dht_local_init (statfs_frame); + /* local->fop value is not used in this case */ + statfs_local = dht_local_init (statfs_frame, NULL, NULL, + GF_FOP_MAXVALUE); if (!statfs_local) { goto err; } @@ -141,12 +143,13 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) goto err; } - statfs_local = dht_local_init (statfs_frame); + /* In this case, 'local->fop' is not used */ + statfs_local = dht_local_init (statfs_frame, loc, NULL, + GF_FOP_MAXVALUE); if (!statfs_local) { goto err; } - loc_copy (&statfs_local->loc, loc); loc_t tmp_loc = { .inode = NULL, .path = "/", }; diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 44ed9c68237..99abe023b2a 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -224,27 +224,60 @@ dht_local_wipe (xlator_t *this, dht_local_t *local) GF_FREE (local->key); } + if (local->rebalance.vector) + GF_FREE (local->rebalance.vector); + + if (local->rebalance.iobref) + iobref_unref (local->rebalance.iobref); + GF_FREE (local); } dht_local_t * -dht_local_init (call_frame_t *frame) +dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop) { dht_local_t *local = NULL; + inode_t *inode = NULL; + int ret = 0; /* TODO: use mem-pool */ - local = GF_CALLOC (1, sizeof (*local), - gf_dht_mt_dht_local_t); - + local = GF_CALLOC (1, sizeof (*local), gf_dht_mt_dht_local_t); if (!local) - return NULL; + goto out; + + if (loc) { + ret = loc_copy (&local->loc, loc); + if (ret) + goto out; - local->op_ret = -1; + inode = loc->inode; + } + + if (fd) { + local->fd = fd_ref (fd); + if (!inode) + inode = fd->inode; + } + + local->op_ret = -1; local->op_errno = EUCLEAN; + local->fop = fop; + + if (inode) { + local->layout = dht_layout_get (frame->this, inode); + local->cached_subvol = dht_subvol_get_cached (frame->this, + inode); + } frame->local = local; +out: + if (ret) { + if (local) + GF_FREE (local); + local = NULL; + } return local; } @@ -496,3 +529,374 @@ err: loc_wipe (child); return -1; } + + + +int +dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) +{ + xlator_list_t *subvols = NULL; + int cnt = 0; + + if (!conf) + return -1; + + for (subvols = this->children; subvols; subvols = subvols->next) + cnt++; + + conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *), + gf_dht_mt_xlator_t); + if (!conf->subvolumes) { + return -1; + } + conf->subvolume_cnt = cnt; + + cnt = 0; + for (subvols = this->children; subvols; subvols = subvols->next) + conf->subvolumes[cnt++] = subvols->xlator; + + conf->subvolume_status = GF_CALLOC (cnt, sizeof (char), + gf_dht_mt_char); + if (!conf->subvolume_status) { + return -1; + } + + conf->last_event = GF_CALLOC (cnt, sizeof (int), + gf_dht_mt_char); + if (!conf->last_event) { + return -1; + } + + conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t), + gf_dht_mt_subvol_time); + if (!conf->subvol_up_time) { + return -1; + } + + conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t), + gf_dht_mt_dht_du_t); + if (!conf->du_stats) { + return -1; + } + + return 0; +} + + + + +static int +dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data) +{ + dht_local_t *local = NULL; + + local = frame->local; + + local->rebalance.target_op_fn (THIS, frame, op_ret); + + return 0; +} + + +int +dht_migration_complete_check_task (void *data) +{ + int ret = -1; + xlator_t *src_node = NULL; + xlator_t *dst_node = NULL; + dht_local_t *local = NULL; + dict_t *dict = NULL; + dht_layout_t *layout = NULL; + struct iatt stbuf = {0,}; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + loc_t tmp_loc = {0,}; + char *path = NULL; + + this = THIS; + frame = data; + local = frame->local; + + src_node = local->cached_subvol; + + /* getxattr on cached_subvol for 'linkto' value */ + if (!local->loc.inode) + ret = syncop_fgetxattr (src_node, local->fd, &dict, + DHT_LINKFILE_KEY); + else + ret = syncop_getxattr (src_node, &local->loc, &dict, + DHT_LINKFILE_KEY); + + if (!ret) + dst_node = dht_linkfile_subvol (this, NULL, NULL, dict); + + if (ret) { + if ((errno != ENOENT) || (!local->loc.inode)) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to get the 'linkto' xattr %s", + local->loc.path, strerror (errno)); + goto out; + } + /* Need to do lookup on hashed subvol, then get the file */ + ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL, + NULL); + if (ret) + goto out; + dst_node = dht_subvol_get_cached (this, local->loc.inode); + } + + if (!dst_node) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to get the destination node", + local->loc.path); + ret = -1; + goto out; + } + + /* lookup on dst */ + if (local->loc.inode) { + ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to lookup the file on %s", + local->loc.path, dst_node->name); + goto out; + } + + if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) { + gf_log (this->name, GF_LOG_ERROR, + "%s: gfid different on the target file on %s", + local->loc.path, dst_node->name); + ret = -1; + goto out; + } + } + + /* update inode ctx (the layout) */ + dht_layout_unref (this, local->layout); + + if (!local->loc.inode) + ret = dht_layout_preset (this, dst_node, local->fd->inode); + else + ret = dht_layout_preset (this, dst_node, local->loc.inode); + if (ret != 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%s: could not set preset layout for subvol %s", + local->loc.path, dst_node->name); + ret = -1; + goto out; + } + + layout = dht_layout_for_subvol (this, dst_node); + if (!layout) { + gf_log (this->name, GF_LOG_INFO, + "%s: no pre-set layout for subvolume %s", + local->loc.path, dst_node ? dst_node->name : "<nil>"); + ret = -1; + goto out; + } + + if (!local->loc.inode) + ret = dht_layout_set (this, local->fd->inode, layout); + else + ret = dht_layout_set (this, local->loc.inode, layout); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set the new layout", + local->loc.path); + goto out; + } + + local->cached_subvol = dst_node; + ret = 0; + + if (!local->fd) + goto out; + + /* once we detect the migration complete, the fd-ctx is no more + required.. delete the ctx, and do one extra 'fd_unref' for open fd */ + ret = fd_ctx_del (local->fd, this, NULL); + if (!ret) { + fd_unref (local->fd); + ret = 0; + goto out; + } + + /* if 'local->fd' (ie, fd based operation), send a 'open()' on + destination if not already done */ + if (local->loc.inode) { + ret = syncop_open (dst_node, &local->loc, + local->fd->flags, local->fd); + } else { + tmp_loc.inode = local->fd->inode; + inode_path (local->fd->inode, NULL, &path); + if (path) + tmp_loc.path = path; + ret = syncop_open (dst_node, &tmp_loc, + local->fd->flags, local->fd); + if (path) + GF_FREE (path); + + } + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to send open() on target file at %s", + local->loc.path, dst_node->name); + goto out; + } + + /* need this unref for the fd on src_node */ + fd_unref (local->fd); + ret = 0; +out: + + return ret; +} + +int +dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame) +{ + int ret = -1; + dht_conf_t *conf = NULL; + + conf = this->private; + + ret = synctask_new (conf->env, dht_migration_complete_check_task, + dht_migration_complete_check_done, + frame, frame); + return ret; +} + +/* During 'in-progress' state, both nodes should have the file */ +static int +dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data) +{ + dht_local_t *local = NULL; + + local = sync_frame->local; + + local->rebalance.target_op_fn (THIS, sync_frame, op_ret); + + return 0; +} + +static int +dht_rebalance_inprogress_task (void *data) +{ + int ret = -1; + xlator_t *src_node = NULL; + xlator_t *dst_node = NULL; + dht_local_t *local = NULL; + dict_t *dict = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + char *path = NULL; + struct iatt stbuf = {0,}; + loc_t tmp_loc = {0,}; + + this = THIS; + frame = data; + local = frame->local; + + src_node = local->cached_subvol; + + /* getxattr on cached_subvol for 'linkto' value */ + if (local->loc.inode) + ret = syncop_getxattr (src_node, &local->loc, &dict, + DHT_LINKFILE_KEY); + else + ret = syncop_fgetxattr (src_node, local->fd, &dict, + DHT_LINKFILE_KEY); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to get the 'linkto' xattr %s", + local->loc.path, strerror (errno)); + goto out; + } + + dst_node = dht_linkfile_subvol (this, NULL, NULL, dict); + if (!dst_node) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to get the 'linkto' xattr from dict", + local->loc.path); + ret = -1; + goto out; + } + + local->rebalance.target_node = dst_node; + + if (local->loc.inode) { + /* lookup on dst */ + ret = syncop_lookup (dst_node, &local->loc, NULL, + &stbuf, NULL, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to lookup the file on %s", + local->loc.path, dst_node->name); + goto out; + } + + if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) { + gf_log (this->name, GF_LOG_ERROR, + "%s: gfid different on the target file on %s", + local->loc.path, dst_node->name); + ret = -1; + goto out; + } + } + + ret = 0; + + if (!local->fd) + goto out; + + if (local->loc.inode) { + ret = syncop_open (dst_node, &local->loc, + local->fd->flags, local->fd); + } else { + tmp_loc.inode = local->fd->inode; + inode_path (local->fd->inode, NULL, &path); + if (path) + tmp_loc.path = path; + ret = syncop_open (dst_node, &tmp_loc, + local->fd->flags, local->fd); + if (path) + GF_FREE (path); + } + + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to send open() on target file at %s", + local->loc.path, dst_node->name); + goto out; + } + + ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set fd-ctx target file at %s", + local->loc.path, dst_node->name); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame) +{ + + int ret = -1; + dht_conf_t *conf = NULL; + + conf = this->private; + + ret = synctask_new (conf->env, dht_rebalance_inprogress_task, + dht_inprogress_check_done, + frame, frame); + return ret; +} diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c new file mode 100644 index 00000000000..1e9f54bdaf1 --- /dev/null +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -0,0 +1,1115 @@ +/* + Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dht-common.h" + +int dht_access2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_open2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret); + +int +dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = 0; + + local = frame->local; + prev = cookie; + + local->op_errno = op_errno; + if ((op_ret == -1) && (op_errno != ENOENT)) { + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto out; + } + + if (!op_ret || (local->call_cnt != 1)) + goto out; + + /* rebalance would have happened */ + local->rebalance.target_op_fn = dht_open2; + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + return 0; + +out: + DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd); + + return 0; +} + +int +dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + int op_errno = EINVAL; + + local = frame->local; + if (!local) + goto out; + + op_errno = ENOENT; + if (op_ret) + goto out; + + local->call_cnt = 2; + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open, + &local->loc, local->rebalance.flags, local->fd, + local->rebalance.wbflags); + return 0; + +out: + DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); + return 0; +} + +int +dht_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags, fd_t *fd, int wbflags) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, loc, fd, GF_FOP_OPEN); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local->rebalance.wbflags = wbflags; + local->rebalance.flags = flags; + local->call_cnt = 1; + + STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open, + loc, flags, fd, wbflags); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL); + + return 0; +} + +int +dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf) +{ + uint64_t tmp_subvol = 0; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + + local = frame->local; + prev = cookie; + + if ((op_ret == -1) && (op_errno != ENOENT)) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto out; + } + + if (local->call_cnt != 1) + goto out; + + /* Check if the rebalance phase2 is true */ + if ((op_ret == -1) || (IA_ISREG (stbuf->ia_type) && + ((st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type) & + ~S_IFMT) == DHT_LINKFILE_MODE))) { + if (local->fd) + ret = fd_ctx_get (local->fd, this, &tmp_subvol); + if (ret) { + /* Phase 2 of migration */ + local->rebalance.target_op_fn = dht_attr2; + ret = dht_rebalance_complete_check (this, frame); + } else { + /* value is already set in fd_ctx, that means no need + to check for whether its complete or not. */ + dht_attr2 (this, frame, 0); + ret = 0; + } + if (!ret) + goto err; + } +out: + DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf); +err: + return 0; +} + +int +dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + int op_errno = EINVAL; + + local = frame->local; + if (!local) + goto out; + + op_errno = local->op_errno; + if (op_ret == -1) + goto out; + + subvol = local->cached_subvol; + local->call_cnt = 2; + + if (local->fop == GF_FOP_FSTAT) { + STACK_WIND (frame, dht_file_attr_cbk, subvol, + subvol->fops->fstat, local->fd); + } else { + STACK_WIND (frame, dht_file_attr_cbk, subvol, + subvol->fops->stat, &local->loc); + } + return 0; + +out: + DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); + return 0; +} + +int +dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + + goto unlock; + } + + dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); + + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); +out: + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, + &local->stbuf); + } +err: + return 0; +} + +int +dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + + local = dht_local_init (frame, loc, NULL, GF_FOP_STAT); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (IA_ISREG (loc->inode->ia_type)) { + local->call_cnt = 1; + + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_file_attr_cbk, subvol, + subvol->fops->stat, loc); + + return 0; + } + + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->stat, + loc); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int i = 0; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (IA_ISREG (fd->inode->ia_type)) { + local->call_cnt = 1; + + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_file_attr_cbk, subvol, + subvol->fops->fstat, fd); + + return 0; + } + + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + subvol = layout->list[i].xlator; + STACK_WIND (frame, dht_attr_cbk, + subvol, subvol->fops->fstat, + fd); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL); + + return 0; +} + +int +dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + struct iovec *vector, int count, struct iatt *stbuf, + struct iobref *iobref) +{ + dht_local_t *local = NULL; + int ret = 0; + + local = frame->local; + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + /* This is already second try, no need for re-check */ + if (local->call_cnt != 1) + goto out; + + if ((op_ret == -1) && (op_errno != ENOENT)) + goto out; + + if ((op_ret == -1) || ((st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type) & + ~S_IFMT) == DHT_LINKFILE_MODE)) { + /* File would be migrated to other node */ + ret = fd_ctx_get (local->fd, this, NULL); + if (ret) { + local->rebalance.target_op_fn = dht_readv2; + ret = dht_rebalance_complete_check (this, frame); + } else { + /* value is already set in fd_ctx, that means no need + to check for whether its complete or not. */ + dht_readv2 (this, frame, 0); + } + if (!ret) + return 0; + } + +out: + DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref); + + return 0; +} + +int +dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + int op_errno = EINVAL; + + local = frame->local; + if (!local) + goto out; + + op_errno = local->op_errno; + if (op_ret == -1) + goto out; + + local->call_cnt = 2; + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv, + local->fd, local->rebalance.size, local->rebalance.offset); + + return 0; + +out: + DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + return 0; +} + +int +dht_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_READ); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local->rebalance.offset = off; + local->rebalance.size = size; + local->call_cnt = 1; + + STACK_WIND (frame, dht_readv_cbk, + subvol, subvol->fops->readv, + fd, size, off); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + + return 0; +} + +int +dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + int ret = -1; + dht_local_t *local = NULL; + + local = frame->local; + + if (local->call_cnt != 1) + goto out; + + if ((op_ret == -1) && (op_errno == ENOENT)) { + /* File would be migrated to other node */ + local->rebalance.target_op_fn = dht_access2; + ret = dht_rebalance_complete_check (frame->this, frame); + if (!ret) + return 0; + } + +out: + DHT_STACK_UNWIND (access, frame, op_ret, op_errno); + return 0; +} + +int +dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + int op_errno = EINVAL; + + local = frame->local; + if (!local) + goto out; + + op_errno = local->op_errno; + if (op_ret == -1) + goto out; + + local->call_cnt = 2; + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access, + &local->loc, local->rebalance.flags); + + return 0; + +out: + DHT_STACK_UNWIND (access, frame, -1, op_errno); + return 0; +} + + +int +dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.flags = mask; + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access, + loc, mask); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (access, frame, -1, op_errno); + + return 0; +} + + +int +dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno) +{ + dht_local_t *local = NULL; + int ret = -1; + + local = frame->local; + + local->op_errno = op_errno; + + if (local->call_cnt != 1) + goto out; + + /* If context is set, then send flush() it to the destination */ + ret = fd_ctx_get (local->fd, this, NULL); + if (!ret) { + dht_flush2 (this, frame, 0); + return 0; + } + +out: + DHT_STACK_UNWIND (flush, frame, op_ret, op_errno); + + return 0; +} + +int +dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + uint64_t tmp_subvol = 0; + int ret = -1; + + local = frame->local; + + ret = fd_ctx_get (local->fd, this, &tmp_subvol); + if (!ret) + subvol = (xlator_t *)(long)tmp_subvol; + + if (!subvol) + subvol = local->cached_subvol; + + local->call_cnt = 2; /* This is the second attempt */ + + STACK_WIND (frame, dht_flush_cbk, + subvol, subvol->fops->flush, local->fd); + + return 0; +} + + +int +dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, dht_flush_cbk, + subvol, subvol->fops->flush, fd); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (flush, frame, -1, op_errno); + + return 0; +} + + +int +dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = -1; + + local = frame->local; + prev = cookie; + + local->op_errno = op_errno; + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto out; + } + + if (local->call_cnt != 1) + goto out; + + ret = fd_ctx_get (local->fd, this, NULL); + if (ret) { + local->rebalance.target_op_fn = dht_fsync2; + + /* Check if the rebalance phase1 is true */ + if (IA_ISREG (postbuf->ia_type) && + (postbuf->ia_prot.sticky == 1) && + (postbuf->ia_prot.sgid == 1)) { + ret = dht_rebalance_in_progress_check (this, frame); + } + + /* Check if the rebalance phase2 is true */ + if (IA_ISREG (postbuf->ia_type) && + ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & + ~S_IFMT) == DHT_LINKFILE_MODE)) { + ret = dht_rebalance_complete_check (this, frame); + } + } else { + dht_fsync2 (this, frame, 0); + ret = 0; + } + if (!ret) + return 0; + +out: + DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno, + prebuf, postbuf); + + return 0; +} + +int +dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + uint64_t tmp_subvol = 0; + int ret = -1; + + local = frame->local; + + ret = fd_ctx_get (local->fd, this, &tmp_subvol); + if (!ret) + subvol = (xlator_t *)(long)tmp_subvol; + + if (!subvol) + subvol = local->cached_subvol; + + local->call_cnt = 2; /* This is the second attempt */ + + STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync, + local->fd, local->rebalance.flags); + + return 0; +} + +int +dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC); + if (!local) { + op_errno = ENOMEM; + + goto err; + } + + local->call_cnt = 1; + local->rebalance.flags = datasync; + + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync, + fd, datasync); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to + indicate that lock migration happened on the fd, so we can consider it as + phase 2 of migration */ +int +dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct gf_flock *flock) +{ + DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock); + + return 0; +} + + +int +dht_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int cmd, struct gf_flock *flock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + /* TODO: for rebalance, we need to preserve the fop arguments */ + STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd, + cmd, flock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL); + + return 0; +} + +/* Symlinks are currently not migrated, so no need for any check here */ +int +dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, const char *path, struct iatt *sbuf) +{ + dht_local_t *local = NULL; + + local = frame->local; + if (op_ret == -1) + goto err; + + if (!local) { + op_ret = -1; + op_errno = EINVAL; + } + +err: + DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf); + + return 0; +} + + +int +dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_readlink_cbk, + subvol, subvol->fops->readlink, + loc, size); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL); + + return 0; +} + +/* Currently no translators on top of 'distribute' will be using + * below fops, hence not implementing 'migration' related checks + */ + +int +dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict) +{ + DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict); + return 0; +} + + +int +dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, + dht_xattrop_cbk, + subvol, subvol->fops->xattrop, + loc, flags, dict); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict) +{ + DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict); + return 0; +} + + +int +dht_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, + dht_fxattrop_cbk, + subvol, subvol->fops->fxattrop, + fd, flags, dict); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL); + + return 0; +} + + +int +dht_inodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno); + return 0; +} + + +int32_t +dht_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + local->call_cnt = 1; + + STACK_WIND (frame, + dht_inodelk_cbk, + subvol, subvol->fops->inodelk, + volume, loc, cmd, lock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (inodelk, frame, -1, op_errno); + + return 0; +} + + +int +dht_finodelk_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ + DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno); + return 0; +} + + +int +dht_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + subvol = dht_subvol_get_cached (this, fd->inode); + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + + STACK_WIND (frame, + dht_finodelk_cbk, + subvol, subvol->fops->finodelk, + volume, fd, cmd, lock); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (finodelk, frame, -1, op_errno); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c new file mode 100644 index 00000000000..21eca611740 --- /dev/null +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -0,0 +1,597 @@ +/* + Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dht-common.h" + +int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret); + +int +dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + dht_local_t *local = NULL; + int ret = -1; + + if (op_ret == -1) { + goto out; + } + + local = frame->local; + if (!local) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + if (local->call_cnt != 1) + goto out; + + local->rebalance.target_op_fn = dht_writev2; + + /* Phase 2 of migration */ + if (IA_ISREG (postbuf->ia_type) && + ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & + ~S_IFMT) == DHT_LINKFILE_MODE)) { + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IA_ISREG (postbuf->ia_type) && (postbuf->ia_prot.sticky == 1) && + (postbuf->ia_prot.sgid == 1)) { + ret = fd_ctx_get (local->fd, this, NULL); + if (!ret) { + dht_writev2 (this, frame, 0); + return 0; + } + ret = dht_rebalance_in_progress_check (this, frame); + if (!ret) + return 0; + } + +out: + DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf); + + return 0; +} + +int +dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + uint64_t tmp_subvol = 0; + int ret = -1; + + local = frame->local; + + ret = fd_ctx_get (local->fd, this, &tmp_subvol); + if (!ret) + subvol = (xlator_t *)(long)tmp_subvol; + + if (!subvol) + subvol = local->cached_subvol; + + local->call_cnt = 2; /* This is the second attempt */ + + STACK_WIND (frame, dht_writev_cbk, + subvol, subvol->fops->writev, + local->fd, local->rebalance.vector, local->rebalance.count, + local->rebalance.offset, local->rebalance.iobref); + + return 0; +} + +int +dht_writev (call_frame_t *frame, xlator_t *this, + fd_t *fd, struct iovec *vector, int count, off_t off, + struct iobref *iobref) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE); + if (!local) { + + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + + local->rebalance.vector = iov_dup (vector, count); + local->rebalance.offset = off; + local->rebalance.count = count; + local->rebalance.iobref = iobref_ref (iobref); + local->call_cnt = 1; + + STACK_WIND (frame, dht_writev_cbk, + subvol, subvol->fops->writev, + fd, vector, count, off, iobref); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL); + + return 0; +} + + + +int +dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + + local = frame->local; + prev = cookie; + + if ((op_ret == -1) && (op_errno != ENOENT)) { + local->op_errno = op_errno; + local->op_ret = -1; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + + goto out; + } + + if (local->call_cnt != 1) + goto out; + + local->rebalance.target_op_fn = dht_truncate2; + + /* Phase 2 of migration */ + if ((op_ret == -1) || (IA_ISREG (postbuf->ia_type) && + ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & + ~S_IFMT) == DHT_LINKFILE_MODE))) { + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + goto err; + } + + /* Check if the rebalance phase1 is true */ + if (IA_ISREG (postbuf->ia_type) && (postbuf->ia_prot.sticky == 1) && + (postbuf->ia_prot.sgid == 1)) { + ret = fd_ctx_get (local->fd, this, NULL); + if (!ret) { + dht_truncate2 (this, frame, 0); + goto err; + } + ret = dht_rebalance_in_progress_check (this, frame); + if (!ret) + goto err; + } + +out: + DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno, + prebuf, postbuf); +err: + return 0; +} + + +int +dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + uint64_t tmp_subvol = 0; + int ret = -1; + + local = frame->local; + + if (local->fd) + ret = fd_ctx_get (local->fd, this, &tmp_subvol); + if (!ret) + subvol = (xlator_t *)(long)tmp_subvol; + + if (!subvol) + subvol = local->cached_subvol; + + local->call_cnt = 2; /* This is the second attempt */ + + if (local->fop == GF_FOP_TRUNCATE) { + STACK_WIND (frame, dht_truncate_cbk, subvol, + subvol->fops->truncate, &local->loc, + local->rebalance.offset); + } else { + STACK_WIND (frame, dht_truncate_cbk, subvol, + subvol->fops->ftruncate, local->fd, + local->rebalance.offset); + } + + return 0; +} + +int +dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.offset = offset; + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_truncate_cbk, + subvol, subvol->fops->truncate, + loc, offset); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL); + + return 0; +} + +int +dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.offset = offset; + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_truncate_cbk, + subvol, subvol->fops->ftruncate, + fd, offset); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL); + + return 0; +} + +/* handle cases of migration here for 'setattr()' calls */ +int +dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = -1; + + local = frame->local; + prev = cookie; + + local->op_errno = op_errno; + if ((op_ret == -1) && (op_errno != ENOENT)) { + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto out; + } + + if (local->call_cnt != 1) + goto out; + + local->rebalance.target_op_fn = dht_setattr2; + + /* Phase 2 of migration */ + if ((op_ret == -1) || (IA_ISREG (postbuf->ia_type) && + ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & + ~S_IFMT) == DHT_LINKFILE_MODE))) { + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + goto out; + } + + /* At the end of the migration process, whatever 'attr' we + have on source file will be migrated to destination file + in one shot, hence we don't need to check for in progress + state here */ +out: + DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno, + prebuf, postbuf); + + return 0; +} + +int +dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + uint64_t tmp_subvol = 0; + int ret = -1; + + local = frame->local; + + if (local->fd) + ret = fd_ctx_get (local->fd, this, &tmp_subvol); + if (!ret) + subvol = (xlator_t *)(long)tmp_subvol; + + if (!subvol) + subvol = local->cached_subvol; + + local->call_cnt = 2; /* This is the second attempt */ + + if (local->fop == GF_FOP_SETATTR) { + STACK_WIND (frame, dht_file_setattr_cbk, subvol, + subvol->fops->setattr, &local->loc, + &local->rebalance.stbuf, local->rebalance.flags); + } else { + STACK_WIND (frame, dht_file_setattr_cbk, subvol, + subvol->fops->fsetattr, local->fd, + &local->rebalance.stbuf, local->rebalance.flags); + } + + return 0; +} + + +/* Keep the existing code same for all the cases other than regular file */ +int +dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *statpre, + struct iatt *statpost) +{ + dht_local_t *local = NULL; + int this_call_cnt = 0; + call_frame_t *prev = NULL; + + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + goto unlock; + } + + dht_iatt_merge (this, &local->prebuf, statpre, prev->this); + dht_iatt_merge (this, &local->stbuf, statpost, prev->this); + + local->op_ret = 0; + } +unlock: + UNLOCK (&frame->lock); + + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->stbuf); + + return 0; +} + + +int +dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid) +{ + xlator_t *subvol = NULL; + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + VALIDATE_OR_GOTO (loc->path, err); + + local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no layout for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_DEBUG, + "layout is not sane for path=%s", loc->path); + op_errno = EINVAL; + goto err; + } + + if (IA_ISREG (loc->inode->ia_type)) { + /* in the regular file _cbk(), we need to check for + migration possibilities */ + local->rebalance.stbuf = *stbuf; + local->rebalance.flags = valid; + local->call_cnt = 1; + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_file_setattr_cbk, subvol, + subvol->fops->setattr, + loc, stbuf, valid); + + return 0; + } + + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_setattr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->setattr, + loc, stbuf, valid); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL); + + return 0; +} + + +int +dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid) +{ + xlator_t *subvol = NULL; + dht_layout_t *layout = NULL; + dht_local_t *local = NULL; + int op_errno = -1; + int i = -1; + + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + layout = local->layout; + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "no layout for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (!layout_is_sane (layout)) { + gf_log (this->name, GF_LOG_DEBUG, + "layout is not sane for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (IA_ISREG (fd->inode->ia_type)) { + /* in the regular file _cbk(), we need to check for + migration possibilities */ + local->rebalance.stbuf = *stbuf; + local->rebalance.flags = valid; + local->call_cnt = 1; + subvol = local->cached_subvol; + + STACK_WIND (frame, dht_file_setattr_cbk, subvol, + subvol->fops->fsetattr, + fd, stbuf, valid); + + return 0; + } + + local->call_cnt = layout->cnt; + + for (i = 0; i < layout->cnt; i++) { + STACK_WIND (frame, dht_setattr_cbk, + layout->list[i].xlator, + layout->list[i].xlator->fops->fsetattr, + fd, stbuf, valid); + } + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c index 1c90c61d676..2186b064a3d 100644 --- a/xlators/cluster/dht/src/dht-linkfile.c +++ b/xlators/cluster/dht/src/dht-linkfile.c @@ -140,13 +140,14 @@ dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, goto err; } - unlink_local = dht_local_init (unlink_frame); + /* Using non-fop value here, as anyways, 'local->fop' is not used in + this particular case */ + unlink_local = dht_local_init (unlink_frame, loc, NULL, + GF_FOP_MAXVALUE); if (!unlink_local) { goto err; } - loc_copy (&unlink_local->loc, loc); - STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk, subvol, subvol->fops->unlink, &unlink_local->loc); diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 60008f2bd64..7b04e8a2dc8 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -83,15 +83,6 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count, break; } - /* do it regardless of all the above cases as we had to 'write' the - given number of bytes */ - ret = syncop_ftruncate (to, fd, offset + size); - if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, - "failed to perform truncate on %s", to->name); - goto out; - } - ret = size; out: return ret; @@ -102,8 +93,7 @@ static inline int __is_file_migratable (xlator_t *this, loc_t *loc, dict_t *rsp_dict, struct iatt *stbuf) { - int ret = -1; - int open_fd_count = 0; + int ret = -1; if (!IA_ISREG (stbuf->ia_type)) { gf_log (this->name, GF_LOG_WARNING, @@ -121,14 +111,6 @@ __is_file_migratable (xlator_t *this, loc_t *loc, dict_t *rsp_dict, goto out; } - ret = dict_get_int32 (rsp_dict, GLUSTERFS_OPEN_FD_COUNT, &open_fd_count); - if (!ret && (open_fd_count > 0)) { - /* TODO: support migration of files with open fds */ - gf_log (this->name, GF_LOG_WARNING, - "%s: file has open fds, not attempting migration", - loc->path); - goto out; - } ret = 0; out: @@ -137,11 +119,10 @@ out: static inline int __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf, - dict_t *dict, fd_t **dst_fd, int *need_unlink) + dict_t *dict, fd_t **dst_fd) { xlator_t *this = NULL; int ret = -1; - mode_t mode = 0; fd_t *fd = NULL; struct iatt new_stbuf = {0,}; @@ -154,6 +135,13 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf, goto out; } + ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set gfid in dict for create", loc->path); + goto out; + } + fd = fd_create (loc->inode, DHT_REBALANCE_PID); if (!fd) { gf_log (this->name, GF_LOG_ERROR, @@ -163,40 +151,27 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf, } ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, "failed to lookup %s on %s", - loc->path, to->name); - - mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); - ret = syncop_create (to, loc, O_WRONLY, mode, fd, dict); - if (ret < 0) { + if (!ret) { + /* File exits in the destination, check if gfid matches */ + if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { gf_log (this->name, GF_LOG_ERROR, - "failed to create %s on %s", loc->path, to->name); + "file %s exits in %s with different gfid", + loc->path, to->name); + fd_unref (fd); goto out; } - - *need_unlink = 1; - goto done; - } - - /* File exits in the destination, just do the open if gfid matches */ - if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "file %s exits in %s with different gfid", - loc->path, to->name); - fd_unref (fd); - goto out; } - ret = syncop_open (to, loc, O_WRONLY, fd); + /* Create the destination with LINKFILE mode, and linkto xattr, + if the linkfile already exists, it will just open the file */ + ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, + dict); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, - "failed to open file %s on %s", - loc->path, to->name); - fd_unref (fd); + "failed to create %s on %s", loc->path, to->name); goto out; } -done: + if (dst_fd) *dst_fd = fd; @@ -254,16 +229,21 @@ out: static inline int __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, - int hole_exists) + uint64_t ia_size, int hole_exists) { - int ret = -1; + int ret = 0; int count = 0; off_t offset = 0; struct iovec *vector = NULL; struct iobref *iobref = NULL; - - while (1) { - ret = syncop_readv (from, src, DHT_REBALANCE_BLKSIZE, + uint64_t total = 0; + size_t read_size = 0; + + /* if file size is '0', no need to enter this loop */ + while (total < ia_size) { + read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? + DHT_REBALANCE_BLKSIZE : (ia_size - total)); + ret = syncop_readv (from, src, read_size, offset, &vector, &count, &iobref); if (!ret || (ret < 0)) { break; @@ -279,6 +259,7 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, break; } offset += ret; + total += ret; if (vector) GF_FREE (vector); @@ -298,6 +279,84 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, return ret; } + +static inline int +__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, + struct iatt *stbuf, fd_t **src_fd) +{ + int ret = 0; + fd_t *fd = NULL; + dict_t *dict = NULL; + xlator_t *this = NULL; + struct iatt iatt = {0,}; + + this = THIS; + + fd = fd_create (loc->inode, DHT_REBALANCE_PID); + if (!fd) { + gf_log (this->name, GF_LOG_ERROR, + "%s: fd create failed (source)", loc->path); + ret = -1; + goto out; + } + + ret = syncop_open (from, loc, O_RDWR, fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to open file %s on %s", + loc->path, from->name); + goto out; + } + + ret = -1; + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set xattr in dict for %s (linkto:%s)", + loc->path, to->name); + goto out; + } + + /* Once the migration starts, the source should have 'linkto' key set + to show which is the target, so other clients can work around it */ + ret = syncop_setxattr (from, loc, dict, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set xattr on %s in %s", + loc->path, from->name); + goto out; + } + + /* mode should be (+S+T) to indicate migration is in progress */ + iatt.ia_prot = stbuf->ia_prot; + iatt.ia_type = stbuf->ia_type; + iatt.ia_prot.sticky = 1; + iatt.ia_prot.sgid = 1; + + ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set mode on %s in %s", + loc->path, from->name); + goto out; + } + + if (src_fd) + *src_fd = fd; + + /* success */ + ret = 0; +out: + if (dict) + dict_unref (dict); + + return ret; +} + int dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, int flag) @@ -305,13 +364,13 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, int ret = -1; struct iatt new_stbuf = {0,}; struct iatt stbuf = {0,}; + struct iatt empty_iatt = {0,}; fd_t *src_fd = NULL; fd_t *dst_fd = NULL; dict_t *dict = NULL; dict_t *xattr = NULL; dict_t *rsp_dict = NULL; int file_has_holes = 0; - int need_unlink = 0; gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -326,6 +385,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, "%s: failed to set fd-count key in dict, may attempt " "migration of file which has open fds", loc->path); + /* Phase 1 - Data migration is in progress from now on */ ret = syncop_lookup (from, loc, dict, &stbuf, &rsp_dict, NULL); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s", @@ -338,9 +398,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) goto out; - /* create the destination */ - ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd, - &need_unlink); + /* create the destination, with required modes/xattr */ + ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd); if (ret) goto out; @@ -351,81 +410,142 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* Try to preserve 'holes' while migrating data */ - if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) - file_has_holes = 1; - - src_fd = fd_create (loc->inode, DHT_REBALANCE_PID); - if (!src_fd) { - gf_log (this->name, GF_LOG_ERROR, - "%s: fd create failed (source)", loc->path); - ret = -1; + /* Open the source, and also update mode/xattr */ + ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s", + loc->path, from->name); goto out; } - ret = syncop_open (from, loc, O_RDONLY, src_fd); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "failed to open file %s on %s", + ret = syncop_fstat (from, src_fd, &stbuf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s", loc->path, from->name); goto out; } + /* Try to preserve 'holes' while migrating data */ + if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) + file_has_holes = 1; + /* All I/O happens in this function */ ret = __dht_rebalane_migrate_data (from, to, src_fd, dst_fd, - file_has_holes); + stbuf.ia_size, file_has_holes); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data", loc->path); goto out; } - ret = syncop_lookup (from, loc, NULL, &new_stbuf, NULL, NULL); + /* TODO: move all xattr related operations to fd based operations */ + ret = syncop_listxattr (from, loc, &xattr); + if (ret == -1) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to get xattr from %s", loc->path, from->name); + + ret = syncop_setxattr (to, loc, xattr, 0); + if (ret == -1) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set xattr on %s", loc->path, to->name); + + /* TODO: Sync the locks */ + + ret = syncop_fsync (to, dst_fd); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to fsync on %s", loc->path, to->name); + + + /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */ + + ret = syncop_fstat (from, src_fd, &new_stbuf); if (ret < 0) { /* Failed to get the stat info */ gf_log (this->name, GF_LOG_ERROR, - "failed to lookup file %s on %s", + "failed to fstat file %s on %s", loc->path, from->name); - need_unlink = 0; goto out; } - /* No need to rebalance, if there is some - activity on source file */ - if (new_stbuf.ia_mtime != stbuf.ia_mtime) { + /* source would have both sticky bit and sgid bit set, reset it to 0, + and set the source permission on destination */ + new_stbuf.ia_prot.sticky = 0; + new_stbuf.ia_prot.sgid = 0; + + /* TODO: if the source actually had sticky bit, or sgid bit set, + we are not handling it */ + + ret = syncop_fsetattr (to, dst_fd, &new_stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | + GF_SET_ATTR_MODE), NULL, NULL); + if (ret) { gf_log (this->name, GF_LOG_WARNING, - "%s: ignoring destination file as source has " - "undergone some changes while migration was happening", - loc->path); - ret = -1; - goto out; + "%s: failed to perform setattr on %s", + loc->path, to->name); } + /* Because 'futimes' is not portable */ ret = syncop_setattr (to, loc, &new_stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID | - GF_SET_ATTR_MODE | GF_SET_ATTR_ATIME | - GF_SET_ATTR_MTIME), NULL, NULL); + (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), + NULL, NULL); if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform setattr on %s", loc->path, to->name); } - ret = syncop_listxattr (from, loc, &xattr); - if (ret == -1) + /* Make the source as a linkfile first before deleting it */ + empty_iatt.ia_prot.sticky = 1; + ret = syncop_fsetattr (from, src_fd, &empty_iatt, + GF_SET_ATTR_MODE, NULL, NULL); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, \ + "%s: failed to perform setattr on %s", + loc->path, from->name); + } + + /* Do a stat and check the gfid before unlink */ + ret = syncop_stat (from, loc, &empty_iatt); + if (ret) { gf_log (this->name, GF_LOG_WARNING, - "%s: failed to get xattr from %s", loc->path, from->name); + "%s: failed to do a stat on %s", + loc->path, from->name); + } - ret = syncop_setxattr (to, loc, xattr, 0); - if (ret == -1) + if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) { + /* take out the source from namespace */ + ret = syncop_unlink (from, loc); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform unlink on %s", + loc->path, from->name); + } + } + + /* Free up the data blocks on the source node, as the whole + file is migrated */ + ret = syncop_ftruncate (from, src_fd, 0); + if (ret) { gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set xattr on %s", loc->path, to->name); + "%s: failed to perform truncate on %s", + loc->path, from->name); + } - /* rebalance complete */ - syncop_close (dst_fd); - syncop_close (src_fd); - syncop_unlink (from, loc); - need_unlink = 0; + /* remove the 'linkto' xattr from the destination */ + ret = syncop_removexattr (to, loc, DHT_LINKFILE_KEY); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform removexattr on %s", + loc->path, to->name); + } + + ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to lookup the file on subvolumes", + loc->path); + } gf_log (this->name, GF_LOG_INFO, "completed migration of %s from subvolume %s to %s", @@ -436,14 +556,10 @@ out: if (dict) dict_unref (dict); - if (ret) { - if (dst_fd) - syncop_close (dst_fd); - if (src_fd) - syncop_close (src_fd); - if (need_unlink) - syncop_unlink (to, loc); - } + if (dst_fd) + syncop_close (dst_fd); + if (src_fd) + syncop_close (src_fd); return ret; } @@ -461,8 +577,8 @@ rebalance_task (void *data) /* This function is 'synchrounous', hence if it returns, we are done with the task */ - ret = dht_migrate_file (THIS, &local->loc, local->from_subvol, - local->to_subvol, local->flags); + ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol, + local->rebalance.target_node, local->flags); return ret; } @@ -488,7 +604,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data) dht_layout_unref (this, layout); } - ret = dht_layout_preset (this, local->to_subvol, + ret = dht_layout_preset (this, local->rebalance.target_node, local->loc.inode); if (ret) gf_log (this->name, GF_LOG_WARNING, diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index bdc4dd2b678..226ce280d8b 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -487,17 +487,18 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } - link_local = dht_local_init (link_frame); + /* fop value sent as maxvalue because it is not used + anywhere in this case */ + link_local = dht_local_init (link_frame, &local->loc2, NULL, + GF_FOP_MAXVALUE); if (!link_local) { goto err; } - loc_copy (&link_local->loc, &local->loc2); if (link_local->loc.inode) inode_unref (link_local->loc.inode); link_local->loc.inode = inode_ref (local->loc.inode); uuid_copy (link_local->gfid, local->loc.inode->gfid); - link_frame->local = link_local; dht_linkfile_create (link_frame, dht_rename_links_create_cbk, src_cached, dst_hashed, &link_local->loc); @@ -813,17 +814,14 @@ dht_rename (call_frame_t *frame, xlator_t *this, if (newloc->inode) dst_cached = dht_subvol_get_cached (this, newloc->inode); - local = dht_local_init (frame); + local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME); if (!local) { op_errno = ENOMEM; goto err; } - - ret = loc_copy (&local->loc, oldloc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } + /* cached_subvol will be set from dht_local_init, reset it to NULL, + as the logic of handling rename is different */ + local->cached_subvol = NULL; ret = loc_copy (&local->loc2, newloc); if (ret == -1) { diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 6c4c1ffcdcd..87a5756546f 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -26,7 +26,7 @@ /* TODO: add NS locking */ #include "statedump.h" -#include "dht-common.c" +#include "dht-common.h" /* TODO: - use volumename in xattr instead of "dht" @@ -419,23 +419,8 @@ struct xlator_fops fops = { .mknod = dht_mknod, .create = dht_create, - .stat = dht_stat, - .fstat = dht_fstat, - .truncate = dht_truncate, - .ftruncate = dht_ftruncate, - .access = dht_access, - .readlink = dht_readlink, - .setxattr = dht_setxattr, - .fsetxattr = dht_fsetxattr, - .getxattr = dht_getxattr, - .removexattr = dht_removexattr, .open = dht_open, - .readv = dht_readv, - .writev = dht_writev, - .flush = dht_flush, - .fsync = dht_fsync, .statfs = dht_statfs, - .lk = dht_lk, .opendir = dht_opendir, .readdir = dht_readdir, .readdirp = dht_readdirp, @@ -446,10 +431,29 @@ struct xlator_fops fops = { .mkdir = dht_mkdir, .rmdir = dht_rmdir, .rename = dht_rename, - .inodelk = dht_inodelk, - .finodelk = dht_finodelk, .entrylk = dht_entrylk, .fentrylk = dht_fentrylk, + + /* Inode read operations */ + .stat = dht_stat, + .fstat = dht_fstat, + .access = dht_access, + .readlink = dht_readlink, + .getxattr = dht_getxattr, + .readv = dht_readv, + .flush = dht_flush, + .fsync = dht_fsync, + .inodelk = dht_inodelk, + .finodelk = dht_finodelk, + .lk = dht_lk, + + /* Inode write operations */ + .removexattr = dht_removexattr, + .setxattr = dht_setxattr, + .fsetxattr = dht_fsetxattr, + .truncate = dht_truncate, + .ftruncate = dht_ftruncate, + .writev = dht_writev, .xattrop = dht_xattrop, .fxattrop = dht_fxattrop, .setattr = dht_setattr, diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 845c6b74ee1..9dcf224d177 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -23,7 +23,7 @@ #include "config.h" #endif -#include "dht-common.c" +#include "dht-common.h" /* TODO: all 'TODO's in dht.c holds good */ @@ -169,21 +169,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this, conf = this->private; - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); if (!local) { op_errno = ENOMEM; goto err; } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "copying location failed for path=%s", - loc->path); - goto err; - } - if (xattr_req) { local->xattr_req = dict_ref (xattr_req); } else { @@ -191,14 +182,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this, } hashed_subvol = dht_subvol_get_hashed (this, &local->loc); - cached_subvol = dht_subvol_get_cached (this, local->loc.inode); + cached_subvol = local->cached_subvol; - local->cached_subvol = cached_subvol; local->hashed_subvol = hashed_subvol; if (is_revalidate (loc)) { - local->layout = layout = dht_layout_get (this, loc->inode); - + layout = local->layout; if (!layout) { gf_log (this->name, GF_LOG_DEBUG, "revalidate without cache. path=%s", @@ -215,7 +204,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this, goto do_fresh_lookup; } - local->inode = inode_ref (loc->inode); + local->inode = inode_ref (loc->inode); local->call_cnt = layout->cnt; call_cnt = local->call_cnt; @@ -314,7 +303,6 @@ nufa_create (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; xlator_t *avail_subvol = NULL; int op_errno = -1; - int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -324,7 +312,7 @@ nufa_create (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); if (!local) { op_errno = ENOMEM; goto err; @@ -348,13 +336,6 @@ nufa_create (call_frame_t *frame, xlator_t *this, if (subvol != avail_subvol) { /* create a link file instead of actual file */ - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - - local->fd = fd_ref (fd); local->params = dict_ref (params); local->mode = mode; local->flags = flags; @@ -421,7 +402,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; xlator_t *avail_subvol = NULL; int op_errno = -1; - int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -431,7 +411,7 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); if (!local) { op_errno = ENOMEM; goto err; @@ -456,11 +436,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, if (avail_subvol != subvol) { /* Create linkfile first */ - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } local->params = dict_ref (params); local->mode = mode; diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index e6a2e5d5cb7..fd3f22ea053 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -23,7 +23,7 @@ #include "config.h" #endif -#include "dht-common.c" +#include "dht-common.h" #include "dht-mem-types.h" #include <sys/time.h> @@ -246,21 +246,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this, conf = this->private; - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); if (!local) { op_errno = ENOMEM; goto err; } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "copying location failed for path=%s", - loc->path); - goto err; - } - if (xattr_req) { local->xattr_req = dict_ref (xattr_req); } else { @@ -268,14 +259,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this, } hashed_subvol = dht_subvol_get_hashed (this, &local->loc); - cached_subvol = dht_subvol_get_cached (this, local->loc.inode); + cached_subvol = local->cached_subvol; - local->cached_subvol = cached_subvol; local->hashed_subvol = hashed_subvol; if (is_revalidate (loc)) { - local->layout = layout = dht_layout_get (this, loc->inode); - + layout = local->layout; if (!layout) { gf_log (this->name, GF_LOG_DEBUG, "revalidate without cache. path=%s", @@ -418,7 +407,6 @@ switch_create (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; xlator_t *avail_subvol = NULL; int op_errno = -1; - int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -428,7 +416,7 @@ switch_create (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); if (!local) { op_errno = ENOMEM; goto err; @@ -451,13 +439,6 @@ switch_create (call_frame_t *frame, xlator_t *this, if (subvol != avail_subvol) { /* create a link file instead of actual file */ - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - - local->fd = fd_ref (fd); local->mode = mode; local->flags = flags; @@ -520,7 +501,6 @@ switch_mknod (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; xlator_t *avail_subvol = NULL; int op_errno = -1; - int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -530,7 +510,7 @@ switch_mknod (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); if (!local) { op_errno = ENOMEM; goto err; @@ -554,11 +534,6 @@ switch_mknod (call_frame_t *frame, xlator_t *this, if (avail_subvol != subvol) { /* Create linkfile first */ - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } local->params = dict_ref (params); local->mode = mode; |