diff options
| -rw-r--r-- | libglusterfs/src/syncop.c | 130 | ||||
| -rw-r--r-- | libglusterfs/src/syncop.h | 7 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/Makefile.am | 9 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 1523 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 413 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 9 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 416 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-inode-read.c | 1115 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 597 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-linkfile.c | 7 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 326 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rename.c | 18 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht.c | 40 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/nufa.c | 39 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/switch.c | 37 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server3_1-fops.c | 7 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 4 | 
17 files changed, 3040 insertions, 1657 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 76d2b5811..bbcf5201e 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -494,8 +494,8 @@ syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags)  }  int -syncop_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                      int op_ret, int op_errno, dict_t *dict) +syncop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                     int op_ret, int op_errno, dict_t *dict)  {          struct syncargs *args = NULL; @@ -516,7 +516,7 @@ syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict)  {          struct syncargs args = {0, }; -        SYNCOP (subvol, (&args), syncop_listxattr_cbk, subvol->fops->getxattr, +        SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,                  loc, NULL);          if (dict) @@ -527,6 +527,36 @@ syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict)  }  int +syncop_getxattr (xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key) +{ +        struct syncargs args = {0, }; + +        SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr, +                loc, key); + +        if (dict) +                *dict = args.xattr; + +        errno = args.op_errno; +        return args.op_ret; +} + +int +syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key) +{ +        struct syncargs args = {0, }; + +        SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr, +                fd, key); + +        if (dict) +                *dict = args.xattr; + +        errno = args.op_errno; +        return args.op_ret; +} + +int  syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                     int32_t op_ret, int32_t op_errno,                     struct statvfs *buf) @@ -856,3 +886,97 @@ syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset)          errno = args.op_errno;          return args.op_ret;  } + +int +syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset) +{ +        struct syncargs args = {0, }; + +        SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate, +                loc, offset); + +        errno = args.op_errno; +        return args.op_ret; +} + +int +syncop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                  int32_t op_ret, int32_t op_errno, +                  struct iatt *prebuf, struct iatt *postbuf) +{ +        struct syncargs *args = NULL; + +        args = cookie; + +        args->op_ret   = op_ret; +        args->op_errno = op_errno; + +        __wake (args); + +        return 0; + +} + +int +syncop_fsync (xlator_t *subvol, fd_t *fd) +{ +        struct syncargs args = {0, }; + +        SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync, +                fd, 0); + +        errno = args.op_errno; +        return args.op_ret; + +} + +int +syncop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                  int32_t op_ret, int32_t op_errno, struct iatt *stbuf) +{ +        struct syncargs *args = NULL; + +        args = cookie; + +        args->op_ret   = op_ret; +        args->op_errno = op_errno; +        if (op_ret == 0) +                args->iatt1 = *stbuf; + +        __wake (args); + +        return 0; + +} + +int +syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf) +{ +        struct syncargs args = {0, }; + +        SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat, +                fd); + +        if (stbuf) +                *stbuf = args.iatt1; + +        errno = args.op_errno; +        return args.op_ret; + +} + +int +syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf) +{ +        struct syncargs args = {0, }; + +        SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->stat, +                loc); + +        if (stbuf) +                *stbuf = args.iatt1; + +        errno = args.op_errno; +        return args.op_ret; + +} diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h index bb898e11e..1c3fe07b5 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/syncop.h @@ -181,6 +181,8 @@ int syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf);  int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags);  int syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict); +int syncop_getxattr (xlator_t *xl, loc_t *loc, dict_t **dict, const char *key); +int syncop_fgetxattr (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key);  int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name);  int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode, @@ -197,8 +199,13 @@ int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,                    struct iovec **vector, int *count, struct iobref **iobref);  int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset); +int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset);  int syncop_unlink (xlator_t *subvol, loc_t *loc); +int syncop_fsync (xlator_t *subvol, fd_t *fd); +int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf); +int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf); +  #endif /* _SYNCOP_H */ diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am index cf883a974..e35058d65 100644 --- a/xlators/cluster/dht/src/Makefile.am +++ b/xlators/cluster/dht/src/Makefile.am @@ -2,10 +2,10 @@  xlator_LTLIBRARIES = dht.la nufa.la switch.la  xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster -  dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \ -		dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \ -		$(top_builddir)/xlators/lib/src/libxlator.c +	dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \ +	dht-common.c dht-inode-write.c dht-inode-read.c \ +	$(top_builddir)/xlators/lib/src/libxlator.c  dht_la_SOURCES = $(dht_common_source) dht.c @@ -21,7 +21,8 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la  switch_la_LDFLAGS = -module -avoidversion  switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = dht-common.h dht-common.c dht-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = dht-common.h dht-mem-types.h \ +	$(top_builddir)/xlators/lib/src/libxlator.h  AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \  	-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \ diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index b5168605b..6f8594e30 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -309,6 +309,14 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  local->return_estale = 1;                          } +                        /* if it is ENOENT, we may have to do a +                         * 'lookup_everywhere()' to make sure +                         * the file is not migrated */ +                        if (op_errno == ENOENT) { +                                if (IA_ISREG (local->loc.inode->ia_type)) { +                                        local->need_lookup_everywhere = 1; +                                } +                        }                          goto unlock;                  } @@ -386,6 +394,18 @@ out:                          return 0;                  } +                if (local->need_lookup_everywhere) { +                        /* As the current layout gave ENOENT error, we would +                           need a new layout */ +                        dht_layout_unref (this, local->layout); +                        local->layout = NULL; + +                        /* We know that current cached subvol is no more +                           valid, get the new one */ +                        local->cached_subvol = NULL; +                        dht_lookup_everywhere (frame, this, &local->loc); +                        return 0; +                }                  if (local->return_estale) {                          local->op_ret = -1;                          local->op_errno = ESTALE; @@ -459,7 +479,7 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)          dht_local_t  *local = NULL;          xlator_t     *hashed_subvol = NULL;          xlator_t     *cached_subvol = NULL; - +        dht_layout_t *layout = NULL;          local = frame->local;          hashed_subvol = local->hashed_subvol; @@ -487,6 +507,42 @@ dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)                  return 0;          } +        if (local->need_lookup_everywhere) { +                if (uuid_compare (local->gfid, local->inode->gfid)) { +                        /* GFID different, return error */ +                        DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, +                                          NULL, NULL, NULL); +                        return 0; +                } +                local->op_ret = 0; +                local->op_errno = 0; +                layout = dht_layout_for_subvol (this, cached_subvol); +                if (!layout) { +                        gf_log (this->name, GF_LOG_INFO, +                                "%s: no pre-set layout for subvolume %s", +                                local->loc.path, (cached_subvol ? +                                                  cached_subvol->name : +                                                  "<nil>")); +                } + +                ret = dht_layout_set (this, local->inode, layout); +                if (ret < 0) { +                        gf_log (this->name, GF_LOG_INFO, +                                "%s: failed to set layout for subvol %s", +                                local->loc.path, (cached_subvol ? +                                                  cached_subvol->name : +                                                  "<nil>")); +                } + +                WIPE (&local->postparent); + +                DHT_STACK_UNWIND (lookup, frame, local->op_ret, +                                  local->op_errno, local->inode, +                                  &local->stbuf, local->xattr, +                                  &local->postparent); +                return 0; +        } +          if (!hashed_subvol) {                  gf_log (this->name, GF_LOG_INFO,                          "cannot create linkfile file for %s on %s: " @@ -560,6 +616,7 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          loc_t        *loc           = NULL;          xlator_t     *link_subvol   = NULL;          int           ret = -1; +        int32_t       fd_count = 0;          GF_VALIDATE_OR_GOTO ("dht", frame, out);          GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -642,12 +699,17 @@ unlock:          UNLOCK (&frame->lock);          if (is_linkfile) { -                gf_log (this->name, GF_LOG_INFO, -                        "deleting stale linkfile %s on %s", -                        loc->path, subvol->name); -                STACK_WIND (frame, dht_lookup_unlink_cbk, -                            subvol, subvol->fops->unlink, loc); -                return 0; +                ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count); +                /* Delete the linkfile only if there are no open fds on it. +                   if there is a open-fd, it may be in migration */ +                if (!ret && (fd_count == 0)) { +                        gf_log (this->name, GF_LOG_INFO, +                                "deleting stale linkfile %s on %s", +                                loc->path, subvol->name); +                        STACK_WIND (frame, dht_lookup_unlink_cbk, +                                    subvol, subvol->fops->unlink, loc); +                        return 0; +                }          }          this_call_cnt = dht_frame_return (frame); @@ -980,7 +1042,7 @@ dht_lookup (call_frame_t *frame, xlator_t *this,          if (!conf)                  goto err; -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);          if (!local) {                  op_errno = ENOMEM;                  goto err; @@ -1003,16 +1065,14 @@ dht_lookup (call_frame_t *frame, xlator_t *this,                  local->xattr_req = dict_new ();          } + +        cached_subvol = local->cached_subvol;          if (!hashed_subvol)                  hashed_subvol = dht_subvol_get_hashed (this, loc); -        cached_subvol = dht_subvol_get_cached (this, loc->inode); - -        local->cached_subvol = cached_subvol;          local->hashed_subvol = hashed_subvol;          if (is_revalidate (loc)) { -                local->layout = layout = dht_layout_get (this, loc->inode); - +                layout = local->layout;                  if (!layout) {                          gf_log (this->name, GF_LOG_DEBUG,                                  "revalidate without cache. path=%s", @@ -1032,10 +1092,9 @@ dht_lookup (call_frame_t *frame, xlator_t *this,                          goto do_fresh_lookup;                  } -                local->inode    = inode_ref (loc->inode); +                local->inode = inode_ref (loc->inode); -                local->call_cnt = layout->cnt; -                call_cnt = local->call_cnt; +                call_cnt = local->call_cnt = layout->cnt;                  /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,                   *       revalidates directly go to the cached-subvolume. @@ -1043,6 +1102,11 @@ dht_lookup (call_frame_t *frame, xlator_t *this,                  ret = dict_set_uint32 (local->xattr_req,                                         "trusted.glusterfs.dht", 4 * 4); +                /* need it for self-healing linkfiles which is +                   'in-migration' state */ +                ret = dict_set_uint32 (local->xattr_req, +                                       GLUSTERFS_OPEN_FD_COUNT, 4); +  		for (i = 0; i < layout->cnt; i++) {  			subvol = layout->list[i].xlator; @@ -1060,7 +1124,12 @@ dht_lookup (call_frame_t *frame, xlator_t *this,                                         "trusted.glusterfs.dht", 4 * 4);                  ret = dict_set_uint32 (local->xattr_req, -                                       "trusted.glusterfs.dht.linkto", 256); +                                       DHT_LINKFILE_KEY, 256); + +                /* need it for self-healing linkfiles which is +                   'in-migration' state */ +                ret = dict_set_uint32 (local->xattr_req, +                                       GLUSTERFS_OPEN_FD_COUNT, 4);                  if (!hashed_subvol) {                          gf_log (this->name, GF_LOG_DEBUG, @@ -1101,286 +1170,6 @@ err:  int -dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                  int op_ret, int op_errno, struct iatt *prebuf, -                  struct iatt *postbuf) -{ -        dht_local_t  *local = NULL; -        int           this_call_cnt = 0; -        call_frame_t *prev = NULL; - -        GF_VALIDATE_OR_GOTO ("dht", frame, err); -        GF_VALIDATE_OR_GOTO ("dht", this, out); -        GF_VALIDATE_OR_GOTO ("dht", frame->local, out); -        GF_VALIDATE_OR_GOTO ("dht", cookie, out); - -        local = frame->local; -        prev = cookie; - -        LOCK (&frame->lock); -        { -                if (op_ret == -1) { -                        local->op_errno = op_errno; -                        local->op_ret = -1; -                        gf_log (this->name, GF_LOG_DEBUG, -                                "subvolume %s returned -1 (%s)", -                                prev->this->name, strerror (op_errno)); -                        goto unlock; -                } - -                dht_iatt_merge (this, &local->prebuf, prebuf, prev->this); -                dht_iatt_merge (this, &local->stbuf, postbuf, prev->this); - -                local->op_ret = 0; -        } -unlock: -        UNLOCK (&frame->lock); -out: -        this_call_cnt = dht_frame_return (frame); -        if (is_last_call (this_call_cnt)) -                DHT_STACK_UNWIND (truncate, frame, local->op_ret, local->op_errno, -                                  &local->prebuf, &local->stbuf); -err: -        return 0; -} - - - -int -dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -              int op_ret, int op_errno, struct iatt *stbuf) -{ -        dht_local_t  *local = NULL; -        int           this_call_cnt = 0; -        call_frame_t *prev = NULL; - -        GF_VALIDATE_OR_GOTO ("dht", frame, err); -        GF_VALIDATE_OR_GOTO ("dht", this, out); -        GF_VALIDATE_OR_GOTO ("dht", frame->local, out); -        GF_VALIDATE_OR_GOTO ("dht", cookie, out); - -        local = frame->local; -        prev = cookie; - -        LOCK (&frame->lock); -        { -                if (op_ret == -1) { -                        local->op_errno = op_errno; -                        gf_log (this->name, GF_LOG_DEBUG, -                                "subvolume %s returned -1 (%s)", -                                prev->this->name, strerror (op_errno)); -                        goto unlock; -                } - -                dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); - -                local->op_ret = 0; -        } -unlock: -        UNLOCK (&frame->lock); -out: -        this_call_cnt = dht_frame_return (frame); -        if (is_last_call (this_call_cnt)) -                DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, -                                  &local->stbuf); -err: -        return 0; -} - - -int -dht_stat (call_frame_t *frame, xlator_t *this, -          loc_t *loc) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; -        dht_layout_t *layout = NULL; -        int           i = 0; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (loc, err); -        VALIDATE_OR_GOTO (loc->inode, err); -        VALIDATE_OR_GOTO (loc->path, err); - - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->layout = layout = dht_layout_get (this, loc->inode); -        if (!layout) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no layout for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        local->inode = inode_ref (loc->inode); -        local->call_cnt = layout->cnt; - -        for (i = 0; i < layout->cnt; i++) { -                subvol = layout->list[i].xlator; - -                STACK_WIND (frame, dht_attr_cbk, -                            subvol, subvol->fops->stat, -                            loc); -        } - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); - -        return 0; -} - - -int -dht_fstat (call_frame_t *frame, xlator_t *this, -           fd_t *fd) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; -        dht_layout_t *layout = NULL; -        int           i = 0; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->layout = layout = dht_layout_get (this, fd->inode); -        if (!layout) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no layout for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local->inode    = inode_ref (fd->inode); -        local->call_cnt = layout->cnt;; - -        for (i = 0; i < layout->cnt; i++) { -                subvol = layout->list[i].xlator; -                STACK_WIND (frame, dht_attr_cbk, -                            subvol, subvol->fops->fstat, -                            fd); -        } - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL); - -        return 0; -} - - -int -dht_truncate (call_frame_t *frame, xlator_t *this, -              loc_t *loc, off_t offset) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (loc, err); -        VALIDATE_OR_GOTO (loc->inode, err); -        VALIDATE_OR_GOTO (loc->path, err); - -        subvol = dht_subvol_get_cached (this, loc->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->inode = inode_ref (loc->inode); -        local->call_cnt = 1; - -        STACK_WIND (frame, dht_truncate_cbk, -                    subvol, subvol->fops->truncate, -                    loc, offset); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL); - -        return 0; -} - - -int -dht_ftruncate (call_frame_t *frame, xlator_t *this, -               fd_t *fd, off_t offset) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->inode = inode_ref (fd->inode); -        local->call_cnt = 1; - -        STACK_WIND (frame, dht_truncate_cbk, -                    subvol, subvol->fops->ftruncate, -                    fd, offset); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL); - -        return 0; -} - - -int  dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  int op_ret, int op_errno, struct iatt *preparent,                  struct iatt *postparent) @@ -1473,42 +1262,6 @@ err:  } -int -dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, -               int op_errno, struct iatt *prebuf, struct iatt *postbuf) -{ -        dht_local_t  *local = NULL; -        int           this_call_cnt = 0; -        call_frame_t *prev = NULL; - - -        local = frame->local; -        prev = cookie; - -        LOCK (&frame->lock); -        { -                if (op_ret == -1) { -                        local->op_errno = op_errno; -                        gf_log (this->name, GF_LOG_DEBUG, -                                "subvolume %s returned -1 (%s)", -                                prev->this->name, strerror (op_errno)); -                        goto unlock; -                } - -                local->op_ret = 0; -        } -unlock: -        UNLOCK (&frame->lock); - -        this_call_cnt = dht_frame_return (frame); -        if (is_last_call (this_call_cnt)) -                DHT_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, -                                  prebuf, postbuf); - -        return 0; -} - -  int  dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1518,7 +1271,6 @@ dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          int           this_call_cnt = 0;          call_frame_t *prev = NULL; -          local = frame->local;          prev = cookie; @@ -1545,125 +1297,6 @@ unlock:          return 0;  } - -int -dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -             int op_ret, int op_errno) -{ -        DHT_STACK_UNWIND (access, frame, op_ret, op_errno); -        return 0; -} - - -int -dht_access (call_frame_t *frame, xlator_t *this, -            loc_t *loc, int32_t mask) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (loc, err); -        VALIDATE_OR_GOTO (loc->inode, err); -        VALIDATE_OR_GOTO (loc->path, err); - -        subvol = dht_subvol_get_cached (this, loc->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->call_cnt = 1; - -        STACK_WIND (frame, dht_access_cbk, -                    subvol, subvol->fops->access, -                    loc, mask); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (access, frame, -1, op_errno); - -        return 0; -} - - -int -dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                  int op_ret, int op_errno, const char *path, struct iatt *sbuf) -{ -        dht_local_t *local = NULL; - -        local = frame->local; -        if (op_ret == -1) -                goto err; - -        if (!local) { -                op_ret = -1; -                op_errno = EINVAL; -        } - -err: -        DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf); - -        return 0; -} - - -int -dht_readlink (call_frame_t *frame, xlator_t *this, -              loc_t *loc, size_t size) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (loc, err); -        VALIDATE_OR_GOTO (loc->inode, err); -        VALIDATE_OR_GOTO (loc->path, err); - -        subvol = dht_subvol_get_cached (this, loc->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        STACK_WIND (frame, dht_readlink_cbk, -                    subvol, subvol->fops->readlink, -                    loc, size); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL); - -        return 0; -} - -  static void  fill_layout_info (dht_layout_t *layout, char *buf)  { @@ -1869,7 +1502,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,          dht_layout_t *layout        = NULL;          xlator_t     **sub_volumes  = NULL;          int           op_errno      = -1; -        int           ret           = 0;          int           i             = 0;          int           cnt           = 0; @@ -1881,38 +1513,33 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,          VALIDATE_OR_GOTO (this->private, err);          conf   = this->private; -        layout = dht_layout_get (this, loc->inode); -        if (!layout) { -                gf_log (this->name, GF_LOG_ERROR, -                        "layout is NULL"); -                op_errno = ENOENT; -                goto err; -        } -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);          if (!local) {                  op_errno = ENOMEM;                  goto err;          } -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = ENOMEM; +        layout = local->layout; +        if (!layout) { +                gf_log (this->name, GF_LOG_ERROR, +                        "layout is NULL"); +                op_errno = ENOENT;                  goto err;          } -        local->layout = layout; - -        if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) { -                hashed_subvol = dht_subvol_get_hashed (this, loc); -                cached_subvol = dht_subvol_get_cached (this, loc->inode); +        if (key) {                  local->key = gf_strdup (key);                  if (!local->key) {                          op_errno = ENOMEM; -                          goto err;                  } +        } + +        if (key && (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)) { +                hashed_subvol = dht_subvol_get_hashed (this, loc); +                cached_subvol = local->cached_subvol;                  local->call_cnt = 1;                  if (hashed_subvol != cached_subvol) { @@ -1990,14 +1617,6 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,                  }          } -        if (key) { -                local->key = gf_strdup (key); -                if (!local->key) { -                        op_errno = ENOMEM; -                        goto err; -                } -        } -          if (loc->inode-> ia_type == IA_IFDIR) {                  cnt = local->call_cnt = layout->cnt;          } else { @@ -2032,7 +1651,13 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,          VALIDATE_OR_GOTO (fd, err);          VALIDATE_OR_GOTO (fd->inode, err); -        subvol = dht_subvol_get_cached (this, fd->inode); +        local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol;          if (!subvol) {                  gf_log (this->name, GF_LOG_DEBUG,                          "no cached subvolume for fd=%p", fd); @@ -2040,13 +1665,6 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,                  goto err;          } -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->inode = inode_ref (fd->inode);          local->call_cnt = 1;          STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr, @@ -2095,7 +1713,13 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,          VALIDATE_OR_GOTO (loc->path, err);          conf   = this->private; -        subvol = dht_subvol_get_cached (this, loc->inode); +        local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol;          if (!subvol) {                  gf_log (this->name, GF_LOG_DEBUG,                          "no cached subvolume for path=%s", loc->path); @@ -2103,13 +1727,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,                  goto err;          } -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->layout = layout = dht_layout_get (this, loc->inode); +        layout = local->layout;          if (!layout) {                  gf_log (this->name, GF_LOG_DEBUG,                          "no layout for path=%s", loc->path); @@ -2117,12 +1735,6 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,                  goto err;          } -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = ENOMEM; -                goto err; -        } -          tmp = dict_get (xattr, "distribute.migrate-data");          if (tmp) {                  if (!IA_ISREG (loc->inode->ia_type)) { @@ -2136,13 +1748,14 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,                  if (strcmp (value, "force") == 0)                          forced_rebalance = 1; -                local->to_subvol   = dht_subvol_get_hashed (this, loc); -                local->from_subvol = dht_subvol_get_cached (this, loc->inode); -                if (local->to_subvol == local->from_subvol) { -                        op_errno = ENOTSUP; +                local->rebalance.target_node = dht_subvol_get_hashed (this, loc); +                local->rebalance.from_subvol = local->cached_subvol; + +                if (local->rebalance.target_node == local->rebalance.from_subvol) { +                        op_errno = EEXIST;                          goto err;                  } -                if (local->to_subvol) { +                if (local->rebalance.target_node) {                          local->flags = forced_rebalance;                          ret = dht_start_rebalance_task (this, frame); @@ -2153,7 +1766,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,                                  "%s: failed to create a new synctask",                                  loc->path);                  } -                op_errno = ENOTSUP; +                op_errno = EINVAL;                  goto err;          } @@ -2259,7 +1872,13 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,          VALIDATE_OR_GOTO (loc->inode, err);          VALIDATE_OR_GOTO (loc->path, err); -        subvol = dht_subvol_get_cached (this, loc->inode); +        local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol;          if (!subvol) {                  gf_log (this->name, GF_LOG_DEBUG,                          "no cached subvolume for path=%s", loc->path); @@ -2267,13 +1886,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,                  goto err;          } -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        local->layout = layout = dht_layout_get (this, loc->inode); +        layout = local->layout;          if (!local->layout) {                  gf_log (this->name, GF_LOG_DEBUG,                          "no layout for path=%s", loc->path); @@ -2282,6 +1895,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,          }          local->call_cnt = layout->cnt; +        local->key = gf_strdup (key);          for (i = 0; i < layout->cnt; i++) {                  STACK_WIND (frame, dht_removexattr_cbk, @@ -2308,7 +1922,6 @@ dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          int           this_call_cnt = 0;          call_frame_t *prev = NULL; -          local = frame->local;          prev = cookie; @@ -2335,318 +1948,6 @@ unlock:          return 0;  } - -int -dht_open (call_frame_t *frame, xlator_t *this, -          loc_t *loc, int flags, fd_t *fd, int wbflags) -{ -        xlator_t     *subvol = NULL; -        int           ret = -1; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        local->fd = fd_ref (fd); -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = ENOMEM; - -                goto err; -        } - -        local->call_cnt = 1; - -        STACK_WIND (frame, dht_fd_cbk, -                    subvol, subvol->fops->open, -                    loc, flags, fd, wbflags); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL); - -        return 0; -} - - -int -dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -               int op_ret, int op_errno, -               struct iovec *vector, int count, struct iatt *stbuf, -               struct iobref *iobref) -{ -        dht_local_t     *local = frame->local; - -        if (!local) { -                op_ret = -1; -                op_errno = EINVAL; -                goto out; -        } - -out: -        DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, -                          iobref); - -        return 0; -} - - -int -dht_readv (call_frame_t *frame, xlator_t *this, -           fd_t *fd, size_t size, off_t off) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                goto err; -        } - -        STACK_WIND (frame, dht_readv_cbk, -                    subvol, subvol->fops->readv, -                    fd, size, off); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); - -        return 0; -} - - -int -dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                int op_ret, int op_errno, struct iatt *prebuf, -                struct iatt *postbuf) -{ -        dht_local_t *local = NULL; - -        if (op_ret == -1) { -                goto out; -        } - -        local = frame->local; -        if (!local) { -                op_ret = -1; -                op_errno = EINVAL; -                goto out; -        } - -out: -        DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf); - -        return 0; -} - - -int -dht_writev (call_frame_t *frame, xlator_t *this, -            fd_t *fd, struct iovec *vector, int count, off_t off, -            struct iobref *iobref) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { - -                op_errno = ENOMEM; -                goto err; -        } - -        STACK_WIND (frame, dht_writev_cbk, -                    subvol, subvol->fops->writev, -                    fd, vector, count, off, iobref); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL); - -        return 0; -} - - -int -dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        local->fd = fd_ref (fd); -        local->call_cnt = 1; - -        STACK_WIND (frame, dht_err_cbk, -                    subvol, subvol->fops->flush, fd); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (flush, frame, -1, op_errno); - -        return 0; -} - - -int -dht_fsync (call_frame_t *frame, xlator_t *this, -           fd_t *fd, int datasync) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } -        local->call_cnt = 1; - -        STACK_WIND (frame, dht_fsync_cbk, -                    subvol, subvol->fops->fsync, -                    fd, datasync); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL); - -        return 0; -} - - -int -dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -            int op_ret, int op_errno, struct gf_flock *flock) -{ -        DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock); - -        return 0; -} - - -int -dht_lk (call_frame_t *frame, xlator_t *this, -        fd_t *fd, int cmd, struct gf_flock *flock) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        STACK_WIND (frame, dht_lk_cbk, -                    subvol, subvol->fops->lk, -                    fd, cmd, flock); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL); - -        return 0; -} -  /*   * dht_normalize_stats -   */ @@ -2742,7 +2043,7 @@ dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)          conf = this->private; -        local = dht_local_init (frame); +        local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);          if (!local) {                  op_errno = ENOMEM;                  goto err; @@ -2787,11 +2088,9 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)  {          dht_local_t  *local  = NULL;          dht_conf_t   *conf = NULL; -        int           ret = -1;          int           op_errno = -1;          int           i = -1; -          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err);          VALIDATE_OR_GOTO (fd, err); @@ -2799,21 +2098,13 @@ dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)          conf = this->private; -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);          if (!local) {                  op_errno = ENOMEM;                  goto err;          } -        local->fd = fd_ref (fd); -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = ENOMEM; - -                goto err; -        } -          local->call_cnt = conf->subvolume_cnt;          for (i = 0; i < conf->subvolume_cnt; i++) { @@ -2865,7 +2156,7 @@ dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,          list_for_each_entry (orig_entry, (&orig_entries->list), list) {                  next_offset = orig_entry->d_off; -                if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL) +                if (check_is_linkfile_wo_dict (NULL, (&orig_entry->d_stat))                      || (check_is_dir (NULL, (&orig_entry->d_stat), NULL)                          && (prev->this != dht_first_up_subvol (this)))) {                          continue; @@ -3053,9 +2344,8 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,          VALIDATE_OR_GOTO (this, err);          VALIDATE_OR_GOTO (fd, err); -        local = dht_local_init (frame); +        local = dht_local_init (frame, NULL, NULL, whichop);          if (!local) { -                  op_errno = ENOMEM;                  goto err;          } @@ -3156,7 +2446,6 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)          int           op_errno = -1;          int           i = -1; -          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err);          VALIDATE_OR_GOTO (fd, err); @@ -3164,10 +2453,9 @@ dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)          conf = this->private; -        local = dht_local_init (frame); +        local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);          if (!local) {                  op_errno = ENOMEM; -                  goto err;          } @@ -3275,7 +2563,6 @@ dht_mknod (call_frame_t *frame, xlator_t *this,  {          xlator_t    *subvol = NULL;          int          op_errno = -1; -        int          ret = -1;          xlator_t    *avail_subvol = NULL;          dht_local_t *local = NULL; @@ -3285,10 +2572,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this,          dht_get_du_info (frame, this, loc); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);          if (!local) {                  op_errno = ENOMEM; -                  goto err;          } @@ -3301,13 +2587,6 @@ dht_mknod (call_frame_t *frame, xlator_t *this,                  goto err;          } -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = ENOMEM; - -                goto err; -        } -          if (!dht_is_subvol_filled (this, subvol)) {                  gf_log (this->name, GF_LOG_TRACE,                          "creating %s on %s", loc->path, subvol->name); @@ -3357,17 +2636,14 @@ dht_symlink (call_frame_t *frame, xlator_t *this,          xlator_t    *subvol = NULL;          int          op_errno = -1;          dht_local_t *local = NULL; -        int          ret = -1; -          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err);          VALIDATE_OR_GOTO (loc, err); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);          if (!local) {                  op_errno = ENOMEM; -                  goto err;          } @@ -3380,13 +2656,6 @@ dht_symlink (call_frame_t *frame, xlator_t *this,                  goto err;          } -        ret = loc_copy (&local->loc, loc); -        if (ret == -1) { -                gf_log (this->name, GF_LOG_TRACE, "Failed to copy loc"); -                op_errno = ENOMEM; -                goto err; -        } -          gf_log (this->name, GF_LOG_TRACE,                  "creating %s on %s", loc->path, subvol->name); @@ -3410,11 +2679,9 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)  {          xlator_t    *cached_subvol = NULL;          xlator_t    *hashed_subvol = NULL; -        int          ret = -1;          int          op_errno = -1;          dht_local_t *local = NULL; -          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err);          VALIDATE_OR_GOTO (loc, err); @@ -3430,11 +2697,10 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)                  goto done;          } -        cached_subvol = dht_subvol_get_cached (this, loc->inode); -        if (!cached_subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for path=%s", loc->path); -                op_errno = EINVAL; +        local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK); +        if (!local) { +                op_errno = ENOMEM; +                  goto err;          } @@ -3447,17 +2713,11 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)                  goto err;          } -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        ret = loc_copy (&local->loc, loc); -        if (ret == -1) { -                op_errno = ENOMEM; - +        cached_subvol = local->cached_subvol; +        if (!cached_subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for path=%s", loc->path); +                op_errno = EINVAL;                  goto err;          } @@ -3522,15 +2782,13 @@ dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          dht_local_t  *local = NULL;          xlator_t     *srcvol = NULL; -          if (op_ret == -1)                  goto err;          local = frame->local;          srcvol = local->linkfile.srcvol; -        STACK_WIND (frame, dht_link_cbk, -                    srcvol, srcvol->fops->link, +        STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,                      &local->loc, &local->loc2);          return 0; @@ -3553,13 +2811,19 @@ dht_link (call_frame_t *frame, xlator_t *this,          int          ret = -1;          dht_local_t *local = NULL; -          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err);          VALIDATE_OR_GOTO (oldloc, err);          VALIDATE_OR_GOTO (newloc, err); -        cached_subvol = dht_subvol_get_cached (this, oldloc->inode); +        local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK); +        if (!local) { +                op_errno = ENOMEM; + +                goto err; +        } + +        cached_subvol = local->cached_subvol;          if (!cached_subvol) {                  gf_log (this->name, GF_LOG_DEBUG,                          "no cached subvolume for path=%s", oldloc->path); @@ -3576,29 +2840,14 @@ dht_link (call_frame_t *frame, xlator_t *this,                  goto err;          } -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        ret = loc_copy (&local->loc, oldloc); -        if (ret == -1) { -                op_errno = ENOMEM; - -                goto err; -        } -          ret = loc_copy (&local->loc2, newloc);          if (ret == -1) {                  op_errno = ENOMEM; -                  goto err;          }          if (hashed_subvol != cached_subvol) { -                memcpy (local->gfid, oldloc->inode->gfid, 16); +                uuid_copy (local->gfid, oldloc->inode->gfid);                  dht_linkfile_create (frame, dht_link_linkfile_cbk,                                       cached_subvol, hashed_subvol, newloc);          } else { @@ -3694,7 +2943,6 @@ dht_create (call_frame_t *frame, xlator_t *this,              fd_t *fd, dict_t *params)  {          int          op_errno = -1; -        int          ret = -1;          xlator_t    *subvol = NULL;          dht_local_t *local = NULL;          xlator_t    *avail_subvol = NULL; @@ -3705,9 +2953,8 @@ dht_create (call_frame_t *frame, xlator_t *this,          dht_get_du_info (frame, this, loc); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);          if (!local) { -                  op_errno = ENOMEM;                  goto err;          } @@ -3723,12 +2970,6 @@ dht_create (call_frame_t *frame, xlator_t *this,                  goto done;          } -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = ENOMEM; - -                goto err; -        }          subvol = dht_subvol_get_hashed (this, loc);          if (!subvol) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -3748,10 +2989,8 @@ dht_create (call_frame_t *frame, xlator_t *this,          }          /* Choose the minimum filled volume, and create the             files there */ -        /* TODO */          avail_subvol = dht_free_disk_available_subvol (this, subvol);          if (avail_subvol != subvol) { -                local->fd = fd_ref (fd);                  local->params = dict_ref (params);                  local->flags = flags;                  local->mode = mode; @@ -3790,7 +3029,6 @@ dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,          dht_local_t   *local = NULL;          dht_layout_t  *layout = NULL; -          local = frame->local;          layout = local->selfheal.layout; @@ -3928,7 +3166,6 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,          dht_local_t  *local  = NULL;          dht_conf_t   *conf = NULL;          int           op_errno = -1; -        int           ret = -1;          xlator_t     *hashed_subvol = NULL; @@ -3943,15 +3180,13 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,          dht_get_du_info (frame, this, loc); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);          if (!local) { -                  op_errno = ENOMEM;                  goto err;          }          hashed_subvol = dht_subvol_get_hashed (this, loc); -          if (hashed_subvol == NULL) {                  gf_log (this->name, GF_LOG_DEBUG,                          "hashed subvol not found for %s", @@ -3961,21 +3196,12 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,          }          local->hashed_subvol = hashed_subvol; -        local->inode = inode_ref (loc->inode); -        ret = loc_copy (&local->loc, loc);          local->mode = mode; - -        if (ret == -1) { - -                op_errno = ENOMEM; -                goto err; -        } -          local->params = dict_ref (params); +        local->inode  = inode_ref (loc->inode);          local->layout = dht_layout_new (this, conf->subvolume_cnt);          if (!local->layout) { -                  op_errno = ENOMEM;                  goto err;          } @@ -4366,8 +3592,6 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)          dht_conf_t   *conf = NULL;          int           op_errno = -1;          int           i = -1; -        int           ret = -1; -          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -4378,9 +3602,8 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)          conf = this->private; -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);          if (!local) { -                  op_errno = ENOMEM;                  goto err;          } @@ -4388,13 +3611,6 @@ dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags)          local->call_cnt = conf->subvolume_cnt;          local->op_ret   = 0; -        ret = loc_copy (&local->loc, loc); -        if (ret == -1) { - -                op_errno = ENOMEM; -                goto err; -        } -          local->flags = flags;          local->fd = fd_create (local->loc.inode, frame->root->pid); @@ -4421,209 +3637,6 @@ err:          return 0;  } - -int -dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                 int32_t op_ret, int32_t op_errno, dict_t *dict) -{ -        DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict); -        return 0; -} - - -int -dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, -             gf_xattrop_flags_t flags, dict_t *dict) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (loc, err); -        VALIDATE_OR_GOTO (loc->inode, err); -        VALIDATE_OR_GOTO (loc->path, err); - -        subvol = dht_subvol_get_cached (this, loc->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        local->inode = inode_ref (loc->inode); -        local->call_cnt = 1; - -        STACK_WIND (frame, -                    dht_xattrop_cbk, -                    subvol, subvol->fops->xattrop, -                    loc, flags, dict); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); - -        return 0; -} - - -int -dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                  int32_t op_ret, int32_t op_errno, dict_t *dict) -{ -        DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict); -        return 0; -} - - -int -dht_fxattrop (call_frame_t *frame, xlator_t *this, -              fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        STACK_WIND (frame, -                    dht_fxattrop_cbk, -                    subvol, subvol->fops->fxattrop, -                    fd, flags, dict); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL); - -        return 0; -} - - -int -dht_inodelk_cbk (call_frame_t *frame, void *cookie, -                 xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ -        DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno); -        return 0; -} - - -int32_t -dht_inodelk (call_frame_t *frame, xlator_t *this, -             const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; -        dht_local_t  *local = NULL; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (loc, err); -        VALIDATE_OR_GOTO (loc->inode, err); -        VALIDATE_OR_GOTO (loc->path, err); - -        subvol = dht_subvol_get_cached (this, loc->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        local->inode = inode_ref (loc->inode); -        local->call_cnt = 1; - -        STACK_WIND (frame, -                    dht_inodelk_cbk, -                    subvol, subvol->fops->inodelk, -                    volume, loc, cmd, lock); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (inodelk, frame, -1, op_errno); - -        return 0; -} - - -int -dht_finodelk_cbk (call_frame_t *frame, void *cookie, -                  xlator_t *this, int32_t op_ret, int32_t op_errno) - -{ -        DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno); -        return 0; -} - - -int -dht_finodelk (call_frame_t *frame, xlator_t *this, -              const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) -{ -        xlator_t     *subvol = NULL; -        int           op_errno = -1; - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        subvol = dht_subvol_get_cached (this, fd->inode); -        if (!subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no cached subvolume for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - - -        STACK_WIND (frame, -                    dht_finodelk_cbk, -                    subvol, subvol->fops->finodelk, -                    volume, fd, cmd, lock); - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (finodelk, frame, -1, op_errno); - -        return 0; -} - -  int  dht_entrylk_cbk (call_frame_t *frame, void *cookie,                   xlator_t *this, int32_t op_ret, int32_t op_errno) @@ -4649,7 +3662,13 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,          VALIDATE_OR_GOTO (loc->inode, err);          VALIDATE_OR_GOTO (loc->path, err); -        subvol = dht_subvol_get_cached (this, loc->inode); +        local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol;          if (!subvol) {                  gf_log (this->name, GF_LOG_DEBUG,                          "no cached subvolume for path=%s", loc->path); @@ -4657,14 +3676,6 @@ dht_entrylk (call_frame_t *frame, xlator_t *this,                  goto err;          } -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        local->inode = inode_ref (loc->inode);          local->call_cnt = 1;          STACK_WIND (frame, dht_entrylk_cbk, @@ -4726,161 +3737,6 @@ err:  int -dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                 int op_ret, int op_errno, struct iatt *statpre, -                 struct iatt *statpost) -{ -        dht_local_t  *local = NULL; -        int           this_call_cnt = 0; -        call_frame_t *prev = NULL; - - -        local = frame->local; -        prev = cookie; - -        LOCK (&frame->lock); -        { -                if (op_ret == -1) { -                        local->op_errno = op_errno; -                        gf_log (this->name, GF_LOG_DEBUG, -                                "subvolume %s returned -1 (%s)", -                                prev->this->name, strerror (op_errno)); -                        goto unlock; -                } - -                dht_iatt_merge (this, &local->prebuf, statpre, prev->this); -                dht_iatt_merge (this, &local->stbuf, statpost, prev->this); - -                local->op_ret = 0; -        } -unlock: -        UNLOCK (&frame->lock); - -        this_call_cnt = dht_frame_return (frame); -        if (is_last_call (this_call_cnt)) -                DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, -                                  &local->prebuf, &local->stbuf); - -        return 0; -} - - -int -dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, -             struct iatt *stbuf, int32_t valid) -{ -        dht_layout_t *layout = NULL; -        dht_local_t  *local  = NULL; -        int           op_errno = -1; -        int           i = -1; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (loc, err); -        VALIDATE_OR_GOTO (loc->inode, err); -        VALIDATE_OR_GOTO (loc->path, err); - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; -                gf_log (this->name, GF_LOG_DEBUG, -                        "memory allocation failed :("); -                goto err; -        } - -        local->layout = layout = dht_layout_get (this, loc->inode); -        if (!layout) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no layout for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        if (!layout_is_sane (layout)) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "layout is not sane for path=%s", loc->path); -                op_errno = EINVAL; -                goto err; -        } - -        local->inode = inode_ref (loc->inode); -        local->call_cnt = layout->cnt; - -        for (i = 0; i < layout->cnt; i++) { -                STACK_WIND (frame, dht_setattr_cbk, -                            layout->list[i].xlator, -                            layout->list[i].xlator->fops->setattr, -                            loc, stbuf, valid); -        } - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL); - -        return 0; -} - - -int -dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, -              int32_t valid) -{ -        dht_layout_t *layout = NULL; -        dht_local_t  *local  = NULL; -        int           op_errno = -1; -        int           i = -1; - - -        VALIDATE_OR_GOTO (frame, err); -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (fd, err); - -        local = dht_local_init (frame); -        if (!local) { -                op_errno = ENOMEM; - -                goto err; -        } - -        local->layout = layout = dht_layout_get (this, fd->inode); -        if (!layout) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no layout for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        if (!layout_is_sane (layout)) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "layout is not sane for fd=%p", fd); -                op_errno = EINVAL; -                goto err; -        } - -        local->inode = inode_ref (fd->inode); -        local->call_cnt = layout->cnt; - -        for (i = 0; i < layout->cnt; i++) { -                STACK_WIND (frame, dht_setattr_cbk, -                            layout->list[i].xlator, -                            layout->list[i].xlator->fops->fsetattr, -                            fd, stbuf, valid); -        } - -        return 0; - -err: -        op_errno = (op_errno == -1) ? errno : op_errno; -        DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL); - -        return 0; -} - - -int  dht_forget (xlator_t *this, inode_t *inode)  {          uint64_t      tmp_layout = 0; @@ -4898,53 +3754,6 @@ dht_forget (xlator_t *this, inode_t *inode)  } - -int -dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) -{ -        xlator_list_t *subvols = NULL; -        int            cnt = 0; - -        if (!conf) -                return -1; - -        for (subvols = this->children; subvols; subvols = subvols->next) -                cnt++; - -        conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *), -                                      gf_dht_mt_xlator_t); -        if (!conf->subvolumes) { - -                return -1; -        } -        conf->subvolume_cnt = cnt; - -        cnt = 0; -        for (subvols = this->children; subvols; subvols = subvols->next) -                conf->subvolumes[cnt++] = subvols->xlator; - -        conf->subvolume_status = GF_CALLOC (cnt, sizeof (char), -                                            gf_dht_mt_char); -        if (!conf->subvolume_status) { - -                return -1; -        } - -        conf->last_event = GF_CALLOC (cnt, sizeof (int), -                                      gf_dht_mt_char); -        if (!conf->last_event) { - -                return -1; -        } -        conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t), -                                          gf_dht_mt_subvol_time); -        if (!conf->subvol_up_time) { -                return -1; -        } -        return 0; -} - -  int  dht_notify (xlator_t *this, int event, void *data, ...)  { diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 95d1b1d6a..ab1b82af2 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -29,45 +29,61 @@  #ifndef _DHT_H  #define _DHT_H -#define GF_XATTR_FIX_LAYOUT_KEY   "trusted.distribute.fix.layout" +#define GF_XATTR_FIX_LAYOUT_KEY     "trusted.distribute.fix.layout"  #define GF_DHT_LOOKUP_UNHASHED_ON   1  #define GF_DHT_LOOKUP_UNHASHED_AUTO 2 -#define DHT_PATHINFO_HEADER "DISTRIBUTE:" +#define DHT_PATHINFO_HEADER         "DISTRIBUTE:"  #include <fnmatch.h>  typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, -                                       xlator_t *this, -                                       int32_t op_ret, int32_t op_errno); +                                       xlator_t     *this, +                                       int32_t       op_ret, int32_t op_errno); +typedef int (*dht_defrag_cbk_fn_t) (xlator_t        *this, call_frame_t *frame, +                                    int              ret);  struct dht_layout { -        int               spread_cnt;  /* layout spread count per directory, -                                          is controlled by 'setxattr()' with -                                          special key */ -        int               cnt; -        int               preset; -        int               gen; -        int               type; -        int               ref;   /* use with dht_conf_t->layout_lock */ -        int               search_unhashed; +        int                spread_cnt;  /* layout spread count per directory, +                                           is controlled by 'setxattr()' with +                                           special key */ +        int                cnt; +        int                preset; +        int                gen; +        int                type; +        int                ref; /* use with dht_conf_t->layout_lock */ +        int                search_unhashed;          struct { -                int       err;   /* 0 = normal -                                    -1 = dir exists and no xattr -                                    >0 = dir lookup failed with errno -                                 */ -                uint32_t  start; -                uint32_t  stop; -                xlator_t *xlator; +                int        err;   /* 0 = normal +                                     -1 = dir exists and no xattr +                                     >0 = dir lookup failed with errno +                                  */ +                uint32_t   start; +                uint32_t   stop; +                xlator_t  *xlator;          } list[0];  }; -typedef struct dht_layout dht_layout_t; +typedef struct dht_layout  dht_layout_t;  typedef enum {          DHT_HASH_TYPE_DM,  } dht_hashfn_type_t; +/* rebalance related */ +struct dht_rebalance_ { +        xlator_t            *from_subvol; +        xlator_t            *target_node; +        int32_t              wbflags; +        off_t                offset; +        size_t               size; +        int32_t              flags; +        int                  count; +        struct iobref       *iobref; +        struct iovec        *vector; +        struct iatt          stbuf; +        dht_defrag_cbk_fn_t  target_op_fn; +};  struct dht_local {          int                      call_cnt; @@ -140,11 +156,12 @@ struct dht_local {          /* flag used to make sure we need to return estale in             {lookup,revalidate}_cbk */ -        char    return_estale; +        char return_estale; +        char need_lookup_everywhere; -        /* rebalance related */ -#define to_subvol    hashed_subvol -#define from_subvol  cached_subvol +        glusterfs_fop_t      fop; + +        struct dht_rebalance_ rebalance;  };  typedef struct dht_local dht_local_t; @@ -209,11 +226,21 @@ typedef struct dht_disk_layout dht_disk_layout_t;  #define is_last_call(cnt) (cnt == 0) -#define DHT_LINKFILE_MODE (S_ISVTX) +#define DHT_MIGRATION_IN_PROGRESS 1 +#define DHT_MIGRATION_COMPLETED   2 + +#define DHT_LINKFILE_KEY         "trusted.glusterfs.dht.linkto" +#define DHT_LINKFILE_MODE        (S_ISVTX) +  #define check_is_linkfile(i,s,x) (                                      \                  ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT)   \                   == DHT_LINKFILE_MODE) &&                               \ -                (s->ia_size == 0)) +                dict_get (x, DHT_LINKFILE_KEY)) + +#define check_is_linkfile_wo_dict(i,s) (                                \ +                ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT)   \ +                 == DHT_LINKFILE_MODE)) +  #define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type)) @@ -221,85 +248,86 @@ typedef struct dht_disk_layout dht_disk_layout_t;  #define DHT_STACK_UNWIND(fop, frame, params ...) do {           \                  dht_local_t *__local = NULL;                    \ -                xlator_t *__xl = NULL;                          \ +                xlator_t    *__xl    = NULL;                    \                  if (frame) {                                    \ -                        __xl = frame->this;                     \ -                        __local = frame->local;                 \ +                        __xl         = frame->this;             \ +                        __local      = frame->local;            \                          frame->local = NULL;                    \                  }                                               \                  STACK_UNWIND_STRICT (fop, frame, params);       \                  dht_local_wipe (__xl, __local);                 \          } while (0) -#define DHT_STACK_DESTROY(frame) do {           \ -                dht_local_t *__local = NULL;    \ -                xlator_t *__xl = NULL;          \ -                __xl = frame->this;             \ -                __local = frame->local;         \ -                frame->local = NULL;            \ -                STACK_DESTROY (frame->root);    \ -                dht_local_wipe (__xl, __local); \ +#define DHT_STACK_DESTROY(frame) do {                   \ +                dht_local_t *__local = NULL;            \ +                xlator_t    *__xl    = NULL;            \ +                __xl                 = frame->this;     \ +                __local              = frame->local;    \ +                frame->local         = NULL;            \ +                STACK_DESTROY (frame->root);            \ +                dht_local_wipe (__xl, __local);         \          } while (0) -dht_layout_t *dht_layout_new (xlator_t *this, int cnt); -dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); -dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); -xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, +dht_layout_t                            *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t                            *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t                            *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t   *this, dht_layout_t *layout,                               const char *name); -int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); -int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, -                          uint32_t *holes_p, uint32_t *overlaps_p, -                          uint32_t *missing_p, uint32_t *down_p, -                          uint32_t *misc_p); -int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, -                             xlator_t *subvol, loc_t *loc, dict_t *xattr); +int                                      dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); +int dht_layout_anomalies (xlator_t      *this, loc_t *loc, dht_layout_t *layout, +                          uint32_t      *holes_p, uint32_t *overlaps_p, +                          uint32_t      *missing_p, uint32_t *down_p, +                          uint32_t      *misc_p); +int dht_layout_dir_mismatch (xlator_t   *this, dht_layout_t *layout, +                             xlator_t   *subvol, loc_t *loc, dict_t *xattr);  xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,                                 struct iatt *buf, dict_t *xattr); -int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, -                         xlator_t *subvol, loc_t *loc); +int dht_linkfile_unlink (call_frame_t      *frame, xlator_t *this, +                         xlator_t          *subvol, loc_t *loc);  int dht_layouts_init (xlator_t *this, dht_conf_t *conf);  int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, -                      int op_ret, int op_errno, dict_t *xattr); +                      int       op_ret, int op_errno, dict_t *xattr);  int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, -                             int pos, int32_t **disk_layout_p); -int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, -                           int pos, void *disk_layout_raw); +                             int       pos, int32_t **disk_layout_p); +int dht_disk_layout_merge (xlator_t   *this, dht_layout_t *layout, +                           int         pos, void *disk_layout_raw);  int dht_frame_return (call_frame_t *frame); -int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); +int                             dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);  int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,                        uint64_t *x);  void dht_local_wipe (xlator_t *this, dht_local_t *local); -dht_local_t *dht_local_init (call_frame_t *frame); -int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from, -                    xlator_t *subvol); +dht_local_t *dht_local_init (call_frame_t    *frame, loc_t *loc, fd_t *fd, +                             glusterfs_fop_t  fop); +int dht_iatt_merge (xlator_t                 *this, struct iatt *to, struct iatt *from, +                    xlator_t                 *subvol);  xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);  xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);  xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev); -int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); +int       dht_subvol_cnt (xlator_t *this, xlator_t *subvol);  int dht_hash_compute (int type, const char *name, uint32_t *hash_p); -int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, -                         xlator_t *tovol, xlator_t *fromvol, loc_t *loc); -int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc); -int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc); +int dht_linkfile_create (call_frame_t    *frame, fop_mknod_cbk_t linkfile_cbk, +                         xlator_t        *tovol, xlator_t *fromvol, loc_t *loc); +int                                       dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc); +int                                       dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);  int -dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, -                        loc_t *loc, dht_layout_t *layout); +dht_selfheal_directory (call_frame_t     *frame, dht_selfheal_dir_cbk_t cbk, +                        loc_t            *loc, dht_layout_t *layout);  int  dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,                              dht_layout_t *layout);  int -dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, -                      loc_t *loc, dht_layout_t *layout); +dht_selfheal_restore (call_frame_t       *frame, dht_selfheal_dir_cbk_t cbk, +                      loc_t              *loc, dht_layout_t *layout);  int  dht_layout_sort_volname (dht_layout_t *layout); @@ -308,31 +336,252 @@ int dht_rename (call_frame_t *frame, xlator_t *this,  int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); -int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +int       dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);  xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); -int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); +int       dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);  int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); -int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout); -void dht_layout_unref (xlator_t *this, dht_layout_t *layout); +int           dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout); +void          dht_layout_unref (xlator_t *this, dht_layout_t *layout);  dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout); -xlator_t *dht_first_up_subvol (xlator_t *this); -xlator_t *dht_last_up_subvol (xlator_t *this); +xlator_t     *dht_first_up_subvol (xlator_t *this); +xlator_t     *dht_last_up_subvol (xlator_t *this);  int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name); -int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, +int dht_filter_loc_subvol_key (xlator_t  *this, loc_t *loc, loc_t *new_loc,                                 xlator_t **subvol); -int dht_rename_cleanup (call_frame_t *frame); +int                                     dht_rename_cleanup (call_frame_t *frame);  int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                      int32_t op_ret, int32_t op_errno, -                      inode_t *inode, struct iatt *stbuf, -                      struct iatt *preparent, struct iatt *postparent); +                          int32_t           op_ret, int32_t op_errno, +                          inode_t          *inode, struct iatt *stbuf, +                          struct iatt      *preparent, struct iatt *postparent);  int dht_fix_directory_layout (call_frame_t *frame, -                              dht_selfheal_dir_cbk_t dir_cbk, -                              dht_layout_t *layout); +                              dht_selfheal_dir_cbk_t  dir_cbk, +                              dht_layout_t           *layout); +int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf); + +/* migration/rebalance */  int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame); -#endif /* _DHT_H */ + +int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame); +int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame); + + +/* FOPS */ +int32_t dht_lookup (call_frame_t *frame, +                    xlator_t *this, +                    loc_t    *loc, +                    dict_t   *xattr_req); + +int32_t dht_stat (call_frame_t *frame, +                  xlator_t *this, +                  loc_t    *loc); + +int32_t dht_fstat (call_frame_t *frame, +                   xlator_t *this, +                   fd_t     *fd); + +int32_t dht_truncate (call_frame_t *frame, +                      xlator_t *this, +                      loc_t    *loc, +                      off_t     offset); + +int32_t dht_ftruncate (call_frame_t *frame, +                       xlator_t *this, +                       fd_t     *fd, +                       off_t     offset); + +int32_t dht_access (call_frame_t *frame, +                    xlator_t *this, +                    loc_t    *loc, +                    int32_t   mask); + +int32_t dht_readlink (call_frame_t *frame, +                      xlator_t *this, +                      loc_t    *loc, +                      size_t    size); + +int32_t dht_mknod (call_frame_t *frame, xlator_t *this, +                   loc_t    *loc, mode_t mode, dev_t rdev, dict_t *params); + +int32_t dht_mkdir (call_frame_t *frame, xlator_t *this, +                   loc_t    *loc, mode_t mode, dict_t *params); + +int32_t dht_unlink (call_frame_t *frame, +                    xlator_t *this, +                    loc_t    *loc); + +int32_t dht_rmdir (call_frame_t *frame, xlator_t *this, +                   loc_t    *loc, int flags); + +int32_t dht_symlink (call_frame_t   *frame, xlator_t *this, +                     const char *linkpath, loc_t *loc, dict_t *params); + +int32_t dht_rename (call_frame_t *frame, +                    xlator_t *this, +                    loc_t    *oldloc, +                    loc_t    *newloc); + +int32_t dht_link (call_frame_t *frame, +                  xlator_t *this, +                  loc_t    *oldloc, +                  loc_t    *newloc); + +int32_t dht_create (call_frame_t *frame, xlator_t *this, +                    loc_t    *loc, int32_t flags, mode_t mode, +                    fd_t     *fd, dict_t *params); + +int32_t dht_open (call_frame_t *frame, +                  xlator_t *this, +                  loc_t    *loc, +                  int32_t   flags, fd_t *fd, +                  int32_t   wbflags); + +int32_t dht_readv (call_frame_t *frame, +                   xlator_t *this, +                   fd_t     *fd, +                   size_t    size, +                   off_t     offset); + +int32_t dht_writev (call_frame_t      *frame, +                    xlator_t      *this, +                    fd_t          *fd, +                    struct iovec  *vector, +                    int32_t        count, +                    off_t          offset, +                    struct iobref *iobref); + +int32_t dht_flush (call_frame_t *frame, +                   xlator_t *this, +                   fd_t     *fd); + +int32_t dht_fsync (call_frame_t *frame, +                   xlator_t *this, +                   fd_t     *fd, +                   int32_t   datasync); + +int32_t dht_opendir (call_frame_t *frame, +                     xlator_t *this, +                     loc_t    *loc, fd_t *fd); + +int32_t dht_fsyncdir (call_frame_t *frame, +                      xlator_t *this, +                      fd_t     *fd, +                      int32_t   datasync); + +int32_t dht_statfs (call_frame_t *frame, +                    xlator_t *this, +                    loc_t    *loc); + +int32_t dht_setxattr (call_frame_t *frame, +                      xlator_t *this, +                      loc_t    *loc, +                      dict_t   *dict, +                      int32_t   flags); + +int32_t dht_getxattr (call_frame_t   *frame, +                      xlator_t   *this, +                      loc_t      *loc, +                      const char *name); + +int32_t dht_fsetxattr (call_frame_t *frame, +                       xlator_t *this, +                       fd_t     *fd, +                       dict_t   *dict, +                       int32_t   flags); + +int32_t dht_fgetxattr (call_frame_t   *frame, +                       xlator_t   *this, +                       fd_t       *fd, +                       const char *name); + +int32_t dht_removexattr (call_frame_t   *frame, +                         xlator_t   *this, +                         loc_t      *loc, +                         const char *name); + +int32_t dht_lk (call_frame_t        *frame, +                xlator_t        *this, +                fd_t            *fd, +                int32_t          cmd, +                struct gf_flock *flock); + +int32_t dht_inodelk (call_frame_t *frame, xlator_t *this, +                     const char      *volume, loc_t *loc, int32_t cmd, +                     struct gf_flock *flock); + +int32_t dht_finodelk (call_frame_t        *frame, xlator_t *this, +                      const char      *volume, fd_t *fd, int32_t cmd, +                      struct gf_flock *flock); + +int32_t dht_entrylk (call_frame_t    *frame, xlator_t *this, +                     const char  *volume, loc_t *loc, const char *basename, +                     entrylk_cmd  cmd, entrylk_type type); + +int32_t dht_fentrylk (call_frame_t    *frame, xlator_t *this, +                      const char  *volume, fd_t *fd, const char *basename, +                      entrylk_cmd  cmd, entrylk_type type); + +int32_t dht_readdir (call_frame_t  *frame, +                     xlator_t *this, +                     fd_t     *fd, +                     size_t    size, off_t off); + +int32_t dht_readdirp (call_frame_t *frame, +                      xlator_t *this, +                      fd_t     *fd, +                      size_t    size, off_t off); + +int32_t dht_xattrop (call_frame_t           *frame, +                     xlator_t           *this, +                     loc_t              *loc, +                     gf_xattrop_flags_t  flags, +                     dict_t             *dict); + +int32_t dht_fxattrop (call_frame_t *frame, +                      xlator_t           *this, +                      fd_t               *fd, +                      gf_xattrop_flags_t  flags, +                      dict_t             *dict); + +int32_t dht_forget (xlator_t *this, inode_t *inode); +int32_t dht_setattr (call_frame_t  *frame, xlator_t *this, loc_t *loc, +                     struct iatt   *stbuf, int32_t valid); +int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, +                      struct iatt  *stbuf, int32_t valid); + +int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...); + +/* definitions for nufa/switch */ +int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                        int op_ret, int op_errno, inode_t *inode, +                        struct iatt *stbuf, dict_t *xattr, +                        struct iatt *postparent); +int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                        int op_ret, int op_errno, inode_t *inode, +                        struct iatt *stbuf, dict_t *xattr, +                        struct iatt *postparent); +int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, +                             xlator_t *this, int op_ret, int op_errno, +                             inode_t *inode, struct iatt *stbuf, dict_t *xattr, +                             struct iatt *postparent); +int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                    int op_ret, int op_errno, +                    inode_t *inode, struct iatt *stbuf, dict_t *xattr, +                    struct iatt *postparent); +int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                    int op_ret, int op_errno, +                    fd_t *fd, inode_t *inode, struct iatt *stbuf, +                    struct iatt *preparent, struct iatt *postparent); +int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                     int op_ret, int op_errno, +                     inode_t *inode, struct iatt *stbuf, struct iatt *preparent, +                     struct iatt *postparent); + + + +#endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 9e2327bff..d27d8bf91 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -98,7 +98,9 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)                  goto err;          } -        statfs_local = dht_local_init (statfs_frame); +        /* local->fop value is not used in this case */ +        statfs_local = dht_local_init (statfs_frame, NULL, NULL, +                                       GF_FOP_MAXVALUE);          if (!statfs_local) {                  goto err;          } @@ -141,12 +143,13 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)                          goto err;                  } -                statfs_local = dht_local_init (statfs_frame); +                /* In this case, 'local->fop' is not used */ +                statfs_local = dht_local_init (statfs_frame, loc, NULL, +                                               GF_FOP_MAXVALUE);                  if (!statfs_local) {                          goto err;                  } -                loc_copy (&statfs_local->loc, loc);                  loc_t tmp_loc = { .inode = NULL,                                    .path = "/",                  }; diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 44ed9c682..99abe023b 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -224,27 +224,60 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)                  GF_FREE (local->key);          } +        if (local->rebalance.vector) +                GF_FREE (local->rebalance.vector); + +        if (local->rebalance.iobref) +                iobref_unref (local->rebalance.iobref); +          GF_FREE (local);  }  dht_local_t * -dht_local_init (call_frame_t *frame) +dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)  {          dht_local_t *local = NULL; +        inode_t     *inode = NULL; +        int          ret   = 0;          /* TODO: use mem-pool */ -        local = GF_CALLOC (1, sizeof (*local), -                           gf_dht_mt_dht_local_t); - +        local = GF_CALLOC (1, sizeof (*local), gf_dht_mt_dht_local_t);          if (!local) -                return NULL; +                goto out; + +        if (loc) { +                ret = loc_copy (&local->loc, loc); +                if (ret) +                        goto out; -        local->op_ret = -1; +                inode = loc->inode; +        } + +        if (fd) { +                local->fd = fd_ref (fd); +                if (!inode) +                        inode = fd->inode; +        } + +        local->op_ret   = -1;          local->op_errno = EUCLEAN; +        local->fop      = fop; + +        if (inode) { +                local->layout   = dht_layout_get (frame->this, inode); +                local->cached_subvol = dht_subvol_get_cached (frame->this, +                                                              inode); +        }          frame->local = local; +out: +        if (ret) { +                if (local) +                        GF_FREE (local); +                local = NULL; +        }          return local;  } @@ -496,3 +529,374 @@ err:          loc_wipe (child);          return -1;  } + + + +int +dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) +{ +        xlator_list_t *subvols = NULL; +        int            cnt = 0; + +        if (!conf) +                return -1; + +        for (subvols = this->children; subvols; subvols = subvols->next) +                cnt++; + +        conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *), +                                      gf_dht_mt_xlator_t); +        if (!conf->subvolumes) { +                return -1; +        } +        conf->subvolume_cnt = cnt; + +        cnt = 0; +        for (subvols = this->children; subvols; subvols = subvols->next) +                conf->subvolumes[cnt++] = subvols->xlator; + +        conf->subvolume_status = GF_CALLOC (cnt, sizeof (char), +                                            gf_dht_mt_char); +        if (!conf->subvolume_status) { +                return -1; +        } + +        conf->last_event = GF_CALLOC (cnt, sizeof (int), +                                      gf_dht_mt_char); +        if (!conf->last_event) { +                return -1; +        } + +        conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t), +                                          gf_dht_mt_subvol_time); +        if (!conf->subvol_up_time) { +                return -1; +        } + +        conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t), +                                    gf_dht_mt_dht_du_t); +        if (!conf->du_stats) { +                return -1; +        } + +        return 0; +} + + + + +static int +dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data) +{ +        dht_local_t *local = NULL; + +        local = frame->local; + +        local->rebalance.target_op_fn (THIS, frame, op_ret); + +        return 0; +} + + +int +dht_migration_complete_check_task (void *data) +{ +        int           ret      = -1; +        xlator_t     *src_node = NULL; +        xlator_t     *dst_node = NULL; +        dht_local_t  *local    = NULL; +        dict_t       *dict     = NULL; +        dht_layout_t *layout   = NULL; +        struct iatt   stbuf    = {0,}; +        xlator_t     *this     = NULL; +        call_frame_t *frame    = NULL; +        loc_t         tmp_loc  = {0,}; +        char         *path     = NULL; + +        this  = THIS; +        frame = data; +        local = frame->local; + +        src_node = local->cached_subvol; + +        /* getxattr on cached_subvol for 'linkto' value */ +        if (!local->loc.inode) +                ret = syncop_fgetxattr (src_node, local->fd, &dict, +                                        DHT_LINKFILE_KEY); +        else +                ret = syncop_getxattr (src_node, &local->loc, &dict, +                                       DHT_LINKFILE_KEY); + +        if (!ret) +                dst_node = dht_linkfile_subvol (this, NULL, NULL, dict); + +        if (ret) { +                if ((errno != ENOENT) || (!local->loc.inode)) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "%s: failed to get the 'linkto' xattr %s", +                                local->loc.path, strerror (errno)); +                        goto out; +                } +                /* Need to do lookup on hashed subvol, then get the file */ +                ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL, +                                     NULL); +                if (ret) +                        goto out; +                dst_node = dht_subvol_get_cached (this, local->loc.inode); +        } + +        if (!dst_node) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to get the destination node", +                        local->loc.path); +                ret = -1; +                goto out; +        } + +        /* lookup on dst */ +        if (local->loc.inode) { +                ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL); + +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "%s: failed to lookup the file on %s", +                                local->loc.path, dst_node->name); +                        goto out; +                } + +                if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "%s: gfid different on the target file on %s", +                                local->loc.path, dst_node->name); +                        ret = -1; +                        goto out; +                } +        } + +        /* update inode ctx (the layout) */ +        dht_layout_unref (this, local->layout); + +        if (!local->loc.inode) +                ret = dht_layout_preset (this, dst_node, local->fd->inode); +        else +                ret = dht_layout_preset (this, dst_node, local->loc.inode); +        if (ret != 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "%s: could not set preset layout for subvol %s", +                        local->loc.path, dst_node->name); +                ret   = -1; +                goto out; +        } + +        layout = dht_layout_for_subvol (this, dst_node); +        if (!layout) { +                gf_log (this->name, GF_LOG_INFO, +                        "%s: no pre-set layout for subvolume %s", +                        local->loc.path, dst_node ? dst_node->name : "<nil>"); +                ret = -1; +                goto out; +        } + +        if (!local->loc.inode) +                ret = dht_layout_set (this, local->fd->inode, layout); +        else +                ret = dht_layout_set (this, local->loc.inode, layout); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to set the new layout", +                        local->loc.path); +                goto out; +        } + +        local->cached_subvol = dst_node; +        ret = 0; + +        if (!local->fd) +                goto out; + +        /* once we detect the migration complete, the fd-ctx is no more +           required.. delete the ctx, and do one extra 'fd_unref' for open fd */ +        ret = fd_ctx_del (local->fd, this, NULL); +        if (!ret) { +                fd_unref (local->fd); +                ret = 0; +                goto out; +        } + +        /* if 'local->fd' (ie, fd based operation), send a 'open()' on +           destination if not already done */ +        if (local->loc.inode) { +                ret = syncop_open (dst_node, &local->loc, +                                   local->fd->flags, local->fd); +        } else { +                tmp_loc.inode = local->fd->inode; +                inode_path (local->fd->inode, NULL, &path); +                if (path) +                        tmp_loc.path = path; +                ret = syncop_open (dst_node, &tmp_loc, +                                   local->fd->flags, local->fd); +                if (path) +                        GF_FREE (path); + +        } +        if (ret == -1) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to send open() on target file at %s", +                        local->loc.path, dst_node->name); +                goto out; +        } + +        /* need this unref for the fd on src_node */ +        fd_unref (local->fd); +        ret = 0; +out: + +        return ret; +} + +int +dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame) +{ +        int         ret     = -1; +        dht_conf_t *conf    = NULL; + +        conf = this->private; + +        ret = synctask_new (conf->env, dht_migration_complete_check_task, +                            dht_migration_complete_check_done, +                            frame, frame); +        return ret; +} + +/* During 'in-progress' state, both nodes should have the file */ +static int +dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data) +{ +        dht_local_t *local = NULL; + +        local = sync_frame->local; + +        local->rebalance.target_op_fn (THIS, sync_frame, op_ret); + +        return 0; +} + +static int +dht_rebalance_inprogress_task (void *data) +{ +        int           ret      = -1; +        xlator_t     *src_node = NULL; +        xlator_t     *dst_node = NULL; +        dht_local_t  *local    = NULL; +        dict_t       *dict     = NULL; +        call_frame_t *frame    = NULL; +        xlator_t     *this     = NULL; +        char         *path     = NULL; +        struct iatt   stbuf    = {0,}; +        loc_t         tmp_loc  = {0,}; + +        this  = THIS; +        frame = data; +        local = frame->local; + +        src_node = local->cached_subvol; + +        /* getxattr on cached_subvol for 'linkto' value */ +        if (local->loc.inode) +                ret = syncop_getxattr (src_node, &local->loc, &dict, +                                       DHT_LINKFILE_KEY); +        else +                ret = syncop_fgetxattr (src_node, local->fd, &dict, +                                        DHT_LINKFILE_KEY); + +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to get the 'linkto' xattr %s", +                        local->loc.path, strerror (errno)); +                        goto out; +        } + +        dst_node = dht_linkfile_subvol (this, NULL, NULL, dict); +        if (!dst_node) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to get the 'linkto' xattr from dict", +                        local->loc.path); +                ret = -1; +                goto out; +        } + +        local->rebalance.target_node = dst_node; + +        if (local->loc.inode) { +                /* lookup on dst */ +                ret = syncop_lookup (dst_node, &local->loc, NULL, +                                     &stbuf, NULL, NULL); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "%s: failed to lookup the file on %s", +                                local->loc.path, dst_node->name); +                        goto out; +                } + +                if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "%s: gfid different on the target file on %s", +                                local->loc.path, dst_node->name); +                        ret = -1; +                        goto out; +                } +        } + +        ret = 0; + +        if (!local->fd) +                goto out; + +        if (local->loc.inode) { +                ret = syncop_open (dst_node, &local->loc, +                                   local->fd->flags, local->fd); +        } else { +                tmp_loc.inode = local->fd->inode; +                inode_path (local->fd->inode, NULL, &path); +                if (path) +                        tmp_loc.path = path; +                ret = syncop_open (dst_node, &tmp_loc, +                                   local->fd->flags, local->fd); +                if (path) +                        GF_FREE (path); +        } + +        if (ret == -1) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to send open() on target file at %s", +                        local->loc.path, dst_node->name); +                goto out; +        } + +        ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to set fd-ctx target file at %s", +                        local->loc.path, dst_node->name); +                goto out; +        } + +        ret = 0; +out: +        return ret; +} + +int +dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame) +{ + +        int         ret     = -1; +        dht_conf_t *conf    = NULL; + +        conf = this->private; + +        ret = synctask_new (conf->env, dht_rebalance_inprogress_task, +                            dht_inprogress_check_done, +                            frame, frame); +        return ret; +} diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c new file mode 100644 index 000000000..1e9f54bda --- /dev/null +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -0,0 +1,1115 @@ +/* +  Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dht-common.h" + +int dht_access2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_open2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret); + +int +dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +              int op_ret, int op_errno, fd_t *fd) +{ +        dht_local_t  *local = NULL; +        call_frame_t *prev = NULL; +        int           ret = 0; + +        local = frame->local; +        prev = cookie; + +        local->op_errno = op_errno; +        if ((op_ret == -1) && (op_errno != ENOENT)) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "subvolume %s returned -1 (%s)", +                        prev->this->name, strerror (op_errno)); +                goto out; +        } + +        if (!op_ret || (local->call_cnt != 1)) +                goto out; + +        /* rebalance would have happened */ +        local->rebalance.target_op_fn = dht_open2; +        ret = dht_rebalance_complete_check (this, frame); +        if (!ret) +                return 0; + +out: +        DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd); + +        return 0; +} + +int +dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t *local  = NULL; +        xlator_t    *subvol = NULL; +        int          op_errno = EINVAL; + +        local = frame->local; +        if (!local) +                goto out; + +        op_errno = ENOENT; +        if (op_ret) +                goto out; + +        local->call_cnt = 2; +        subvol = local->cached_subvol; + +        STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open, +                    &local->loc, local->rebalance.flags, local->fd, +                    local->rebalance.wbflags); +        return 0; + +out: +        DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); +        return 0; +} + +int +dht_open (call_frame_t *frame, xlator_t *this, +          loc_t *loc, int flags, fd_t *fd, int wbflags) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, loc, fd, GF_FOP_OPEN); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        local->rebalance.wbflags = wbflags; +        local->rebalance.flags = flags; +        local->call_cnt = 1; + +        STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open, +                    loc, flags, fd, wbflags); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL); + +        return 0; +} + +int +dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                   int op_ret, int op_errno, struct iatt *stbuf) +{ +        uint64_t      tmp_subvol = 0; +        dht_local_t  *local = NULL; +        call_frame_t *prev = NULL; +        int           ret = -1; + +        GF_VALIDATE_OR_GOTO ("dht", frame, err); +        GF_VALIDATE_OR_GOTO ("dht", this, out); +        GF_VALIDATE_OR_GOTO ("dht", frame->local, out); +        GF_VALIDATE_OR_GOTO ("dht", cookie, out); + +        local = frame->local; +        prev = cookie; + +        if ((op_ret == -1) && (op_errno != ENOENT)) { +                local->op_errno = op_errno; +                gf_log (this->name, GF_LOG_DEBUG, +                        "subvolume %s returned -1 (%s)", +                        prev->this->name, strerror (op_errno)); +                goto out; +        } + +        if (local->call_cnt != 1) +                goto out; + +        /* Check if the rebalance phase2 is true */ +        if ((op_ret == -1) || (IA_ISREG (stbuf->ia_type) && +            ((st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type) & +              ~S_IFMT) == DHT_LINKFILE_MODE))) { +                if (local->fd) +                        ret = fd_ctx_get (local->fd, this, &tmp_subvol); +                if (ret) { +                        /* Phase 2 of migration */ +                        local->rebalance.target_op_fn = dht_attr2; +                        ret = dht_rebalance_complete_check (this, frame); +                } else { +                        /* value is already set in fd_ctx, that means no need +                           to check for whether its complete or not. */ +                        dht_attr2 (this, frame, 0); +                        ret = 0; +                } +                if (!ret) +                        goto err; +        } +out: +        DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf); +err: +        return 0; +} + +int +dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t *local  = NULL; +        xlator_t    *subvol = NULL; +        int          op_errno = EINVAL; + +        local = frame->local; +        if (!local) +                goto out; + +        op_errno = local->op_errno; +        if (op_ret == -1) +                goto out; + +        subvol = local->cached_subvol; +        local->call_cnt = 2; + +        if (local->fop == GF_FOP_FSTAT) { +                STACK_WIND (frame, dht_file_attr_cbk, subvol, +                            subvol->fops->fstat, local->fd); +        } else { +                STACK_WIND (frame, dht_file_attr_cbk, subvol, +                            subvol->fops->stat, &local->loc); +        } +        return 0; + +out: +        DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); +        return 0; +} + +int +dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +              int op_ret, int op_errno, struct iatt *stbuf) +{ +        dht_local_t  *local = NULL; +        int           this_call_cnt = 0; +        call_frame_t *prev = NULL; + +        GF_VALIDATE_OR_GOTO ("dht", frame, err); +        GF_VALIDATE_OR_GOTO ("dht", this, out); +        GF_VALIDATE_OR_GOTO ("dht", frame->local, out); +        GF_VALIDATE_OR_GOTO ("dht", cookie, out); + +        local = frame->local; +        prev = cookie; + +        LOCK (&frame->lock); +        { +                if (op_ret == -1) { +                        local->op_errno = op_errno; +                        gf_log (this->name, GF_LOG_DEBUG, +                                "subvolume %s returned -1 (%s)", +                                prev->this->name, strerror (op_errno)); + +                        goto unlock; +                } + +                dht_iatt_merge (this, &local->stbuf, stbuf, prev->this); + +                local->op_ret = 0; +        } +unlock: +        UNLOCK (&frame->lock); +out: +        this_call_cnt = dht_frame_return (frame); +        if (is_last_call (this_call_cnt)) { +                DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, +                                  &local->stbuf); +        } +err: +        return 0; +} + +int +dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; +        dht_layout_t *layout = NULL; +        int           i = 0; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + + +        local = dht_local_init (frame, loc, NULL, GF_FOP_STAT); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        layout = local->layout; +        if (!layout) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no layout for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        if (IA_ISREG (loc->inode->ia_type)) { +                local->call_cnt = 1; + +                subvol = local->cached_subvol; + +                STACK_WIND (frame, dht_file_attr_cbk, subvol, +                            subvol->fops->stat, loc); + +                return 0; +        } + +        local->call_cnt = layout->cnt; + +        for (i = 0; i < layout->cnt; i++) { +                subvol = layout->list[i].xlator; + +                STACK_WIND (frame, dht_attr_cbk, +                            subvol, subvol->fops->stat, +                            loc); +        } + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL); + +        return 0; +} + + +int +dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; +        dht_layout_t *layout = NULL; +        int           i = 0; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        layout = local->layout; +        if (!layout) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no layout for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        if (IA_ISREG (fd->inode->ia_type)) { +                local->call_cnt = 1; + +                subvol = local->cached_subvol; + +                STACK_WIND (frame, dht_file_attr_cbk, subvol, +                            subvol->fops->fstat, fd); + +                return 0; +        } + +        local->call_cnt = layout->cnt; + +        for (i = 0; i < layout->cnt; i++) { +                subvol = layout->list[i].xlator; +                STACK_WIND (frame, dht_attr_cbk, +                            subvol, subvol->fops->fstat, +                            fd); +        } + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL); + +        return 0; +} + +int +dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +               int op_ret, int op_errno, +               struct iovec *vector, int count, struct iatt *stbuf, +               struct iobref *iobref) +{ +        dht_local_t *local      = NULL; +        int          ret        = 0; + +        local = frame->local; +        if (!local) { +                op_ret = -1; +                op_errno = EINVAL; +                goto out; +        } + +        /* This is already second try, no need for re-check */ +        if (local->call_cnt != 1) +                goto out; + +        if ((op_ret == -1) && (op_errno != ENOENT)) +                goto out; + +        if ((op_ret == -1) || ((st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type) & +                                ~S_IFMT) == DHT_LINKFILE_MODE)) { +                /* File would be migrated to other node */ +                ret = fd_ctx_get (local->fd, this, NULL); +                if (ret) { +                        local->rebalance.target_op_fn = dht_readv2; +                        ret = dht_rebalance_complete_check (this, frame); +                } else { +                        /* value is already set in fd_ctx, that means no need +                           to check for whether its complete or not. */ +                        dht_readv2 (this, frame, 0); +                } +                if (!ret) +                        return 0; +        } + +out: +        DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, +                          iobref); + +        return 0; +} + +int +dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t *local  = NULL; +        xlator_t    *subvol = NULL; +        int          op_errno = EINVAL; + +        local = frame->local; +        if (!local) +                goto out; + +        op_errno = local->op_errno; +        if (op_ret == -1) +                goto out; + +        local->call_cnt = 2; +        subvol = local->cached_subvol; + +        STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv, +                    local->fd, local->rebalance.size, local->rebalance.offset); + +        return 0; + +out: +        DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); +        return 0; +} + +int +dht_readv (call_frame_t *frame, xlator_t *this, +           fd_t *fd, size_t size, off_t off) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_READ); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        local->rebalance.offset = off; +        local->rebalance.size   = size; +        local->call_cnt = 1; + +        STACK_WIND (frame, dht_readv_cbk, +                    subvol, subvol->fops->readv, +                    fd, size, off); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + +        return 0; +} + +int +dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                int op_ret, int op_errno) +{ +        int          ret = -1; +        dht_local_t *local = NULL; + +        local = frame->local; + +        if (local->call_cnt != 1) +                goto out; + +        if ((op_ret == -1) && (op_errno == ENOENT)) { +                /* File would be migrated to other node */ +                local->rebalance.target_op_fn = dht_access2; +                ret = dht_rebalance_complete_check (frame->this, frame); +                if (!ret) +                        return 0; +        } + +out: +        DHT_STACK_UNWIND (access, frame, op_ret, op_errno); +        return 0; +} + +int +dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t *local  = NULL; +        xlator_t    *subvol = NULL; +        int          op_errno = EINVAL; + +        local = frame->local; +        if (!local) +                goto out; + +        op_errno = local->op_errno; +        if (op_ret == -1) +                goto out; + +        local->call_cnt = 2; +        subvol = local->cached_subvol; + +        STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access, +                    &local->loc, local->rebalance.flags); + +        return 0; + +out: +        DHT_STACK_UNWIND (access, frame, -1, op_errno); +        return 0; +} + + +int +dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +        local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        local->rebalance.flags = mask; +        local->call_cnt = 1; +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access, +                    loc, mask); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (access, frame, -1, op_errno); + +        return 0; +} + + +int +dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +               int op_ret, int op_errno) +{ +        dht_local_t  *local = NULL; +        int           ret = -1; + +        local = frame->local; + +        local->op_errno = op_errno; + +        if (local->call_cnt != 1) +                goto out; + +        /* If context is set, then send flush() it to the destination */ +        ret = fd_ctx_get (local->fd, this, NULL); +        if (!ret) { +                dht_flush2 (this, frame, 0); +                return 0; +        } + +out: +        DHT_STACK_UNWIND (flush, frame, op_ret, op_errno); + +        return 0; +} + +int +dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t  *local  = NULL; +        xlator_t     *subvol = NULL; +        uint64_t      tmp_subvol = 0; +        int           ret = -1; + +        local = frame->local; + +        ret = fd_ctx_get (local->fd, this, &tmp_subvol); +        if (!ret) +                subvol = (xlator_t *)(long)tmp_subvol; + +        if (!subvol) +                subvol = local->cached_subvol; + +        local->call_cnt = 2; /* This is the second attempt */ + +        STACK_WIND (frame, dht_flush_cbk, +                    subvol, subvol->fops->flush, local->fd); + +        return 0; +} + + +int +dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        local->call_cnt = 1; + +        STACK_WIND (frame, dht_flush_cbk, +                    subvol, subvol->fops->flush, fd); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (flush, frame, -1, op_errno); + +        return 0; +} + + +int +dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, +               int op_errno, struct iatt *prebuf, struct iatt *postbuf) +{ +        dht_local_t  *local = NULL; +        call_frame_t *prev = NULL; +        int           ret = -1; + +        local = frame->local; +        prev = cookie; + +        local->op_errno = op_errno; +        if (op_ret == -1) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "subvolume %s returned -1 (%s)", +                        prev->this->name, strerror (op_errno)); +                goto out; +        } + +        if (local->call_cnt != 1) +                goto out; + +        ret = fd_ctx_get (local->fd, this, NULL); +        if (ret) { +                local->rebalance.target_op_fn = dht_fsync2; + +                /* Check if the rebalance phase1 is true */ +                if (IA_ISREG (postbuf->ia_type) && +                    (postbuf->ia_prot.sticky == 1) && +                    (postbuf->ia_prot.sgid == 1)) { +                        ret = dht_rebalance_in_progress_check (this, frame); +                } + +                /* Check if the rebalance phase2 is true */ +                if (IA_ISREG (postbuf->ia_type) && +                    ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & +                      ~S_IFMT) == DHT_LINKFILE_MODE)) { +                        ret = dht_rebalance_complete_check (this, frame); +                } +        } else { +                dht_fsync2 (this, frame, 0); +                ret = 0; +        } +        if (!ret) +                return 0; + +out: +        DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno, +                          prebuf, postbuf); + +        return 0; +} + +int +dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t  *local  = NULL; +        xlator_t     *subvol = NULL; +        uint64_t      tmp_subvol = 0; +        int           ret = -1; + +        local = frame->local; + +        ret = fd_ctx_get (local->fd, this, &tmp_subvol); +        if (!ret) +                subvol = (xlator_t *)(long)tmp_subvol; + +        if (!subvol) +                subvol = local->cached_subvol; + +        local->call_cnt = 2; /* This is the second attempt */ + +        STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync, +                    local->fd, local->rebalance.flags); + +        return 0; +} + +int +dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC); +        if (!local) { +                op_errno = ENOMEM; + +                goto err; +        } + +        local->call_cnt = 1; +        local->rebalance.flags = datasync; + +        subvol = local->cached_subvol; + +        STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync, +                    fd, datasync); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL); + +        return 0; +} + + +/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to +   indicate that lock migration happened on the fd, so we can consider it as +   phase 2 of migration */ +int +dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +            int op_ret, int op_errno, struct gf_flock *flock) +{ +        DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock); + +        return 0; +} + + +int +dht_lk (call_frame_t *frame, xlator_t *this, +        fd_t *fd, int cmd, struct gf_flock *flock) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        subvol = dht_subvol_get_cached (this, fd->inode); +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        /* TODO: for rebalance, we need to preserve the fop arguments */ +        STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd, +                    cmd, flock); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL); + +        return 0; +} + +/* Symlinks are currently not migrated, so no need for any check here */ +int +dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                  int op_ret, int op_errno, const char *path, struct iatt *sbuf) +{ +        dht_local_t *local = NULL; + +        local = frame->local; +        if (op_ret == -1) +                goto err; + +        if (!local) { +                op_ret = -1; +                op_errno = EINVAL; +        } + +err: +        DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, sbuf); + +        return 0; +} + + +int +dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +        local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        STACK_WIND (frame, dht_readlink_cbk, +                    subvol, subvol->fops->readlink, +                    loc, size); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL); + +        return 0; +} + +/* Currently no translators on top of 'distribute' will be using + * below fops, hence not implementing 'migration' related checks + */ + +int +dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                 int32_t op_ret, int32_t op_errno, dict_t *dict) +{ +        DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict); +        return 0; +} + + +int +dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, +             gf_xattrop_flags_t flags, dict_t *dict) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +        local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        local->call_cnt = 1; + +        STACK_WIND (frame, +                    dht_xattrop_cbk, +                    subvol, subvol->fops->xattrop, +                    loc, flags, dict); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); + +        return 0; +} + + +int +dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                  int32_t op_ret, int32_t op_errno, dict_t *dict) +{ +        DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict); +        return 0; +} + + +int +dht_fxattrop (call_frame_t *frame, xlator_t *this, +              fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        subvol = dht_subvol_get_cached (this, fd->inode); +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        STACK_WIND (frame, +                    dht_fxattrop_cbk, +                    subvol, subvol->fops->fxattrop, +                    fd, flags, dict); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL); + +        return 0; +} + + +int +dht_inodelk_cbk (call_frame_t *frame, void *cookie, +                 xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ +        DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno); +        return 0; +} + + +int32_t +dht_inodelk (call_frame_t *frame, xlator_t *this, +             const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +        local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        local->call_cnt = 1; + +        STACK_WIND (frame, +                    dht_inodelk_cbk, +                    subvol, subvol->fops->inodelk, +                    volume, loc, cmd, lock); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (inodelk, frame, -1, op_errno); + +        return 0; +} + + +int +dht_finodelk_cbk (call_frame_t *frame, void *cookie, +                  xlator_t *this, int32_t op_ret, int32_t op_errno) + +{ +        DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno); +        return 0; +} + + +int +dht_finodelk (call_frame_t *frame, xlator_t *this, +              const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        subvol = dht_subvol_get_cached (this, fd->inode); +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + + +        STACK_WIND (frame, +                    dht_finodelk_cbk, +                    subvol, subvol->fops->finodelk, +                    volume, fd, cmd, lock); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (finodelk, frame, -1, op_errno); + +        return 0; +} diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c new file mode 100644 index 000000000..21eca6117 --- /dev/null +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -0,0 +1,597 @@ +/* +  Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dht-common.h" + +int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret); + +int +dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                int op_ret, int op_errno, struct iatt *prebuf, +                struct iatt *postbuf) +{ +        dht_local_t *local = NULL; +        int          ret   = -1; + +        if (op_ret == -1) { +                goto out; +        } + +        local = frame->local; +        if (!local) { +                op_ret = -1; +                op_errno = EINVAL; +                goto out; +        } + +        if (local->call_cnt != 1) +                goto out; + +        local->rebalance.target_op_fn = dht_writev2; + +        /* Phase 2 of migration */ +        if (IA_ISREG (postbuf->ia_type) && +            ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & +              ~S_IFMT) == DHT_LINKFILE_MODE)) { +                ret = dht_rebalance_complete_check (this, frame); +                if (!ret) +                        return 0; +        } + +        /* Check if the rebalance phase1 is true */ +        if (IA_ISREG (postbuf->ia_type) && (postbuf->ia_prot.sticky == 1) && +            (postbuf->ia_prot.sgid == 1)) { +                ret = fd_ctx_get (local->fd, this, NULL); +                if (!ret) { +                        dht_writev2 (this, frame, 0); +                        return 0; +                } +                ret = dht_rebalance_in_progress_check (this, frame); +                if (!ret) +                        return 0; +        } + +out: +        DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf); + +        return 0; +} + +int +dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t  *local  = NULL; +        xlator_t     *subvol = NULL; +        uint64_t      tmp_subvol = 0; +        int           ret = -1; + +        local = frame->local; + +        ret = fd_ctx_get (local->fd, this, &tmp_subvol); +        if (!ret) +                subvol = (xlator_t *)(long)tmp_subvol; + +        if (!subvol) +                subvol = local->cached_subvol; + +        local->call_cnt = 2; /* This is the second attempt */ + +        STACK_WIND (frame, dht_writev_cbk, +                    subvol, subvol->fops->writev, +                    local->fd, local->rebalance.vector, local->rebalance.count, +                    local->rebalance.offset, local->rebalance.iobref); + +        return 0; +} + +int +dht_writev (call_frame_t *frame, xlator_t *this, +            fd_t *fd, struct iovec *vector, int count, off_t off, +            struct iobref *iobref) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE); +        if (!local) { + +                op_errno = ENOMEM; +                goto err; +        } + +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + + +        local->rebalance.vector = iov_dup (vector, count); +        local->rebalance.offset = off; +        local->rebalance.count = count; +        local->rebalance.iobref = iobref_ref (iobref); +        local->call_cnt = 1; + +        STACK_WIND (frame, dht_writev_cbk, +                    subvol, subvol->fops->writev, +                    fd, vector, count, off, iobref); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL); + +        return 0; +} + + + +int +dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                  int op_ret, int op_errno, struct iatt *prebuf, +                  struct iatt *postbuf) +{ +        dht_local_t  *local = NULL; +        call_frame_t *prev = NULL; +        int           ret = -1; + +        GF_VALIDATE_OR_GOTO ("dht", frame, err); +        GF_VALIDATE_OR_GOTO ("dht", this, out); +        GF_VALIDATE_OR_GOTO ("dht", frame->local, out); +        GF_VALIDATE_OR_GOTO ("dht", cookie, out); + +        local = frame->local; +        prev = cookie; + +        if ((op_ret == -1) && (op_errno != ENOENT)) { +                local->op_errno = op_errno; +                local->op_ret = -1; +                gf_log (this->name, GF_LOG_DEBUG, +                        "subvolume %s returned -1 (%s)", +                        prev->this->name, strerror (op_errno)); + +                goto out; +        } + +        if (local->call_cnt != 1) +                goto out; + +        local->rebalance.target_op_fn = dht_truncate2; + +        /* Phase 2 of migration */ +        if ((op_ret == -1) || (IA_ISREG (postbuf->ia_type) && +            ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & +              ~S_IFMT) == DHT_LINKFILE_MODE))) { +                ret = dht_rebalance_complete_check (this, frame); +                if (!ret) +                        goto err; +        } + +        /* Check if the rebalance phase1 is true */ +        if (IA_ISREG (postbuf->ia_type) && (postbuf->ia_prot.sticky == 1) && +            (postbuf->ia_prot.sgid == 1)) { +                ret = fd_ctx_get (local->fd, this, NULL); +                if (!ret) { +                        dht_truncate2 (this, frame, 0); +                        goto err; +                } +                ret = dht_rebalance_in_progress_check (this, frame); +                if (!ret) +                        goto err; +        } + +out: +        DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno, +                          prebuf, postbuf); +err: +        return 0; +} + + +int +dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t  *local  = NULL; +        xlator_t     *subvol = NULL; +        uint64_t      tmp_subvol = 0; +        int           ret = -1; + +        local = frame->local; + +        if (local->fd) +                ret = fd_ctx_get (local->fd, this, &tmp_subvol); +        if (!ret) +                subvol = (xlator_t *)(long)tmp_subvol; + +        if (!subvol) +                subvol = local->cached_subvol; + +        local->call_cnt = 2; /* This is the second attempt */ + +        if (local->fop == GF_FOP_TRUNCATE) { +                STACK_WIND (frame, dht_truncate_cbk, subvol, +                            subvol->fops->truncate, &local->loc, +                            local->rebalance.offset); +        } else { +                STACK_WIND (frame, dht_truncate_cbk, subvol, +                            subvol->fops->ftruncate, local->fd, +                            local->rebalance.offset); +        } + +        return 0; +} + +int +dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +        local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        local->rebalance.offset = offset; +        local->call_cnt = 1; +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        STACK_WIND (frame, dht_truncate_cbk, +                    subvol, subvol->fops->truncate, +                    loc, offset); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL); + +        return 0; +} + +int +dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +{ +        xlator_t     *subvol = NULL; +        int           op_errno = -1; +        dht_local_t  *local = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        local->rebalance.offset = offset; +        local->call_cnt = 1; +        subvol = local->cached_subvol; +        if (!subvol) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no cached subvolume for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        STACK_WIND (frame, dht_truncate_cbk, +                    subvol, subvol->fops->ftruncate, +                    fd, offset); + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL); + +        return 0; +} + +/* handle cases of migration here for 'setattr()' calls */ +int +dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                      int op_ret, int op_errno, struct iatt *prebuf, +                      struct iatt *postbuf) +{ +        dht_local_t  *local = NULL; +        call_frame_t *prev = NULL; +        int           ret = -1; + +        local = frame->local; +        prev = cookie; + +        local->op_errno = op_errno; +        if ((op_ret == -1) && (op_errno != ENOENT)) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "subvolume %s returned -1 (%s)", +                        prev->this->name, strerror (op_errno)); +                goto out; +        } + +        if (local->call_cnt != 1) +                goto out; + +        local->rebalance.target_op_fn = dht_setattr2; + +        /* Phase 2 of migration */ +        if ((op_ret == -1) || (IA_ISREG (postbuf->ia_type) && +            ((st_mode_from_ia (postbuf->ia_prot, postbuf->ia_type) & +              ~S_IFMT) == DHT_LINKFILE_MODE))) { +                ret = dht_rebalance_complete_check (this, frame); +                if (!ret) +                        goto out; +        } + +        /* At the end of the migration process, whatever 'attr' we +           have on source file will be migrated to destination file +           in one shot, hence we don't need to check for in progress +           state here */ +out: +        DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno, +                          prebuf, postbuf); + +        return 0; +} + +int +dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret) +{ +        dht_local_t  *local  = NULL; +        xlator_t     *subvol = NULL; +        uint64_t      tmp_subvol = 0; +        int           ret = -1; + +        local = frame->local; + +        if (local->fd) +                ret = fd_ctx_get (local->fd, this, &tmp_subvol); +        if (!ret) +                subvol = (xlator_t *)(long)tmp_subvol; + +        if (!subvol) +                subvol = local->cached_subvol; + +        local->call_cnt = 2; /* This is the second attempt */ + +        if (local->fop == GF_FOP_SETATTR) { +                STACK_WIND (frame, dht_file_setattr_cbk, subvol, +                            subvol->fops->setattr, &local->loc, +                            &local->rebalance.stbuf, local->rebalance.flags); +        } else { +                STACK_WIND (frame, dht_file_setattr_cbk, subvol, +                            subvol->fops->fsetattr, local->fd, +                            &local->rebalance.stbuf, local->rebalance.flags); +        } + +        return 0; +} + + +/* Keep the existing code same for all the cases other than regular file */ +int +dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                 int op_ret, int op_errno, struct iatt *statpre, +                 struct iatt *statpost) +{ +        dht_local_t  *local = NULL; +        int           this_call_cnt = 0; +        call_frame_t *prev = NULL; + + +        local = frame->local; +        prev = cookie; + +        LOCK (&frame->lock); +        { +                if (op_ret == -1) { +                        local->op_errno = op_errno; +                        gf_log (this->name, GF_LOG_DEBUG, +                                "subvolume %s returned -1 (%s)", +                                prev->this->name, strerror (op_errno)); +                        goto unlock; +                } + +                dht_iatt_merge (this, &local->prebuf, statpre, prev->this); +                dht_iatt_merge (this, &local->stbuf, statpost, prev->this); + +                local->op_ret = 0; +        } +unlock: +        UNLOCK (&frame->lock); + +        this_call_cnt = dht_frame_return (frame); +        if (is_last_call (this_call_cnt)) +                DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, +                                  &local->prebuf, &local->stbuf); + +        return 0; +} + + +int +dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, +             struct iatt *stbuf, int32_t valid) +{ +        xlator_t     *subvol = NULL; +        dht_layout_t *layout = NULL; +        dht_local_t  *local  = NULL; +        int           op_errno = -1; +        int           i = -1; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (loc, err); +        VALIDATE_OR_GOTO (loc->inode, err); +        VALIDATE_OR_GOTO (loc->path, err); + +        local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        layout = local->layout; +        if (!layout) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no layout for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        if (!layout_is_sane (layout)) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "layout is not sane for path=%s", loc->path); +                op_errno = EINVAL; +                goto err; +        } + +        if (IA_ISREG (loc->inode->ia_type)) { +                /* in the regular file _cbk(), we need to check for +                   migration possibilities */ +                local->rebalance.stbuf = *stbuf; +                local->rebalance.flags = valid; +                local->call_cnt = 1; +                subvol = local->cached_subvol; + +                STACK_WIND (frame, dht_file_setattr_cbk, subvol, +                            subvol->fops->setattr, +                            loc, stbuf, valid); + +                return 0; +        } + +        local->call_cnt = layout->cnt; + +        for (i = 0; i < layout->cnt; i++) { +                STACK_WIND (frame, dht_setattr_cbk, +                            layout->list[i].xlator, +                            layout->list[i].xlator->fops->setattr, +                            loc, stbuf, valid); +        } + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL); + +        return 0; +} + + +int +dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, +              int32_t valid) +{ +        xlator_t     *subvol = NULL; +        dht_layout_t *layout = NULL; +        dht_local_t  *local  = NULL; +        int           op_errno = -1; +        int           i = -1; + + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR); +        if (!local) { +                op_errno = ENOMEM; +                goto err; +        } + +        layout = local->layout; +        if (!layout) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "no layout for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        if (!layout_is_sane (layout)) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "layout is not sane for fd=%p", fd); +                op_errno = EINVAL; +                goto err; +        } + +        if (IA_ISREG (fd->inode->ia_type)) { +                /* in the regular file _cbk(), we need to check for +                   migration possibilities */ +                local->rebalance.stbuf = *stbuf; +                local->rebalance.flags = valid; +                local->call_cnt = 1; +                subvol = local->cached_subvol; + +                STACK_WIND (frame, dht_file_setattr_cbk, subvol, +                            subvol->fops->fsetattr, +                            fd, stbuf, valid); + +                return 0; +        } + +        local->call_cnt = layout->cnt; + +        for (i = 0; i < layout->cnt; i++) { +                STACK_WIND (frame, dht_setattr_cbk, +                            layout->list[i].xlator, +                            layout->list[i].xlator->fops->fsetattr, +                            fd, stbuf, valid); +        } + +        return 0; + +err: +        op_errno = (op_errno == -1) ? errno : op_errno; +        DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL); + +        return 0; +} diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c index 1c90c61d6..2186b064a 100644 --- a/xlators/cluster/dht/src/dht-linkfile.c +++ b/xlators/cluster/dht/src/dht-linkfile.c @@ -140,13 +140,14 @@ dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,                  goto err;          } -        unlink_local = dht_local_init (unlink_frame); +        /* Using non-fop value here, as anyways, 'local->fop' is not used in +           this particular case */ +        unlink_local = dht_local_init (unlink_frame, loc, NULL, +                                       GF_FOP_MAXVALUE);          if (!unlink_local) {                  goto err;          } -        loc_copy (&unlink_local->loc, loc); -          STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,                      subvol, subvol->fops->unlink,                      &unlink_local->loc); diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 60008f2bd..7b04e8a2d 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -83,15 +83,6 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,                          break;          } -        /* do it regardless of all the above cases as we had to 'write' the -           given number of bytes */ -        ret = syncop_ftruncate (to, fd, offset + size); -        if (ret) { -                gf_log (THIS->name, GF_LOG_WARNING, -                        "failed to perform truncate on %s", to->name); -                goto out; -        } -          ret = size;  out:          return ret; @@ -102,8 +93,7 @@ static inline int  __is_file_migratable (xlator_t *this, loc_t *loc, dict_t *rsp_dict,                        struct iatt *stbuf)  { -        int ret           = -1; -        int open_fd_count = 0; +        int ret = -1;          if (!IA_ISREG (stbuf->ia_type)) {                  gf_log (this->name, GF_LOG_WARNING, @@ -121,14 +111,6 @@ __is_file_migratable (xlator_t *this, loc_t *loc, dict_t *rsp_dict,                  goto out;          } -        ret = dict_get_int32 (rsp_dict, GLUSTERFS_OPEN_FD_COUNT, &open_fd_count); -        if (!ret && (open_fd_count > 0)) { -                /* TODO: support migration of files with open fds */ -                gf_log (this->name, GF_LOG_WARNING, -                        "%s: file has open fds, not attempting migration", -                        loc->path); -                goto out; -        }          ret = 0;  out: @@ -137,11 +119,10 @@ out:  static inline int  __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf, -                                 dict_t *dict, fd_t **dst_fd, int *need_unlink) +                                 dict_t *dict, fd_t **dst_fd)  {          xlator_t *this = NULL;          int       ret  = -1; -        mode_t    mode = 0;          fd_t     *fd   = NULL;          struct iatt new_stbuf = {0,}; @@ -154,6 +135,13 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,                  goto out;          } +        ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: failed to set gfid in dict for create", loc->path); +                goto out; +        } +          fd = fd_create (loc->inode, DHT_REBALANCE_PID);          if (!fd) {                  gf_log (this->name, GF_LOG_ERROR, @@ -163,40 +151,27 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,          }          ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL); -        if (ret) { -                gf_log (this->name, GF_LOG_DEBUG, "failed to lookup %s on %s", -                        loc->path, to->name); - -                mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); -                ret = syncop_create (to, loc, O_WRONLY, mode, fd, dict); -                if (ret < 0) { +        if (!ret) { +                /* File exits in the destination, check if gfid matches */ +                if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {                          gf_log (this->name, GF_LOG_ERROR, -                                "failed to create %s on %s", loc->path, to->name); +                                "file %s exits in %s with different gfid", +                                loc->path, to->name); +                        fd_unref (fd);                          goto out;                  } - -                *need_unlink = 1; -                goto done; -        } - -        /* File exits in the destination, just do the open if gfid matches */ -        if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { -                gf_log (this->name, GF_LOG_ERROR, -                        "file %s exits in %s with different gfid", -                        loc->path, to->name); -                fd_unref (fd); -                goto out;          } -        ret = syncop_open (to, loc, O_WRONLY, fd); +        /* Create the destination with LINKFILE mode, and linkto xattr, +           if the linkfile already exists, it will just open the file */ +        ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, +                             dict);          if (ret < 0) {                  gf_log (this->name, GF_LOG_ERROR, -                        "failed to open file %s on %s", -                        loc->path, to->name); -                fd_unref (fd); +                        "failed to create %s on %s", loc->path, to->name);                  goto out;          } -done: +          if (dst_fd)                  *dst_fd = fd; @@ -254,16 +229,21 @@ out:  static inline int  __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, -                             int hole_exists) +                             uint64_t ia_size, int hole_exists)  { -        int            ret    = -1; +        int            ret    = 0;          int            count  = 0;          off_t          offset = 0;          struct iovec  *vector = NULL;          struct iobref *iobref = NULL; - -        while (1) { -                ret = syncop_readv (from, src, DHT_REBALANCE_BLKSIZE, +        uint64_t       total  = 0; +        size_t         read_size = 0; + +        /* if file size is '0', no need to enter this loop */ +        while (total < ia_size) { +                read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? +                             DHT_REBALANCE_BLKSIZE : (ia_size - total)); +                ret = syncop_readv (from, src, read_size,                                      offset, &vector, &count, &iobref);                  if (!ret || (ret < 0)) {                          break; @@ -279,6 +259,7 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,                          break;                  }                  offset += ret; +                total += ret;                  if (vector)                          GF_FREE (vector); @@ -298,6 +279,84 @@ __dht_rebalane_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,          return ret;  } + +static inline int +__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, +                               struct iatt *stbuf, fd_t **src_fd) +{ +        int          ret  = 0; +        fd_t        *fd   = NULL; +        dict_t      *dict = NULL; +        xlator_t    *this = NULL; +        struct iatt  iatt = {0,}; + +        this = THIS; + +        fd = fd_create (loc->inode, DHT_REBALANCE_PID); +        if (!fd) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: fd create failed (source)", loc->path); +                ret = -1; +                goto out; +        } + +        ret = syncop_open (from, loc, O_RDWR, fd); +        if (ret == -1) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to open file %s on %s", +                        loc->path, from->name); +                goto out; +        } + +        ret = -1; +        dict = dict_new (); +        if (!dict) +                goto out; + +        ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to set xattr in dict for %s (linkto:%s)", +                        loc->path, to->name); +                goto out; +        } + +        /* Once the migration starts, the source should have 'linkto' key set +           to show which is the target, so other clients can work around it */ +        ret = syncop_setxattr (from, loc, dict, 0); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to set xattr on %s in %s", +                        loc->path, from->name); +                goto out; +        } + +        /* mode should be (+S+T) to indicate migration is in progress */ +        iatt.ia_prot = stbuf->ia_prot; +        iatt.ia_type = stbuf->ia_type; +        iatt.ia_prot.sticky = 1; +        iatt.ia_prot.sgid = 1; + +        ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to set mode on %s in %s", +                        loc->path, from->name); +                goto out; +        } + +        if (src_fd) +                *src_fd = fd; + +        /* success */ +        ret = 0; +out: +        if (dict) +                dict_unref (dict); + +        return ret; +} +  int  dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                    int flag) @@ -305,13 +364,13 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          int             ret            = -1;          struct iatt     new_stbuf      = {0,};          struct iatt     stbuf          = {0,}; +        struct iatt     empty_iatt     = {0,};          fd_t           *src_fd         = NULL;          fd_t           *dst_fd         = NULL;          dict_t         *dict           = NULL;          dict_t         *xattr          = NULL;          dict_t         *rsp_dict       = NULL;          int             file_has_holes = 0; -        int             need_unlink    = 0;          gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",                  loc->path, from->name, to->name); @@ -326,6 +385,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                          "%s: failed to set fd-count key in dict, may attempt "                          "migration of file which has open fds", loc->path); +        /* Phase 1 - Data migration is in progress from now on */          ret = syncop_lookup (from, loc, dict, &stbuf, &rsp_dict, NULL);          if (ret) {                  gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s", @@ -338,9 +398,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          if (ret)                  goto out; -        /* create the destination */ -        ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd, -                                               &need_unlink); +        /* create the destination, with required modes/xattr */ +        ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd);          if (ret)                  goto out; @@ -351,81 +410,142 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                          goto out;          } -        /* Try to preserve 'holes' while migrating data */ -        if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) -                file_has_holes = 1; - -        src_fd = fd_create (loc->inode, DHT_REBALANCE_PID); -        if (!src_fd) { -                gf_log (this->name, GF_LOG_ERROR, -                        "%s: fd create failed (source)", loc->path); -                ret = -1; +        /* Open the source, and also update mode/xattr */ +        ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s", +                        loc->path, from->name);                  goto out;          } -        ret = syncop_open (from, loc, O_RDONLY, src_fd); -        if (ret == -1) { -                gf_log (this->name, GF_LOG_ERROR, -                        "failed to open file %s on %s", +        ret = syncop_fstat (from, src_fd, &stbuf); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s",                          loc->path, from->name);                  goto out;          } +        /* Try to preserve 'holes' while migrating data */ +        if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) +                file_has_holes = 1; +          /* All I/O happens in this function */          ret = __dht_rebalane_migrate_data (from, to, src_fd, dst_fd, -                                           file_has_holes); +                                           stbuf.ia_size, file_has_holes);          if (ret) {                  gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",                          loc->path);                  goto out;          } -        ret = syncop_lookup (from, loc, NULL, &new_stbuf, NULL, NULL); +        /* TODO: move all xattr related operations to fd based operations */ +        ret = syncop_listxattr (from, loc, &xattr); +        if (ret == -1) +                gf_log (this->name, GF_LOG_WARNING, +                        "%s: failed to get xattr from %s", loc->path, from->name); + +        ret = syncop_setxattr (to, loc, xattr, 0); +        if (ret == -1) +                gf_log (this->name, GF_LOG_WARNING, +                        "%s: failed to set xattr on %s", loc->path, to->name); + +        /* TODO: Sync the locks */ + +        ret = syncop_fsync (to, dst_fd); +        if (ret) +                gf_log (this->name, GF_LOG_WARNING, +                        "%s: failed to fsync on %s", loc->path, to->name); + + +        /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */ + +        ret = syncop_fstat (from, src_fd, &new_stbuf);          if (ret < 0) {                  /* Failed to get the stat info */                  gf_log (this->name, GF_LOG_ERROR, -                        "failed to lookup file %s on %s", +                        "failed to fstat file %s on %s",                          loc->path, from->name); -                need_unlink = 0;                  goto out;          } -        /* No need to rebalance, if there is some -           activity on source file */ -        if (new_stbuf.ia_mtime != stbuf.ia_mtime) { +        /* source would have both sticky bit and sgid bit set, reset it to 0, +           and set the source permission on destination  */ +        new_stbuf.ia_prot.sticky = 0; +        new_stbuf.ia_prot.sgid = 0; + +        /* TODO: if the source actually had sticky bit, or sgid bit set, +           we are not handling it */ + +        ret = syncop_fsetattr (to, dst_fd, &new_stbuf, +                               (GF_SET_ATTR_UID | GF_SET_ATTR_GID | +                                GF_SET_ATTR_MODE), NULL, NULL); +        if (ret) {                  gf_log (this->name, GF_LOG_WARNING, -                        "%s: ignoring destination file as source has " -                        "undergone some changes while migration was happening", -                        loc->path); -                ret = -1; -                goto out; +                        "%s: failed to perform setattr on %s", +                        loc->path, to->name);          } +        /* Because 'futimes' is not portable */          ret = syncop_setattr (to, loc, &new_stbuf, -                              (GF_SET_ATTR_UID | GF_SET_ATTR_GID | -                               GF_SET_ATTR_MODE | GF_SET_ATTR_ATIME | -                               GF_SET_ATTR_MTIME), NULL, NULL); +                              (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), +                              NULL, NULL);          if (ret) {                  gf_log (this->name, GF_LOG_WARNING,                          "%s: failed to perform setattr on %s",                          loc->path, to->name);          } -        ret = syncop_listxattr (from, loc, &xattr); -        if (ret == -1) +        /* Make the source as a linkfile first before deleting it */ +        empty_iatt.ia_prot.sticky = 1; +        ret = syncop_fsetattr (from, src_fd, &empty_iatt, +                               GF_SET_ATTR_MODE, NULL, NULL); +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING,             \ +                        "%s: failed to perform setattr on %s", +                        loc->path, from->name); +        } + +        /* Do a stat and check the gfid before unlink */ +        ret = syncop_stat (from, loc, &empty_iatt); +        if (ret) {                  gf_log (this->name, GF_LOG_WARNING, -                        "%s: failed to get xattr from %s", loc->path, from->name); +                        "%s: failed to do a stat on %s", +                        loc->path, from->name); +        } -        ret = syncop_setxattr (to, loc, xattr, 0); -        if (ret == -1) +        if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) { +                /* take out the source from namespace */ +                ret = syncop_unlink (from, loc); +                if (ret) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "%s: failed to perform unlink on %s", +                                loc->path, from->name); +                } +        } + +        /* Free up the data blocks on the source node, as the whole +           file is migrated */ +        ret = syncop_ftruncate (from, src_fd, 0); +        if (ret) {                  gf_log (this->name, GF_LOG_WARNING, -                        "%s: failed to set xattr on %s", loc->path, to->name); +                        "%s: failed to perform truncate on %s", +                        loc->path, from->name); +        } -        /* rebalance complete */ -        syncop_close (dst_fd); -        syncop_close (src_fd); -        syncop_unlink (from, loc); -        need_unlink = 0; +        /* remove the 'linkto' xattr from the destination */ +        ret = syncop_removexattr (to, loc, DHT_LINKFILE_KEY); +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING, +                        "%s: failed to perform removexattr on %s", +                        loc->path, to->name); +        } + +        ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL); +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING, +                        "%s: failed to lookup the file on subvolumes", +                        loc->path); +        }          gf_log (this->name, GF_LOG_INFO,                  "completed migration of %s from subvolume %s to %s", @@ -436,14 +556,10 @@ out:          if (dict)                  dict_unref (dict); -        if (ret) { -                if (dst_fd) -                        syncop_close (dst_fd); -                if (src_fd) -                        syncop_close (src_fd); -                if (need_unlink) -                        syncop_unlink (to, loc); -        } +        if (dst_fd) +                syncop_close (dst_fd); +        if (src_fd) +                syncop_close (src_fd);          return ret;  } @@ -461,8 +577,8 @@ rebalance_task (void *data)          /* This function is 'synchrounous', hence if it returns,             we are done with the task */ -        ret = dht_migrate_file (THIS, &local->loc, local->from_subvol, -                                local->to_subvol, local->flags); +        ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol, +                                local->rebalance.target_node, local->flags);          return ret;  } @@ -488,7 +604,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)                          dht_layout_unref (this, layout);                  } -                ret = dht_layout_preset (this, local->to_subvol, +                ret = dht_layout_preset (this, local->rebalance.target_node,                                           local->loc.inode);                  if (ret)                          gf_log (this->name, GF_LOG_WARNING, diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index bdc4dd2b6..226ce280d 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -487,17 +487,18 @@ dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          goto err;                  } -                link_local = dht_local_init (link_frame); +                /* fop value sent as maxvalue because it is not used +                   anywhere in this case */ +                link_local = dht_local_init (link_frame, &local->loc2, NULL, +                                             GF_FOP_MAXVALUE);                  if (!link_local) {                          goto err;                  } -                loc_copy (&link_local->loc, &local->loc2);                  if (link_local->loc.inode)                          inode_unref (link_local->loc.inode);                  link_local->loc.inode = inode_ref (local->loc.inode);                  uuid_copy (link_local->gfid, local->loc.inode->gfid); -                link_frame->local = link_local;                  dht_linkfile_create (link_frame, dht_rename_links_create_cbk,                                       src_cached, dst_hashed, &link_local->loc); @@ -813,17 +814,14 @@ dht_rename (call_frame_t *frame, xlator_t *this,          if (newloc->inode)                  dst_cached = dht_subvol_get_cached (this, newloc->inode); -        local = dht_local_init (frame); +        local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);          if (!local) {                  op_errno = ENOMEM;                  goto err;          } - -        ret = loc_copy (&local->loc, oldloc); -        if (ret == -1) { -                op_errno = ENOMEM; -                goto err; -        } +        /* cached_subvol will be set from dht_local_init, reset it to NULL, +           as the logic of handling rename is different  */ +        local->cached_subvol = NULL;          ret = loc_copy (&local->loc2, newloc);          if (ret == -1) { diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 6c4c1ffcd..87a575654 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -26,7 +26,7 @@  /* TODO: add NS locking */  #include "statedump.h" -#include "dht-common.c" +#include "dht-common.h"  /* TODO:     - use volumename in xattr instead of "dht" @@ -419,23 +419,8 @@ struct xlator_fops fops = {          .mknod       = dht_mknod,          .create      = dht_create, -        .stat        = dht_stat, -        .fstat       = dht_fstat, -        .truncate    = dht_truncate, -        .ftruncate   = dht_ftruncate, -        .access      = dht_access, -        .readlink    = dht_readlink, -        .setxattr    = dht_setxattr, -        .fsetxattr   = dht_fsetxattr, -        .getxattr    = dht_getxattr, -        .removexattr = dht_removexattr,          .open        = dht_open, -        .readv       = dht_readv, -        .writev      = dht_writev, -        .flush       = dht_flush, -        .fsync       = dht_fsync,          .statfs      = dht_statfs, -        .lk          = dht_lk,          .opendir     = dht_opendir,          .readdir     = dht_readdir,          .readdirp    = dht_readdirp, @@ -446,10 +431,29 @@ struct xlator_fops fops = {          .mkdir       = dht_mkdir,          .rmdir       = dht_rmdir,          .rename      = dht_rename, -        .inodelk     = dht_inodelk, -        .finodelk    = dht_finodelk,          .entrylk     = dht_entrylk,          .fentrylk    = dht_fentrylk, + +        /* Inode read operations */ +        .stat        = dht_stat, +        .fstat       = dht_fstat, +        .access      = dht_access, +        .readlink    = dht_readlink, +        .getxattr    = dht_getxattr, +        .readv       = dht_readv, +        .flush       = dht_flush, +        .fsync       = dht_fsync, +        .inodelk     = dht_inodelk, +        .finodelk    = dht_finodelk, +        .lk          = dht_lk, + +        /* Inode write operations */ +        .removexattr = dht_removexattr, +        .setxattr    = dht_setxattr, +        .fsetxattr   = dht_fsetxattr, +        .truncate    = dht_truncate, +        .ftruncate   = dht_ftruncate, +        .writev      = dht_writev,          .xattrop     = dht_xattrop,          .fxattrop    = dht_fxattrop,          .setattr     = dht_setattr, diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 845c6b74e..9dcf224d1 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -23,7 +23,7 @@  #include "config.h"  #endif -#include "dht-common.c" +#include "dht-common.h"  /* TODO: all 'TODO's in dht.c holds good */ @@ -169,21 +169,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,          conf = this->private; -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);          if (!local) {                  op_errno = ENOMEM;                  goto err;          } -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = errno; -                gf_log (this->name, GF_LOG_DEBUG, -                        "copying location failed for path=%s", -                        loc->path); -                goto err; -        } -          if (xattr_req) {                  local->xattr_req = dict_ref (xattr_req);          } else { @@ -191,14 +182,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,          }          hashed_subvol = dht_subvol_get_hashed (this, &local->loc); -        cached_subvol = dht_subvol_get_cached (this, local->loc.inode); +        cached_subvol = local->cached_subvol; -        local->cached_subvol = cached_subvol;          local->hashed_subvol = hashed_subvol;          if (is_revalidate (loc)) { -                local->layout = layout = dht_layout_get (this, loc->inode); - +                layout = local->layout;                  if (!layout) {                          gf_log (this->name, GF_LOG_DEBUG,                                  "revalidate without cache. path=%s", @@ -215,7 +204,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,                          goto do_fresh_lookup;                  } -                local->inode    = inode_ref (loc->inode); +                local->inode = inode_ref (loc->inode);                  local->call_cnt = layout->cnt;                  call_cnt = local->call_cnt; @@ -314,7 +303,6 @@ nufa_create (call_frame_t *frame, xlator_t *this,          xlator_t    *subvol = NULL;          xlator_t    *avail_subvol = NULL;          int          op_errno = -1; -        int          ret = -1;          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -324,7 +312,7 @@ nufa_create (call_frame_t *frame, xlator_t *this,          dht_get_du_info (frame, this, loc); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);          if (!local) {                  op_errno = ENOMEM;                  goto err; @@ -348,13 +336,6 @@ nufa_create (call_frame_t *frame, xlator_t *this,          if (subvol != avail_subvol) {                  /* create a link file instead of actual file */ -                ret = loc_copy (&local->loc, loc); -                if (ret == -1) { -                        op_errno = ENOMEM; -                        goto err; -                } - -                local->fd = fd_ref (fd);                  local->params = dict_ref (params);                  local->mode = mode;                  local->flags = flags; @@ -421,7 +402,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,          xlator_t    *subvol = NULL;          xlator_t    *avail_subvol = NULL;          int          op_errno = -1; -        int          ret = -1;          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -431,7 +411,7 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,          dht_get_du_info (frame, this, loc); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);          if (!local) {                  op_errno = ENOMEM;                  goto err; @@ -456,11 +436,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,          if (avail_subvol != subvol) {                  /* Create linkfile first */ -                ret = loc_copy (&local->loc, loc); -                if (ret == -1) { -                        op_errno = ENOMEM; -                        goto err; -                }                  local->params = dict_ref (params);                  local->mode = mode; diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index e6a2e5d5c..fd3f22ea0 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -23,7 +23,7 @@  #include "config.h"  #endif -#include "dht-common.c" +#include "dht-common.h"  #include "dht-mem-types.h"  #include <sys/time.h> @@ -246,21 +246,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this,          conf = this->private; -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);          if (!local) {                  op_errno = ENOMEM;                  goto err;          } -        ret = loc_dup (loc, &local->loc); -        if (ret == -1) { -                op_errno = errno; -                gf_log (this->name, GF_LOG_DEBUG, -                        "copying location failed for path=%s", -                        loc->path); -                goto err; -        } -          if (xattr_req) {                  local->xattr_req = dict_ref (xattr_req);          } else { @@ -268,14 +259,12 @@ switch_lookup (call_frame_t *frame, xlator_t *this,          }          hashed_subvol = dht_subvol_get_hashed (this, &local->loc); -        cached_subvol = dht_subvol_get_cached (this, local->loc.inode); +        cached_subvol = local->cached_subvol; -        local->cached_subvol = cached_subvol;          local->hashed_subvol = hashed_subvol;          if (is_revalidate (loc)) { -                local->layout = layout = dht_layout_get (this, loc->inode); - +                layout = local->layout;                  if (!layout) {                          gf_log (this->name, GF_LOG_DEBUG,                                  "revalidate without cache. path=%s", @@ -418,7 +407,6 @@ switch_create (call_frame_t *frame, xlator_t *this,          xlator_t    *subvol = NULL;          xlator_t    *avail_subvol = NULL;          int          op_errno = -1; -        int          ret = -1;          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -428,7 +416,7 @@ switch_create (call_frame_t *frame, xlator_t *this,          dht_get_du_info (frame, this, loc); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);          if (!local) {                  op_errno = ENOMEM;                  goto err; @@ -451,13 +439,6 @@ switch_create (call_frame_t *frame, xlator_t *this,          if (subvol != avail_subvol) {                  /* create a link file instead of actual file */ -                ret = loc_copy (&local->loc, loc); -                if (ret == -1) { -                        op_errno = ENOMEM; -                        goto err; -                } - -                local->fd = fd_ref (fd);                  local->mode = mode;                  local->flags = flags; @@ -520,7 +501,6 @@ switch_mknod (call_frame_t *frame, xlator_t *this,          xlator_t    *subvol = NULL;          xlator_t    *avail_subvol = NULL;          int          op_errno = -1; -        int          ret = -1;          VALIDATE_OR_GOTO (frame, err);          VALIDATE_OR_GOTO (this, err); @@ -530,7 +510,7 @@ switch_mknod (call_frame_t *frame, xlator_t *this,          dht_get_du_info (frame, this, loc); -        local = dht_local_init (frame); +        local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);          if (!local) {                  op_errno = ENOMEM;                  goto err; @@ -554,11 +534,6 @@ switch_mknod (call_frame_t *frame, xlator_t *this,          if (avail_subvol != subvol) {                  /* Create linkfile first */ -                ret = loc_copy (&local->loc, loc); -                if (ret == -1) { -                        op_errno = ENOMEM; -                        goto err; -                }                  local->params = dict_ref (params);                  local->mode = mode; diff --git a/xlators/protocol/server/src/server3_1-fops.c b/xlators/protocol/server/src/server3_1-fops.c index 5b6bf29fc..e869cb48e 100644 --- a/xlators/protocol/server/src/server3_1-fops.c +++ b/xlators/protocol/server/src/server3_1-fops.c @@ -2920,13 +2920,18 @@ server_create (rpcsvc_request_t *req)                  buf = NULL;          } -        state->resolve.type   = RESOLVE_NOT;          state->resolve.path   = gf_strdup (args.path);          state->resolve.bname  = gf_strdup (args.bname);          state->mode           = args.mode;          state->flags          = gf_flags_to_flags (args.flags);          memcpy (state->resolve.pargfid, args.pargfid, 16); +        if (state->flags & O_EXCL) { +                state->resolve.type = RESOLVE_NOT; +        } else { +                state->resolve.type = RESOLVE_DONTCARE; +        } +          ret = 0;          resolve_and_resume (frame, server_create_resume); diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 30f246c3e..09818ce89 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3218,8 +3218,8 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,          if (op_ret == -1) {                  op_errno = errno;                  gf_log (this->name, GF_LOG_ERROR, -                        "ftruncate failed on fd=%p: %s", -                        fd, strerror (errno)); +                        "ftruncate failed on fd=%p (%"PRId64": %s", +                        fd, offset, strerror (errno));                  goto out;          }  | 
