diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-helper.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 326 |
1 files changed, 243 insertions, 83 deletions
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 055ea7cb0..311a48112 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -18,6 +18,28 @@ #include "xlator.h" #include "dht-common.h" +static inline int +dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol) +{ + uint64_t tmp_subvol = 0; + + tmp_subvol = (long)subvol; + return inode_ctx_set1 (inode, this, &tmp_subvol); +} + +int +dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol) +{ + int ret = -1; + uint64_t tmp_subvol = 0; + + ret = inode_ctx_get1 (inode, this, &tmp_subvol); + if (tmp_subvol && subvol) + *subvol = (xlator_t *)tmp_subvol; + + return ret; +} + int dht_frame_return (call_frame_t *frame) @@ -40,6 +62,43 @@ dht_frame_return (call_frame_t *frame) } +static uint64_t +dht_bits_for (uint64_t num) +{ + uint64_t bits = 0, ctrl = 1; + + while (ctrl < num) { + ctrl *= 2; + bits ++; + } + + return bits; +} + +/* + * A slightly "updated" version of the algorithm described in the commit log + * is used here. + * + * The only enhancement is that: + * + * - The number of bits used by the backend filesystem for HUGE d_off which + * is described as 63, and + * - The number of bits used by the d_off presented by the transformation + * upwards which is described as 64, are both made "configurable." + */ + + +#define BACKEND_D_OFF_BITS 63 +#define PRESENT_D_OFF_BITS 63 + +#define ONE 1ULL +#define MASK (~0ULL) +#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) +#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) + +#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) +#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) + int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) { @@ -47,6 +106,9 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) int cnt = 0; int max = 0; uint64_t y = 0; + uint64_t hi_mask = 0; + uint64_t off_mask = 0; + int max_bits = 0; if (x == ((uint64_t) -1)) { y = (uint64_t) -1; @@ -60,7 +122,23 @@ dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) max = conf->subvolume_cnt; cnt = dht_subvol_cnt (this, subvol); - y = ((x * max) + cnt); + if (max == 1) { + y = x; + goto out; + } + + max_bits = dht_bits_for (max); + + hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); + + if (x & hi_mask) { + /* HUGE d_off */ + off_mask = MASK << max_bits; + y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt; + } else { + /* small d_off */ + y = ((x * max) + cnt); + } out: if (y_p) @@ -135,16 +213,38 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, int max = 0; uint64_t x = 0; xlator_t *subvol = 0; + int max_bits = 0; + uint64_t off_mask = 0; + uint64_t host_mask = 0; if (!this->private) - goto out; + return -1; conf = this->private; max = conf->subvolume_cnt; - cnt = y % max; - x = y / max; + if (max == 1) { + x = y; + cnt = 0; + goto out; + } + + if (y & TOP_BIT) { + /* HUGE d_off */ + max_bits = dht_bits_for (max); + off_mask = (MASK << max_bits); + host_mask = ~(off_mask); + + x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS; + + cnt = y & host_mask; + } else { + /* small d_off */ + cnt = y % max; + x = y / max; + } +out: subvol = conf->subvolumes[cnt]; if (subvol_p) @@ -153,7 +253,6 @@ dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, if (x_p) *x_p = x; -out: return 0; } @@ -263,20 +362,6 @@ out: return local; } - -char * -basestr (const char *str) -{ - char *basestr = NULL; - - basestr = strrchr (str, '/'); - if (basestr) - basestr ++; - - return basestr; -} - - xlator_t * dht_first_up_subvol (xlator_t *this) { @@ -428,7 +513,36 @@ out: return next; } +/* This func wraps around, if prev is actually the last subvol. + */ +xlator_t * +dht_subvol_next_available (xlator_t *this, xlator_t *prev) +{ + dht_conf_t *conf = NULL; + int i = 0; + xlator_t *next = NULL; + conf = this->private; + if (!conf) + goto out; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev) { + /* if prev is last in conf->subvolumes, then wrap + * around. + */ + if ((i + 1) < conf->subvolume_cnt) { + next = conf->subvolumes[i + 1]; + } else { + next = conf->subvolumes[0]; + } + break; + } + } + +out: + return next; +} int dht_subvol_cnt (xlator_t *this, xlator_t *subvol) { @@ -620,20 +734,36 @@ dht_migration_complete_check_task (void *data) call_frame_t *frame = NULL; loc_t tmp_loc = {0,}; char *path = NULL; + dht_conf_t *conf = NULL; + inode_t *inode = NULL; + fd_t *iter_fd = NULL; + uint64_t tmp_subvol = 0; + int open_failed = 0; this = THIS; frame = data; local = frame->local; + conf = this->private; src_node = local->cached_subvol; - /* getxattr on cached_subvol for 'linkto' value */ - if (!local->loc.inode) + if (!local->loc.inode && !local->fd) + goto out; + + inode = (!local->fd) ? local->loc.inode : local->fd->inode; + + /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr + * as root:root. If a fd is already open, access check wont be done*/ + + if (!local->loc.inode) { ret = syncop_fgetxattr (src_node, local->fd, &dict, - DHT_LINKFILE_KEY); - else + conf->link_xattr_name); + } else { + SYNCTASK_SETID (0, 0); ret = syncop_getxattr (src_node, &local->loc, &dict, - DHT_LINKFILE_KEY); + conf->link_xattr_name); + SYNCTASK_SETID (frame->root->uid, frame->root->gid); + } if (!ret) dst_node = dht_linkfile_subvol (this, NULL, NULL, dict); @@ -684,10 +814,7 @@ dht_migration_complete_check_task (void *data) /* update inode ctx (the layout) */ dht_layout_unref (this, local->layout); - if (!local->loc.inode) - ret = dht_layout_preset (this, dst_node, local->fd->inode); - else - ret = dht_layout_preset (this, dst_node, local->loc.inode); + ret = dht_layout_preset (this, dst_node, inode); if (ret != 0) { gf_log (this->name, GF_LOG_DEBUG, "%s: could not set preset layout for subvol %s", @@ -705,10 +832,7 @@ dht_migration_complete_check_task (void *data) goto out; } - if (!local->loc.inode) - ret = dht_layout_set (this, local->fd->inode, layout); - else - ret = dht_layout_set (this, local->loc.inode, layout); + ret = dht_layout_set (this, inode, layout); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set the new layout", @@ -719,42 +843,46 @@ dht_migration_complete_check_task (void *data) local->cached_subvol = dst_node; ret = 0; - if (!local->fd) + /* once we detect the migration complete, the inode-ctx2 is no more + required.. delete the ctx and also, it means, open() already + done on all the fd of inode */ + ret = inode_ctx_reset1 (inode, this, &tmp_subvol); + if (tmp_subvol) goto out; - /* once we detect the migration complete, the fd-ctx is no more - required.. delete the ctx, and do one extra 'fd_unref' for open fd */ - ret = fd_ctx_del (local->fd, this, NULL); - if (!ret) { - fd_unref (local->fd); - ret = 0; + if (list_empty (&inode->fd_list)) goto out; - } - /* if 'local->fd' (ie, fd based operation), send a 'open()' on - destination if not already done */ - if (local->loc.inode) { - ret = syncop_open (dst_node, &local->loc, - local->fd->flags, local->fd); - } else { - tmp_loc.inode = local->fd->inode; - inode_path (local->fd->inode, NULL, &path); - if (path) - tmp_loc.path = path; - ret = syncop_open (dst_node, &tmp_loc, - local->fd->flags, local->fd); - GF_FREE (path); + /* perform open as root:root. There is window between linkfile + * creation(root:root) and setattr with the correct uid/gid + */ + SYNCTASK_SETID(0, 0); + + /* perform 'open()' on all the fd's present on the inode */ + tmp_loc.inode = inode; + inode_path (inode, NULL, &path); + if (path) + tmp_loc.path = path; + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + if (fd_is_anonymous (iter_fd)) + continue; + ret = syncop_open (dst_node, &tmp_loc, + iter_fd->flags, iter_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "failed to open " + "the fd (%p, flags=0%o) on file %s @ %s", + iter_fd, iter_fd->flags, path, dst_node->name); + open_failed = 1; + } } - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to send open() on target file at %s", - local->loc.path, dst_node->name); + GF_FREE (path); + + SYNCTASK_SETID (frame->root->uid, frame->root->gid); + if (open_failed) { + ret = -1; goto out; } - - /* need this unref for the fd on src_node */ - fd_unref (local->fd); ret = 0; out: @@ -798,20 +926,34 @@ dht_rebalance_inprogress_task (void *data) char *path = NULL; struct iatt stbuf = {0,}; loc_t tmp_loc = {0,}; + dht_conf_t *conf = NULL; + inode_t *inode = NULL; + fd_t *iter_fd = NULL; + int open_failed = 0; this = THIS; frame = data; local = frame->local; + conf = this->private; src_node = local->cached_subvol; - /* getxattr on cached_subvol for 'linkto' value */ - if (local->loc.inode) + if (!local->loc.inode && !local->fd) + goto out; + + inode = (!local->fd) ? local->loc.inode : local->fd->inode; + + /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr + * as root:root. If a fd is already open, access check wont be done*/ + if (local->loc.inode) { + SYNCTASK_SETID (0, 0); ret = syncop_getxattr (src_node, &local->loc, &dict, - DHT_LINKFILE_KEY); - else + conf->link_xattr_name); + SYNCTASK_SETID (frame->root->uid, frame->root->gid); + } else { ret = syncop_fgetxattr (src_node, local->fd, &dict, - DHT_LINKFILE_KEY); + conf->link_xattr_name); + } if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -853,33 +995,46 @@ dht_rebalance_inprogress_task (void *data) ret = 0; - if (!local->fd) - goto out; + if (list_empty (&inode->fd_list)) + goto done; + + /* perform open as root:root. There is window between linkfile + * creation(root:root) and setattr with the correct uid/gid + */ + SYNCTASK_SETID (0, 0); + + tmp_loc.inode = inode; + inode_path (inode, NULL, &path); + if (path) + tmp_loc.path = path; + + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + if (fd_is_anonymous (iter_fd)) + continue; - if (local->loc.inode) { - ret = syncop_open (dst_node, &local->loc, - local->fd->flags, local->fd); - } else { - tmp_loc.inode = local->fd->inode; - inode_path (local->fd->inode, NULL, &path); - if (path) - tmp_loc.path = path; ret = syncop_open (dst_node, &tmp_loc, - local->fd->flags, local->fd); - GF_FREE (path); + iter_fd->flags, iter_fd); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "failed to send open " + "the fd (%p, flags=0%o) on file %s @ %s", + iter_fd, iter_fd->flags, path, dst_node->name); + open_failed = 1; + } } + GF_FREE (path); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to send open() on target file at %s", - local->loc.path, dst_node->name); + SYNCTASK_SETID (frame->root->uid, frame->root->gid); + + if (open_failed) { + ret = -1; goto out; } - ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node); +done: + ret = dht_inode_ctx_set1 (this, inode, dst_node); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "%s: failed to set fd-ctx target file at %s", + "%s: failed to set inode-ctx target file at %s", local->loc.path, dst_node->name); goto out; } @@ -931,6 +1086,9 @@ dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, dht_stat_time_t *time = 0; int ret = -1; + GF_VALIDATE_OR_GOTO (this->name, stat, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); + ret = dht_inode_ctx_get (inode, this, &ctx); if (ret) { @@ -948,6 +1106,8 @@ dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime, stat->ia_atime_nsec, inode, post); + ret = dht_inode_ctx_set (inode, this, ctx); +out: return 0; } |
