diff options
author | Krutika Dhananjay <kdhananj@redhat.com> | 2015-04-01 16:00:05 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-04-08 15:23:29 +0000 |
commit | 4be65bb376e2fffd7175f579724aae4c5718d57c (patch) | |
tree | af258397a2efdd5f5a6c9ae0309c05c11a9f386f /xlators | |
parent | 346e64e578573296028efa516cd93cfaf2b17b8f (diff) |
features/shard: Introduce file size xattr
With each inode write FOP, the size and block count of the file will be
updated within the xattr. There are two 64 byte fields that are
intentionally left blank for now for future use when consistency
guarantee is introduced later in sharding.
Change-Id: I40a2e700150c1f199a6bf87909f063c84ab7bb43
BUG: 1207603
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: http://review.gluster.org/10097
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/features/shard/src/shard-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/features/shard/src/shard.c | 280 | ||||
-rw-r--r-- | xlators/features/shard/src/shard.h | 30 |
3 files changed, 268 insertions, 43 deletions
diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h index 4bdcbba787c..5a043278b7d 100644 --- a/xlators/features/shard/src/shard-mem-types.h +++ b/xlators/features/shard/src/shard-mem-types.h @@ -17,6 +17,7 @@ enum gf_shard_mem_types_ { gf_shard_mt_inode_list, gf_shard_mt_inode_ctx_t, gf_shard_mt_iovec, + gf_shard_mt_uint64_t, gf_shard_mt_end }; #endif diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 2dccbb0da8d..506c22e2400 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -212,6 +212,29 @@ shard_local_wipe (shard_local_t *local) } int +shard_modify_size_and_block_count (struct iatt *stbuf, dict_t *dict) +{ + int ret = -1; + void *size_attr = NULL; + uint64_t size_array[4]; + + ret = dict_get_ptr (dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + GF_XATTR_SHARD_FILE_SIZE " for %s", + uuid_utoa (stbuf->ia_gfid)); + return ret; + } + + memcpy (size_array, size_attr, sizeof (size_array)); + + stbuf->ia_size = ntoh64 (size_array[0]); + stbuf->ia_blocks = ntoh64 (size_array[2]); + + return 0; +} + +int shard_call_count_return (call_frame_t *frame) { int call_count = 0; @@ -244,15 +267,19 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; ret = dict_get_uint64 (xdata, GF_XATTR_SHARD_BLOCK_SIZE, &size); - if (!ret && size) { + if (!ret) { ctx_tmp.block_size = ntoh64 (size); ctx_tmp.mode = st_mode_from_ia (buf->ia_prot, buf->ia_type); ctx_tmp.rdev = buf->ia_rdev; - ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp); - if (ret) - gf_log (this->name, GF_LOG_WARNING, "Failed to set " - "inode ctx for %s", uuid_utoa (buf->ia_gfid)); } + /* else it is assumed that the file was created prior to enabling + * sharding on the volume. + */ + + ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "Failed to set inode ctx " + "for %s", uuid_utoa (buf->ia_gfid)); /* To-Do: return the call with aggregated values of ia_size and * ia_blocks @@ -287,8 +314,7 @@ shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, if (!local->xattr_req) goto err; - if ((shard_inode_ctx_get_block_size (loc->inode, this, &block_size) || - !block_size)) { + if (shard_inode_ctx_get_block_size (loc->inode, this, &block_size)) { ret = dict_set_uint64 (local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); if (ret) { @@ -407,6 +433,35 @@ unwind: } int +shard_set_size_attrs (uint64_t size, uint64_t block_count, + uint64_t **size_attr_p) +{ + int ret = -1; + uint64_t *size_attr = NULL; + + if (!size_attr_p) + goto out; + + size_attr = GF_CALLOC (4, sizeof (uint64_t), gf_shard_mt_uint64_t); + if (!size_attr) + goto out; + + size_attr[0] = hton64 (size); + /* As sharding evolves, it _may_ be necessary to embed more pieces of + * information within the same xattr. So allocating slots for them in + * advance. For now, only bytes 0-63 and 128-191 which would make up the + * current size and block count respectively of the file are valid. + */ + size_attr[2] = hton64 (block_count); + + *size_attr_p = size_attr; + + ret = 0; +out: + return ret; +} + +int shard_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) { @@ -661,11 +716,82 @@ err: } int +shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + + SHARD_STACK_UNWIND (writev, frame, local->written_size, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + return 0; + +err: + SHARD_STACK_UNWIND (writev, frame, -1, local->op_errno, NULL, + NULL, NULL); + return 0; +} + +int +shard_update_file_size (call_frame_t *frame, xlator_t *this) +{ + int ret = -1; + uint64_t *size_attr = NULL; + fd_t *fd = NULL; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; + + local = frame->local; + fd = local->fd; + + xattr_req = dict_new (); + if (!xattr_req) + goto err; + + ret = shard_set_size_attrs (local->postbuf.ia_size, + local->postbuf.ia_blocks, &size_attr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set size attrs for" + " %s", uuid_utoa (fd->inode->gfid)); + goto err; + } + + ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, + 8 * 4); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set key %s into " + "dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE, + uuid_utoa (fd->inode->gfid)); + GF_FREE (size_attr); + goto err; + } + + STACK_WIND (frame, shard_update_file_size_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, xattr_req, 0, NULL); + + dict_unref (xattr_req); + return 0; + +err: + if (xattr_req) + dict_unref (xattr_req); + SHARD_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + +} + +int shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int ret = 0; int call_count = 0; fd_t *anon_fd = cookie; shard_local_t *local = NULL; @@ -677,6 +803,9 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; } else { local->written_size += op_ret; + local->postbuf.ia_blocks += (postbuf->ia_blocks - + prebuf->ia_blocks); + local->postbuf.ia_size += (postbuf->ia_size - prebuf->ia_size); } if (anon_fd) @@ -684,9 +813,14 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = shard_call_count_return (frame); if (call_count == 0) { - ret = (local->op_ret < 0) ? local->op_ret : local->written_size; - SHARD_STACK_UNWIND (writev, frame, ret, local->op_errno, prebuf, - postbuf, xdata); + if (local->op_ret < 0) { + SHARD_STACK_UNWIND (writev, frame, local->op_ret, + local->op_errno, NULL, NULL, NULL); + } else { + if (xdata) + local->xattr_rsp = dict_ref (xdata); + shard_update_file_size (frame, this); + } } return 0; @@ -786,6 +920,72 @@ next: return 0; } +int +shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) + goto unwind; + + local->prebuf = *buf; + if (shard_modify_size_and_block_count (&local->prebuf, xdata)) { + op_ret = -1; + op_errno = EINVAL; + goto unwind; + } + + local->postbuf = local->prebuf; + + shard_writev_do (frame, this); + + return 0; + +unwind: + SHARD_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; + +} + +int +shard_lookup_base_file (call_frame_t *frame, xlator_t *this) +{ + fd_t *fd = NULL; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; + + local = frame->local; + fd = local->fd; + + xattr_req = dict_new (); + if (!xattr_req) + goto err; + + local->loc.inode = inode_new (fd->inode->table); + gf_uuid_copy (local->loc.gfid, fd->inode->gfid); + + SHARD_MD_READ_FOP_INIT_REQ_DICT (this, xattr_req, fd->inode->gfid, err); + + STACK_WIND (frame, shard_lookup_base_file_cbk, FIRST_CHILD (this), + FIRST_CHILD(this)->fops->lookup, &local->loc, + xattr_req); + + dict_unref (xattr_req); + return 0; + +err: + if (xattr_req) + dict_unref (xattr_req); + SHARD_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + +} + void shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode, struct iatt *buf) @@ -836,7 +1036,7 @@ done: if (local->op_ret < 0) goto unwind; else - shard_writev_do (frame, this); + shard_lookup_base_file (frame, this); } return 0; @@ -1005,7 +1205,7 @@ done: goto unwind; } else { if (!local->eexist_count) { - shard_writev_do (frame, this); + shard_lookup_base_file (frame, this); } else { local->call_count = local->eexist_count; shard_writev_lookup_shards (frame, this); @@ -1179,7 +1379,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this) if (local->call_count) shard_writev_resume_mknod (frame, this); else - shard_writev_do (frame, this); + shard_lookup_base_file (frame, this); return 0; } @@ -1349,45 +1549,30 @@ err: return 0; } -/* shard_writev - still a WIP */ int shard_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { + int ret = 0; int i = 0; uint64_t block_size = 0; - uint32_t first_block = 0; - uint32_t last_block = 0; - uint32_t num_blocks = 0; - size_t total_size = 0; shard_local_t *local = NULL; shard_priv_t *priv = NULL; priv = this->private; - if (shard_inode_ctx_get_block_size (fd->inode, this, &block_size)) { - gf_log (this->name, GF_LOG_ERROR, "Failed to get inode ctx for " - "%s", uuid_utoa(fd->inode->gfid)); + ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get block size " + "for %s from its inode ctx", + uuid_utoa (fd->inode->gfid)); goto out; } - for (i = 0; i < count; i++) - total_size += vector[i].iov_len; - - first_block = get_lowest_block (offset, block_size); - last_block = get_highest_block (offset, total_size, block_size); - num_blocks = last_block - first_block + 1; - - gf_log (this->name, GF_LOG_TRACE, "gfid=%s first_block=%"PRIu32" " - "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64" " - "total_size=%lu", uuid_utoa (fd->inode->gfid), first_block, - last_block, num_blocks, offset, total_size); - - if (!block_size || - ((first_block == 0) && (first_block == last_block))) { - /* To-Do: Replace default_writev_cbk with a specific cbk - * that would collect total size and block count before unwind + if (!block_size) { + /* block_size = 0 means that the file was created before + * sharding was enabled on the volume. */ STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, @@ -1412,21 +1597,30 @@ shard_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, if (!local->vector) goto out; + for (i = 0; i < count; i++) + local->total_size += vector[i].iov_len; + local->count = count; local->offset = offset; local->flags = flags; local->iobref = iobref_ref (iobref); local->fd = fd_ref (fd); - local->first_block = first_block; - local->last_block = last_block; - local->total_size = total_size; local->block_size = block_size; - local->num_blocks = num_blocks; - local->inode_list = GF_CALLOC (num_blocks, sizeof (inode_t *), + local->first_block = get_lowest_block (offset, local->block_size); + local->last_block = get_highest_block (offset, local->total_size, + local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; + local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), gf_shard_mt_inode_list); if (!local->inode_list) goto out; + gf_log (this->name, GF_LOG_TRACE, "gfid=%s first_block=%"PRIu32" " + "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64" " + "total_size=%lu", uuid_utoa (fd->inode->gfid), + local->first_block, local->last_block, local->num_blocks, + offset, local->total_size); + local->dot_shard_loc.inode = inode_find (this->itable, priv->dot_shard_gfid); if (!local->dot_shard_loc.inode) diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 879aa030991..0eeabc51f5e 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -24,6 +24,7 @@ #define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB) #define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB) #define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size" +#define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size" #define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806" #define SHARD_INODE_LRU_LIMIT 4096 @@ -70,6 +71,7 @@ #define SHARD_INODE_CREATE_INIT(this, local, xattr_req, loc, label) do { \ int __ret = -1; \ + uint64_t *__size_attr = NULL; \ shard_priv_t *__priv = NULL; \ \ __priv = this->private; \ @@ -84,8 +86,34 @@ goto label; \ } \ \ + __ret = shard_set_size_attrs (0, 0, &__size_attr); \ + if (__ret) \ + goto label; \ + \ + __ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, \ + __size_attr, 8 * 4); \ + if (__ret) { \ + gf_log (this->name, GF_LOG_WARNING, "Failed to set key: %s " \ + "on path %s", GF_XATTR_SHARD_FILE_SIZE, loc->path); \ + GF_FREE (__size_attr); \ + goto label; \ + } \ +} while (0) + + +#define SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, gfid, label) do { \ + int __ret = -1; \ + \ + __ret = dict_set_uint64 (xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); \ + if (__ret) { \ + gf_log (this->name, GF_LOG_WARNING, "Failed to set dict" \ + " value: key:%s for %s.", GF_XATTR_SHARD_FILE_SIZE, \ + uuid_utoa (gfid)); \ + goto label; \ + } \ } while (0) + typedef struct shard_priv { uint64_t block_size; uuid_t dot_shard_gfid; @@ -119,6 +147,8 @@ typedef struct shard_local { dict_t *xattr_req; dict_t *xattr_rsp; inode_t **inode_list; + struct iatt prebuf; + struct iatt postbuf; struct iovec *vector; struct iobref *iobref; struct { |