diff options
Diffstat (limited to 'xlators/features/shard')
-rw-r--r-- | xlators/features/shard/src/shard.c | 260 | ||||
-rw-r--r-- | xlators/features/shard/src/shard.h | 43 |
2 files changed, 233 insertions, 70 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 35a4f551e96..4ccd4b01a0b 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -74,10 +74,9 @@ __shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) return ret; } - int -__shard_inode_ctx_set (inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_in) +__shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) { int ret = -1; shard_inode_ctx_t *ctx = NULL; @@ -86,27 +85,65 @@ __shard_inode_ctx_set (inode_t *inode, xlator_t *this, if (ret) return ret; - ctx->block_size = ctx_in->block_size; - ctx->mode = ctx_in->mode; - ctx->rdev = ctx_in->rdev; + if (valid & SHARD_MASK_BLOCK_SIZE) + ctx->block_size = block_size; + + if (!stbuf) + return 0; + + if (valid & SHARD_MASK_PROT) + ctx->stat.ia_prot = stbuf->ia_prot; + + if (valid & SHARD_MASK_NLINK) + ctx->stat.ia_nlink = stbuf->ia_nlink; + + if (valid & SHARD_MASK_UID) + ctx->stat.ia_uid = stbuf->ia_uid; + + if (valid & SHARD_MASK_GID) + ctx->stat.ia_gid = stbuf->ia_gid; + + if (valid & SHARD_MASK_SIZE) + ctx->stat.ia_size = stbuf->ia_size; + + if (valid & SHARD_MASK_BLOCKS) + ctx->stat.ia_blocks = stbuf->ia_blocks; + + if (valid & SHARD_MASK_TIMES) { + SHARD_TIME_UPDATE (ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, + stbuf->ia_mtime, stbuf->ia_mtime_nsec); + SHARD_TIME_UPDATE (ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, + stbuf->ia_ctime, stbuf->ia_ctime_nsec); + SHARD_TIME_UPDATE (ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, + stbuf->ia_atime, stbuf->ia_atime_nsec); + } + + if (valid & SHARD_MASK_OTHERS) { + ctx->stat.ia_ino = stbuf->ia_ino; + gf_uuid_copy (ctx->stat.ia_gfid, stbuf->ia_gfid); + ctx->stat.ia_dev = stbuf->ia_dev; + ctx->stat.ia_type = stbuf->ia_type; + ctx->stat.ia_rdev = stbuf->ia_rdev; + ctx->stat.ia_blksize = stbuf->ia_blksize; + } return 0; } int -shard_inode_ctx_set_all (inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_in) +shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) { int ret = -1; LOCK (&inode->lock); { - ret = __shard_inode_ctx_set (inode, this, ctx_in); + ret = __shard_inode_ctx_set (inode, this, stbuf, block_size, + valid); } UNLOCK (&inode->lock); return ret; - } int @@ -158,10 +195,7 @@ __shard_inode_ctx_get_all (inode_t *inode, xlator_t *this, ctx = (shard_inode_ctx_t *) ctx_uint; - ctx_out->block_size = ctx->block_size; - ctx_out->mode = ctx->mode; - ctx_out->rdev = ctx->rdev; - + memcpy (ctx_out, ctx, sizeof (shard_inode_ctx_t)); return 0; } @@ -341,6 +375,7 @@ shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { + inode_t *inode = NULL; shard_local_t *local = NULL; local = frame->local; @@ -351,6 +386,20 @@ shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } + if (shard_modify_size_and_block_count (&local->postbuf, xdata)) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + if ((local->fd) && (local->fd->inode)) + inode = local->fd->inode; + else if (local->loc.inode) + inode = local->loc.inode; + + shard_inode_ctx_set (inode, this, &local->postbuf, 0, + SHARD_INODE_WRITE_MASK); + err: local->post_update_size_handler (frame, this); return 0; @@ -402,7 +451,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd, if (!xattr_req) { local->op_ret = -1; local->op_errno = ENOMEM; - goto err; + goto out; } if (fd) @@ -410,6 +459,13 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd, else inode = loc->inode; + /* If both size and block count have not changed, then skip the xattrop. + */ + if ((local->delta_size + local->hole_size == 0) && + (local->delta_blocks == 0)) { + goto out; + } + ret = shard_set_size_attrs (local->delta_size + local->hole_size, local->delta_blocks, &size_attr); if (ret) { @@ -417,7 +473,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd, " %s", uuid_utoa (inode->gfid)); local->op_ret = -1; local->op_errno = ENOMEM; - goto err; + goto out; } ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, @@ -429,7 +485,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd, GF_FREE (size_attr); local->op_ret = -1; local->op_errno = ENOMEM; - goto err; + goto out; } if (fd) @@ -446,7 +502,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_unref (xattr_req); return 0; -err: +out: if (xattr_req) dict_unref (xattr_req); handler (frame, this); @@ -551,27 +607,31 @@ err: } static void -shard_inode_ctx_set_if_absent (inode_t *inode, xlator_t *this, dict_t *xdata, - struct iatt *buf) +shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata, + struct iatt *buf) { int ret = 0; uint64_t size = 0; void *bsize = NULL; - shard_inode_ctx_t ctx_tmp = {0,}; if (shard_inode_ctx_get_block_size (inode, this, &size)) { + /* Fresh lookup */ ret = dict_get_ptr (xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); - if (!ret) { - ctx_tmp.block_size = ntoh64 (*((uint64_t *)bsize)); - ctx_tmp.mode = st_mode_from_ia (buf->ia_prot, - buf->ia_type); - ctx_tmp.rdev = buf->ia_rdev; - } - ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp); - if (ret) - gf_log (this->name, GF_LOG_WARNING, "Failed to set " - "inode ctx for %s", uuid_utoa (buf->ia_gfid)); + if (!ret) + size = ntoh64 (*((uint64_t *)bsize)); + /* If the file is sharded, set its block size, otherwise just + * set 0. + */ + + shard_inode_ctx_set (inode, this, buf, size, + SHARD_MASK_BLOCK_SIZE); } + /* If the file is sharded, also set the remaining attributes, + * except for ia_size and ia_blocks. + */ + if (size) + shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK); + } int @@ -594,7 +654,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * already initialised to all zeroes, nothing more needs to be done. */ - (void) shard_inode_ctx_set_if_absent (inode, this, xdata, buf); + (void) shard_inode_ctx_update (inode, this, xdata, buf); /* Also, if the file is sharded, get the file size and block cnt xattr, * and store them in the stbuf appropriately. @@ -673,7 +733,10 @@ shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { + int ret = -1; + int32_t mask = SHARD_INODE_WRITE_MASK; shard_local_t *local = NULL; + shard_inode_ctx_t ctx = {0,}; local = frame->local; @@ -690,6 +753,19 @@ shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; } + if (shard_inode_ctx_get_all (inode, this, &ctx)) + mask = SHARD_ALL_MASK; + + ret = shard_inode_ctx_set (inode, this, &local->prebuf, 0, mask); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set inode write " + "params into inode ctx for %s", + uuid_utoa (buf->ia_gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + unwind: local->handler (frame, this); return 0; @@ -699,21 +775,32 @@ int shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc, shard_post_fop_handler_t handler) { - shard_local_t *local = NULL; - dict_t *xattr_req = NULL; + int ret = -1; + shard_local_t *local = NULL; + shard_inode_ctx_t ctx = {0,}; + dict_t *xattr_req = NULL; local = frame->local; local->handler = handler; + ret = shard_inode_ctx_get_all (loc->inode, this, &ctx); + /* By this time, inode ctx should have been created either in create, + * mknod, readdirp or lookup. If not it is a bug! + */ + if ((ret == 0) && (ctx.stat.ia_size > 0)) { + local->prebuf = ctx.stat; + goto out; + } + xattr_req = dict_new (); if (!xattr_req) { local->op_ret = -1; local->op_errno = ENOMEM; - goto err; + goto out; } SHARD_MD_READ_FOP_INIT_REQ_DICT (this, xattr_req, loc->gfid, - local, err); + local, out); STACK_WIND (frame, shard_lookup_base_file_cbk, FIRST_CHILD (this), FIRST_CHILD(this)->fops->lookup, loc, xattr_req); @@ -721,7 +808,7 @@ shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_unref (xattr_req); return 0; -err: +out: if (xattr_req) dict_unref (xattr_req); handler (frame, this); @@ -1502,6 +1589,10 @@ shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this) return 0; } +/* TO-DO: + * Fix updates to size and block count with racing write(s) and truncate(s). + */ + int shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) @@ -1608,17 +1699,14 @@ shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { int ret = -1; shard_local_t *local = NULL; - shard_inode_ctx_t ctx_tmp = {0,}; local = frame->local; if (op_ret == -1) goto unwind; - ctx_tmp.block_size = ntoh64 (local->block_size); - ctx_tmp.mode = st_mode_from_ia (buf->ia_prot, buf->ia_type); - ctx_tmp.rdev = buf->ia_rdev; - ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp); + ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size), + SHARD_ALL_MASK); if (ret) gf_log (this->name, GF_LOG_WARNING, "Failed to set inode ctx " "for %s", uuid_utoa (inode->gfid)); @@ -2278,22 +2366,17 @@ shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { int ret = -1; shard_local_t *local = NULL; - shard_inode_ctx_t ctx_tmp = {0,}; local = frame->local; if (op_ret == -1) goto unwind; - ctx_tmp.block_size = ntoh64 (local->block_size); - ctx_tmp.mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); - ctx_tmp.rdev = stbuf->ia_rdev; - ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to set block size " - "for %s in inode ctx", uuid_utoa (inode->gfid)); - goto unwind; - } + ret = shard_inode_ctx_set (inode, this, stbuf, + ntoh64 (local->block_size), SHARD_ALL_MASK); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "Failed to set inode ctx " + "for %s", uuid_utoa (inode->gfid)); unwind: SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, @@ -2600,6 +2683,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this, int ret = 0; int call_count = 0; char path[PATH_MAX] = {0,}; + mode_t mode = 0; char *bname = NULL; shard_priv_t *priv = NULL; shard_inode_ctx_t ctx_tmp = {0,}; @@ -2627,6 +2711,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this, local->op_errno = ENOMEM; goto err; } + mode = st_mode_from_ia (ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); while (shard_idx_iter <= last_block) { if (local->inode_list[i]) { @@ -2686,7 +2771,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this, (void *) (long) shard_idx_iter, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &loc, - ctx_tmp.mode, ctx_tmp.rdev, 0, xattr_req); + mode, ctx_tmp.stat.ia_rdev, 0, xattr_req); loc_wipe (&loc); dict_unref (xattr_req); @@ -2929,10 +3014,52 @@ shard_post_update_size_writev_handler (call_frame_t *frame, xlator_t *this) } int +__shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, + xlator_t *this) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; + + ret = __inode_ctx_get (inode, this, &ctx_uint); + if (ret < 0) + return ret; + + ctx = (shard_inode_ctx_t *) ctx_uint; + + if (local->offset + local->total_size > ctx->stat.ia_size) { + local->delta_size = (local->offset + local->total_size) - + ctx->stat.ia_size; + ctx->stat.ia_size += (local->delta_size); + } else { + local->delta_size = 0; + } + + return 0; +} + +int +shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, + xlator_t *this) +{ + int ret = -1; + + LOCK (&inode->lock); + { + ret = __shard_get_delta_size_from_inode_ctx (local, inode, + this); + } + UNLOCK (&inode->lock); + + return ret; +} + +int shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { + int ret = 0; int call_count = 0; fd_t *anon_fd = cookie; shard_local_t *local = NULL; @@ -2946,6 +3073,14 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->written_size += op_ret; local->delta_blocks += (postbuf->ia_blocks - prebuf->ia_blocks); local->delta_size += (postbuf->ia_size - prebuf->ia_size); + ret = shard_inode_ctx_set (local->fd->inode, this, postbuf, 0, + SHARD_MASK_TIMES); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to set " + "times in the inode ctx. Shard = %s, base file " + "gfid = %s", uuid_utoa (postbuf->ia_gfid), + uuid_utoa (local->fd->inode->gfid)); + } } if (anon_fd) @@ -2958,12 +3093,9 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, SHARD_STACK_UNWIND (writev, frame, local->op_ret, local->op_errno, NULL, NULL, NULL); } else { - if (local->is_write_extending) - local->delta_size = (local->offset + - local->total_size) - - local->prebuf.ia_size; - else - local->delta_size = 0; + shard_get_delta_size_from_inode_ctx (local, + local->fd->inode, + this); local->hole_size = 0; if (xdata) local->xattr_rsp = dict_ref (xdata); @@ -3138,12 +3270,6 @@ shard_post_lookup_writev_handler (call_frame_t *frame, xlator_t *this) local->postbuf = local->prebuf; - /* At this point, calculate the size of the hole if it is going to be - * created as part of this write. - */ - if (local->offset + local->total_size > local->prebuf.ia_size) - local->is_write_extending = _gf_true; - if (local->create_count) shard_common_resume_mknod (frame, this, shard_post_mknod_writev_handler); @@ -3462,8 +3588,8 @@ shard_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!entry->inode) continue; - shard_inode_ctx_set_if_absent (entry->inode, this, entry->dict, - &entry->d_stat); + shard_inode_ctx_update (entry->inode, this, entry->dict, + &entry->d_stat); } local->op_ret = op_ret; diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 8daf2272dd3..ce6952f91fd 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -24,6 +24,32 @@ #define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806" #define SHARD_INODE_LRU_LIMIT 4096 +/** + * Bit masks for the valid flag, which is used while updating ctx +**/ +#define SHARD_MASK_BLOCK_SIZE (1 << 0) +#define SHARD_MASK_PROT (1 << 1) +#define SHARD_MASK_NLINK (1 << 2) +#define SHARD_MASK_UID (1 << 3) +#define SHARD_MASK_GID (1 << 4) +#define SHARD_MASK_SIZE (1 << 6) +#define SHARD_MASK_BLOCKS (1 << 7) +#define SHARD_MASK_TIMES (1 << 8) +#define SHARD_MASK_OTHERS (1 << 9) + +#define SHARD_INODE_WRITE_MASK (SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \ + | SHARD_MASK_TIMES) + +#define SHARD_LOOKUP_MASK (SHARD_MASK_PROT | SHARD_MASK_NLINK | SHARD_MASK_UID \ + | SHARD_MASK_GID | SHARD_MASK_TIMES \ + | SHARD_MASK_OTHERS) + +#define SHARD_ALL_MASK (SHARD_MASK_BLOCK_SIZE | SHARD_MASK_PROT \ + | SHARD_MASK_NLINK | SHARD_MASK_UID | SHARD_MASK_GID \ + | SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \ + | SHARD_MASK_TIMES | SHARD_MASK_OTHERS) + + #define get_lowest_block(off, shard_size) ((off) / (shard_size)) #define get_highest_block(off, len, shard_size) \ (((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size)) @@ -130,6 +156,19 @@ } \ } while (0) +#define SHARD_TIME_UPDATE(ctx_sec, ctx_nsec, new_sec, new_nsec) do { \ + if (ctx_sec == new_sec) \ + ctx_nsec = new_nsec = max (new_nsec, ctx_nsec); \ + else if (ctx_sec > new_sec) { \ + new_sec = ctx_sec; \ + new_nsec = ctx_nsec; \ + } else { \ + ctx_sec = new_sec; \ + ctx_nsec = new_nsec; \ + } \ + } while (0) + + typedef struct shard_priv { uint64_t block_size; uuid_t dot_shard_gfid; @@ -199,7 +238,6 @@ typedef struct shard_local { gf_dirent_t entries_head; gf_boolean_t is_set_fsid; gf_boolean_t list_inited; - gf_boolean_t is_write_extending; shard_post_fop_handler_t handler; shard_post_lookup_shards_fop_handler_t pls_fop_handler; shard_post_resolve_fop_handler_t post_res_handler; @@ -213,10 +251,9 @@ typedef struct shard_local { } shard_local_t; typedef struct shard_inode_ctx { - uint32_t rdev; uint64_t block_size; /* The block size with which this inode is sharded */ - mode_t mode; + struct iatt stat; } shard_inode_ctx_t; #endif /* __SHARD_H__ */ |