summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2015-09-07 14:57:24 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-09-22 22:51:38 -0700
commit5ad1add883ee54f83a42f0f6d6ae2c6e3f9a543a (patch)
tree74b1e7ff76ba333dc477947ebdb59f7ce521c21e
parent070d93afe63f377539b56ed646b714bb281bd240 (diff)
features/shard: Performance improvements in IO path
This is patch 1/2 of the performance improvement work for sharding in the IO path. What this patch does: Since the primary use-case where sharding is targeted - VM store - is a single-writer workload, instead of performing lookup on the base file everytime to gather the size and block count from the backend in reads, writes and truncate, now the size and block count is also cached and kept up-to-date after every inode write in the inode ctx. TO-DO: Make changes in rename, link, unlink, [f]setattr and [f]stat to keep the relevant iatt members up-to-date in the inode ctx. Change-Id: Ica87d020dabc3a3dbccec814b26b01d6a629ff4d BUG: 1258905 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/12126 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r--xlators/features/shard/src/shard.c260
-rw-r--r--xlators/features/shard/src/shard.h43
2 files changed, 233 insertions, 70 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 35a4f551e96..4ccd4b01a0b 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -74,10 +74,9 @@ __shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
return ret;
}
-
int
-__shard_inode_ctx_set (inode_t *inode, xlator_t *this,
- shard_inode_ctx_t *ctx_in)
+__shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
{
int ret = -1;
shard_inode_ctx_t *ctx = NULL;
@@ -86,27 +85,65 @@ __shard_inode_ctx_set (inode_t *inode, xlator_t *this,
if (ret)
return ret;
- ctx->block_size = ctx_in->block_size;
- ctx->mode = ctx_in->mode;
- ctx->rdev = ctx_in->rdev;
+ if (valid & SHARD_MASK_BLOCK_SIZE)
+ ctx->block_size = block_size;
+
+ if (!stbuf)
+ return 0;
+
+ if (valid & SHARD_MASK_PROT)
+ ctx->stat.ia_prot = stbuf->ia_prot;
+
+ if (valid & SHARD_MASK_NLINK)
+ ctx->stat.ia_nlink = stbuf->ia_nlink;
+
+ if (valid & SHARD_MASK_UID)
+ ctx->stat.ia_uid = stbuf->ia_uid;
+
+ if (valid & SHARD_MASK_GID)
+ ctx->stat.ia_gid = stbuf->ia_gid;
+
+ if (valid & SHARD_MASK_SIZE)
+ ctx->stat.ia_size = stbuf->ia_size;
+
+ if (valid & SHARD_MASK_BLOCKS)
+ ctx->stat.ia_blocks = stbuf->ia_blocks;
+
+ if (valid & SHARD_MASK_TIMES) {
+ SHARD_TIME_UPDATE (ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+ stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+ SHARD_TIME_UPDATE (ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+ SHARD_TIME_UPDATE (ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+ stbuf->ia_atime, stbuf->ia_atime_nsec);
+ }
+
+ if (valid & SHARD_MASK_OTHERS) {
+ ctx->stat.ia_ino = stbuf->ia_ino;
+ gf_uuid_copy (ctx->stat.ia_gfid, stbuf->ia_gfid);
+ ctx->stat.ia_dev = stbuf->ia_dev;
+ ctx->stat.ia_type = stbuf->ia_type;
+ ctx->stat.ia_rdev = stbuf->ia_rdev;
+ ctx->stat.ia_blksize = stbuf->ia_blksize;
+ }
return 0;
}
int
-shard_inode_ctx_set_all (inode_t *inode, xlator_t *this,
- shard_inode_ctx_t *ctx_in)
+shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
{
int ret = -1;
LOCK (&inode->lock);
{
- ret = __shard_inode_ctx_set (inode, this, ctx_in);
+ ret = __shard_inode_ctx_set (inode, this, stbuf, block_size,
+ valid);
}
UNLOCK (&inode->lock);
return ret;
-
}
int
@@ -158,10 +195,7 @@ __shard_inode_ctx_get_all (inode_t *inode, xlator_t *this,
ctx = (shard_inode_ctx_t *) ctx_uint;
- ctx_out->block_size = ctx->block_size;
- ctx_out->mode = ctx->mode;
- ctx_out->rdev = ctx->rdev;
-
+ memcpy (ctx_out, ctx, sizeof (shard_inode_ctx_t));
return 0;
}
@@ -341,6 +375,7 @@ shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata)
{
+ inode_t *inode = NULL;
shard_local_t *local = NULL;
local = frame->local;
@@ -351,6 +386,20 @@ shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
+ if (shard_modify_size_and_block_count (&local->postbuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ if ((local->fd) && (local->fd->inode))
+ inode = local->fd->inode;
+ else if (local->loc.inode)
+ inode = local->loc.inode;
+
+ shard_inode_ctx_set (inode, this, &local->postbuf, 0,
+ SHARD_INODE_WRITE_MASK);
+
err:
local->post_update_size_handler (frame, this);
return 0;
@@ -402,7 +451,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!xattr_req) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
if (fd)
@@ -410,6 +459,13 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
else
inode = loc->inode;
+ /* If both size and block count have not changed, then skip the xattrop.
+ */
+ if ((local->delta_size + local->hole_size == 0) &&
+ (local->delta_blocks == 0)) {
+ goto out;
+ }
+
ret = shard_set_size_attrs (local->delta_size + local->hole_size,
local->delta_blocks, &size_attr);
if (ret) {
@@ -417,7 +473,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
" %s", uuid_utoa (inode->gfid));
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr,
@@ -429,7 +485,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
GF_FREE (size_attr);
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
if (fd)
@@ -446,7 +502,7 @@ shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd,
dict_unref (xattr_req);
return 0;
-err:
+out:
if (xattr_req)
dict_unref (xattr_req);
handler (frame, this);
@@ -551,27 +607,31 @@ err:
}
static void
-shard_inode_ctx_set_if_absent (inode_t *inode, xlator_t *this, dict_t *xdata,
- struct iatt *buf)
+shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata,
+ struct iatt *buf)
{
int ret = 0;
uint64_t size = 0;
void *bsize = NULL;
- shard_inode_ctx_t ctx_tmp = {0,};
if (shard_inode_ctx_get_block_size (inode, this, &size)) {
+ /* Fresh lookup */
ret = dict_get_ptr (xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
- if (!ret) {
- ctx_tmp.block_size = ntoh64 (*((uint64_t *)bsize));
- ctx_tmp.mode = st_mode_from_ia (buf->ia_prot,
- buf->ia_type);
- ctx_tmp.rdev = buf->ia_rdev;
- }
- ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING, "Failed to set "
- "inode ctx for %s", uuid_utoa (buf->ia_gfid));
+ if (!ret)
+ size = ntoh64 (*((uint64_t *)bsize));
+ /* If the file is sharded, set its block size, otherwise just
+ * set 0.
+ */
+
+ shard_inode_ctx_set (inode, this, buf, size,
+ SHARD_MASK_BLOCK_SIZE);
}
+ /* If the file is sharded, also set the remaining attributes,
+ * except for ia_size and ia_blocks.
+ */
+ if (size)
+ shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK);
+
}
int
@@ -594,7 +654,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* already initialised to all zeroes, nothing more needs to be done.
*/
- (void) shard_inode_ctx_set_if_absent (inode, this, xdata, buf);
+ (void) shard_inode_ctx_update (inode, this, xdata, buf);
/* Also, if the file is sharded, get the file size and block cnt xattr,
* and store them in the stbuf appropriately.
@@ -673,7 +733,10 @@ shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
+ int ret = -1;
+ int32_t mask = SHARD_INODE_WRITE_MASK;
shard_local_t *local = NULL;
+ shard_inode_ctx_t ctx = {0,};
local = frame->local;
@@ -690,6 +753,19 @@ shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
+ if (shard_inode_ctx_get_all (inode, this, &ctx))
+ mask = SHARD_ALL_MASK;
+
+ ret = shard_inode_ctx_set (inode, this, &local->prebuf, 0, mask);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set inode write "
+ "params into inode ctx for %s",
+ uuid_utoa (buf->ia_gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
unwind:
local->handler (frame, this);
return 0;
@@ -699,21 +775,32 @@ int
shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc,
shard_post_fop_handler_t handler)
{
- shard_local_t *local = NULL;
- dict_t *xattr_req = NULL;
+ int ret = -1;
+ shard_local_t *local = NULL;
+ shard_inode_ctx_t ctx = {0,};
+ dict_t *xattr_req = NULL;
local = frame->local;
local->handler = handler;
+ ret = shard_inode_ctx_get_all (loc->inode, this, &ctx);
+ /* By this time, inode ctx should have been created either in create,
+ * mknod, readdirp or lookup. If not it is a bug!
+ */
+ if ((ret == 0) && (ctx.stat.ia_size > 0)) {
+ local->prebuf = ctx.stat;
+ goto out;
+ }
+
xattr_req = dict_new ();
if (!xattr_req) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- goto err;
+ goto out;
}
SHARD_MD_READ_FOP_INIT_REQ_DICT (this, xattr_req, loc->gfid,
- local, err);
+ local, out);
STACK_WIND (frame, shard_lookup_base_file_cbk, FIRST_CHILD (this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -721,7 +808,7 @@ shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_unref (xattr_req);
return 0;
-err:
+out:
if (xattr_req)
dict_unref (xattr_req);
handler (frame, this);
@@ -1502,6 +1589,10 @@ shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this)
return 0;
}
+/* TO-DO:
+ * Fix updates to size and block count with racing write(s) and truncate(s).
+ */
+
int
shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
@@ -1608,17 +1699,14 @@ shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
int ret = -1;
shard_local_t *local = NULL;
- shard_inode_ctx_t ctx_tmp = {0,};
local = frame->local;
if (op_ret == -1)
goto unwind;
- ctx_tmp.block_size = ntoh64 (local->block_size);
- ctx_tmp.mode = st_mode_from_ia (buf->ia_prot, buf->ia_type);
- ctx_tmp.rdev = buf->ia_rdev;
- ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp);
+ ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size),
+ SHARD_ALL_MASK);
if (ret)
gf_log (this->name, GF_LOG_WARNING, "Failed to set inode ctx "
"for %s", uuid_utoa (inode->gfid));
@@ -2278,22 +2366,17 @@ shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
int ret = -1;
shard_local_t *local = NULL;
- shard_inode_ctx_t ctx_tmp = {0,};
local = frame->local;
if (op_ret == -1)
goto unwind;
- ctx_tmp.block_size = ntoh64 (local->block_size);
- ctx_tmp.mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
- ctx_tmp.rdev = stbuf->ia_rdev;
- ret = shard_inode_ctx_set_all (inode, this, &ctx_tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "Failed to set block size "
- "for %s in inode ctx", uuid_utoa (inode->gfid));
- goto unwind;
- }
+ ret = shard_inode_ctx_set (inode, this, stbuf,
+ ntoh64 (local->block_size), SHARD_ALL_MASK);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "Failed to set inode ctx "
+ "for %s", uuid_utoa (inode->gfid));
unwind:
SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf,
@@ -2600,6 +2683,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
int ret = 0;
int call_count = 0;
char path[PATH_MAX] = {0,};
+ mode_t mode = 0;
char *bname = NULL;
shard_priv_t *priv = NULL;
shard_inode_ctx_t ctx_tmp = {0,};
@@ -2627,6 +2711,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
local->op_errno = ENOMEM;
goto err;
}
+ mode = st_mode_from_ia (ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
while (shard_idx_iter <= last_block) {
if (local->inode_list[i]) {
@@ -2686,7 +2771,7 @@ shard_common_resume_mknod (call_frame_t *frame, xlator_t *this,
(void *) (long) shard_idx_iter,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, &loc,
- ctx_tmp.mode, ctx_tmp.rdev, 0, xattr_req);
+ mode, ctx_tmp.stat.ia_rdev, 0, xattr_req);
loc_wipe (&loc);
dict_unref (xattr_req);
@@ -2929,10 +3014,52 @@ shard_post_update_size_writev_handler (call_frame_t *frame, xlator_t *this)
}
int
+__shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *) ctx_uint;
+
+ if (local->offset + local->total_size > ctx->stat.ia_size) {
+ local->delta_size = (local->offset + local->total_size) -
+ ctx->stat.ia_size;
+ ctx->stat.ia_size += (local->delta_size);
+ } else {
+ local->delta_size = 0;
+ }
+
+ return 0;
+}
+
+int
+shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __shard_get_delta_size_from_inode_ctx (local, inode,
+ this);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
+ int ret = 0;
int call_count = 0;
fd_t *anon_fd = cookie;
shard_local_t *local = NULL;
@@ -2946,6 +3073,14 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->written_size += op_ret;
local->delta_blocks += (postbuf->ia_blocks - prebuf->ia_blocks);
local->delta_size += (postbuf->ia_size - prebuf->ia_size);
+ ret = shard_inode_ctx_set (local->fd->inode, this, postbuf, 0,
+ SHARD_MASK_TIMES);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to set "
+ "times in the inode ctx. Shard = %s, base file "
+ "gfid = %s", uuid_utoa (postbuf->ia_gfid),
+ uuid_utoa (local->fd->inode->gfid));
+ }
}
if (anon_fd)
@@ -2958,12 +3093,9 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
SHARD_STACK_UNWIND (writev, frame, local->op_ret,
local->op_errno, NULL, NULL, NULL);
} else {
- if (local->is_write_extending)
- local->delta_size = (local->offset +
- local->total_size) -
- local->prebuf.ia_size;
- else
- local->delta_size = 0;
+ shard_get_delta_size_from_inode_ctx (local,
+ local->fd->inode,
+ this);
local->hole_size = 0;
if (xdata)
local->xattr_rsp = dict_ref (xdata);
@@ -3138,12 +3270,6 @@ shard_post_lookup_writev_handler (call_frame_t *frame, xlator_t *this)
local->postbuf = local->prebuf;
- /* At this point, calculate the size of the hole if it is going to be
- * created as part of this write.
- */
- if (local->offset + local->total_size > local->prebuf.ia_size)
- local->is_write_extending = _gf_true;
-
if (local->create_count)
shard_common_resume_mknod (frame, this,
shard_post_mknod_writev_handler);
@@ -3462,8 +3588,8 @@ shard_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!entry->inode)
continue;
- shard_inode_ctx_set_if_absent (entry->inode, this, entry->dict,
- &entry->d_stat);
+ shard_inode_ctx_update (entry->inode, this, entry->dict,
+ &entry->d_stat);
}
local->op_ret = op_ret;
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 8daf2272dd3..ce6952f91fd 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -24,6 +24,32 @@
#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
#define SHARD_INODE_LRU_LIMIT 4096
+/**
+ * Bit masks for the valid flag, which is used while updating ctx
+**/
+#define SHARD_MASK_BLOCK_SIZE (1 << 0)
+#define SHARD_MASK_PROT (1 << 1)
+#define SHARD_MASK_NLINK (1 << 2)
+#define SHARD_MASK_UID (1 << 3)
+#define SHARD_MASK_GID (1 << 4)
+#define SHARD_MASK_SIZE (1 << 6)
+#define SHARD_MASK_BLOCKS (1 << 7)
+#define SHARD_MASK_TIMES (1 << 8)
+#define SHARD_MASK_OTHERS (1 << 9)
+
+#define SHARD_INODE_WRITE_MASK (SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \
+ | SHARD_MASK_TIMES)
+
+#define SHARD_LOOKUP_MASK (SHARD_MASK_PROT | SHARD_MASK_NLINK | SHARD_MASK_UID \
+ | SHARD_MASK_GID | SHARD_MASK_TIMES \
+ | SHARD_MASK_OTHERS)
+
+#define SHARD_ALL_MASK (SHARD_MASK_BLOCK_SIZE | SHARD_MASK_PROT \
+ | SHARD_MASK_NLINK | SHARD_MASK_UID | SHARD_MASK_GID \
+ | SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \
+ | SHARD_MASK_TIMES | SHARD_MASK_OTHERS)
+
+
#define get_lowest_block(off, shard_size) ((off) / (shard_size))
#define get_highest_block(off, len, shard_size) \
(((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size))
@@ -130,6 +156,19 @@
} \
} while (0)
+#define SHARD_TIME_UPDATE(ctx_sec, ctx_nsec, new_sec, new_nsec) do { \
+ if (ctx_sec == new_sec) \
+ ctx_nsec = new_nsec = max (new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } else { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } while (0)
+
+
typedef struct shard_priv {
uint64_t block_size;
uuid_t dot_shard_gfid;
@@ -199,7 +238,6 @@ typedef struct shard_local {
gf_dirent_t entries_head;
gf_boolean_t is_set_fsid;
gf_boolean_t list_inited;
- gf_boolean_t is_write_extending;
shard_post_fop_handler_t handler;
shard_post_lookup_shards_fop_handler_t pls_fop_handler;
shard_post_resolve_fop_handler_t post_res_handler;
@@ -213,10 +251,9 @@ typedef struct shard_local {
} shard_local_t;
typedef struct shard_inode_ctx {
- uint32_t rdev;
uint64_t block_size; /* The block size with which this inode is
sharded */
- mode_t mode;
+ struct iatt stat;
} shard_inode_ctx_t;
#endif /* __SHARD_H__ */