summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2015-09-29 15:13:37 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-10-08 22:17:13 -0700
commit62851271df97c584b43a7b2458d6bccc97dee029 (patch)
treed79981e02fd25771695943f532e58cc6cbbb8c74
parent4ad9bc5faca60528345f1e9c95c22bd8402162c0 (diff)
features/shard: Regulate memory consumption by individual shards' inode_t objects
Shard translator will now maintain an lru list of inodes associated with individual shards of constant size, and will make sure that at no point the number of these inodes will exceed the configured limit. This is to keep the memory consumption by the thousands of shards of every large file from exploding. Change-Id: I5e60eea5dcf3130257fb431ca70cfaba53cae7f3 BUG: 1252263 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/12254 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r--xlators/features/shard/src/shard.c148
-rw-r--r--xlators/features/shard/src/shard.h11
2 files changed, 141 insertions, 18 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index aee6ed3ce45..2c33faf6a5f 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -63,6 +63,8 @@ __shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
if (!ctx_p)
return ret;
+ INIT_LIST_HEAD (&ctx_p->ilist);
+
ret = __inode_ctx_set (inode, this, (uint64_t *)&ctx_p);
if (ret < 0) {
GF_FREE (ctx_p);
@@ -75,6 +77,20 @@ __shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
}
int
+shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+{
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_get (inode, this, ctx);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
__shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf,
uint64_t block_size, int32_t valid)
{
@@ -323,17 +339,78 @@ out:
return ret;
}
+void
+__shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
+ inode_t *base_inode, int block_num)
+{
+ char block_bname[256] = {0,};
+ inode_t *lru_inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_inode_ctx_t *lru_inode_ctx = NULL;
+
+ priv = this->private;
+
+ shard_inode_ctx_get (linked_inode, this, &ctx);
+
+ if (list_empty (&ctx->ilist)) {
+ if (priv->inode_count + 1 <= SHARD_MAX_INODES) {
+ /* If this inode was linked here for the first time (indicated
+ * by empty list), and if there is still space in the priv list,
+ * add this ctx to the tail of the list.
+ */
+ gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+ ctx->block_num = block_num;
+ list_add_tail (&ctx->ilist, &priv->ilist_head);
+ priv->inode_count++;
+ } else {
+ /*If on the other hand there is no available slot for this inode
+ * in the list, delete the lru inode from the head of the list,
+ * unlink it. And in its place add this new inode into the list.
+ */
+ lru_inode_ctx = list_first_entry (&priv->ilist_head,
+ shard_inode_ctx_t,
+ ilist);
+ GF_ASSERT (lru_inode_ctx->block_num > 0);
+ list_del_init (&lru_inode_ctx->ilist);
+ lru_inode = inode_find (linked_inode->table,
+ lru_inode_ctx->stat.ia_gfid);
+ shard_make_block_bname (lru_inode_ctx->block_num,
+ lru_inode_ctx->base_gfid,
+ block_bname,
+ sizeof (block_bname));
+ inode_unlink (lru_inode, priv->dot_shard_inode,
+ block_bname);
+ /* The following unref corresponds to the ref held by
+ * inode_find() above.
+ */
+ inode_forget (lru_inode, 0);
+ inode_unref (lru_inode);
+ gf_uuid_copy (ctx->base_gfid, base_inode->gfid);
+ ctx->block_num = block_num;
+ list_add_tail (&ctx->ilist, &priv->ilist_head);
+ }
+ } else {
+ /* If this is not the first time this inode is being operated on, move
+ * it to the most recently used end of the list.
+ */
+ list_move_tail (&ctx->ilist, &priv->ilist_head);
+ }
+}
+
int
shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
inode_t *res_inode,
shard_post_resolve_fop_handler_t post_res_handler)
{
- int i = -1;
- uint32_t shard_idx_iter = 0;
- char path[PATH_MAX] = {0,};
- inode_t *inode = NULL;
- shard_local_t *local = NULL;
+ int i = -1;
+ uint32_t shard_idx_iter = 0;
+ char path[PATH_MAX] = {0,};
+ inode_t *inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+ priv = this->private;
local = frame->local;
shard_idx_iter = local->first_block;
@@ -361,6 +438,14 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
* forgotten by the time the fop reaches the actual
* write stage.
*/
+ LOCK(&priv->lock);
+ {
+ __shard_update_shards_inode_list (inode, this,
+ res_inode,
+ shard_idx_iter);
+ }
+ UNLOCK(&priv->lock);
+
continue;
} else {
local->call_count++;
@@ -1266,23 +1351,36 @@ void
shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,
struct iatt *buf)
{
+ int list_index = 0;
char block_bname[256] = {0,};
inode_t *linked_inode = NULL;
+ xlator_t *this = NULL;
shard_priv_t *priv = NULL;
- priv = THIS->private;
+ this = THIS;
+ priv = this->private;
shard_make_block_bname (block_num, (local->loc.inode)->gfid,
block_bname, sizeof (block_bname));
+ shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK);
linked_inode = inode_link (inode, priv->dot_shard_inode, block_bname,
buf);
inode_lookup (linked_inode);
- local->inode_list[block_num - local->first_block] = linked_inode;
- /* Defer unref'ing the inodes until write is complete to prevent
- * them from getting purged. These inodes are unref'd in the event of
- * a failure or after successful fop completion in shard_local_wipe().
+ list_index = block_num - local->first_block;
+
+ /* Defer unref'ing the inodes until write is complete. These inodes are
+ * unref'd in the event of a failure or after successful fop completion
+ * in shard_local_wipe().
*/
+ local->inode_list[list_index] = linked_inode;
+
+ LOCK(&priv->lock);
+ {
+ __shard_update_shards_inode_list (linked_inode, this,
+ local->loc.inode, block_num);
+ }
+ UNLOCK(&priv->lock);
}
int
@@ -1897,19 +1995,33 @@ shard_unlink_base_file (call_frame_t *frame, xlator_t *this)
void
shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
{
- char block_bname[256] = {0,};
- inode_t *inode = NULL;
- shard_priv_t *priv = NULL;
+ char block_bname[256] = {0,};
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t *ctx = NULL;
- priv = THIS->private;
+ this = THIS;
+ priv = this->private;
inode = local->inode_list[shard_block_num - local->first_block];
shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid,
block_bname, sizeof (block_bname));
- inode_unlink (inode, priv->dot_shard_inode, block_bname);
- inode_forget (inode, 0);
+ LOCK(&priv->lock);
+ {
+ shard_inode_ctx_get (inode, this, &ctx);
+ if (!list_empty (&ctx->ilist)) {
+ list_del_init (&ctx->ilist);
+ priv->inode_count--;
+ }
+ GF_ASSERT (priv->inode_count >= 0);
+ inode_unlink (inode, priv->dot_shard_inode, block_bname);
+ inode_forget (inode, 0);
+ }
+ UNLOCK(&priv->lock);
+
}
int
@@ -4259,6 +4371,8 @@ init (xlator_t *this)
gf_uuid_parse (SHARD_ROOT_GFID, priv->dot_shard_gfid);
this->private = priv;
+ LOCK_INIT (&priv->lock);
+ INIT_LIST_HEAD (&priv->ilist_head);
ret = 0;
out:
if (ret) {
@@ -4285,6 +4399,7 @@ fini (xlator_t *this)
goto out;
this->private = NULL;
+ LOCK_DESTROY (&priv->lock);
GF_FREE (priv);
out:
@@ -4320,7 +4435,6 @@ shard_forget (xlator_t *this, inode_t *inode)
ctx = (shard_inode_ctx_t *)ctx_uint;
- /* To-Do: Delete all the shards associated with this inode. */
GF_FREE (ctx);
return 0;
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 69414bfe1b5..b3d9b795957 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -24,7 +24,7 @@
#define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size"
#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
#define SHARD_INODE_LRU_LIMIT 4096
-
+#define SHARD_MAX_INODES 16384
/**
* Bit masks for the valid flag, which is used while updating ctx
**/
@@ -177,6 +177,9 @@ typedef struct shard_priv {
uint64_t block_size;
uuid_t dot_shard_gfid;
inode_t *dot_shard_inode;
+ gf_lock_t lock;
+ int inode_count;
+ struct list_head ilist_head;
} shard_priv_t;
typedef struct {
@@ -258,6 +261,12 @@ typedef struct shard_inode_ctx {
uint64_t block_size; /* The block size with which this inode is
sharded */
struct iatt stat;
+ /* The following members of inode ctx will be applicable only to the
+ * individual shards' ctx and never the base file ctx.
+ */
+ struct list_head ilist;
+ uuid_t base_gfid;
+ int block_num;
} shard_inode_ctx_t;
#endif /* __SHARD_H__ */