diff options
Diffstat (limited to 'libglusterfs')
-rw-r--r-- | libglusterfs/src/glusterfs.h | 1 | ||||
-rw-r--r-- | libglusterfs/src/inode.c | 253 | ||||
-rw-r--r-- | libglusterfs/src/inode.h | 19 | ||||
-rw-r--r-- | libglusterfs/src/libglusterfs.sym | 2 |
4 files changed, 239 insertions, 36 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 376b7a7c673..2b58ce65a18 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -522,6 +522,7 @@ struct _cmd_args { pid_t client_pid; int client_pid_set; unsigned uid_map_root; + int32_t lru_limit; int background_qlen; int congestion_threshold; char *fuse_mountopts; diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 089aa6f9b21..12a8fbd014d 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -23,6 +23,100 @@ move latest accessed dentry to list_head of inode */ +// clang-format off +/* + +Details as per Xavi: + + I think we should have 3 lists: active, lru and invalidate. + +We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of +refs, invalidate_sent flag and moving from one list to another must be done +atomically. + +With this information, these are the states that cause a transition: + + refs nlookups inv_sent op + 1 0 0 unref -> refs = 0, active--->destroy + 1 1 0 unref -> refs = 0, active--->lru + 1 1 0 forget -> nlookups = 0, active--->active + *0 1 0 forget -> nlookups = 0, lru--->destroy + *0 1 1 forget -> nlookups = 0, invalidate--->destroy + 0 1 0 ref -> refs = 1, lru--->active + 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active + 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate + 1 1 1 unref -> refs = 0, invalidate--->invalidate + 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active + +(*) technically these combinations cannot happen because a forget sent by the +kernel first calls ref() and then unref(). However it's equivalent. + +overflow means that lru list has grown beyond the limit and the inode needs to +be invalidated. All other combinations do not cause a change in state or are not +possible. + +Based on this, the code could be similar to this: + + ref(inode, inv) + { + if (refs == 0) { + if (inv_sent) { + invalidate_count--; + inv_sent = 0; + } else { + lru_count--; + } + if (inv) { + inv_sent = 1; + invalidate_count++; + list_move(inode, invalidate); + } else { + active_count++; + list_move(inode, active); + } + } + refs++; + } + + unref(inode, clear) + { + if (clear && inv_sent) { + // there is a case of fuse itself sending forget, without + // invalidate, after entry delete, like unlink(), rmdir(). + inv_sent = 0; + invalidate_count--; + active_count++; + list_move(inode, active); + } + refs--; + if ((refs == 0) && !inv_sent) { + active_count--; + if (nlookups == 0) { + destroy(inode); + } else { + lru_count++; + list_move(inode, lru); + } + } + } + + forget(inode) + { + ref(inode, false); + nlookups--; + unref(inode, true); + } + + overflow(inode) + { + ref(inode, true); + invalidator(inode); + unref(inode, false); + } + +*/ +// clang-format on + #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \ { \ int i = 1; \ @@ -37,7 +131,7 @@ } static inode_t * -__inode_unref(inode_t *inode); +__inode_unref(inode_t *inode, bool clear); static int inode_table_prune(inode_table_t *table); @@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry) dentry->name = NULL; if (dentry->parent) { - __inode_unref(dentry->parent); + __inode_unref(dentry->parent, false); dentry->parent = NULL; } @@ -446,7 +540,7 @@ out: } static inode_t * -__inode_unref(inode_t *inode) +__inode_unref(inode_t *inode, bool clear) { int index = 0; xlator_t *this = NULL; @@ -454,8 +548,6 @@ __inode_unref(inode_t *inode) if (!inode) return NULL; - this = THIS; - /* * Root inode should always be in active list of inode table. So unrefs * on root inode are no-ops. @@ -463,6 +555,13 @@ __inode_unref(inode_t *inode) if (__is_root_gfid(inode->gfid)) return inode; + this = THIS; + + if (clear && inode->invalidate_sent) { + inode->invalidate_sent = false; + inode->table->invalidate_size--; + __inode_activate(inode); + } GF_ASSERT(inode->ref); --inode->ref; @@ -473,7 +572,7 @@ __inode_unref(inode_t *inode) inode->_ctx[index].ref--; } - if (!inode->ref) { + if (!inode->ref && !inode->invalidate_sent) { inode->table->active_size--; if (inode->nlookup) @@ -486,7 +585,7 @@ __inode_unref(inode_t *inode) } static inode_t * -__inode_ref(inode_t *inode) +__inode_ref(inode_t *inode, bool is_invalidate) { int index = 0; xlator_t *this = NULL; @@ -496,11 +595,6 @@ __inode_ref(inode_t *inode) this = THIS; - if (!inode->ref) { - inode->table->lru_size--; - __inode_activate(inode); - } - /* * Root inode should always be in active list of inode table. So unrefs * on root inode are no-ops. If we do not allow unrefs but allow refs, @@ -512,6 +606,22 @@ __inode_ref(inode_t *inode) if (__is_root_gfid(inode->gfid) && inode->ref) return inode; + if (!inode->ref) { + if (inode->invalidate_sent) { + inode->invalidate_sent = false; + inode->table->invalidate_size--; + } else { + inode->table->lru_size--; + } + if (is_invalidate) { + inode->invalidate_sent = true; + inode->table->invalidate_size++; + list_move_tail(&inode->list, &inode->table->invalidate); + } else { + __inode_activate(inode); + } + } + inode->ref++; index = __inode_get_xl_index(inode, this); @@ -535,7 +645,7 @@ inode_unref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_unref(inode); + inode = __inode_unref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -556,7 +666,7 @@ inode_ref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_ref(inode); + inode = __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -590,7 +700,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name) } if (parent) - newd->parent = __inode_ref(parent); + newd->parent = __inode_ref(parent, false); list_add(&newd->inode_list, &inode->dentry_list); newd->inode = inode; @@ -660,7 +770,7 @@ inode_new(inode_table_t *table) { inode = __inode_create(table); if (inode != NULL) { - __inode_ref(inode); + __inode_ref(inode, false); } } pthread_mutex_unlock(&table->lock); @@ -773,7 +883,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) inode = dentry->inode; if (inode) - __inode_ref(inode); + __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -916,7 +1026,7 @@ inode_find(inode_table_t *table, uuid_t gfid) { inode = __inode_find(table, gfid); if (inode) - __inode_ref(inode); + __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -1061,7 +1171,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) linked_inode = __inode_link(inode, parent, name, iatt); if (linked_inode) - __inode_ref(linked_inode); + __inode_ref(linked_inode, false); } pthread_mutex_unlock(&table->lock); @@ -1140,6 +1250,31 @@ inode_forget(inode_t *inode, uint64_t nlookup) return 0; } +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup) +{ + inode_table_t *table = NULL; + + if (!inode) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, + "inode not found"); + return -1; + } + + table = inode->table; + + pthread_mutex_lock(&table->lock); + { + __inode_forget(inode, nlookup); + __inode_unref(inode, true); + } + pthread_mutex_unlock(&table->lock); + + inode_table_prune(table); + + return 0; +} + /* * Invalidate an inode. This is invoked when a translator decides that an * inode's cache is no longer valid. Any translator interested in taking action @@ -1314,7 +1449,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name) parent = dentry->parent; if (parent) - __inode_ref(parent); + __inode_ref(parent, false); } pthread_mutex_unlock(&table->lock); @@ -1496,6 +1631,7 @@ inode_table_prune(inode_table_t *table) inode_t *del = NULL; inode_t *tmp = NULL; inode_t *entry = NULL; + int64_t lru_size = 0; if (!table) return -1; @@ -1504,7 +1640,11 @@ inode_table_prune(inode_table_t *table) pthread_mutex_lock(&table->lock); { - while (table->lru_limit && table->lru_size > (table->lru_limit)) { + if (!table->lru_limit) + goto purge_list; + + lru_size = table->lru_size; + while (lru_size > (table->lru_limit)) { if (list_empty(&table->lru)) { gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_INODE_LIST, @@ -1514,26 +1654,46 @@ inode_table_prune(inode_table_t *table) break; } + lru_size--; entry = list_entry(table->lru.next, inode_t, list); + /* The logic of invalidation is required only if invalidator_fn + is present */ + if (table->invalidator_fn) { + /* check for valid inode with 'nlookup' */ + if (entry->nlookup) { + __inode_ref(entry, true); + tmp = entry; + break; + } + } + table->lru_size--; __inode_retire(entry); - ret++; } + purge_list: list_splice_init(&table->purge, &purge); table->purge_size = 0; } pthread_mutex_unlock(&table->lock); + /* Pick 1 inode for invalidation */ + if (tmp) { + xlator_t *old_THIS = THIS; + THIS = table->invalidator_xl; + table->invalidator_fn(table->invalidator_xl, tmp); + THIS = old_THIS; + inode_unref(tmp); + } + + /* Just so that if purge list is handled too, then clear it off */ + list_for_each_entry_safe(del, tmp, &purge, list) { - list_for_each_entry_safe(del, tmp, &purge, list) - { - list_del_init(&del->list); - __inode_forget(del, 0); - __inode_destroy(del); - } + list_del_init(&del->list); + __inode_forget(del, 0); + __inode_destroy(del); } return ret; @@ -1561,9 +1721,12 @@ __inode_table_init_root(inode_table_t *table) } inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl) +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl) { inode_table_t *new = NULL; + uint32_t mem_pool_size = lru_limit; int ret = -1; int i = 0; @@ -1575,20 +1738,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl) new->ctxcount = xl->graph->xl_count + 1; new->lru_limit = lru_limit; + new->invalidator_fn = invalidator_fn; + new->invalidator_xl = invalidator_xl; new->hashsize = 14057; /* TODO: Random Number?? */ /* In case FUSE is initing the inode table. */ - if (lru_limit == 0) - lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; - - new->inode_pool = mem_pool_new(inode_t, lru_limit); + if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) + mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; + new->inode_pool = mem_pool_new(inode_t, mem_pool_size); if (!new->inode_pool) goto out; - new->dentry_pool = mem_pool_new(dentry_t, lru_limit); - + new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size); if (!new->dentry_pool) goto out; @@ -1620,6 +1783,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl) INIT_LIST_HEAD(&new->active); INIT_LIST_HEAD(&new->lru); INIT_LIST_HEAD(&new->purge); + INIT_LIST_HEAD(&new->invalidate); ret = gf_asprintf(&new->name, "%s/inode", xl->name); if (-1 == ret) { @@ -1649,6 +1813,13 @@ out: return new; } +inode_table_t * +inode_table_new(uint32_t lru_limit, xlator_t *xl) +{ + /* Only fuse for now requires the inode table with invalidator */ + return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); +} + int inode_table_ctx_free(inode_table_t *table) { @@ -1787,6 +1958,14 @@ inode_table_destroy(inode_table_t *inode_table) inode_table->lru_size--; } + /* Same logic for invalidate list */ + while (!list_empty(&inode_table->invalidate)) { + trav = list_first_entry(&inode_table->invalidate, inode_t, list); + __inode_forget(trav, 0); + __inode_retire(trav); + inode_table->invalidate_size--; + } + while (!list_empty(&inode_table->active)) { trav = list_first_entry(&inode_table->active, inode_t, list); /* forget and unref the inode to retire and add it to @@ -2294,6 +2473,7 @@ inode_dump(inode_t *inode, char *prefix) gf_proc_dump_write("fd-count", "%u", inode->fd_count); gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count); gf_proc_dump_write("ref", "%u", inode->ref); + gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent); gf_proc_dump_write("ia_type", "%d", inode->ia_type); if (inode->_ctx) { inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx), @@ -2367,10 +2547,13 @@ inode_table_dump(inode_table_t *itable, char *prefix) gf_proc_dump_write(key, "%d", itable->lru_size); gf_proc_dump_build_key(key, prefix, "purge_size"); gf_proc_dump_write(key, "%d", itable->purge_size); + gf_proc_dump_build_key(key, prefix, "invalidate_size"); + gf_proc_dump_write(key, "%d", itable->invalidate_size); INODE_DUMP_LIST(&itable->active, key, prefix, "active"); INODE_DUMP_LIST(&itable->lru, key, prefix, "lru"); INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); + INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate"); pthread_mutex_unlock(&itable->lock); } diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index 276a1f1577c..3f39dddcb26 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -54,6 +54,13 @@ struct _inode_table { struct mem_pool *dentry_pool; /* memory pool for dentrys */ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */ int ctxcount; /* number of slots in inode->ctx */ + + /* This is required for 'invalidation' when 'nlookup' would be used, + specially in case of fuse-bridge */ + int32_t (*invalidator_fn)(xlator_t *, inode_t *); + xlator_t *invalidator_xl; + struct list_head invalidate; /* inodes which are in invalidation queue */ + uint32_t invalidate_size; /* count of inodes in invalidation list */ }; struct _dentry { @@ -100,6 +107,8 @@ struct _inode { struct list_head list; /* active/lru/purge */ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ + + bool invalidate_sent; /* Set it if invalidate_fn is called for inode */ }; #define UUID0_STR "00000000-0000-0000-0000-000000000000" @@ -107,7 +116,12 @@ struct _inode { #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1) inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl); +inode_table_new(uint32_t lru_limit, xlator_t *xl); + +inode_table_t * +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl); void inode_table_destroy_all(glusterfs_ctx_t *ctx); @@ -141,6 +155,9 @@ int inode_forget(inode_t *inode, uint64_t nlookup); int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup); + +int inode_ref_reduce_by_n(inode_t *inode, uint64_t nref); int diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index c5b322a9bdf..41d621470b1 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -784,6 +784,7 @@ __inode_find inode_find inode_find_directory_name inode_forget +inode_forget_with_unref inode_from_path inode_grep inode_grep_for_gfid @@ -808,6 +809,7 @@ inode_table_destroy_all inode_table_dump inode_table_dump_to_dict inode_table_new +inode_table_with_invalidator __inode_table_set_lru_limit inode_table_set_lru_limit inode_unlink |