diff options
Diffstat (limited to 'libglusterfs')
-rw-r--r-- | libglusterfs/src/glusterfs/glusterfs.h | 1 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs/inode.h | 17 | ||||
-rw-r--r-- | libglusterfs/src/inode.c | 254 | ||||
-rw-r--r-- | libglusterfs/src/libglusterfs.sym | 2 |
4 files changed, 238 insertions, 36 deletions
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 908a0ce774f..9f14f2f5440 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -524,6 +524,7 @@ struct _cmd_args { pid_t client_pid; int client_pid_set; unsigned uid_map_root; + int32_t lru_limit; int background_qlen; int congestion_threshold; char *fuse_mountopts; diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h index 5934373ec5b..52efdd85ccc 100644 --- a/libglusterfs/src/glusterfs/inode.h +++ b/libglusterfs/src/glusterfs/inode.h @@ -54,6 +54,13 @@ struct _inode_table { struct mem_pool *dentry_pool; /* memory pool for dentrys */ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */ int ctxcount; /* number of slots in inode->ctx */ + + /* This is required for 'invalidation' when 'nlookup' would be used, + specially in case of fuse-bridge */ + int32_t (*invalidator_fn)(xlator_t *, inode_t *); + xlator_t *invalidator_xl; + struct list_head invalidate; /* inodes which are in invalidation queue */ + uint32_t invalidate_size; /* count of inodes in invalidation list */ }; struct _dentry { @@ -100,6 +107,7 @@ struct _inode { struct list_head list; /* active/lru/purge */ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ + bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ }; #define UUID0_STR "00000000-0000-0000-0000-000000000000" @@ -107,7 +115,12 @@ struct _inode { #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1) inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl); +inode_table_new(uint32_t lru_limit, xlator_t *xl); + +inode_table_t * +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl); void inode_table_destroy_all(glusterfs_ctx_t *ctx); @@ -139,6 +152,8 @@ inode_lookup(inode_t *inode); int inode_forget(inode_t *inode, uint64_t nlookup); +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup); int inode_ref_reduce_by_n(inode_t *inode, uint64_t nref); diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 3bf32cfe442..b4a62897498 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -23,6 +23,100 @@ move latest accessed dentry to list_head of inode */ +// clang-format off +/* + +Details as per Xavi: + + I think we should have 3 lists: active, lru and invalidate. + +We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of +refs, invalidate_sent flag and moving from one list to another must be done +atomically. + +With this information, these are the states that cause a transition: + + refs nlookups inv_sent op + 1 0 0 unref -> refs = 0, active--->destroy + 1 1 0 unref -> refs = 0, active--->lru + 1 1 0 forget -> nlookups = 0, active--->active + *0 1 0 forget -> nlookups = 0, lru--->destroy + *0 1 1 forget -> nlookups = 0, invalidate--->destroy + 0 1 0 ref -> refs = 1, lru--->active + 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active + 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate + 1 1 1 unref -> refs = 0, invalidate--->invalidate + 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active + +(*) technically these combinations cannot happen because a forget sent by the +kernel first calls ref() and then unref(). However it's equivalent. + +overflow means that lru list has grown beyond the limit and the inode needs to +be invalidated. All other combinations do not cause a change in state or are not +possible. + +Based on this, the code could be similar to this: + + ref(inode, inv) + { + if (refs == 0) { + if (inv_sent) { + invalidate_count--; + inv_sent = 0; + } else { + lru_count--; + } + if (inv) { + inv_sent = 1; + invalidate_count++; + list_move(inode, invalidate); + } else { + active_count++; + list_move(inode, active); + } + } + refs++; + } + + unref(inode, clear) + { + if (clear && inv_sent) { + // there is a case of fuse itself sending forget, without + // invalidate, after entry delete, like unlink(), rmdir(). + inv_sent = 0; + invalidate_count--; + active_count++; + list_move(inode, active); + } + refs--; + if ((refs == 0) && !inv_sent) { + active_count--; + if (nlookups == 0) { + destroy(inode); + } else { + lru_count++; + list_move(inode, lru); + } + } + } + + forget(inode) + { + ref(inode, false); + nlookups--; + unref(inode, true); + } + + overflow(inode) + { + ref(inode, true); + invalidator(inode); + unref(inode, false); + } + +*/ +// clang-format on + #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \ { \ int i = 1; \ @@ -37,7 +131,7 @@ } static inode_t * -__inode_unref(inode_t *inode); +__inode_unref(inode_t *inode, bool clear); static int inode_table_prune(inode_table_t *table); @@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry) dentry->name = NULL; if (dentry->parent) { - __inode_unref(dentry->parent); + __inode_unref(dentry->parent, false); dentry->parent = NULL; } @@ -446,7 +540,7 @@ out: } static inode_t * -__inode_unref(inode_t *inode) +__inode_unref(inode_t *inode, bool clear) { int index = 0; xlator_t *this = NULL; @@ -455,8 +549,6 @@ __inode_unref(inode_t *inode) if (!inode) return NULL; - this = THIS; - /* * Root inode should always be in active list of inode table. So unrefs * on root inode are no-ops. @@ -464,6 +556,13 @@ __inode_unref(inode_t *inode) if (__is_root_gfid(inode->gfid)) return inode; + this = THIS; + + if (clear && inode->invalidate_sent) { + inode->invalidate_sent = false; + inode->table->invalidate_size--; + __inode_activate(inode); + } GF_ASSERT(inode->ref); --inode->ref; @@ -474,7 +573,7 @@ __inode_unref(inode_t *inode) inode->_ctx[index].ref--; } - if (!inode->ref) { + if (!inode->ref && !inode->invalidate_sent) { inode->table->active_size--; nlookup = GF_ATOMIC_GET(inode->nlookup); @@ -488,7 +587,7 @@ __inode_unref(inode_t *inode) } static inode_t * -__inode_ref(inode_t *inode) +__inode_ref(inode_t *inode, bool is_invalidate) { int index = 0; xlator_t *this = NULL; @@ -498,11 +597,6 @@ __inode_ref(inode_t *inode) this = THIS; - if (!inode->ref) { - inode->table->lru_size--; - __inode_activate(inode); - } - /* * Root inode should always be in active list of inode table. So unrefs * on root inode are no-ops. If we do not allow unrefs but allow refs, @@ -514,6 +608,22 @@ __inode_ref(inode_t *inode) if (__is_root_gfid(inode->gfid) && inode->ref) return inode; + if (!inode->ref) { + if (inode->invalidate_sent) { + inode->invalidate_sent = false; + inode->table->invalidate_size--; + } else { + inode->table->lru_size--; + } + if (is_invalidate) { + inode->invalidate_sent = true; + inode->table->invalidate_size++; + list_move_tail(&inode->list, &inode->table->invalidate); + } else { + __inode_activate(inode); + } + } + inode->ref++; index = __inode_get_xl_index(inode, this); @@ -537,7 +647,7 @@ inode_unref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_unref(inode); + inode = __inode_unref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -558,7 +668,7 @@ inode_ref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_ref(inode); + inode = __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -592,7 +702,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name) } if (parent) - newd->parent = __inode_ref(parent); + newd->parent = __inode_ref(parent, false); list_add(&newd->inode_list, &inode->dentry_list); newd->inode = inode; @@ -662,7 +772,7 @@ inode_new(inode_table_t *table) { inode = __inode_create(table); if (inode != NULL) { - __inode_ref(inode); + __inode_ref(inode, false); } } pthread_mutex_unlock(&table->lock); @@ -769,7 +879,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) inode = dentry->inode; if (inode) - __inode_ref(inode); + __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -912,7 +1022,7 @@ inode_find(inode_table_t *table, uuid_t gfid) { inode = __inode_find(table, gfid); if (inode) - __inode_ref(inode); + __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -1057,7 +1167,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) linked_inode = __inode_link(inode, parent, name, iatt); if (linked_inode) - __inode_ref(linked_inode); + __inode_ref(linked_inode, false); } pthread_mutex_unlock(&table->lock); @@ -1124,6 +1234,31 @@ inode_forget(inode_t *inode, uint64_t nlookup) return 0; } +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup) +{ + inode_table_t *table = NULL; + + if (!inode) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, + "inode not found"); + return -1; + } + + table = inode->table; + + pthread_mutex_lock(&table->lock); + { + inode_forget_atomic(inode, nlookup); + __inode_unref(inode, true); + } + pthread_mutex_unlock(&table->lock); + + inode_table_prune(table); + + return 0; +} + /* * Invalidate an inode. This is invoked when a translator decides that an * inode's cache is no longer valid. Any translator interested in taking action @@ -1298,7 +1433,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name) parent = dentry->parent; if (parent) - __inode_ref(parent); + __inode_ref(parent, false); } pthread_mutex_unlock(&table->lock); @@ -1480,6 +1615,8 @@ inode_table_prune(inode_table_t *table) inode_t *del = NULL; inode_t *tmp = NULL; inode_t *entry = NULL; + uint64_t nlookup = 0; + int64_t lru_size = 0; if (!table) return -1; @@ -1488,7 +1625,11 @@ inode_table_prune(inode_table_t *table) pthread_mutex_lock(&table->lock); { - while (table->lru_limit && table->lru_size > (table->lru_limit)) { + if (!table->lru_limit) + goto purge_list; + + lru_size = table->lru_size; + while (lru_size > (table->lru_limit)) { if (list_empty(&table->lru)) { gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_INODE_LIST, @@ -1498,26 +1639,46 @@ inode_table_prune(inode_table_t *table) break; } + lru_size--; entry = list_entry(table->lru.next, inode_t, list); + /* The logic of invalidation is required only if invalidator_fn + is present */ + if (table->invalidator_fn) { + /* check for valid inode with 'nlookup' */ + nlookup = GF_ATOMIC_GET(entry->nlookup); + if (nlookup) { + __inode_ref(entry, true); + tmp = entry; + break; + } + } table->lru_size--; __inode_retire(entry); - ret++; } + purge_list: list_splice_init(&table->purge, &purge); table->purge_size = 0; } pthread_mutex_unlock(&table->lock); + /* Pick 1 inode for invalidation */ + if (tmp) { + xlator_t *old_THIS = THIS; + THIS = table->invalidator_xl; + table->invalidator_fn(table->invalidator_xl, tmp); + THIS = old_THIS; + inode_unref(tmp); + } + + /* Just so that if purge list is handled too, then clear it off */ + list_for_each_entry_safe(del, tmp, &purge, list) { - list_for_each_entry_safe(del, tmp, &purge, list) - { - list_del_init(&del->list); - inode_forget_atomic(del, 0); - __inode_destroy(del); - } + list_del_init(&del->list); + inode_forget_atomic(del, 0); + __inode_destroy(del); } return ret; @@ -1545,9 +1706,12 @@ __inode_table_init_root(inode_table_t *table) } inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl) +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl) { inode_table_t *new = NULL; + uint32_t mem_pool_size = lru_limit; int ret = -1; int i = 0; @@ -1559,20 +1723,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl) new->ctxcount = xl->graph->xl_count + 1; new->lru_limit = lru_limit; + new->invalidator_fn = invalidator_fn; + new->invalidator_xl = invalidator_xl; new->hashsize = 14057; /* TODO: Random Number?? */ /* In case FUSE is initing the inode table. */ - if (lru_limit == 0) - lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; - - new->inode_pool = mem_pool_new(inode_t, lru_limit); + if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) + mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; + new->inode_pool = mem_pool_new(inode_t, mem_pool_size); if (!new->inode_pool) goto out; - new->dentry_pool = mem_pool_new(dentry_t, lru_limit); - + new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size); if (!new->dentry_pool) goto out; @@ -1604,6 +1768,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl) INIT_LIST_HEAD(&new->active); INIT_LIST_HEAD(&new->lru); INIT_LIST_HEAD(&new->purge); + INIT_LIST_HEAD(&new->invalidate); ret = gf_asprintf(&new->name, "%s/inode", xl->name); if (-1 == ret) { @@ -1633,6 +1798,13 @@ out: return new; } +inode_table_t * +inode_table_new(uint32_t lru_limit, xlator_t *xl) +{ + /* Only fuse for now requires the inode table with invalidator */ + return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); +} + int inode_table_ctx_free(inode_table_t *table) { @@ -1771,6 +1943,14 @@ inode_table_destroy(inode_table_t *inode_table) inode_table->lru_size--; } + /* Same logic for invalidate list */ + while (!list_empty(&inode_table->invalidate)) { + trav = list_first_entry(&inode_table->invalidate, inode_t, list); + inode_forget_atomic(trav, 0); + __inode_retire(trav); + inode_table->invalidate_size--; + } + while (!list_empty(&inode_table->active)) { trav = list_first_entry(&inode_table->active, inode_t, list); /* forget and unref the inode to retire and add it to @@ -2280,6 +2460,7 @@ inode_dump(inode_t *inode, char *prefix) gf_proc_dump_write("fd-count", "%u", inode->fd_count); gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count); gf_proc_dump_write("ref", "%u", inode->ref); + gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent); gf_proc_dump_write("ia_type", "%d", inode->ia_type); if (inode->_ctx) { inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx), @@ -2353,10 +2534,13 @@ inode_table_dump(inode_table_t *itable, char *prefix) gf_proc_dump_write(key, "%d", itable->lru_size); gf_proc_dump_build_key(key, prefix, "purge_size"); gf_proc_dump_write(key, "%d", itable->purge_size); + gf_proc_dump_build_key(key, prefix, "invalidate_size"); + gf_proc_dump_write(key, "%d", itable->invalidate_size); INODE_DUMP_LIST(&itable->active, key, prefix, "active"); INODE_DUMP_LIST(&itable->lru, key, prefix, "lru"); INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); + INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate"); pthread_mutex_unlock(&itable->lock); } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 6ca6a639456..464493d6cfc 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -791,6 +791,7 @@ __inode_find inode_find inode_find_directory_name inode_forget +inode_forget_with_unref inode_from_path inode_grep inode_grep_for_gfid @@ -815,6 +816,7 @@ inode_table_destroy_all inode_table_dump inode_table_dump_to_dict inode_table_new +inode_table_with_invalidator __inode_table_set_lru_limit inode_table_set_lru_limit inode_unlink |