summaryrefslogtreecommitdiffstats
path: root/libglusterfs
diff options
context:
space:
mode:
Diffstat (limited to 'libglusterfs')
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h1
-rw-r--r--libglusterfs/src/glusterfs/inode.h17
-rw-r--r--libglusterfs/src/inode.c254
-rw-r--r--libglusterfs/src/libglusterfs.sym2
4 files changed, 238 insertions, 36 deletions
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index 908a0ce774f..9f14f2f5440 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -524,6 +524,7 @@ struct _cmd_args {
pid_t client_pid;
int client_pid_set;
unsigned uid_map_root;
+ int32_t lru_limit;
int background_qlen;
int congestion_threshold;
char *fuse_mountopts;
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
index 5934373ec5b..52efdd85ccc 100644
--- a/libglusterfs/src/glusterfs/inode.h
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -54,6 +54,13 @@ struct _inode_table {
struct mem_pool *dentry_pool; /* memory pool for dentrys */
struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
int ctxcount; /* number of slots in inode->ctx */
+
+ /* This is required for 'invalidation' when 'nlookup' would be used,
+ specially in case of fuse-bridge */
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *);
+ xlator_t *invalidator_xl;
+ struct list_head invalidate; /* inodes which are in invalidation queue */
+ uint32_t invalidate_size; /* count of inodes in invalidation list */
};
struct _dentry {
@@ -100,6 +107,7 @@ struct _inode {
struct list_head list; /* active/lru/purge */
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+ bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
};
#define UUID0_STR "00000000-0000-0000-0000-000000000000"
@@ -107,7 +115,12 @@ struct _inode {
#define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
inode_table_t *
-inode_table_new(size_t lru_limit, xlator_t *xl);
+inode_table_new(uint32_t lru_limit, xlator_t *xl);
+
+inode_table_t *
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl);
void
inode_table_destroy_all(glusterfs_ctx_t *ctx);
@@ -139,6 +152,8 @@ inode_lookup(inode_t *inode);
int
inode_forget(inode_t *inode, uint64_t nlookup);
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup);
int
inode_ref_reduce_by_n(inode_t *inode, uint64_t nref);
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index 3bf32cfe442..b4a62897498 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -23,6 +23,100 @@
move latest accessed dentry to list_head of inode
*/
+// clang-format off
+/*
+
+Details as per Xavi:
+
+ I think we should have 3 lists: active, lru and invalidate.
+
+We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of
+refs, invalidate_sent flag and moving from one list to another must be done
+atomically.
+
+With this information, these are the states that cause a transition:
+
+ refs nlookups inv_sent op
+ 1 0 0 unref -> refs = 0, active--->destroy
+ 1 1 0 unref -> refs = 0, active--->lru
+ 1 1 0 forget -> nlookups = 0, active--->active
+ *0 1 0 forget -> nlookups = 0, lru--->destroy
+ *0 1 1 forget -> nlookups = 0, invalidate--->destroy
+ 0 1 0 ref -> refs = 1, lru--->active
+ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active
+ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate
+ 1 1 1 unref -> refs = 0, invalidate--->invalidate
+ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active
+
+(*) technically these combinations cannot happen because a forget sent by the
+kernel first calls ref() and then unref(). However it's equivalent.
+
+overflow means that lru list has grown beyond the limit and the inode needs to
+be invalidated. All other combinations do not cause a change in state or are not
+possible.
+
+Based on this, the code could be similar to this:
+
+ ref(inode, inv)
+ {
+ if (refs == 0) {
+ if (inv_sent) {
+ invalidate_count--;
+ inv_sent = 0;
+ } else {
+ lru_count--;
+ }
+ if (inv) {
+ inv_sent = 1;
+ invalidate_count++;
+ list_move(inode, invalidate);
+ } else {
+ active_count++;
+ list_move(inode, active);
+ }
+ }
+ refs++;
+ }
+
+ unref(inode, clear)
+ {
+ if (clear && inv_sent) {
+ // there is a case of fuse itself sending forget, without
+ // invalidate, after entry delete, like unlink(), rmdir().
+ inv_sent = 0;
+ invalidate_count--;
+ active_count++;
+ list_move(inode, active);
+ }
+ refs--;
+ if ((refs == 0) && !inv_sent) {
+ active_count--;
+ if (nlookups == 0) {
+ destroy(inode);
+ } else {
+ lru_count++;
+ list_move(inode, lru);
+ }
+ }
+ }
+
+ forget(inode)
+ {
+ ref(inode, false);
+ nlookups--;
+ unref(inode, true);
+ }
+
+ overflow(inode)
+ {
+ ref(inode, true);
+ invalidator(inode);
+ unref(inode, false);
+ }
+
+*/
+// clang-format on
+
#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
{ \
int i = 1; \
@@ -37,7 +131,7 @@
}
static inode_t *
-__inode_unref(inode_t *inode);
+__inode_unref(inode_t *inode, bool clear);
static int
inode_table_prune(inode_table_t *table);
@@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry)
dentry->name = NULL;
if (dentry->parent) {
- __inode_unref(dentry->parent);
+ __inode_unref(dentry->parent, false);
dentry->parent = NULL;
}
@@ -446,7 +540,7 @@ out:
}
static inode_t *
-__inode_unref(inode_t *inode)
+__inode_unref(inode_t *inode, bool clear)
{
int index = 0;
xlator_t *this = NULL;
@@ -455,8 +549,6 @@ __inode_unref(inode_t *inode)
if (!inode)
return NULL;
- this = THIS;
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops.
@@ -464,6 +556,13 @@ __inode_unref(inode_t *inode)
if (__is_root_gfid(inode->gfid))
return inode;
+ this = THIS;
+
+ if (clear && inode->invalidate_sent) {
+ inode->invalidate_sent = false;
+ inode->table->invalidate_size--;
+ __inode_activate(inode);
+ }
GF_ASSERT(inode->ref);
--inode->ref;
@@ -474,7 +573,7 @@ __inode_unref(inode_t *inode)
inode->_ctx[index].ref--;
}
- if (!inode->ref) {
+ if (!inode->ref && !inode->invalidate_sent) {
inode->table->active_size--;
nlookup = GF_ATOMIC_GET(inode->nlookup);
@@ -488,7 +587,7 @@ __inode_unref(inode_t *inode)
}
static inode_t *
-__inode_ref(inode_t *inode)
+__inode_ref(inode_t *inode, bool is_invalidate)
{
int index = 0;
xlator_t *this = NULL;
@@ -498,11 +597,6 @@ __inode_ref(inode_t *inode)
this = THIS;
- if (!inode->ref) {
- inode->table->lru_size--;
- __inode_activate(inode);
- }
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops. If we do not allow unrefs but allow refs,
@@ -514,6 +608,22 @@ __inode_ref(inode_t *inode)
if (__is_root_gfid(inode->gfid) && inode->ref)
return inode;
+ if (!inode->ref) {
+ if (inode->invalidate_sent) {
+ inode->invalidate_sent = false;
+ inode->table->invalidate_size--;
+ } else {
+ inode->table->lru_size--;
+ }
+ if (is_invalidate) {
+ inode->invalidate_sent = true;
+ inode->table->invalidate_size++;
+ list_move_tail(&inode->list, &inode->table->invalidate);
+ } else {
+ __inode_activate(inode);
+ }
+ }
+
inode->ref++;
index = __inode_get_xl_index(inode, this);
@@ -537,7 +647,7 @@ inode_unref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_unref(inode);
+ inode = __inode_unref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -558,7 +668,7 @@ inode_ref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_ref(inode);
+ inode = __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -592,7 +702,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)
}
if (parent)
- newd->parent = __inode_ref(parent);
+ newd->parent = __inode_ref(parent, false);
list_add(&newd->inode_list, &inode->dentry_list);
newd->inode = inode;
@@ -662,7 +772,7 @@ inode_new(inode_table_t *table)
{
inode = __inode_create(table);
if (inode != NULL) {
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
}
pthread_mutex_unlock(&table->lock);
@@ -769,7 +879,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
inode = dentry->inode;
if (inode)
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -912,7 +1022,7 @@ inode_find(inode_table_t *table, uuid_t gfid)
{
inode = __inode_find(table, gfid);
if (inode)
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1057,7 +1167,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
linked_inode = __inode_link(inode, parent, name, iatt);
if (linked_inode)
- __inode_ref(linked_inode);
+ __inode_ref(linked_inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1124,6 +1234,31 @@ inode_forget(inode_t *inode, uint64_t nlookup)
return 0;
}
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup)
+{
+ inode_table_t *table = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
+
+ table = inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ inode_forget_atomic(inode, nlookup);
+ __inode_unref(inode, true);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return 0;
+}
+
/*
* Invalidate an inode. This is invoked when a translator decides that an
* inode's cache is no longer valid. Any translator interested in taking action
@@ -1298,7 +1433,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
parent = dentry->parent;
if (parent)
- __inode_ref(parent);
+ __inode_ref(parent, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1480,6 +1615,8 @@ inode_table_prune(inode_table_t *table)
inode_t *del = NULL;
inode_t *tmp = NULL;
inode_t *entry = NULL;
+ uint64_t nlookup = 0;
+ int64_t lru_size = 0;
if (!table)
return -1;
@@ -1488,7 +1625,11 @@ inode_table_prune(inode_table_t *table)
pthread_mutex_lock(&table->lock);
{
- while (table->lru_limit && table->lru_size > (table->lru_limit)) {
+ if (!table->lru_limit)
+ goto purge_list;
+
+ lru_size = table->lru_size;
+ while (lru_size > (table->lru_limit)) {
if (list_empty(&table->lru)) {
gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
LG_MSG_INVALID_INODE_LIST,
@@ -1498,26 +1639,46 @@ inode_table_prune(inode_table_t *table)
break;
}
+ lru_size--;
entry = list_entry(table->lru.next, inode_t, list);
+ /* The logic of invalidation is required only if invalidator_fn
+ is present */
+ if (table->invalidator_fn) {
+ /* check for valid inode with 'nlookup' */
+ nlookup = GF_ATOMIC_GET(entry->nlookup);
+ if (nlookup) {
+ __inode_ref(entry, true);
+ tmp = entry;
+ break;
+ }
+ }
table->lru_size--;
__inode_retire(entry);
-
ret++;
}
+ purge_list:
list_splice_init(&table->purge, &purge);
table->purge_size = 0;
}
pthread_mutex_unlock(&table->lock);
+ /* Pick 1 inode for invalidation */
+ if (tmp) {
+ xlator_t *old_THIS = THIS;
+ THIS = table->invalidator_xl;
+ table->invalidator_fn(table->invalidator_xl, tmp);
+ THIS = old_THIS;
+ inode_unref(tmp);
+ }
+
+ /* Just so that if purge list is handled too, then clear it off */
+ list_for_each_entry_safe(del, tmp, &purge, list)
{
- list_for_each_entry_safe(del, tmp, &purge, list)
- {
- list_del_init(&del->list);
- inode_forget_atomic(del, 0);
- __inode_destroy(del);
- }
+ list_del_init(&del->list);
+ inode_forget_atomic(del, 0);
+ __inode_destroy(del);
}
return ret;
@@ -1545,9 +1706,12 @@ __inode_table_init_root(inode_table_t *table)
}
inode_table_t *
-inode_table_new(size_t lru_limit, xlator_t *xl)
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl)
{
inode_table_t *new = NULL;
+ uint32_t mem_pool_size = lru_limit;
int ret = -1;
int i = 0;
@@ -1559,20 +1723,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
new->ctxcount = xl->graph->xl_count + 1;
new->lru_limit = lru_limit;
+ new->invalidator_fn = invalidator_fn;
+ new->invalidator_xl = invalidator_xl;
new->hashsize = 14057; /* TODO: Random Number?? */
/* In case FUSE is initing the inode table. */
- if (lru_limit == 0)
- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
-
- new->inode_pool = mem_pool_new(inode_t, lru_limit);
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES;
+ new->inode_pool = mem_pool_new(inode_t, mem_pool_size);
if (!new->inode_pool)
goto out;
- new->dentry_pool = mem_pool_new(dentry_t, lru_limit);
-
+ new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);
if (!new->dentry_pool)
goto out;
@@ -1604,6 +1768,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
INIT_LIST_HEAD(&new->active);
INIT_LIST_HEAD(&new->lru);
INIT_LIST_HEAD(&new->purge);
+ INIT_LIST_HEAD(&new->invalidate);
ret = gf_asprintf(&new->name, "%s/inode", xl->name);
if (-1 == ret) {
@@ -1633,6 +1798,13 @@ out:
return new;
}
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl)
+{
+ /* Only fuse for now requires the inode table with invalidator */
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+}
+
int
inode_table_ctx_free(inode_table_t *table)
{
@@ -1771,6 +1943,14 @@ inode_table_destroy(inode_table_t *inode_table)
inode_table->lru_size--;
}
+ /* Same logic for invalidate list */
+ while (!list_empty(&inode_table->invalidate)) {
+ trav = list_first_entry(&inode_table->invalidate, inode_t, list);
+ inode_forget_atomic(trav, 0);
+ __inode_retire(trav);
+ inode_table->invalidate_size--;
+ }
+
while (!list_empty(&inode_table->active)) {
trav = list_first_entry(&inode_table->active, inode_t, list);
/* forget and unref the inode to retire and add it to
@@ -2280,6 +2460,7 @@ inode_dump(inode_t *inode, char *prefix)
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);
gf_proc_dump_write("ref", "%u", inode->ref);
+ gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
if (inode->_ctx) {
inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx),
@@ -2353,10 +2534,13 @@ inode_table_dump(inode_table_t *itable, char *prefix)
gf_proc_dump_write(key, "%d", itable->lru_size);
gf_proc_dump_build_key(key, prefix, "purge_size");
gf_proc_dump_write(key, "%d", itable->purge_size);
+ gf_proc_dump_build_key(key, prefix, "invalidate_size");
+ gf_proc_dump_write(key, "%d", itable->invalidate_size);
INODE_DUMP_LIST(&itable->active, key, prefix, "active");
INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");
pthread_mutex_unlock(&itable->lock);
}
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index 6ca6a639456..464493d6cfc 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -791,6 +791,7 @@ __inode_find
inode_find
inode_find_directory_name
inode_forget
+inode_forget_with_unref
inode_from_path
inode_grep
inode_grep_for_gfid
@@ -815,6 +816,7 @@ inode_table_destroy_all
inode_table_dump
inode_table_dump_to_dict
inode_table_new
+inode_table_with_invalidator
__inode_table_set_lru_limit
inode_table_set_lru_limit
inode_unlink