diff options
-rw-r--r-- | doc/mount.glusterfs.8 | 4 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.c | 24 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.h | 1 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs/glusterfs.h | 1 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs/inode.h | 17 | ||||
-rw-r--r-- | libglusterfs/src/inode.c | 254 | ||||
-rw-r--r-- | libglusterfs/src/libglusterfs.sym | 2 | ||||
-rw-r--r-- | tests/features/fuse-lru-limit.t | 42 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 127 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 3 | ||||
-rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 7 |
11 files changed, 395 insertions, 87 deletions
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 index 367f02d9b1a..902b0c1ee5c 100644 --- a/doc/mount.glusterfs.8 +++ b/doc/mount.glusterfs.8 @@ -122,6 +122,10 @@ Provide list of backup volfile servers in the following format [default: None] \fBDeprecated\fR option - placed here for backward compatibility [default: 1] .TP .TP +\fBlru-limit=\fRN +Set fuse module's limit for number of inodes kept in LRU list to N [default: 0] +.TP +.TP \fBbackground-qlen=\fRN Set fuse module's background queue length to N [default: 64] .TP diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 6347941f369..0dea52b6af1 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -219,6 +219,9 @@ static struct argp_option gf_options[] = { "[default: 300]"}, {"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0, "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"}, + {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, + "Set fuse module's limit for number of inodes kept in LRU list to N " + "[default: 0]"}, {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, "Set fuse module's background queue length to N " "[default: 64]"}, @@ -496,6 +499,15 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options) } } + if (cmd_args->lru_limit >= 0) { + ret = dict_set_int32(options, "lru-limit", cmd_args->lru_limit); + if (ret < 0) { + gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "lru-limit"); + goto err; + } + } + if (cmd_args->background_qlen) { ret = dict_set_int32(options, "background-qlen", cmd_args->background_qlen); @@ -1257,6 +1269,13 @@ parse_opts(int key, char *arg, struct argp_state *state) cmd_args->resolve_gids = 1; break; + case ARGP_FUSE_LRU_LIMIT_KEY: + if (!gf_string2int32(arg, &cmd_args->lru_limit)) + break; + + argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); + break; + case ARGP_FUSE_BACKGROUND_QLEN_KEY: if (!gf_string2int(arg, &cmd_args->background_qlen)) break; @@ -2085,6 +2104,11 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx) ctx->ssl_cert_depth = glusterfs_read_secure_access_file(); } + /* Need to set lru_limit to below 0 to indicate there was nothing + specified. This is needed as 0 is a valid option, and may not be + default value. */ + cmd_args->lru_limit = -1; + argp_parse(&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args); if (cmd_args->print_xlatordir || cmd_args->print_statedumpdir || diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 0042054e138..86ac61c1a92 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -109,6 +109,7 @@ enum argp_option_keys { ARGP_ATTR_TIMES_GRANULARITY_KEY = 187, ARGP_PRINT_LIBEXECDIR_KEY = 188, ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, + ARGP_FUSE_LRU_LIMIT_KEY = 190, }; struct _gfd_vol_top_priv { diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 908a0ce774f..9f14f2f5440 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -524,6 +524,7 @@ struct _cmd_args { pid_t client_pid; int client_pid_set; unsigned uid_map_root; + int32_t lru_limit; int background_qlen; int congestion_threshold; char *fuse_mountopts; diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h index 5934373ec5b..52efdd85ccc 100644 --- a/libglusterfs/src/glusterfs/inode.h +++ b/libglusterfs/src/glusterfs/inode.h @@ -54,6 +54,13 @@ struct _inode_table { struct mem_pool *dentry_pool; /* memory pool for dentrys */ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */ int ctxcount; /* number of slots in inode->ctx */ + + /* This is required for 'invalidation' when 'nlookup' would be used, + specially in case of fuse-bridge */ + int32_t (*invalidator_fn)(xlator_t *, inode_t *); + xlator_t *invalidator_xl; + struct list_head invalidate; /* inodes which are in invalidation queue */ + uint32_t invalidate_size; /* count of inodes in invalidation list */ }; struct _dentry { @@ -100,6 +107,7 @@ struct _inode { struct list_head list; /* active/lru/purge */ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ + bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ }; #define UUID0_STR "00000000-0000-0000-0000-000000000000" @@ -107,7 +115,12 @@ struct _inode { #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1) inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl); +inode_table_new(uint32_t lru_limit, xlator_t *xl); + +inode_table_t * +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl); void inode_table_destroy_all(glusterfs_ctx_t *ctx); @@ -139,6 +152,8 @@ inode_lookup(inode_t *inode); int inode_forget(inode_t *inode, uint64_t nlookup); +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup); int inode_ref_reduce_by_n(inode_t *inode, uint64_t nref); diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 3bf32cfe442..b4a62897498 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -23,6 +23,100 @@ move latest accessed dentry to list_head of inode */ +// clang-format off +/* + +Details as per Xavi: + + I think we should have 3 lists: active, lru and invalidate. + +We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of +refs, invalidate_sent flag and moving from one list to another must be done +atomically. + +With this information, these are the states that cause a transition: + + refs nlookups inv_sent op + 1 0 0 unref -> refs = 0, active--->destroy + 1 1 0 unref -> refs = 0, active--->lru + 1 1 0 forget -> nlookups = 0, active--->active + *0 1 0 forget -> nlookups = 0, lru--->destroy + *0 1 1 forget -> nlookups = 0, invalidate--->destroy + 0 1 0 ref -> refs = 1, lru--->active + 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active + 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate + 1 1 1 unref -> refs = 0, invalidate--->invalidate + 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active + +(*) technically these combinations cannot happen because a forget sent by the +kernel first calls ref() and then unref(). However it's equivalent. + +overflow means that lru list has grown beyond the limit and the inode needs to +be invalidated. All other combinations do not cause a change in state or are not +possible. + +Based on this, the code could be similar to this: + + ref(inode, inv) + { + if (refs == 0) { + if (inv_sent) { + invalidate_count--; + inv_sent = 0; + } else { + lru_count--; + } + if (inv) { + inv_sent = 1; + invalidate_count++; + list_move(inode, invalidate); + } else { + active_count++; + list_move(inode, active); + } + } + refs++; + } + + unref(inode, clear) + { + if (clear && inv_sent) { + // there is a case of fuse itself sending forget, without + // invalidate, after entry delete, like unlink(), rmdir(). + inv_sent = 0; + invalidate_count--; + active_count++; + list_move(inode, active); + } + refs--; + if ((refs == 0) && !inv_sent) { + active_count--; + if (nlookups == 0) { + destroy(inode); + } else { + lru_count++; + list_move(inode, lru); + } + } + } + + forget(inode) + { + ref(inode, false); + nlookups--; + unref(inode, true); + } + + overflow(inode) + { + ref(inode, true); + invalidator(inode); + unref(inode, false); + } + +*/ +// clang-format on + #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \ { \ int i = 1; \ @@ -37,7 +131,7 @@ } static inode_t * -__inode_unref(inode_t *inode); +__inode_unref(inode_t *inode, bool clear); static int inode_table_prune(inode_table_t *table); @@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry) dentry->name = NULL; if (dentry->parent) { - __inode_unref(dentry->parent); + __inode_unref(dentry->parent, false); dentry->parent = NULL; } @@ -446,7 +540,7 @@ out: } static inode_t * -__inode_unref(inode_t *inode) +__inode_unref(inode_t *inode, bool clear) { int index = 0; xlator_t *this = NULL; @@ -455,8 +549,6 @@ __inode_unref(inode_t *inode) if (!inode) return NULL; - this = THIS; - /* * Root inode should always be in active list of inode table. So unrefs * on root inode are no-ops. @@ -464,6 +556,13 @@ __inode_unref(inode_t *inode) if (__is_root_gfid(inode->gfid)) return inode; + this = THIS; + + if (clear && inode->invalidate_sent) { + inode->invalidate_sent = false; + inode->table->invalidate_size--; + __inode_activate(inode); + } GF_ASSERT(inode->ref); --inode->ref; @@ -474,7 +573,7 @@ __inode_unref(inode_t *inode) inode->_ctx[index].ref--; } - if (!inode->ref) { + if (!inode->ref && !inode->invalidate_sent) { inode->table->active_size--; nlookup = GF_ATOMIC_GET(inode->nlookup); @@ -488,7 +587,7 @@ __inode_unref(inode_t *inode) } static inode_t * -__inode_ref(inode_t *inode) +__inode_ref(inode_t *inode, bool is_invalidate) { int index = 0; xlator_t *this = NULL; @@ -498,11 +597,6 @@ __inode_ref(inode_t *inode) this = THIS; - if (!inode->ref) { - inode->table->lru_size--; - __inode_activate(inode); - } - /* * Root inode should always be in active list of inode table. So unrefs * on root inode are no-ops. If we do not allow unrefs but allow refs, @@ -514,6 +608,22 @@ __inode_ref(inode_t *inode) if (__is_root_gfid(inode->gfid) && inode->ref) return inode; + if (!inode->ref) { + if (inode->invalidate_sent) { + inode->invalidate_sent = false; + inode->table->invalidate_size--; + } else { + inode->table->lru_size--; + } + if (is_invalidate) { + inode->invalidate_sent = true; + inode->table->invalidate_size++; + list_move_tail(&inode->list, &inode->table->invalidate); + } else { + __inode_activate(inode); + } + } + inode->ref++; index = __inode_get_xl_index(inode, this); @@ -537,7 +647,7 @@ inode_unref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_unref(inode); + inode = __inode_unref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -558,7 +668,7 @@ inode_ref(inode_t *inode) pthread_mutex_lock(&table->lock); { - inode = __inode_ref(inode); + inode = __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -592,7 +702,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name) } if (parent) - newd->parent = __inode_ref(parent); + newd->parent = __inode_ref(parent, false); list_add(&newd->inode_list, &inode->dentry_list); newd->inode = inode; @@ -662,7 +772,7 @@ inode_new(inode_table_t *table) { inode = __inode_create(table); if (inode != NULL) { - __inode_ref(inode); + __inode_ref(inode, false); } } pthread_mutex_unlock(&table->lock); @@ -769,7 +879,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) inode = dentry->inode; if (inode) - __inode_ref(inode); + __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -912,7 +1022,7 @@ inode_find(inode_table_t *table, uuid_t gfid) { inode = __inode_find(table, gfid); if (inode) - __inode_ref(inode); + __inode_ref(inode, false); } pthread_mutex_unlock(&table->lock); @@ -1057,7 +1167,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) linked_inode = __inode_link(inode, parent, name, iatt); if (linked_inode) - __inode_ref(linked_inode); + __inode_ref(linked_inode, false); } pthread_mutex_unlock(&table->lock); @@ -1124,6 +1234,31 @@ inode_forget(inode_t *inode, uint64_t nlookup) return 0; } +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup) +{ + inode_table_t *table = NULL; + + if (!inode) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, + "inode not found"); + return -1; + } + + table = inode->table; + + pthread_mutex_lock(&table->lock); + { + inode_forget_atomic(inode, nlookup); + __inode_unref(inode, true); + } + pthread_mutex_unlock(&table->lock); + + inode_table_prune(table); + + return 0; +} + /* * Invalidate an inode. This is invoked when a translator decides that an * inode's cache is no longer valid. Any translator interested in taking action @@ -1298,7 +1433,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name) parent = dentry->parent; if (parent) - __inode_ref(parent); + __inode_ref(parent, false); } pthread_mutex_unlock(&table->lock); @@ -1480,6 +1615,8 @@ inode_table_prune(inode_table_t *table) inode_t *del = NULL; inode_t *tmp = NULL; inode_t *entry = NULL; + uint64_t nlookup = 0; + int64_t lru_size = 0; if (!table) return -1; @@ -1488,7 +1625,11 @@ inode_table_prune(inode_table_t *table) pthread_mutex_lock(&table->lock); { - while (table->lru_limit && table->lru_size > (table->lru_limit)) { + if (!table->lru_limit) + goto purge_list; + + lru_size = table->lru_size; + while (lru_size > (table->lru_limit)) { if (list_empty(&table->lru)) { gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_INODE_LIST, @@ -1498,26 +1639,46 @@ inode_table_prune(inode_table_t *table) break; } + lru_size--; entry = list_entry(table->lru.next, inode_t, list); + /* The logic of invalidation is required only if invalidator_fn + is present */ + if (table->invalidator_fn) { + /* check for valid inode with 'nlookup' */ + nlookup = GF_ATOMIC_GET(entry->nlookup); + if (nlookup) { + __inode_ref(entry, true); + tmp = entry; + break; + } + } table->lru_size--; __inode_retire(entry); - ret++; } + purge_list: list_splice_init(&table->purge, &purge); table->purge_size = 0; } pthread_mutex_unlock(&table->lock); + /* Pick 1 inode for invalidation */ + if (tmp) { + xlator_t *old_THIS = THIS; + THIS = table->invalidator_xl; + table->invalidator_fn(table->invalidator_xl, tmp); + THIS = old_THIS; + inode_unref(tmp); + } + + /* Just so that if purge list is handled too, then clear it off */ + list_for_each_entry_safe(del, tmp, &purge, list) { - list_for_each_entry_safe(del, tmp, &purge, list) - { - list_del_init(&del->list); - inode_forget_atomic(del, 0); - __inode_destroy(del); - } + list_del_init(&del->list); + inode_forget_atomic(del, 0); + __inode_destroy(del); } return ret; @@ -1545,9 +1706,12 @@ __inode_table_init_root(inode_table_t *table) } inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl) +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), + xlator_t *invalidator_xl) { inode_table_t *new = NULL; + uint32_t mem_pool_size = lru_limit; int ret = -1; int i = 0; @@ -1559,20 +1723,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl) new->ctxcount = xl->graph->xl_count + 1; new->lru_limit = lru_limit; + new->invalidator_fn = invalidator_fn; + new->invalidator_xl = invalidator_xl; new->hashsize = 14057; /* TODO: Random Number?? */ /* In case FUSE is initing the inode table. */ - if (lru_limit == 0) - lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; - - new->inode_pool = mem_pool_new(inode_t, lru_limit); + if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) + mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; + new->inode_pool = mem_pool_new(inode_t, mem_pool_size); if (!new->inode_pool) goto out; - new->dentry_pool = mem_pool_new(dentry_t, lru_limit); - + new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size); if (!new->dentry_pool) goto out; @@ -1604,6 +1768,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl) INIT_LIST_HEAD(&new->active); INIT_LIST_HEAD(&new->lru); INIT_LIST_HEAD(&new->purge); + INIT_LIST_HEAD(&new->invalidate); ret = gf_asprintf(&new->name, "%s/inode", xl->name); if (-1 == ret) { @@ -1633,6 +1798,13 @@ out: return new; } +inode_table_t * +inode_table_new(uint32_t lru_limit, xlator_t *xl) +{ + /* Only fuse for now requires the inode table with invalidator */ + return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); +} + int inode_table_ctx_free(inode_table_t *table) { @@ -1771,6 +1943,14 @@ inode_table_destroy(inode_table_t *inode_table) inode_table->lru_size--; } + /* Same logic for invalidate list */ + while (!list_empty(&inode_table->invalidate)) { + trav = list_first_entry(&inode_table->invalidate, inode_t, list); + inode_forget_atomic(trav, 0); + __inode_retire(trav); + inode_table->invalidate_size--; + } + while (!list_empty(&inode_table->active)) { trav = list_first_entry(&inode_table->active, inode_t, list); /* forget and unref the inode to retire and add it to @@ -2280,6 +2460,7 @@ inode_dump(inode_t *inode, char *prefix) gf_proc_dump_write("fd-count", "%u", inode->fd_count); gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count); gf_proc_dump_write("ref", "%u", inode->ref); + gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent); gf_proc_dump_write("ia_type", "%d", inode->ia_type); if (inode->_ctx) { inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx), @@ -2353,10 +2534,13 @@ inode_table_dump(inode_table_t *itable, char *prefix) gf_proc_dump_write(key, "%d", itable->lru_size); gf_proc_dump_build_key(key, prefix, "purge_size"); gf_proc_dump_write(key, "%d", itable->purge_size); + gf_proc_dump_build_key(key, prefix, "invalidate_size"); + gf_proc_dump_write(key, "%d", itable->invalidate_size); INODE_DUMP_LIST(&itable->active, key, prefix, "active"); INODE_DUMP_LIST(&itable->lru, key, prefix, "lru"); INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); + INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate"); pthread_mutex_unlock(&itable->lock); } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 6ca6a639456..464493d6cfc 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -791,6 +791,7 @@ __inode_find inode_find inode_find_directory_name inode_forget +inode_forget_with_unref inode_from_path inode_grep inode_grep_for_gfid @@ -815,6 +816,7 @@ inode_table_destroy_all inode_table_dump inode_table_dump_to_dict inode_table_new +inode_table_with_invalidator __inode_table_set_lru_limit inode_table_set_lru_limit inode_unlink diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t new file mode 100644 index 00000000000..9f1211660ce --- /dev/null +++ b/tests/features/fuse-lru-limit.t @@ -0,0 +1,42 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST glusterfs -s $H0 --volfile-id $V0 $M0 + +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0 + +mkdir ${M0}/dir-{1..9} +for i in {1..9}; do + for j in {1..1000}; do + echo "Test file" > ${M0}/dir-$i/file-$j; + done; +done +lc=$(get_mount_lru_size_value $V0 ${M0}) +# ideally it should be 9000+ +TEST [ $lc -ge 9000 ] + +TEST umount $M0 + +TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0 + +TEST find $M0 +lc=$(get_mount_lru_size_value $V0 ${M0}) +# ideally it should be <1000 +# Not sure if there are any possibilities of buffer need. +TEST [ $lc -le 1000 ] + +TEST rm -rf $M0/* + +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0 + +cleanup diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3f4e19c211e..5bc070658e2 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -216,8 +216,8 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, struct fuse_out_header *fouh = NULL; if (res == -1) { - gf_log("glusterfs-fuse", GF_LOG_ERROR, - "writing to fuse device failed: %s", strerror(errno)); + gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR, + "writing to fuse device failed: %s", strerror(errno)); return errno; } @@ -312,29 +312,29 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) #define send_fuse_obj(this, finh, obj) \ send_fuse_data(this, finh, obj, sizeof(*(obj))) -#if FUSE_KERNEL_MINOR_VERSION >= 11 static void fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) { +#if FUSE_KERNEL_MINOR_VERSION >= 11 struct fuse_out_header *fouh = NULL; struct fuse_notify_inval_entry_out *fnieo = NULL; fuse_private_t *priv = NULL; dentry_t *dentry = NULL; + dentry_t *tmp = NULL; inode_t *inode = NULL; size_t nlen = 0; fuse_invalidate_node_t *node = NULL; + char gfid_str[UUID_CANONICAL_FORM_LEN + 1]; priv = this->private; - if (!priv->reverse_fuse_thread_started) return; - inode = fuse_ino_to_inode(fuse_ino, this); - if (inode == NULL) { + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) return; - } - list_for_each_entry(dentry, &inode->dentry_list, inode_list) + list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list) { node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); if (node == NULL) @@ -348,38 +348,41 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) fouh->unique = 0; fouh->error = FUSE_NOTIFY_INVAL_ENTRY; - nlen = strlen(dentry->name); - fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; - fnieo->parent = inode_to_fuse_nodeid(dentry->parent); - - fnieo->namelen = nlen; - strcpy(node->inval_buf + sizeof(*fouh) + sizeof(*fnieo), dentry->name); + if (dentry->name) { + nlen = strlen(dentry->name); + fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; + fnieo->parent = inode_to_fuse_nodeid(dentry->parent); - pthread_mutex_lock(&priv->invalidate_mutex); - { - list_add_tail(&node->next, &priv->invalidate_list); - pthread_cond_signal(&priv->invalidate_cond); + fnieo->namelen = nlen; + strcpy((node->inval_buf + sizeof(*fouh) + sizeof(*fnieo)), + dentry->name); } - pthread_mutex_unlock(&priv->invalidate_mutex); gf_log("glusterfs-fuse", GF_LOG_TRACE, - "INVALIDATE entry: " - "%" PRIu64 "/%s", - fnieo->parent, dentry->name); + "INVALIDATE entry: %" PRIu64 "/%s (gfid:%s)", fnieo->parent, + dentry->name, uuid_utoa(inode->gfid)); if (dentry->parent) { - fuse_log_eh(this, "Invalidated entry %s (parent: %s)", dentry->name, - uuid_utoa(dentry->parent->gfid)); + fuse_log_eh(this, "Invalidated entry %s (parent: %s) gfid:%s", + dentry->name, uuid_utoa(dentry->parent->gfid), + uuid_utoa_r(inode->gfid, gfid_str)); } else { - fuse_log_eh(this, "Invalidated entry %s(nodeid: %" PRIu64 ")", - dentry->name, fnieo->parent); + fuse_log_eh(this, + "Invalidated entry %s(nodeid: %" PRIu64 ") gfid:%s", + dentry->name, fnieo->parent, uuid_utoa(inode->gfid)); + } + + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); + pthread_cond_signal(&priv->invalidate_cond); } + pthread_mutex_unlock(&priv->invalidate_mutex); } - if (inode) - inode_unref(inode); -} #endif + return; +} /* * Send an inval inode notification to fuse. This causes an invalidation of the @@ -400,6 +403,10 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) if (!priv->reverse_fuse_thread_started) return; + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) + return; + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); if (node == NULL) return; @@ -419,7 +426,11 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) fniio->off = 0; fniio->len = -1; - inode = fuse_ino_to_inode(fuse_ino, this); + fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, + uuid_utoa(inode->gfid)); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "INVALIDATE inode: %" PRIu64 "(gfid:%s)", fuse_ino, + uuid_utoa(inode->gfid)); pthread_mutex_lock(&priv->invalidate_mutex); { @@ -428,24 +439,22 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) } pthread_mutex_unlock(&priv->invalidate_mutex); - gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %" PRIu64, - fuse_ino); - - if (inode) { - fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, - uuid_utoa(inode->gfid)); - } else { - fuse_log_eh(this, "Invalidated inode %" PRIu64, fuse_ino); - } - - if (inode) - inode_unref(inode); #else gf_log("glusterfs-fuse", GF_LOG_WARNING, - "fuse_invalidate_inode not implemented on OS X due to missing FUSE " - "notification"); + "fuse_invalidate_inode not implemented on this system"); #endif + return; +} + +#if FUSE_KERNEL_MINOR_VERSION >= 11 +/* Need this function for the signature (inode_t *, instead of uint64_t) */ +static int32_t +fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) +{ + fuse_invalidate_entry(this, (uint64_t)inode); + return 0; } +#endif static fuse_timed_message_t * fuse_timed_message_new(void) @@ -1068,11 +1077,14 @@ do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup) { inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this); + gf_log("fuse", GF_LOG_TRACE, + "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique, + nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); + fuse_log_eh(this, "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); - inode_forget(fuse_inode, nlookup); - inode_unref(fuse_inode); + inode_forget_with_unref(fuse_inode, nlookup); } static void @@ -1087,10 +1099,6 @@ fuse_forget(xlator_t *this, fuse_in_header_t *finh, void *msg, return; } - gf_log("glusterfs-fuse", GF_LOG_TRACE, - "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64, finh->unique, - finh->nodeid, ffi->nlookup); - do_forget(this, finh->unique, finh->nodeid, ffi->nlookup); GF_FREE(finh); @@ -5658,7 +5666,9 @@ fuse_thread_proc(void *data) fuse_in_header_t *finh = NULL; struct iovec iov_in[2]; void *msg = NULL; - const size_t msg0_size = sizeof(*finh) + 128; + /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is + found to be reduces 'REALLOC()' in the loop */ + const size_t msg0_size = sizeof(*finh) + 512; fuse_handler_t **fuse_ops = NULL; struct pollfd pfd[2] = {{ 0, @@ -5992,7 +6002,12 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph) goto unlock; } +#if FUSE_KERNEL_MINOR_VERSION >= 11 + itable = inode_table_with_invalidator(priv->lru_limit, graph->top, + fuse_inode_invalidate_fn, this); +#else itable = inode_table_new(0, graph->top); +#endif if (!itable) { ret = -1; goto unlock; @@ -6453,6 +6468,8 @@ init(xlator_t *this_xl) } } + GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); + GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); @@ -6780,6 +6797,14 @@ struct volume_options options[] = { .description = "Handle iterrupts in FLUSH handler (for testing purposes).", }, + { + .key = {"lru-limit"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "131072", + .min = 0, + .description = "makes glusterfs invalidate kernel inodes after " + "reaching this limit (0 means 'unlimited')", + }, {.key = {NULL}}, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 60702ab1da5..b892113eb79 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -187,6 +187,9 @@ struct fuse_private { pthread_mutex_t interrupt_mutex; gf_boolean_t flush_handle_interrupt; + + /* LRU Limit, if not set, default is 128k for now */ + uint32_t lru_limit; }; typedef struct fuse_private fuse_private_t; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 4a95cd80b87..d09a7cd663e 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -249,6 +249,10 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); fi + if [ -n "$lru_limit" ]; then + cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); + fi + if [ -n "$bg_qlen" ]; then cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); fi @@ -489,6 +493,9 @@ with_options() "gid-timeout") gid_timeout=$value ;; + "lru-limit") + lru_limit=$value + ;; "background-qlen") bg_qlen=$value ;; |