summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorN Balachandran <nbalacha@redhat.com>2019-08-09 14:34:22 +0530
committerAmar Tumballi <amarts@redhat.com>2019-08-14 03:21:03 +0000
commit089e53c7f1c32644ce2b37830b678b4c0c7071fc (patch)
tree04a020934ae845ad4d310e8410cbfa6bda46c27a
parent5707f2eb1c6933e70300227a6068b6b50befbb87 (diff)
fuse: Set limit on invalidate queue size
If the glusterfs fuse client process is unable to process the invalidate requests quickly enough, the number of such requests quickly grows large enough to use a significant amount of memory. We are now introducing another option to set an upper limit on these to prevent runaway memory usage. Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788 Fixes: bz#1732717 Signed-off-by: N Balachandran <nbalacha@redhat.com>
-rw-r--r--doc/mount.glusterfs.85
-rw-r--r--glusterfsd/src/glusterfsd.c21
-rw-r--r--glusterfsd/src/glusterfsd.h1
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h1
-rw-r--r--libglusterfs/src/glusterfs/inode.h1
-rw-r--r--libglusterfs/src/inode.c31
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c60
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h4
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in7
9 files changed, 107 insertions, 24 deletions
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 286631b9c5c..b35b362d69a 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None]
Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072]
.TP
.TP
+\fBinvalidate-limit=\fRN
+Suspend fuse invalidations implied by 'lru-limit' if number of outstanding
+invalidations reaches N
+.TP
+.TP
\fBbackground-qlen=\fRN
Set fuse module's background queue length to N [default: 64]
.TP
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 968b97c2eac..b2fc944b92a 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -223,6 +223,9 @@ static struct argp_option gf_options[] = {
{"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
"Set fuse module's limit for number of inodes kept in LRU list to N "
"[default: 131072]"},
+ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
+ "Suspend inode invalidations implied by 'lru-limit' if the number of "
+ "outstanding invalidations reaches N"},
{"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
"Set fuse module's background queue length to N "
"[default: 64]"},
@@ -518,6 +521,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
}
}
+ if (cmd_args->invalidate_limit >= 0) {
+ ret = dict_set_int32(options, "invalidate-limit",
+ cmd_args->invalidate_limit);
+ if (ret < 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
+ "invalidate-limit");
+ goto err;
+ }
+ }
+
if (cmd_args->background_qlen) {
ret = dict_set_int32(options, "background-qlen",
cmd_args->background_qlen);
@@ -1317,6 +1330,14 @@ parse_opts(int key, char *arg, struct argp_state *state)
argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
break;
+ case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
+ break;
+
+ argp_failure(state, -1, 0, "unknown invalidate limit option %s",
+ arg);
+ break;
+
case ARGP_FUSE_BACKGROUND_QLEN_KEY:
if (!gf_string2int(arg, &cmd_args->background_qlen))
break;
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 8a7c034ce40..e87b12bac88 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -114,6 +114,7 @@ enum argp_option_keys {
ARGP_GLOBAL_THREADING_KEY = 192,
ARGP_BRICK_MUX_KEY = 193,
ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY = 194,
+ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
};
struct _gfd_vol_top_priv {
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index d59306508b2..01262dcd9f5 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -527,6 +527,7 @@ struct _cmd_args {
int client_pid_set;
unsigned uid_map_root;
int32_t lru_limit;
+ int32_t invalidate_limit;
int background_qlen;
int congestion_threshold;
char *fuse_mountopts;
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
index 424e50fd935..4b28da510c7 100644
--- a/libglusterfs/src/glusterfs/inode.h
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -111,6 +111,7 @@ struct _inode {
struct list_head list; /* active/lru/purge */
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+ bool in_invalidate_list; /* Set if inode is in table invalidate list */
bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
};
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index b5147f2010c..89e2092927a 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -510,8 +510,8 @@ __inode_unref(inode_t *inode, bool clear)
this = THIS;
- if (clear && inode->invalidate_sent) {
- inode->invalidate_sent = false;
+ if (clear && inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
inode->table->invalidate_size--;
__inode_activate(inode);
}
@@ -525,7 +525,7 @@ __inode_unref(inode_t *inode, bool clear)
inode->_ctx[index].ref--;
}
- if (!inode->ref && !inode->invalidate_sent) {
+ if (!inode->ref && !inode->in_invalidate_list) {
inode->table->active_size--;
nlookup = GF_ATOMIC_GET(inode->nlookup);
@@ -561,14 +561,14 @@ __inode_ref(inode_t *inode, bool is_invalidate)
return inode;
if (!inode->ref) {
- if (inode->invalidate_sent) {
- inode->invalidate_sent = false;
+ if (inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
inode->table->invalidate_size--;
} else {
inode->table->lru_size--;
}
if (is_invalidate) {
- inode->invalidate_sent = true;
+ inode->in_invalidate_list = true;
inode->table->invalidate_size++;
list_move_tail(&inode->list, &inode->table->invalidate);
} else {
@@ -1544,6 +1544,7 @@ static int
inode_table_prune(inode_table_t *table)
{
int ret = 0;
+ int ret1 = 0;
struct list_head purge = {
0,
};
@@ -1582,6 +1583,10 @@ inode_table_prune(inode_table_t *table)
/* check for valid inode with 'nlookup' */
nlookup = GF_ATOMIC_GET(entry->nlookup);
if (nlookup) {
+ if (entry->invalidate_sent) {
+ list_move_tail(&entry->list, &table->lru);
+ continue;
+ }
__inode_ref(entry, true);
tmp = entry;
break;
@@ -1603,9 +1608,19 @@ inode_table_prune(inode_table_t *table)
if (tmp) {
xlator_t *old_THIS = THIS;
THIS = table->invalidator_xl;
- table->invalidator_fn(table->invalidator_xl, tmp);
+ ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
THIS = old_THIS;
- inode_unref(tmp);
+ pthread_mutex_lock(&table->lock);
+ {
+ if (!ret1) {
+ tmp->invalidate_sent = true;
+ __inode_unref(tmp, false);
+ } else {
+ /* Move this back to the lru list*/
+ __inode_unref(tmp, true);
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
}
/* Just so that if purge list is handled too, then clear it off */
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 3a779625a08..836d07ddfde 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -33,7 +33,7 @@ static int gf_fuse_xattr_enotsup_log;
void
fini(xlator_t *this_xl);
-static void
+static int32_t
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
/*
@@ -319,7 +319,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
#define send_fuse_obj(this, finh, obj) \
send_fuse_data(this, finh, obj, sizeof(*(obj)))
-static void
+static int32_t
fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
{
#if FUSE_KERNEL_MINOR_VERSION >= 11
@@ -335,17 +335,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
priv = this->private;
if (!priv->reverse_fuse_thread_started)
- return;
+ return -1;
+
+ if (priv->invalidate_limit &&
+ (priv->invalidate_count >= priv->invalidate_limit)) {
+ return -1;
+ }
inode = (inode_t *)(unsigned long)fuse_ino;
if (inode == NULL)
- return;
+ return -1;
list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
{
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
- break;
+ return -1;
INIT_LIST_HEAD(&node->next);
@@ -382,20 +387,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
pthread_mutex_lock(&priv->invalidate_mutex);
{
list_add_tail(&node->next, &priv->invalidate_list);
+ priv->invalidate_count++;
pthread_cond_signal(&priv->invalidate_cond);
}
pthread_mutex_unlock(&priv->invalidate_mutex);
}
#endif
- return;
+ return 0;
}
/*
* Send an inval inode notification to fuse. This causes an invalidation of the
* entire page cache mapping on the inode.
*/
-static void
+static int32_t
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
{
#if FUSE_KERNEL_MINOR_VERSION >= 11
@@ -408,15 +414,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
priv = this->private;
if (!priv->reverse_fuse_thread_started)
- return;
+ return -1;
+
+ if (priv->invalidate_limit &&
+ (priv->invalidate_count >= priv->invalidate_limit)) {
+ return -1;
+ }
inode = (inode_t *)(unsigned long)fuse_ino;
if (inode == NULL)
- return;
+ return -1;
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
- return;
+ return -1;
INIT_LIST_HEAD(&node->next);
@@ -442,6 +453,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
pthread_mutex_lock(&priv->invalidate_mutex);
{
list_add_tail(&node->next, &priv->invalidate_list);
+ priv->invalidate_count++;
pthread_cond_signal(&priv->invalidate_cond);
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -450,7 +462,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
gf_log("glusterfs-fuse", GF_LOG_WARNING,
"fuse_invalidate_inode not implemented on this system");
#endif
- return;
+ return 0;
}
#if FUSE_KERNEL_MINOR_VERSION >= 11
@@ -458,8 +470,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
static int32_t
fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
{
- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
- return 0;
+ int32_t ret = 0;
+ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
+ return ret;
}
#endif
@@ -4010,7 +4023,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
finh->nodeid);
#if FUSE_KERNEL_MINOR_VERSION >= 11
- fuse_invalidate_entry(this, finh->nodeid);
+ ret = fuse_invalidate_entry(this, finh->nodeid);
+ if (ret)
+ op_errno = EBUSY;
#endif
goto done;
}
@@ -4832,6 +4847,7 @@ notify_kernel_loop(void *data)
fuse_invalidate_node_t, next);
list_del_init(&node->next);
+ priv->invalidate_count--;
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -4875,6 +4891,7 @@ notify_kernel_loop(void *data)
list_del_init(&node->next);
GF_FREE(node);
}
+ priv->invalidate_count = 0;
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -6150,6 +6167,9 @@ fuse_priv_dump(xlator_t *this)
(int)private->timed_response_fuse_thread_started);
gf_proc_dump_write("reverse_thread_started", "%d",
(int)private->reverse_fuse_thread_started);
+ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
+ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
+ private->invalidate_count);
gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
return 0;
@@ -6689,6 +6709,9 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
+ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
+ cleanup_exit);
+
GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
@@ -7028,6 +7051,15 @@ struct volume_options options[] = {
"reaching this limit (0 means 'unlimited')",
},
{
+ .key = {"invalidate-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .min = 0,
+ .description = "suspend invalidations as of 'lru-limit' if the number "
+ "of outstanding invalidations reaches this limit "
+ "(0 means 'unlimited')",
+ },
+ {
.key = {"auto-invalidation"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true",
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index 8d25bed0481..c239d948652 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -139,7 +139,7 @@ struct fuse_private {
pthread_cond_t invalidate_cond;
pthread_mutex_t invalidate_mutex;
gf_boolean_t reverse_fuse_thread_started;
-
+ uint64_t invalidate_count;
/* For communicating with separate mount thread. */
int status_pipe[2];
@@ -191,7 +191,7 @@ struct fuse_private {
/* LRU Limit, if not set, default is 128k for now */
uint32_t lru_limit;
-
+ uint32_t invalidate_limit;
uint32_t fuse_dev_eperm_ratelimit_ns;
};
typedef struct fuse_private fuse_private_t;
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 21cc018eaa5..59762169f46 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -261,6 +261,10 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit");
fi
+ if [ -n "$invalidate_limit" ]; then
+ cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit");
+ fi
+
if [ -n "$bg_qlen" ]; then
cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
fi
@@ -513,6 +517,9 @@ with_options()
"lru-limit")
lru_limit=$value
;;
+ "invalidate-limit")
+ invalidate_limit=$value
+ ;;
"background-qlen")
bg_qlen=$value
;;