diff options
| -rw-r--r-- | doc/mount.glusterfs.8 | 4 | ||||
| -rw-r--r-- | glusterfsd/src/glusterfsd.c | 24 | ||||
| -rw-r--r-- | glusterfsd/src/glusterfsd.h | 1 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs/glusterfs.h | 1 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs/inode.h | 17 | ||||
| -rw-r--r-- | libglusterfs/src/inode.c | 254 | ||||
| -rw-r--r-- | libglusterfs/src/libglusterfs.sym | 2 | ||||
| -rw-r--r-- | tests/features/fuse-lru-limit.t | 42 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 127 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 3 | ||||
| -rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 7 | 
11 files changed, 395 insertions, 87 deletions
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 index 367f02d9b1a..902b0c1ee5c 100644 --- a/doc/mount.glusterfs.8 +++ b/doc/mount.glusterfs.8 @@ -122,6 +122,10 @@ Provide list of backup volfile servers in the following format [default: None]  \fBDeprecated\fR option - placed here for backward compatibility [default: 1]  .TP  .TP +\fBlru-limit=\fRN +Set fuse module's limit for number of inodes kept in LRU list to N [default: 0] +.TP +.TP  \fBbackground-qlen=\fRN  Set fuse module's background queue length to N [default: 64]  .TP diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 6347941f369..0dea52b6af1 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -219,6 +219,9 @@ static struct argp_option gf_options[] = {       "[default: 300]"},      {"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0,       "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"}, +    {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, +     "Set fuse module's limit for number of inodes kept in LRU list to N " +     "[default: 0]"},      {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,       "Set fuse module's background queue length to N "       "[default: 64]"}, @@ -496,6 +499,15 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)          }      } +    if (cmd_args->lru_limit >= 0) { +        ret = dict_set_int32(options, "lru-limit", cmd_args->lru_limit); +        if (ret < 0) { +            gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, +                   "lru-limit"); +            goto err; +        } +    } +      if (cmd_args->background_qlen) {          ret = dict_set_int32(options, "background-qlen",                               cmd_args->background_qlen); @@ -1257,6 +1269,13 @@ parse_opts(int key, char *arg, struct argp_state *state)              cmd_args->resolve_gids = 1;              break; +        case ARGP_FUSE_LRU_LIMIT_KEY: +            if (!gf_string2int32(arg, &cmd_args->lru_limit)) +                break; + +            argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); +            break; +          case ARGP_FUSE_BACKGROUND_QLEN_KEY:              if (!gf_string2int(arg, &cmd_args->background_qlen))                  break; @@ -2085,6 +2104,11 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)          ctx->ssl_cert_depth = glusterfs_read_secure_access_file();      } +    /* Need to set lru_limit to below 0 to indicate there was nothing +       specified. This is needed as 0 is a valid option, and may not be +       default value. */ +    cmd_args->lru_limit = -1; +      argp_parse(&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);      if (cmd_args->print_xlatordir || cmd_args->print_statedumpdir || diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 0042054e138..86ac61c1a92 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -109,6 +109,7 @@ enum argp_option_keys {      ARGP_ATTR_TIMES_GRANULARITY_KEY = 187,      ARGP_PRINT_LIBEXECDIR_KEY = 188,      ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, +    ARGP_FUSE_LRU_LIMIT_KEY = 190,  };  struct _gfd_vol_top_priv { diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 908a0ce774f..9f14f2f5440 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -524,6 +524,7 @@ struct _cmd_args {      pid_t client_pid;      int client_pid_set;      unsigned uid_map_root; +    int32_t lru_limit;      int background_qlen;      int congestion_threshold;      char *fuse_mountopts; diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h index 5934373ec5b..52efdd85ccc 100644 --- a/libglusterfs/src/glusterfs/inode.h +++ b/libglusterfs/src/glusterfs/inode.h @@ -54,6 +54,13 @@ struct _inode_table {      struct mem_pool *dentry_pool; /* memory pool for dentrys */      struct mem_pool *fd_mem_pool; /* memory pool for fd_t */      int ctxcount;                 /* number of slots in inode->ctx */ + +    /* This is required for 'invalidation' when 'nlookup' would be used, +       specially in case of fuse-bridge */ +    int32_t (*invalidator_fn)(xlator_t *, inode_t *); +    xlator_t *invalidator_xl; +    struct list_head invalidate; /* inodes which are in invalidation queue */ +    uint32_t invalidate_size;    /* count of inodes in invalidation list */  };  struct _dentry { @@ -100,6 +107,7 @@ struct _inode {      struct list_head list;        /* active/lru/purge */      struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ +    bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */  };  #define UUID0_STR "00000000-0000-0000-0000-000000000000" @@ -107,7 +115,12 @@ struct _inode {  #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)  inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl); +inode_table_new(uint32_t lru_limit, xlator_t *xl); + +inode_table_t * +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, +                             int32_t (*invalidator_fn)(xlator_t *, inode_t *), +                             xlator_t *invalidator_xl);  void  inode_table_destroy_all(glusterfs_ctx_t *ctx); @@ -139,6 +152,8 @@ inode_lookup(inode_t *inode);  int  inode_forget(inode_t *inode, uint64_t nlookup); +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup);  int  inode_ref_reduce_by_n(inode_t *inode, uint64_t nref); diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 3bf32cfe442..b4a62897498 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -23,6 +23,100 @@     move latest accessed dentry to list_head of inode  */ +// clang-format off +/* + +Details as per Xavi: + + I think we should have 3 lists: active, lru and invalidate. + +We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of +refs, invalidate_sent flag and moving from one list to another must be done +atomically. + +With this information, these are the states that cause a transition: + +    refs nlookups inv_sent    op +      1      0        0      unref  -> refs = 0, active--->destroy +      1      1        0      unref  -> refs = 0, active--->lru +      1      1        0     forget  -> nlookups = 0, active--->active +     *0      1        0     forget  -> nlookups = 0, lru--->destroy +     *0      1        1     forget  -> nlookups = 0, invalidate--->destroy +      0      1        0       ref   -> refs = 1, lru--->active +      0      1        1       ref   -> refs = 1, inv_sent = 0, invalidate--->active +      0      1        0    overflow -> refs = 1, inv_sent = 1, lru--->invalidate +      1      1        1      unref  -> refs = 0, invalidate--->invalidate +      1      1        1     forget  -> nlookups = 0, inv_sent = 0, invalidate--->active + +(*) technically these combinations cannot happen because a forget sent by the +kernel first calls ref() and then unref(). However it's equivalent. + +overflow means that lru list has grown beyond the limit and the inode needs to +be invalidated. All other combinations do not cause a change in state or are not +possible. + +Based on this, the code could be similar to this: + +    ref(inode, inv) +    { +        if (refs == 0) { +            if (inv_sent) { +                invalidate_count--; +                inv_sent = 0; +            } else { +                lru_count--; +            } +            if (inv) { +                inv_sent = 1; +                invalidate_count++; +                list_move(inode, invalidate); +            } else { +                active_count++; +                list_move(inode, active); +            } +        } +        refs++; +    } + +    unref(inode, clear) +    { +        if (clear && inv_sent) { +            // there is a case of fuse itself sending forget, without +            // invalidate, after entry delete, like unlink(), rmdir(). +            inv_sent = 0; +            invalidate_count--; +            active_count++; +            list_move(inode, active); +        } +        refs--; +        if ((refs == 0) && !inv_sent) { +            active_count--; +            if (nlookups == 0) { +                destroy(inode); +            } else { +                lru_count++; +                list_move(inode, lru); +            } +        } +    } + +    forget(inode) +    { +        ref(inode, false); +        nlookups--; +        unref(inode, true); +    } + +    overflow(inode) +    { +        ref(inode, true); +        invalidator(inode); +        unref(inode, false); +    } + +*/ +// clang-format on +  #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type)                  \      {                                                                          \          int i = 1;                                                             \ @@ -37,7 +131,7 @@      }  static inode_t * -__inode_unref(inode_t *inode); +__inode_unref(inode_t *inode, bool clear);  static int  inode_table_prune(inode_table_t *table); @@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry)      dentry->name = NULL;      if (dentry->parent) { -        __inode_unref(dentry->parent); +        __inode_unref(dentry->parent, false);          dentry->parent = NULL;      } @@ -446,7 +540,7 @@ out:  }  static inode_t * -__inode_unref(inode_t *inode) +__inode_unref(inode_t *inode, bool clear)  {      int index = 0;      xlator_t *this = NULL; @@ -455,8 +549,6 @@ __inode_unref(inode_t *inode)      if (!inode)          return NULL; -    this = THIS; -      /*       * Root inode should always be in active list of inode table. So unrefs       * on root inode are no-ops. @@ -464,6 +556,13 @@ __inode_unref(inode_t *inode)      if (__is_root_gfid(inode->gfid))          return inode; +    this = THIS; + +    if (clear && inode->invalidate_sent) { +        inode->invalidate_sent = false; +        inode->table->invalidate_size--; +        __inode_activate(inode); +    }      GF_ASSERT(inode->ref);      --inode->ref; @@ -474,7 +573,7 @@ __inode_unref(inode_t *inode)          inode->_ctx[index].ref--;      } -    if (!inode->ref) { +    if (!inode->ref && !inode->invalidate_sent) {          inode->table->active_size--;          nlookup = GF_ATOMIC_GET(inode->nlookup); @@ -488,7 +587,7 @@ __inode_unref(inode_t *inode)  }  static inode_t * -__inode_ref(inode_t *inode) +__inode_ref(inode_t *inode, bool is_invalidate)  {      int index = 0;      xlator_t *this = NULL; @@ -498,11 +597,6 @@ __inode_ref(inode_t *inode)      this = THIS; -    if (!inode->ref) { -        inode->table->lru_size--; -        __inode_activate(inode); -    } -      /*       * Root inode should always be in active list of inode table. So unrefs       * on root inode are no-ops. If we do not allow unrefs but allow refs, @@ -514,6 +608,22 @@ __inode_ref(inode_t *inode)      if (__is_root_gfid(inode->gfid) && inode->ref)          return inode; +    if (!inode->ref) { +        if (inode->invalidate_sent) { +            inode->invalidate_sent = false; +            inode->table->invalidate_size--; +        } else { +            inode->table->lru_size--; +        } +        if (is_invalidate) { +            inode->invalidate_sent = true; +            inode->table->invalidate_size++; +            list_move_tail(&inode->list, &inode->table->invalidate); +        } else { +            __inode_activate(inode); +        } +    } +      inode->ref++;      index = __inode_get_xl_index(inode, this); @@ -537,7 +647,7 @@ inode_unref(inode_t *inode)      pthread_mutex_lock(&table->lock);      { -        inode = __inode_unref(inode); +        inode = __inode_unref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -558,7 +668,7 @@ inode_ref(inode_t *inode)      pthread_mutex_lock(&table->lock);      { -        inode = __inode_ref(inode); +        inode = __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -592,7 +702,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)      }      if (parent) -        newd->parent = __inode_ref(parent); +        newd->parent = __inode_ref(parent, false);      list_add(&newd->inode_list, &inode->dentry_list);      newd->inode = inode; @@ -662,7 +772,7 @@ inode_new(inode_table_t *table)      {          inode = __inode_create(table);          if (inode != NULL) { -            __inode_ref(inode); +            __inode_ref(inode, false);          }      }      pthread_mutex_unlock(&table->lock); @@ -769,7 +879,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)              inode = dentry->inode;          if (inode) -            __inode_ref(inode); +            __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -912,7 +1022,7 @@ inode_find(inode_table_t *table, uuid_t gfid)      {          inode = __inode_find(table, gfid);          if (inode) -            __inode_ref(inode); +            __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -1057,7 +1167,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)          linked_inode = __inode_link(inode, parent, name, iatt);          if (linked_inode) -            __inode_ref(linked_inode); +            __inode_ref(linked_inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -1124,6 +1234,31 @@ inode_forget(inode_t *inode, uint64_t nlookup)      return 0;  } +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup) +{ +    inode_table_t *table = NULL; + +    if (!inode) { +        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +                         "inode not found"); +        return -1; +    } + +    table = inode->table; + +    pthread_mutex_lock(&table->lock); +    { +        inode_forget_atomic(inode, nlookup); +        __inode_unref(inode, true); +    } +    pthread_mutex_unlock(&table->lock); + +    inode_table_prune(table); + +    return 0; +} +  /*   * Invalidate an inode. This is invoked when a translator decides that an   * inode's cache is no longer valid. Any translator interested in taking action @@ -1298,7 +1433,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)              parent = dentry->parent;          if (parent) -            __inode_ref(parent); +            __inode_ref(parent, false);      }      pthread_mutex_unlock(&table->lock); @@ -1480,6 +1615,8 @@ inode_table_prune(inode_table_t *table)      inode_t *del = NULL;      inode_t *tmp = NULL;      inode_t *entry = NULL; +    uint64_t nlookup = 0; +    int64_t lru_size = 0;      if (!table)          return -1; @@ -1488,7 +1625,11 @@ inode_table_prune(inode_table_t *table)      pthread_mutex_lock(&table->lock);      { -        while (table->lru_limit && table->lru_size > (table->lru_limit)) { +        if (!table->lru_limit) +            goto purge_list; + +        lru_size = table->lru_size; +        while (lru_size > (table->lru_limit)) {              if (list_empty(&table->lru)) {                  gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,                                   LG_MSG_INVALID_INODE_LIST, @@ -1498,26 +1639,46 @@ inode_table_prune(inode_table_t *table)                  break;              } +            lru_size--;              entry = list_entry(table->lru.next, inode_t, list); +            /* The logic of invalidation is required only if invalidator_fn +               is present */ +            if (table->invalidator_fn) { +                /* check for valid inode with 'nlookup' */ +                nlookup = GF_ATOMIC_GET(entry->nlookup); +                if (nlookup) { +                    __inode_ref(entry, true); +                    tmp = entry; +                    break; +                } +            }              table->lru_size--;              __inode_retire(entry); -              ret++;          } +    purge_list:          list_splice_init(&table->purge, &purge);          table->purge_size = 0;      }      pthread_mutex_unlock(&table->lock); +    /* Pick 1 inode for invalidation */ +    if (tmp) { +        xlator_t *old_THIS = THIS; +        THIS = table->invalidator_xl; +        table->invalidator_fn(table->invalidator_xl, tmp); +        THIS = old_THIS; +        inode_unref(tmp); +    } + +    /* Just so that if purge list is handled too, then clear it off */ +    list_for_each_entry_safe(del, tmp, &purge, list)      { -        list_for_each_entry_safe(del, tmp, &purge, list) -        { -            list_del_init(&del->list); -            inode_forget_atomic(del, 0); -            __inode_destroy(del); -        } +        list_del_init(&del->list); +        inode_forget_atomic(del, 0); +        __inode_destroy(del);      }      return ret; @@ -1545,9 +1706,12 @@ __inode_table_init_root(inode_table_t *table)  }  inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl) +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, +                             int32_t (*invalidator_fn)(xlator_t *, inode_t *), +                             xlator_t *invalidator_xl)  {      inode_table_t *new = NULL; +    uint32_t mem_pool_size = lru_limit;      int ret = -1;      int i = 0; @@ -1559,20 +1723,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl)      new->ctxcount = xl->graph->xl_count + 1;      new->lru_limit = lru_limit; +    new->invalidator_fn = invalidator_fn; +    new->invalidator_xl = invalidator_xl;      new->hashsize = 14057; /* TODO: Random Number?? */      /* In case FUSE is initing the inode table. */ -    if (lru_limit == 0) -        lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; - -    new->inode_pool = mem_pool_new(inode_t, lru_limit); +    if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) +        mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; +    new->inode_pool = mem_pool_new(inode_t, mem_pool_size);      if (!new->inode_pool)          goto out; -    new->dentry_pool = mem_pool_new(dentry_t, lru_limit); - +    new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);      if (!new->dentry_pool)          goto out; @@ -1604,6 +1768,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl)      INIT_LIST_HEAD(&new->active);      INIT_LIST_HEAD(&new->lru);      INIT_LIST_HEAD(&new->purge); +    INIT_LIST_HEAD(&new->invalidate);      ret = gf_asprintf(&new->name, "%s/inode", xl->name);      if (-1 == ret) { @@ -1633,6 +1798,13 @@ out:      return new;  } +inode_table_t * +inode_table_new(uint32_t lru_limit, xlator_t *xl) +{ +    /* Only fuse for now requires the inode table with invalidator */ +    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); +} +  int  inode_table_ctx_free(inode_table_t *table)  { @@ -1771,6 +1943,14 @@ inode_table_destroy(inode_table_t *inode_table)              inode_table->lru_size--;          } +        /* Same logic for invalidate list */ +        while (!list_empty(&inode_table->invalidate)) { +            trav = list_first_entry(&inode_table->invalidate, inode_t, list); +            inode_forget_atomic(trav, 0); +            __inode_retire(trav); +            inode_table->invalidate_size--; +        } +          while (!list_empty(&inode_table->active)) {              trav = list_first_entry(&inode_table->active, inode_t, list);              /* forget and unref the inode to retire and add it to @@ -2280,6 +2460,7 @@ inode_dump(inode_t *inode, char *prefix)          gf_proc_dump_write("fd-count", "%u", inode->fd_count);          gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);          gf_proc_dump_write("ref", "%u", inode->ref); +        gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);          gf_proc_dump_write("ia_type", "%d", inode->ia_type);          if (inode->_ctx) {              inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx), @@ -2353,10 +2534,13 @@ inode_table_dump(inode_table_t *itable, char *prefix)      gf_proc_dump_write(key, "%d", itable->lru_size);      gf_proc_dump_build_key(key, prefix, "purge_size");      gf_proc_dump_write(key, "%d", itable->purge_size); +    gf_proc_dump_build_key(key, prefix, "invalidate_size"); +    gf_proc_dump_write(key, "%d", itable->invalidate_size);      INODE_DUMP_LIST(&itable->active, key, prefix, "active");      INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");      INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); +    INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");      pthread_mutex_unlock(&itable->lock);  } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 6ca6a639456..464493d6cfc 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -791,6 +791,7 @@ __inode_find  inode_find  inode_find_directory_name  inode_forget +inode_forget_with_unref  inode_from_path  inode_grep  inode_grep_for_gfid @@ -815,6 +816,7 @@ inode_table_destroy_all  inode_table_dump  inode_table_dump_to_dict  inode_table_new +inode_table_with_invalidator  __inode_table_set_lru_limit  inode_table_set_lru_limit  inode_unlink diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t new file mode 100644 index 00000000000..9f1211660ce --- /dev/null +++ b/tests/features/fuse-lru-limit.t @@ -0,0 +1,42 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST glusterfs -s $H0 --volfile-id $V0 $M0 + +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0 + +mkdir ${M0}/dir-{1..9} +for i in {1..9}; do +    for j in {1..1000}; do +        echo "Test file" > ${M0}/dir-$i/file-$j; +    done; +done +lc=$(get_mount_lru_size_value $V0 ${M0}) +# ideally it should be 9000+ +TEST [ $lc -ge 9000 ] + +TEST umount $M0 + +TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0 + +TEST find $M0 +lc=$(get_mount_lru_size_value $V0 ${M0}) +# ideally it should be <1000 +# Not sure if there are any possibilities of buffer need. +TEST [ $lc -le 1000 ] + +TEST rm -rf $M0/* + +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0 + +cleanup diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3f4e19c211e..5bc070658e2 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -216,8 +216,8 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,      struct fuse_out_header *fouh = NULL;      if (res == -1) { -        gf_log("glusterfs-fuse", GF_LOG_ERROR, -               "writing to fuse device failed: %s", strerror(errno)); +        gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR, +                         "writing to fuse device failed: %s", strerror(errno));          return errno;      } @@ -312,29 +312,29 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)  #define send_fuse_obj(this, finh, obj)                                         \      send_fuse_data(this, finh, obj, sizeof(*(obj))) -#if FUSE_KERNEL_MINOR_VERSION >= 11  static void  fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)  { +#if FUSE_KERNEL_MINOR_VERSION >= 11      struct fuse_out_header *fouh = NULL;      struct fuse_notify_inval_entry_out *fnieo = NULL;      fuse_private_t *priv = NULL;      dentry_t *dentry = NULL; +    dentry_t *tmp = NULL;      inode_t *inode = NULL;      size_t nlen = 0;      fuse_invalidate_node_t *node = NULL; +    char gfid_str[UUID_CANONICAL_FORM_LEN + 1];      priv = this->private; -      if (!priv->reverse_fuse_thread_started)          return; -    inode = fuse_ino_to_inode(fuse_ino, this); -    if (inode == NULL) { +    inode = (inode_t *)(unsigned long)fuse_ino; +    if (inode == NULL)          return; -    } -    list_for_each_entry(dentry, &inode->dentry_list, inode_list) +    list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)      {          node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);          if (node == NULL) @@ -348,38 +348,41 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)          fouh->unique = 0;          fouh->error = FUSE_NOTIFY_INVAL_ENTRY; -        nlen = strlen(dentry->name); -        fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; -        fnieo->parent = inode_to_fuse_nodeid(dentry->parent); - -        fnieo->namelen = nlen; -        strcpy(node->inval_buf + sizeof(*fouh) + sizeof(*fnieo), dentry->name); +        if (dentry->name) { +            nlen = strlen(dentry->name); +            fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; +            fnieo->parent = inode_to_fuse_nodeid(dentry->parent); -        pthread_mutex_lock(&priv->invalidate_mutex); -        { -            list_add_tail(&node->next, &priv->invalidate_list); -            pthread_cond_signal(&priv->invalidate_cond); +            fnieo->namelen = nlen; +            strcpy((node->inval_buf + sizeof(*fouh) + sizeof(*fnieo)), +                   dentry->name);          } -        pthread_mutex_unlock(&priv->invalidate_mutex);          gf_log("glusterfs-fuse", GF_LOG_TRACE, -               "INVALIDATE entry: " -               "%" PRIu64 "/%s", -               fnieo->parent, dentry->name); +               "INVALIDATE entry: %" PRIu64 "/%s (gfid:%s)", fnieo->parent, +               dentry->name, uuid_utoa(inode->gfid));          if (dentry->parent) { -            fuse_log_eh(this, "Invalidated entry %s (parent: %s)", dentry->name, -                        uuid_utoa(dentry->parent->gfid)); +            fuse_log_eh(this, "Invalidated entry %s (parent: %s) gfid:%s", +                        dentry->name, uuid_utoa(dentry->parent->gfid), +                        uuid_utoa_r(inode->gfid, gfid_str));          } else { -            fuse_log_eh(this, "Invalidated entry %s(nodeid: %" PRIu64 ")", -                        dentry->name, fnieo->parent); +            fuse_log_eh(this, +                        "Invalidated entry %s(nodeid: %" PRIu64 ") gfid:%s", +                        dentry->name, fnieo->parent, uuid_utoa(inode->gfid)); +        } + +        pthread_mutex_lock(&priv->invalidate_mutex); +        { +            list_add_tail(&node->next, &priv->invalidate_list); +            pthread_cond_signal(&priv->invalidate_cond);          } +        pthread_mutex_unlock(&priv->invalidate_mutex);      } -    if (inode) -        inode_unref(inode); -}  #endif +    return; +}  /*   * Send an inval inode notification to fuse. This causes an invalidation of the @@ -400,6 +403,10 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)      if (!priv->reverse_fuse_thread_started)          return; +    inode = (inode_t *)(unsigned long)fuse_ino; +    if (inode == NULL) +        return; +      node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);      if (node == NULL)          return; @@ -419,7 +426,11 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)      fniio->off = 0;      fniio->len = -1; -    inode = fuse_ino_to_inode(fuse_ino, this); +    fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, +                uuid_utoa(inode->gfid)); +    gf_log("glusterfs-fuse", GF_LOG_TRACE, +           "INVALIDATE inode: %" PRIu64 "(gfid:%s)", fuse_ino, +           uuid_utoa(inode->gfid));      pthread_mutex_lock(&priv->invalidate_mutex);      { @@ -428,24 +439,22 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)      }      pthread_mutex_unlock(&priv->invalidate_mutex); -    gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %" PRIu64, -           fuse_ino); - -    if (inode) { -        fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, -                    uuid_utoa(inode->gfid)); -    } else { -        fuse_log_eh(this, "Invalidated inode %" PRIu64, fuse_ino); -    } - -    if (inode) -        inode_unref(inode);  #else      gf_log("glusterfs-fuse", GF_LOG_WARNING, -           "fuse_invalidate_inode not implemented on OS X due to missing FUSE " -           "notification"); +           "fuse_invalidate_inode not implemented on this system");  #endif +    return; +} + +#if FUSE_KERNEL_MINOR_VERSION >= 11 +/* Need this function for the signature (inode_t *, instead of uint64_t) */ +static int32_t +fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) +{ +    fuse_invalidate_entry(this, (uint64_t)inode); +    return 0;  } +#endif  static fuse_timed_message_t *  fuse_timed_message_new(void) @@ -1068,11 +1077,14 @@ do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup)  {      inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this); +    gf_log("fuse", GF_LOG_TRACE, +           "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique, +           nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); +      fuse_log_eh(this, "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)",                  unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); -    inode_forget(fuse_inode, nlookup); -    inode_unref(fuse_inode); +    inode_forget_with_unref(fuse_inode, nlookup);  }  static void @@ -1087,10 +1099,6 @@ fuse_forget(xlator_t *this, fuse_in_header_t *finh, void *msg,          return;      } -    gf_log("glusterfs-fuse", GF_LOG_TRACE, -           "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64, finh->unique, -           finh->nodeid, ffi->nlookup); -      do_forget(this, finh->unique, finh->nodeid, ffi->nlookup);      GF_FREE(finh); @@ -5658,7 +5666,9 @@ fuse_thread_proc(void *data)      fuse_in_header_t *finh = NULL;      struct iovec iov_in[2];      void *msg = NULL; -    const size_t msg0_size = sizeof(*finh) + 128; +    /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is +       found to be reduces 'REALLOC()' in the loop */ +    const size_t msg0_size = sizeof(*finh) + 512;      fuse_handler_t **fuse_ops = NULL;      struct pollfd pfd[2] = {{          0, @@ -5992,7 +6002,12 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph)              goto unlock;          } +#if FUSE_KERNEL_MINOR_VERSION >= 11 +        itable = inode_table_with_invalidator(priv->lru_limit, graph->top, +                                              fuse_inode_invalidate_fn, this); +#else          itable = inode_table_new(0, graph->top); +#endif          if (!itable) {              ret = -1;              goto unlock; @@ -6453,6 +6468,8 @@ init(xlator_t *this_xl)          }      } +    GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); +      GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);      GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); @@ -6780,6 +6797,14 @@ struct volume_options options[] = {          .description =              "Handle iterrupts in FLUSH handler (for testing purposes).",      }, +    { +        .key = {"lru-limit"}, +        .type = GF_OPTION_TYPE_INT, +        .default_value = "131072", +        .min = 0, +        .description = "makes glusterfs invalidate kernel inodes after " +                       "reaching this limit (0 means 'unlimited')", +    },      {.key = {NULL}},  }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 60702ab1da5..b892113eb79 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -187,6 +187,9 @@ struct fuse_private {      pthread_mutex_t interrupt_mutex;      gf_boolean_t flush_handle_interrupt; + +    /* LRU Limit, if not set, default is 128k for now */ +    uint32_t lru_limit;  };  typedef struct fuse_private fuse_private_t; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 4a95cd80b87..d09a7cd663e 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -249,6 +249,10 @@ start_glusterfs ()          cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout");      fi +    if [ -n "$lru_limit" ]; then +        cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); +    fi +      if [ -n "$bg_qlen" ]; then          cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");      fi @@ -489,6 +493,9 @@ with_options()          "gid-timeout")              gid_timeout=$value              ;; +        "lru-limit") +            lru_limit=$value +            ;;          "background-qlen")              bg_qlen=$value              ;;  | 
