diff options
Diffstat (limited to 'libglusterfs')
| -rw-r--r-- | libglusterfs/src/glusterfs/glusterfs.h | 1 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs/inode.h | 17 | ||||
| -rw-r--r-- | libglusterfs/src/inode.c | 254 | ||||
| -rw-r--r-- | libglusterfs/src/libglusterfs.sym | 2 | 
4 files changed, 238 insertions, 36 deletions
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index 908a0ce774f..9f14f2f5440 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -524,6 +524,7 @@ struct _cmd_args {      pid_t client_pid;      int client_pid_set;      unsigned uid_map_root; +    int32_t lru_limit;      int background_qlen;      int congestion_threshold;      char *fuse_mountopts; diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h index 5934373ec5b..52efdd85ccc 100644 --- a/libglusterfs/src/glusterfs/inode.h +++ b/libglusterfs/src/glusterfs/inode.h @@ -54,6 +54,13 @@ struct _inode_table {      struct mem_pool *dentry_pool; /* memory pool for dentrys */      struct mem_pool *fd_mem_pool; /* memory pool for fd_t */      int ctxcount;                 /* number of slots in inode->ctx */ + +    /* This is required for 'invalidation' when 'nlookup' would be used, +       specially in case of fuse-bridge */ +    int32_t (*invalidator_fn)(xlator_t *, inode_t *); +    xlator_t *invalidator_xl; +    struct list_head invalidate; /* inodes which are in invalidation queue */ +    uint32_t invalidate_size;    /* count of inodes in invalidation list */  };  struct _dentry { @@ -100,6 +107,7 @@ struct _inode {      struct list_head list;        /* active/lru/purge */      struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ +    bool invalidate_sent;    /* Set it if invalidator_fn is called for inode */  };  #define UUID0_STR "00000000-0000-0000-0000-000000000000" @@ -107,7 +115,12 @@ struct _inode {  #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)  inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl); +inode_table_new(uint32_t lru_limit, xlator_t *xl); + +inode_table_t * +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, +                             int32_t (*invalidator_fn)(xlator_t *, inode_t *), +                             xlator_t *invalidator_xl);  void  inode_table_destroy_all(glusterfs_ctx_t *ctx); @@ -139,6 +152,8 @@ inode_lookup(inode_t *inode);  int  inode_forget(inode_t *inode, uint64_t nlookup); +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup);  int  inode_ref_reduce_by_n(inode_t *inode, uint64_t nref); diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 3bf32cfe442..b4a62897498 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -23,6 +23,100 @@     move latest accessed dentry to list_head of inode  */ +// clang-format off +/* + +Details as per Xavi: + + I think we should have 3 lists: active, lru and invalidate. + +We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of +refs, invalidate_sent flag and moving from one list to another must be done +atomically. + +With this information, these are the states that cause a transition: + +    refs nlookups inv_sent    op +      1      0        0      unref  -> refs = 0, active--->destroy +      1      1        0      unref  -> refs = 0, active--->lru +      1      1        0     forget  -> nlookups = 0, active--->active +     *0      1        0     forget  -> nlookups = 0, lru--->destroy +     *0      1        1     forget  -> nlookups = 0, invalidate--->destroy +      0      1        0       ref   -> refs = 1, lru--->active +      0      1        1       ref   -> refs = 1, inv_sent = 0, invalidate--->active +      0      1        0    overflow -> refs = 1, inv_sent = 1, lru--->invalidate +      1      1        1      unref  -> refs = 0, invalidate--->invalidate +      1      1        1     forget  -> nlookups = 0, inv_sent = 0, invalidate--->active + +(*) technically these combinations cannot happen because a forget sent by the +kernel first calls ref() and then unref(). However it's equivalent. + +overflow means that lru list has grown beyond the limit and the inode needs to +be invalidated. All other combinations do not cause a change in state or are not +possible. + +Based on this, the code could be similar to this: + +    ref(inode, inv) +    { +        if (refs == 0) { +            if (inv_sent) { +                invalidate_count--; +                inv_sent = 0; +            } else { +                lru_count--; +            } +            if (inv) { +                inv_sent = 1; +                invalidate_count++; +                list_move(inode, invalidate); +            } else { +                active_count++; +                list_move(inode, active); +            } +        } +        refs++; +    } + +    unref(inode, clear) +    { +        if (clear && inv_sent) { +            // there is a case of fuse itself sending forget, without +            // invalidate, after entry delete, like unlink(), rmdir(). +            inv_sent = 0; +            invalidate_count--; +            active_count++; +            list_move(inode, active); +        } +        refs--; +        if ((refs == 0) && !inv_sent) { +            active_count--; +            if (nlookups == 0) { +                destroy(inode); +            } else { +                lru_count++; +                list_move(inode, lru); +            } +        } +    } + +    forget(inode) +    { +        ref(inode, false); +        nlookups--; +        unref(inode, true); +    } + +    overflow(inode) +    { +        ref(inode, true); +        invalidator(inode); +        unref(inode, false); +    } + +*/ +// clang-format on +  #define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type)                  \      {                                                                          \          int i = 1;                                                             \ @@ -37,7 +131,7 @@      }  static inode_t * -__inode_unref(inode_t *inode); +__inode_unref(inode_t *inode, bool clear);  static int  inode_table_prune(inode_table_t *table); @@ -132,7 +226,7 @@ __dentry_unset(dentry_t *dentry)      dentry->name = NULL;      if (dentry->parent) { -        __inode_unref(dentry->parent); +        __inode_unref(dentry->parent, false);          dentry->parent = NULL;      } @@ -446,7 +540,7 @@ out:  }  static inode_t * -__inode_unref(inode_t *inode) +__inode_unref(inode_t *inode, bool clear)  {      int index = 0;      xlator_t *this = NULL; @@ -455,8 +549,6 @@ __inode_unref(inode_t *inode)      if (!inode)          return NULL; -    this = THIS; -      /*       * Root inode should always be in active list of inode table. So unrefs       * on root inode are no-ops. @@ -464,6 +556,13 @@ __inode_unref(inode_t *inode)      if (__is_root_gfid(inode->gfid))          return inode; +    this = THIS; + +    if (clear && inode->invalidate_sent) { +        inode->invalidate_sent = false; +        inode->table->invalidate_size--; +        __inode_activate(inode); +    }      GF_ASSERT(inode->ref);      --inode->ref; @@ -474,7 +573,7 @@ __inode_unref(inode_t *inode)          inode->_ctx[index].ref--;      } -    if (!inode->ref) { +    if (!inode->ref && !inode->invalidate_sent) {          inode->table->active_size--;          nlookup = GF_ATOMIC_GET(inode->nlookup); @@ -488,7 +587,7 @@ __inode_unref(inode_t *inode)  }  static inode_t * -__inode_ref(inode_t *inode) +__inode_ref(inode_t *inode, bool is_invalidate)  {      int index = 0;      xlator_t *this = NULL; @@ -498,11 +597,6 @@ __inode_ref(inode_t *inode)      this = THIS; -    if (!inode->ref) { -        inode->table->lru_size--; -        __inode_activate(inode); -    } -      /*       * Root inode should always be in active list of inode table. So unrefs       * on root inode are no-ops. If we do not allow unrefs but allow refs, @@ -514,6 +608,22 @@ __inode_ref(inode_t *inode)      if (__is_root_gfid(inode->gfid) && inode->ref)          return inode; +    if (!inode->ref) { +        if (inode->invalidate_sent) { +            inode->invalidate_sent = false; +            inode->table->invalidate_size--; +        } else { +            inode->table->lru_size--; +        } +        if (is_invalidate) { +            inode->invalidate_sent = true; +            inode->table->invalidate_size++; +            list_move_tail(&inode->list, &inode->table->invalidate); +        } else { +            __inode_activate(inode); +        } +    } +      inode->ref++;      index = __inode_get_xl_index(inode, this); @@ -537,7 +647,7 @@ inode_unref(inode_t *inode)      pthread_mutex_lock(&table->lock);      { -        inode = __inode_unref(inode); +        inode = __inode_unref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -558,7 +668,7 @@ inode_ref(inode_t *inode)      pthread_mutex_lock(&table->lock);      { -        inode = __inode_ref(inode); +        inode = __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -592,7 +702,7 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)      }      if (parent) -        newd->parent = __inode_ref(parent); +        newd->parent = __inode_ref(parent, false);      list_add(&newd->inode_list, &inode->dentry_list);      newd->inode = inode; @@ -662,7 +772,7 @@ inode_new(inode_table_t *table)      {          inode = __inode_create(table);          if (inode != NULL) { -            __inode_ref(inode); +            __inode_ref(inode, false);          }      }      pthread_mutex_unlock(&table->lock); @@ -769,7 +879,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)              inode = dentry->inode;          if (inode) -            __inode_ref(inode); +            __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -912,7 +1022,7 @@ inode_find(inode_table_t *table, uuid_t gfid)      {          inode = __inode_find(table, gfid);          if (inode) -            __inode_ref(inode); +            __inode_ref(inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -1057,7 +1167,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)          linked_inode = __inode_link(inode, parent, name, iatt);          if (linked_inode) -            __inode_ref(linked_inode); +            __inode_ref(linked_inode, false);      }      pthread_mutex_unlock(&table->lock); @@ -1124,6 +1234,31 @@ inode_forget(inode_t *inode, uint64_t nlookup)      return 0;  } +int +inode_forget_with_unref(inode_t *inode, uint64_t nlookup) +{ +    inode_table_t *table = NULL; + +    if (!inode) { +        gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +                         "inode not found"); +        return -1; +    } + +    table = inode->table; + +    pthread_mutex_lock(&table->lock); +    { +        inode_forget_atomic(inode, nlookup); +        __inode_unref(inode, true); +    } +    pthread_mutex_unlock(&table->lock); + +    inode_table_prune(table); + +    return 0; +} +  /*   * Invalidate an inode. This is invoked when a translator decides that an   * inode's cache is no longer valid. Any translator interested in taking action @@ -1298,7 +1433,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)              parent = dentry->parent;          if (parent) -            __inode_ref(parent); +            __inode_ref(parent, false);      }      pthread_mutex_unlock(&table->lock); @@ -1480,6 +1615,8 @@ inode_table_prune(inode_table_t *table)      inode_t *del = NULL;      inode_t *tmp = NULL;      inode_t *entry = NULL; +    uint64_t nlookup = 0; +    int64_t lru_size = 0;      if (!table)          return -1; @@ -1488,7 +1625,11 @@ inode_table_prune(inode_table_t *table)      pthread_mutex_lock(&table->lock);      { -        while (table->lru_limit && table->lru_size > (table->lru_limit)) { +        if (!table->lru_limit) +            goto purge_list; + +        lru_size = table->lru_size; +        while (lru_size > (table->lru_limit)) {              if (list_empty(&table->lru)) {                  gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,                                   LG_MSG_INVALID_INODE_LIST, @@ -1498,26 +1639,46 @@ inode_table_prune(inode_table_t *table)                  break;              } +            lru_size--;              entry = list_entry(table->lru.next, inode_t, list); +            /* The logic of invalidation is required only if invalidator_fn +               is present */ +            if (table->invalidator_fn) { +                /* check for valid inode with 'nlookup' */ +                nlookup = GF_ATOMIC_GET(entry->nlookup); +                if (nlookup) { +                    __inode_ref(entry, true); +                    tmp = entry; +                    break; +                } +            }              table->lru_size--;              __inode_retire(entry); -              ret++;          } +    purge_list:          list_splice_init(&table->purge, &purge);          table->purge_size = 0;      }      pthread_mutex_unlock(&table->lock); +    /* Pick 1 inode for invalidation */ +    if (tmp) { +        xlator_t *old_THIS = THIS; +        THIS = table->invalidator_xl; +        table->invalidator_fn(table->invalidator_xl, tmp); +        THIS = old_THIS; +        inode_unref(tmp); +    } + +    /* Just so that if purge list is handled too, then clear it off */ +    list_for_each_entry_safe(del, tmp, &purge, list)      { -        list_for_each_entry_safe(del, tmp, &purge, list) -        { -            list_del_init(&del->list); -            inode_forget_atomic(del, 0); -            __inode_destroy(del); -        } +        list_del_init(&del->list); +        inode_forget_atomic(del, 0); +        __inode_destroy(del);      }      return ret; @@ -1545,9 +1706,12 @@ __inode_table_init_root(inode_table_t *table)  }  inode_table_t * -inode_table_new(size_t lru_limit, xlator_t *xl) +inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, +                             int32_t (*invalidator_fn)(xlator_t *, inode_t *), +                             xlator_t *invalidator_xl)  {      inode_table_t *new = NULL; +    uint32_t mem_pool_size = lru_limit;      int ret = -1;      int i = 0; @@ -1559,20 +1723,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl)      new->ctxcount = xl->graph->xl_count + 1;      new->lru_limit = lru_limit; +    new->invalidator_fn = invalidator_fn; +    new->invalidator_xl = invalidator_xl;      new->hashsize = 14057; /* TODO: Random Number?? */      /* In case FUSE is initing the inode table. */ -    if (lru_limit == 0) -        lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES; - -    new->inode_pool = mem_pool_new(inode_t, lru_limit); +    if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) +        mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES; +    new->inode_pool = mem_pool_new(inode_t, mem_pool_size);      if (!new->inode_pool)          goto out; -    new->dentry_pool = mem_pool_new(dentry_t, lru_limit); - +    new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);      if (!new->dentry_pool)          goto out; @@ -1604,6 +1768,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl)      INIT_LIST_HEAD(&new->active);      INIT_LIST_HEAD(&new->lru);      INIT_LIST_HEAD(&new->purge); +    INIT_LIST_HEAD(&new->invalidate);      ret = gf_asprintf(&new->name, "%s/inode", xl->name);      if (-1 == ret) { @@ -1633,6 +1798,13 @@ out:      return new;  } +inode_table_t * +inode_table_new(uint32_t lru_limit, xlator_t *xl) +{ +    /* Only fuse for now requires the inode table with invalidator */ +    return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); +} +  int  inode_table_ctx_free(inode_table_t *table)  { @@ -1771,6 +1943,14 @@ inode_table_destroy(inode_table_t *inode_table)              inode_table->lru_size--;          } +        /* Same logic for invalidate list */ +        while (!list_empty(&inode_table->invalidate)) { +            trav = list_first_entry(&inode_table->invalidate, inode_t, list); +            inode_forget_atomic(trav, 0); +            __inode_retire(trav); +            inode_table->invalidate_size--; +        } +          while (!list_empty(&inode_table->active)) {              trav = list_first_entry(&inode_table->active, inode_t, list);              /* forget and unref the inode to retire and add it to @@ -2280,6 +2460,7 @@ inode_dump(inode_t *inode, char *prefix)          gf_proc_dump_write("fd-count", "%u", inode->fd_count);          gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);          gf_proc_dump_write("ref", "%u", inode->ref); +        gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);          gf_proc_dump_write("ia_type", "%d", inode->ia_type);          if (inode->_ctx) {              inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx), @@ -2353,10 +2534,13 @@ inode_table_dump(inode_table_t *itable, char *prefix)      gf_proc_dump_write(key, "%d", itable->lru_size);      gf_proc_dump_build_key(key, prefix, "purge_size");      gf_proc_dump_write(key, "%d", itable->purge_size); +    gf_proc_dump_build_key(key, prefix, "invalidate_size"); +    gf_proc_dump_write(key, "%d", itable->invalidate_size);      INODE_DUMP_LIST(&itable->active, key, prefix, "active");      INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");      INODE_DUMP_LIST(&itable->purge, key, prefix, "purge"); +    INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");      pthread_mutex_unlock(&itable->lock);  } diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index 6ca6a639456..464493d6cfc 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -791,6 +791,7 @@ __inode_find  inode_find  inode_find_directory_name  inode_forget +inode_forget_with_unref  inode_from_path  inode_grep  inode_grep_for_gfid @@ -815,6 +816,7 @@ inode_table_destroy_all  inode_table_dump  inode_table_dump_to_dict  inode_table_new +inode_table_with_invalidator  __inode_table_set_lru_limit  inode_table_set_lru_limit  inode_unlink  | 
