summaryrefslogtreecommitdiffstats
path: root/xlators/storage/bdb/src/bdb.h
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/bdb/src/bdb.h')
-rw-r--r--xlators/storage/bdb/src/bdb.h483
1 files changed, 288 insertions, 195 deletions
diff --git a/xlators/storage/bdb/src/bdb.h b/xlators/storage/bdb/src/bdb.h
index f85fa4c5f85..a041d942da3 100644
--- a/xlators/storage/bdb/src/bdb.h
+++ b/xlators/storage/bdb/src/bdb.h
@@ -55,7 +55,7 @@
#include "compat.h"
#include "compat-errno.h"
-#define GLFS_BDB_STORAGE "/glusterfs_storage.db"
+#define BDB_STORAGE "/glusterfs_storage.db"
/* numbers are not so reader-friendly, so lets have ON and OFF macros */
#define ON 1
@@ -66,49 +66,58 @@
#define BDB_ENOSPC_THRESHOLD 25600
-#define BDB_DEFAULT_CHECKPOINT_TIMEOUT 30
+#define BDB_DEFAULT_CHECKPOINT_INTERVAL 30
#define BCTX_ENV(bctx) (bctx->table->dbenv)
+
+#define BDB_EXPORT_PATH_LEN(_private) \
+ (((struct bdb_private *)_private)->export_path_length)
+
+#define BDB_EXPORT_PATH(_private) \
+ (((struct bdb_private *)_private)->export_path)
/* MAKE_REAL_PATH(var,this,path)
* make the real path on the underlying file-system
*
* @var: destination to hold the real path
- * @this: pointer to xlator_t corresponding to bdb xlator
- * @path: path, as seen from mount-point
+ * @this: pointer to xlator_t corresponding to bdb xlator
+ * @path: path, as seen from mount-point
*/
-#define MAKE_REAL_PATH(var, this, path) do { \
- int base_len = ((struct bdb_private *)this->private)->export_path_length; \
- var = alloca (strlen (path) + base_len + 2); \
- strcpy (var, ((struct bdb_private *)this->private)->export_path); \
- strcpy (&var[base_len], path); \
- } while (0)
+#define MAKE_REAL_PATH(var, this, path) do { \
+ int base_len = BDB_EXPORT_PATH_LEN(this->private); \
+ var = alloca (strlen (path) + base_len + 2); \
+ strcpy (var, BDB_EXPORT_PATH(this->private)); \
+ strcpy (&var[base_len], path); \
+ } while (0)
/* MAKE_REAL_PATH_TO_STORAGE_DB(var,this,path)
* make the real path to the storage-database file on file-system
*
* @var: destination to hold the real path
- * @this: pointer to xlator_t corresponding to bdb xlator
- * @path: path of the directory, as seen from mount-point
+ * @this: pointer to xlator_t corresponding to bdb xlator
+ * @path: path of the directory, as seen from mount-point
*/
-#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \
- int base_len = ((struct bdb_private *)this->private)->export_path_length; \
- var = alloca (strlen (path) + base_len + strlen (GLFS_BDB_STORAGE)); \
- strcpy (var, ((struct bdb_private *)this->private)->export_path); \
- strcpy (&var[base_len], path); \
- strcat (var, GLFS_BDB_STORAGE); \
- } while (0)
+#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \
+ int base_len = BDB_EXPORT_PATH_LEN(this->private); \
+ var = alloca (strlen (path) + \
+ base_len + \
+ strlen (BDB_STORAGE)); \
+ strcpy (var, BDB_EXPORT_PATH(this->private)); \
+ strcpy (&var[base_len], path); \
+ strcat (var, BDB_STORAGE); \
+ } while (0)
/* MAKE_KEY_FROM_PATH(key,path)
- * make a 'key', which we use as key in the underlying database by using the path
+ * make a 'key', which we use as key in the underlying database by using
+ * the path
*
* @key: destination to hold the key
- * @path: path to file as seen from mount-point
+ * @path: path to file as seen from mount-point
*/
-#define MAKE_KEY_FROM_PATH(key, path) do { \
- char *tmp = alloca (strlen (path)); \
- strcpy (tmp, path); \
- key = basename (tmp); \
- }while (0);
+#define MAKE_KEY_FROM_PATH(key, path) do { \
+ char *tmp = alloca (strlen (path)); \
+ strcpy (tmp, path); \
+ key = basename (tmp); \
+ }while (0);
/* BDB_DO_LSTAT(path,stbuf,dirent)
* construct real-path to a dirent and do lstat on the real-path
@@ -117,42 +126,44 @@
* @stbuf: a 'struct stat *'
* @dirent: a 'struct dirent *'
*/
-#define BDB_DO_LSTAT(path, stbuf, dirent) do { \
- char tmp_real_path[GF_PATH_MAX]; \
- strcpy(tmp_real_path, path); \
- strcat (tmp_real_path, "/"); \
- strcat(tmp_real_path, dirent->d_name); \
- ret = lstat (tmp_real_path, stbuf); \
- } while(0);
+#define BDB_DO_LSTAT(path, stbuf, dirent) do { \
+ char tmp_real_path[GF_PATH_MAX]; \
+ strcpy(tmp_real_path, path); \
+ strcat (tmp_real_path, "/"); \
+ strcat(tmp_real_path, dirent->d_name); \
+ ret = lstat (tmp_real_path, stbuf); \
+ } while(0);
/* IS_BDB_PRIVATE_FILE(name)
- * check if a given 'name' is bdb xlator's internal file name
+ * check if a given 'name' is bdb xlator's internal file name
*
* @name: basename of a file.
*
- * bdb xlator reserves file names 'glusterfs_storage.db',
- * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*' (used by libdb)
+ * bdb xlator reserves file names 'glusterfs_storage.db',
+ * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*'
+ * (used by libdb)
*/
-#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \
+#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \
(!strcmp(name, "glusterfs_storage.db")) || \
- (!strcmp(name, "glusterfs_ns.db")) || \
+ (!strcmp(name, "glusterfs_ns.db")) || \
(!strncmp(name, "log.0000", 8)))
/* check if 'name' is '.' or '..' entry */
-#define IS_DOT_DOTDOT(name) ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2)))
+#define IS_DOT_DOTDOT(name) \
+ ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2)))
/* BDB_SET_BCTX(this,inode,bctx)
* put a stamp on inode. d00d, you are using bdb.. huhaha.
- * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories.
+ * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories.
* this will happen either in lookup() or mkdir().
*
* @this: pointer xlator_t of bdb xlator.
* @inode: inode where 'struct bdb_ctx *' has to be stored.
* @bctx: a 'struct bdb_ctx *'
*/
-#define BDB_SET_BCTX(this,inode,bctx) do{ \
- inode_ctx_put(inode, this, (uint64_t)(long)bctx); \
- }while (0);
+#define BDB_SET_BCTX(this,inode,bctx) do{ \
+ inode_ctx_put(inode, this, (uint64_t)(long)bctx); \
+ }while (0);
/* MAKE_BCTX_FROM_INODE(this,bctx,inode)
* extract bdb xlator's 'struct bdb_ctx *' from an inode's ctx.
@@ -160,18 +171,18 @@
*
* @this: pointer xlator_t of bdb xlator.
* @bctx: a 'struct bdb_ctx *'
- * @inode: inode from where 'struct bdb_ctx *' has to be extracted.
+ * @inode: inode from where 'struct bdb_ctx *' has to be extracted.
*/
-#define MAKE_BCTX_FROM_INODE(this,bctx,inode) do{ \
- uint64_t tmp_bctx = 0; \
- inode_ctx_get (inode, this, &tmp_bctx); \
- if (ret == 0) \
- bctx = (void *)(long)tmp_bctx; \
- }while (0);
+#define MAKE_BCTX_FROM_INODE(this,bctx,inode) do{ \
+ uint64_t tmp_bctx = 0; \
+ inode_ctx_get (inode, this, &tmp_bctx); \
+ if (ret == 0) \
+ bctx = (void *)(long)tmp_bctx; \
+ }while (0);
-#define BDB_SET_BFD(this,fd,bfd) do{ \
- fd_ctx_set (fd, this, (uint64_t)(long)bfd); \
- }while (0);
+#define BDB_SET_BFD(this,fd,bfd) do{ \
+ fd_ctx_set (fd, this, (uint64_t)(long)bfd); \
+ }while (0);
/* maximum number of open dbs that bdb xlator will ever have */
#define BDB_MAX_OPEN_DBS 100
@@ -188,159 +199,241 @@
#define IS_VALID_FILE_MODE(mode) (!(mode & (~RWXRWXRWX)))
#define IS_VALID_DIR_MODE(mode) (!(mode & (~(RWXRWXRWX)))
-/* maximum retries for a failed transactional operation */
+/* maximum retries for a failed transactional operation */
#define BDB_MAX_RETRIES 10
+#define BDB_LL_PAGE_SIZE_DEFAULT 4096
+#define BDB_LL_PAGE_SIZE_MIN 4096
+#define BDB_LL_PAGE_SIZE_MAX 65536
+
+#define PAGE_SIZE_IN_RANGE(_page_size) \
+ ((_page_size >= BDB_LL_PAGE_SIZE_MIN) \
+ && (table->page_size <= BDB_LL_PAGE_SIZE_MAX))
+
typedef struct bctx_table bctx_table_t;
typedef struct bdb_ctx bctx_t;
typedef struct bdb_cache bdb_cache_t;
typedef struct bdb_private bdb_private_t;
-
+
struct bctx_table {
- uint64_t dbflags; /* flags to be used for opening each database */
- uint64_t cache; /* cache: can be either ON or OFF */
- gf_lock_t lock; /* used to lock the 'struct bctx_table *' */
- gf_lock_t checkpoint_lock; /* lock for checkpointing */
- struct list_head *b_hash; /* hash table of 'struct bdb_ctx' */
- struct list_head active; /* list of active 'struct bdb_ctx' */
- struct list_head b_lru; /* lru list of inactive 'struct bdb_ctx' */
- struct list_head purge;
- uint32_t lru_limit;
- uint32_t lru_size;
- uint32_t hash_size;
- DBTYPE access_mode; /* access mode for accessing the databases,
- * can be DB_HASH, DB_BTREE */
- DB_ENV *dbenv; /* DB_ENV under which every db operation
- * is carried over */
- int32_t transaction;
- xlator_t *this;
-
- uint64_t page_size; /* page-size of DB,
- * DB->set_pagesize(), should be set before DB->open */
+ /* flags to be used for opening each database */
+ uint64_t dbflags;
+
+ /* cache: can be either ON or OFF */
+ uint64_t cache;
+
+ /* used to lock the 'struct bctx_table *' */
+ gf_lock_t lock;
+
+ /* lock for checkpointing */
+ gf_lock_t checkpoint_lock;
+
+ /* hash table of 'struct bdb_ctx' */
+ struct list_head *b_hash;
+
+ /* list of active 'struct bdb_ctx' */
+ struct list_head active;
+
+ /* lru list of inactive 'struct bdb_ctx' */
+ struct list_head b_lru;
+ struct list_head purge;
+ uint32_t lru_limit;
+ uint32_t lru_size;
+ uint32_t hash_size;
+
+ /* access mode for accessing the databases, can be DB_HASH, DB_BTREE */
+ DBTYPE access_mode;
+
+ /* DB_ENV under which every db operation is carried over */
+ DB_ENV *dbenv;
+ int32_t transaction;
+ xlator_t *this;
+
+ /* page-size of DB, DB->set_pagesize(), should be set before DB->open */
+ uint64_t page_size;
};
struct bdb_ctx {
- /* controller members */
- struct list_head list; /* lru list of 'struct bdb_ctx's,
- * a bdb_ctx can exist in one of b_hash or lru lists */
- struct list_head b_hash; /* directory 'name' hashed list of 'struct bdb_ctx's */
+ /* controller members */
+
+ /* lru list of 'struct bdb_ctx's, a bdb_ctx can exist in one of
+ * b_hash or lru lists */
+ struct list_head list;
- struct bctx_table *table;
- int32_t ref; /* reference count */
- gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */
+ /* directory 'name' hashed list of 'struct bdb_ctx's */
+ struct list_head b_hash;
- char *directory; /* directory path */
- DB *dbp; /* pointer to open database, that resides inside this directory */
- uint32_t cache; /* cache ON or OFF */
+ struct bctx_table *table;
+ int32_t ref; /* reference count */
+ gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */
- /* per directory cache, bdb xlator's internal cache */
- struct list_head c_list; /* linked list of cached records */
- int32_t c_count; /* number of cached records */
+ char *directory; /* directory path */
- int32_t key_hash; /* index to hash table list, to which this ctx belongs */
- char *db_path; /* absolute path to db file */
+ /* pointer to open database, that resides inside this directory */
+ DB *dbp;
+ uint32_t cache; /* cache ON or OFF */
+
+ /* per directory cache, bdb xlator's internal cache */
+ struct list_head c_list; /* linked list of cached records */
+ int32_t c_count; /* number of cached records */
+
+ /* index to hash table list, to which this ctx belongs */
+ int32_t key_hash;
+ char *db_path; /* absolute path to db file */
};
struct bdb_fd {
- struct bdb_ctx *ctx; /* pointer to bdb_ctx of the parent directory */
- char *key; /* name of the file. NOTE: basename, not the complete path */
- int32_t flags; /* open flags */
+ /* pointer to bdb_ctx of the parent directory */
+ struct bdb_ctx *ctx;
+
+ /* name of the file. NOTE: basename, not the complete path */
+ char *key;
+ int32_t flags; /* open flags */
};
struct bdb_dir {
- struct bdb_ctx *ctx; /* pointer to bdb_ctx of this directory */
- DIR *dir; /* open directory pointer, as returned by opendir() */
- char offset[NAME_MAX]; /* FIXME: readdir offset, too crude. must go */
- char *path; /* path to this directory */
+ /* pointer to bdb_ctx of this directory */
+ struct bdb_ctx *ctx;
+
+ /* open directory pointer, as returned by opendir() */
+ DIR *dir;
+
+ /* FIXME: readdir offset, too crude. must go */
+ char offset[NAME_MAX];
+ char *path; /* path to this directory */
};
/* cache */
struct bdb_cache {
- struct list_head c_list; /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */
- char *key; /* name of the file this cache holds. NOTE: basename of file */
- char *data; /* file content */
- size_t size; /* size of the file content that this cache holds */
+ /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */
+ struct list_head c_list;
+
+ /* name of the file this cache holds. NOTE: basename of file */
+ char *key;
+ char *data; /* file content */
+
+ /* size of the file content that this cache holds */
+ size_t size;
};
struct bdb_private {
- inode_table_t *itable; /* pointer to inode table that we use */
- int32_t temp; /**/
- char is_stateless; /**/
- char *export_path; /* path to the export directory
- * (option directory <export-path>) */
- int32_t export_path_length; /* length of 'export_path' string */
-
- /* statistics */
- struct xlator_stats stats; /* Statistics, provides activity of the server */
-
- struct timeval prev_fetch_time;
- struct timeval init_time;
- int32_t max_read; /* */
- int32_t max_write; /* */
- int64_t interval_read; /* Used to calculate the max_read value */
- int64_t interval_write; /* Used to calculate the max_write value */
- int64_t read_value; /* Total read, from init */
- int64_t write_value; /* Total write, from init */
-
- /* bdb xlator specific private data */
- uint64_t envflags; /* flags used for opening DB_ENV for this xlator */
- uint64_t dbflags; /* flags to be used for opening each database */
- uint64_t cache; /* cache: can be either ON or OFF */
- uint32_t transaction; /* transaction: can be either ON or OFF */
- uint32_t active;
- gf_lock_t active_lock;
- struct bctx_table *b_table;
- DBTYPE access_mode; /* access mode for accessing the databases,
- * can be DB_HASH, DB_BTREE
- * (option access-mode <mode>) */
- mode_t file_mode; /* mode for each and every file stored on bdb
- * (option file-mode <mode>) */
- mode_t dir_mode; /* mode for each and every directory stored on bdb
- * (option dir-mode <mode>) */
- mode_t symlink_mode; /* mode for each and every symlink stored on bdb */
- pthread_t checkpoint_thread; /* pthread_t object used for creating checkpoint
- * thread */
- int32_t checkpoint_timeout; /* time duration between two consecutive checkpoint
- * operations.
- * (option checkpoint-timeout <time-in-seconds>) */
- ino_t next_ino; /* inode number allocation counter */
- gf_lock_t ino_lock; /* lock to protect 'next_ino' */
- char *logdir; /* environment log directory
- * (option logdir <directory>) */
- char *errfile; /* errfile path, used by environment to
- * print detailed error log.
- * (option errfile <errfile-path>) */
- FILE *errfp; /* DB_ENV->set_errfile() expects us to fopen
- * the errfile before doing DB_ENV->set_errfile() */
- uint32_t txn_timeout; /* used by DB_ENV->set_timeout to set the timeout for
- * a transactionally encapsulated DB->operation() to
- * timeout before waiting for locks to be released.
- * (option transaction-timeout <time-in-milliseconds>)
- */
- uint32_t lock_timeout;
- uint32_t log_auto_remove; /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/
- uint32_t log_region_max;
+ /* pointer to inode table that we use */
+ inode_table_t *itable;
+ int32_t temp; /**/
+ char is_stateless; /**/
+
+ /* path to the export directory
+ * (option directory <export-path>) */
+ char *export_path;
+
+ /* length of 'export_path' string */
+ int32_t export_path_length;
+
+ /* statistics */
+ /* Statistics, provides activity of the server */
+ struct xlator_stats stats;
+
+ struct timeval prev_fetch_time;
+ struct timeval init_time;
+ int32_t max_read; /* */
+ int32_t max_write; /* */
+
+ /* Used to calculate the max_read value */
+ int64_t interval_read;
+
+ /* Used to calculate the max_write value */
+ int64_t interval_write;
+ int64_t read_value; /* Total read, from init */
+ int64_t write_value; /* Total write, from init */
+
+ /* bdb xlator specific private data */
+
+ /* flags used for opening DB_ENV for this xlator */
+ uint64_t envflags;
+
+ /* flags to be used for opening each database */
+ uint64_t dbflags;
+
+ /* cache: can be either ON or OFF */
+ uint64_t cache;
+
+ /* transaction: can be either ON or OFF */
+ uint32_t transaction;
+ uint32_t active;
+ gf_lock_t active_lock;
+ struct bctx_table *b_table;
+
+ /* access mode for accessing the databases, can be DB_HASH, DB_BTREE
+ * (option access-mode <mode>) */
+ DBTYPE access_mode;
+
+ /* mode for each and every file stored on bdb
+ * (option file-mode <mode>) */
+ mode_t file_mode;
+
+ /* mode for each and every directory stored on bdb
+ * (option dir-mode <mode>) */
+ mode_t dir_mode;
+
+ /* mode for each and every symlink stored on bdb */
+ mode_t symlink_mode;
+
+ /* pthread_t object used for creating checkpoint thread */
+ pthread_t checkpoint_thread;
+
+ /* time duration between two consecutive checkpoint operations.
+ * (option checkpoint-interval <time-in-seconds>) */
+ uint32_t checkpoint_interval;
+
+ /* inode number allocation counter */
+ ino_t next_ino;
+
+ /* lock to protect 'next_ino' */
+ gf_lock_t ino_lock;
+
+ /* environment log directory (option logdir <directory>) */
+ char *logdir;
+
+ /* errfile path, used by environment to print detailed error log.
+ * (option errfile <errfile-path>) */
+ char *errfile;
+
+ /* DB_ENV->set_errfile() expects us to fopen
+ * the errfile before doing DB_ENV->set_errfile() */
+ FILE *errfp;
+
+ /* used by DB_ENV->set_timeout to set the timeout for
+ * a transactionally encapsulated DB->operation() to
+ * timeout before waiting for locks to be released.
+ * (option transaction-timeout <time-in-milliseconds>)
+ */
+ uint32_t txn_timeout;
+ uint32_t lock_timeout;
+
+ /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/
+ uint32_t log_auto_remove;
+ uint32_t log_region_max;
};
-static inline int32_t
+static inline int32_t
bdb_txn_begin (DB_ENV *dbenv,
- DB_TXN **ptxnid)
+ DB_TXN **ptxnid)
{
- return dbenv->txn_begin (dbenv, NULL, ptxnid, 0);
+ return dbenv->txn_begin (dbenv, NULL, ptxnid, 0);
}
static inline int32_t
bdb_txn_abort (DB_TXN *txnid)
{
- return txnid->abort (txnid);
+ return txnid->abort (txnid);
}
static inline int32_t
bdb_txn_commit (DB_TXN *txnid)
{
- return txnid->commit (txnid, 0);
+ return txnid->commit (txnid, 0);
}
inline void *
@@ -348,53 +441,53 @@ bdb_extract_bfd (fd_t *fd, xlator_t *this);
void *
-bdb_db_stat (bctx_t *bctx,
- DB_TXN *txnid,
- uint32_t flags);
+bdb_db_stat (bctx_t *bctx,
+ DB_TXN *txnid,
+ uint32_t flags);
int32_t
bdb_db_get(struct bdb_ctx *bctx,
- DB_TXN *txnid,
- const char *key_string,
- char **buf,
- size_t size,
- off_t offset);
+ DB_TXN *txnid,
+ const char *key_string,
+ char **buf,
+ size_t size,
+ off_t offset);
#define BDB_TRUNCATE_RECORD 0xcafebabe
int32_t
bdb_db_put (struct bdb_ctx *bctx,
- DB_TXN *txnid,
- const char *key_string,
- const char *buf,
- size_t size,
- off_t offset,
- int32_t flags);
+ DB_TXN *txnid,
+ const char *key_string,
+ const char *buf,
+ size_t size,
+ off_t offset,
+ int32_t flags);
int32_t
bdb_db_del (struct bdb_ctx *bctx,
- DB_TXN *txnid,
- const char *path);
+ DB_TXN *txnid,
+ const char *path);
ino_t
bdb_inode_transform (ino_t parent,
- struct bdb_ctx *bctx);
+ struct bdb_ctx *bctx);
int32_t
bdb_cursor_open (struct bdb_ctx *bctx,
- DBC **cursorp);
+ DBC **cursorp);
int32_t
bdb_cursor_get (DBC *cursorp,
- DBT *key,
- DBT *value,
- int32_t flags);
+ DBT *key,
+ DBT *value,
+ int32_t flags);
int32_t
bdb_cursor_close (struct bdb_ctx *ctx,
- DBC *cursorp);
+ DBC *cursorp);
int32_t
@@ -405,17 +498,17 @@ dirent_size (struct dirent *entry);
int
bdb_db_init (xlator_t *this,
- dict_t *options);
+ dict_t *options);
void
bdb_dbs_from_dict_close (dict_t *this,
- char *key,
- data_t *value,
- void *data);
+ char *key,
+ data_t *value,
+ void *data);
bctx_t *
bctx_lookup (struct bctx_table *table,
- const char *path);
+ const char *path);
bctx_t *
bctx_parent
@@ -429,11 +522,11 @@ bctx_t *
bctx_ref (bctx_t *ctx);
bctx_t *
-bctx_rename (bctx_t *bctx,
- const char *db_newpath);
+bctx_rename (bctx_t *bctx,
+ const char *db_newpath);
int32_t
-bdb_db_rename (bctx_table_t *table,
- const char *tmp_db_newpath,
- const char *real_db_newpath);
+bdb_db_rename (bctx_table_t *table,
+ const char *tmp_db_newpath,
+ const char *real_db_newpath);
#endif /* _BDB_H */