diff options
Diffstat (limited to 'xlators/storage/bdb/src/bdb.h')
-rw-r--r-- | xlators/storage/bdb/src/bdb.h | 483 |
1 files changed, 288 insertions, 195 deletions
diff --git a/xlators/storage/bdb/src/bdb.h b/xlators/storage/bdb/src/bdb.h index f85fa4c5f85..a041d942da3 100644 --- a/xlators/storage/bdb/src/bdb.h +++ b/xlators/storage/bdb/src/bdb.h @@ -55,7 +55,7 @@ #include "compat.h" #include "compat-errno.h" -#define GLFS_BDB_STORAGE "/glusterfs_storage.db" +#define BDB_STORAGE "/glusterfs_storage.db" /* numbers are not so reader-friendly, so lets have ON and OFF macros */ #define ON 1 @@ -66,49 +66,58 @@ #define BDB_ENOSPC_THRESHOLD 25600 -#define BDB_DEFAULT_CHECKPOINT_TIMEOUT 30 +#define BDB_DEFAULT_CHECKPOINT_INTERVAL 30 #define BCTX_ENV(bctx) (bctx->table->dbenv) + +#define BDB_EXPORT_PATH_LEN(_private) \ + (((struct bdb_private *)_private)->export_path_length) + +#define BDB_EXPORT_PATH(_private) \ + (((struct bdb_private *)_private)->export_path) /* MAKE_REAL_PATH(var,this,path) * make the real path on the underlying file-system * * @var: destination to hold the real path - * @this: pointer to xlator_t corresponding to bdb xlator - * @path: path, as seen from mount-point + * @this: pointer to xlator_t corresponding to bdb xlator + * @path: path, as seen from mount-point */ -#define MAKE_REAL_PATH(var, this, path) do { \ - int base_len = ((struct bdb_private *)this->private)->export_path_length; \ - var = alloca (strlen (path) + base_len + 2); \ - strcpy (var, ((struct bdb_private *)this->private)->export_path); \ - strcpy (&var[base_len], path); \ - } while (0) +#define MAKE_REAL_PATH(var, this, path) do { \ + int base_len = BDB_EXPORT_PATH_LEN(this->private); \ + var = alloca (strlen (path) + base_len + 2); \ + strcpy (var, BDB_EXPORT_PATH(this->private)); \ + strcpy (&var[base_len], path); \ + } while (0) /* MAKE_REAL_PATH_TO_STORAGE_DB(var,this,path) * make the real path to the storage-database file on file-system * * @var: destination to hold the real path - * @this: pointer to xlator_t corresponding to bdb xlator - * @path: path of the directory, as seen from mount-point + * @this: pointer to xlator_t corresponding to bdb xlator + * @path: path of the directory, as seen from mount-point */ -#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \ - int base_len = ((struct bdb_private *)this->private)->export_path_length; \ - var = alloca (strlen (path) + base_len + strlen (GLFS_BDB_STORAGE)); \ - strcpy (var, ((struct bdb_private *)this->private)->export_path); \ - strcpy (&var[base_len], path); \ - strcat (var, GLFS_BDB_STORAGE); \ - } while (0) +#define MAKE_REAL_PATH_TO_STORAGE_DB(var, this, path) do { \ + int base_len = BDB_EXPORT_PATH_LEN(this->private); \ + var = alloca (strlen (path) + \ + base_len + \ + strlen (BDB_STORAGE)); \ + strcpy (var, BDB_EXPORT_PATH(this->private)); \ + strcpy (&var[base_len], path); \ + strcat (var, BDB_STORAGE); \ + } while (0) /* MAKE_KEY_FROM_PATH(key,path) - * make a 'key', which we use as key in the underlying database by using the path + * make a 'key', which we use as key in the underlying database by using + * the path * * @key: destination to hold the key - * @path: path to file as seen from mount-point + * @path: path to file as seen from mount-point */ -#define MAKE_KEY_FROM_PATH(key, path) do { \ - char *tmp = alloca (strlen (path)); \ - strcpy (tmp, path); \ - key = basename (tmp); \ - }while (0); +#define MAKE_KEY_FROM_PATH(key, path) do { \ + char *tmp = alloca (strlen (path)); \ + strcpy (tmp, path); \ + key = basename (tmp); \ + }while (0); /* BDB_DO_LSTAT(path,stbuf,dirent) * construct real-path to a dirent and do lstat on the real-path @@ -117,42 +126,44 @@ * @stbuf: a 'struct stat *' * @dirent: a 'struct dirent *' */ -#define BDB_DO_LSTAT(path, stbuf, dirent) do { \ - char tmp_real_path[GF_PATH_MAX]; \ - strcpy(tmp_real_path, path); \ - strcat (tmp_real_path, "/"); \ - strcat(tmp_real_path, dirent->d_name); \ - ret = lstat (tmp_real_path, stbuf); \ - } while(0); +#define BDB_DO_LSTAT(path, stbuf, dirent) do { \ + char tmp_real_path[GF_PATH_MAX]; \ + strcpy(tmp_real_path, path); \ + strcat (tmp_real_path, "/"); \ + strcat(tmp_real_path, dirent->d_name); \ + ret = lstat (tmp_real_path, stbuf); \ + } while(0); /* IS_BDB_PRIVATE_FILE(name) - * check if a given 'name' is bdb xlator's internal file name + * check if a given 'name' is bdb xlator's internal file name * * @name: basename of a file. * - * bdb xlator reserves file names 'glusterfs_storage.db', - * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*' (used by libdb) + * bdb xlator reserves file names 'glusterfs_storage.db', + * 'glusterfs_ns.db'(used by bdb xlator itself), 'log.*', '__db.*' + * (used by libdb) */ -#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \ +#define IS_BDB_PRIVATE_FILE(name) ((!strncmp(name, "__db.", 5)) || \ (!strcmp(name, "glusterfs_storage.db")) || \ - (!strcmp(name, "glusterfs_ns.db")) || \ + (!strcmp(name, "glusterfs_ns.db")) || \ (!strncmp(name, "log.0000", 8))) /* check if 'name' is '.' or '..' entry */ -#define IS_DOT_DOTDOT(name) ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2))) +#define IS_DOT_DOTDOT(name) \ + ((!strncmp(name,".", 1)) || (!strncmp(name,"..", 2))) /* BDB_SET_BCTX(this,inode,bctx) * put a stamp on inode. d00d, you are using bdb.. huhaha. - * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories. + * pointer to 'struct bdb_ctx' is stored in inode's ctx of all directories. * this will happen either in lookup() or mkdir(). * * @this: pointer xlator_t of bdb xlator. * @inode: inode where 'struct bdb_ctx *' has to be stored. * @bctx: a 'struct bdb_ctx *' */ -#define BDB_SET_BCTX(this,inode,bctx) do{ \ - inode_ctx_put(inode, this, (uint64_t)(long)bctx); \ - }while (0); +#define BDB_SET_BCTX(this,inode,bctx) do{ \ + inode_ctx_put(inode, this, (uint64_t)(long)bctx); \ + }while (0); /* MAKE_BCTX_FROM_INODE(this,bctx,inode) * extract bdb xlator's 'struct bdb_ctx *' from an inode's ctx. @@ -160,18 +171,18 @@ * * @this: pointer xlator_t of bdb xlator. * @bctx: a 'struct bdb_ctx *' - * @inode: inode from where 'struct bdb_ctx *' has to be extracted. + * @inode: inode from where 'struct bdb_ctx *' has to be extracted. */ -#define MAKE_BCTX_FROM_INODE(this,bctx,inode) do{ \ - uint64_t tmp_bctx = 0; \ - inode_ctx_get (inode, this, &tmp_bctx); \ - if (ret == 0) \ - bctx = (void *)(long)tmp_bctx; \ - }while (0); +#define MAKE_BCTX_FROM_INODE(this,bctx,inode) do{ \ + uint64_t tmp_bctx = 0; \ + inode_ctx_get (inode, this, &tmp_bctx); \ + if (ret == 0) \ + bctx = (void *)(long)tmp_bctx; \ + }while (0); -#define BDB_SET_BFD(this,fd,bfd) do{ \ - fd_ctx_set (fd, this, (uint64_t)(long)bfd); \ - }while (0); +#define BDB_SET_BFD(this,fd,bfd) do{ \ + fd_ctx_set (fd, this, (uint64_t)(long)bfd); \ + }while (0); /* maximum number of open dbs that bdb xlator will ever have */ #define BDB_MAX_OPEN_DBS 100 @@ -188,159 +199,241 @@ #define IS_VALID_FILE_MODE(mode) (!(mode & (~RWXRWXRWX))) #define IS_VALID_DIR_MODE(mode) (!(mode & (~(RWXRWXRWX))) -/* maximum retries for a failed transactional operation */ +/* maximum retries for a failed transactional operation */ #define BDB_MAX_RETRIES 10 +#define BDB_LL_PAGE_SIZE_DEFAULT 4096 +#define BDB_LL_PAGE_SIZE_MIN 4096 +#define BDB_LL_PAGE_SIZE_MAX 65536 + +#define PAGE_SIZE_IN_RANGE(_page_size) \ + ((_page_size >= BDB_LL_PAGE_SIZE_MIN) \ + && (table->page_size <= BDB_LL_PAGE_SIZE_MAX)) + typedef struct bctx_table bctx_table_t; typedef struct bdb_ctx bctx_t; typedef struct bdb_cache bdb_cache_t; typedef struct bdb_private bdb_private_t; - + struct bctx_table { - uint64_t dbflags; /* flags to be used for opening each database */ - uint64_t cache; /* cache: can be either ON or OFF */ - gf_lock_t lock; /* used to lock the 'struct bctx_table *' */ - gf_lock_t checkpoint_lock; /* lock for checkpointing */ - struct list_head *b_hash; /* hash table of 'struct bdb_ctx' */ - struct list_head active; /* list of active 'struct bdb_ctx' */ - struct list_head b_lru; /* lru list of inactive 'struct bdb_ctx' */ - struct list_head purge; - uint32_t lru_limit; - uint32_t lru_size; - uint32_t hash_size; - DBTYPE access_mode; /* access mode for accessing the databases, - * can be DB_HASH, DB_BTREE */ - DB_ENV *dbenv; /* DB_ENV under which every db operation - * is carried over */ - int32_t transaction; - xlator_t *this; - - uint64_t page_size; /* page-size of DB, - * DB->set_pagesize(), should be set before DB->open */ + /* flags to be used for opening each database */ + uint64_t dbflags; + + /* cache: can be either ON or OFF */ + uint64_t cache; + + /* used to lock the 'struct bctx_table *' */ + gf_lock_t lock; + + /* lock for checkpointing */ + gf_lock_t checkpoint_lock; + + /* hash table of 'struct bdb_ctx' */ + struct list_head *b_hash; + + /* list of active 'struct bdb_ctx' */ + struct list_head active; + + /* lru list of inactive 'struct bdb_ctx' */ + struct list_head b_lru; + struct list_head purge; + uint32_t lru_limit; + uint32_t lru_size; + uint32_t hash_size; + + /* access mode for accessing the databases, can be DB_HASH, DB_BTREE */ + DBTYPE access_mode; + + /* DB_ENV under which every db operation is carried over */ + DB_ENV *dbenv; + int32_t transaction; + xlator_t *this; + + /* page-size of DB, DB->set_pagesize(), should be set before DB->open */ + uint64_t page_size; }; struct bdb_ctx { - /* controller members */ - struct list_head list; /* lru list of 'struct bdb_ctx's, - * a bdb_ctx can exist in one of b_hash or lru lists */ - struct list_head b_hash; /* directory 'name' hashed list of 'struct bdb_ctx's */ + /* controller members */ + + /* lru list of 'struct bdb_ctx's, a bdb_ctx can exist in one of + * b_hash or lru lists */ + struct list_head list; - struct bctx_table *table; - int32_t ref; /* reference count */ - gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */ + /* directory 'name' hashed list of 'struct bdb_ctx's */ + struct list_head b_hash; - char *directory; /* directory path */ - DB *dbp; /* pointer to open database, that resides inside this directory */ - uint32_t cache; /* cache ON or OFF */ + struct bctx_table *table; + int32_t ref; /* reference count */ + gf_lock_t lock; /* used to lock this 'struct bdb_ctx' */ - /* per directory cache, bdb xlator's internal cache */ - struct list_head c_list; /* linked list of cached records */ - int32_t c_count; /* number of cached records */ + char *directory; /* directory path */ - int32_t key_hash; /* index to hash table list, to which this ctx belongs */ - char *db_path; /* absolute path to db file */ + /* pointer to open database, that resides inside this directory */ + DB *dbp; + uint32_t cache; /* cache ON or OFF */ + + /* per directory cache, bdb xlator's internal cache */ + struct list_head c_list; /* linked list of cached records */ + int32_t c_count; /* number of cached records */ + + /* index to hash table list, to which this ctx belongs */ + int32_t key_hash; + char *db_path; /* absolute path to db file */ }; struct bdb_fd { - struct bdb_ctx *ctx; /* pointer to bdb_ctx of the parent directory */ - char *key; /* name of the file. NOTE: basename, not the complete path */ - int32_t flags; /* open flags */ + /* pointer to bdb_ctx of the parent directory */ + struct bdb_ctx *ctx; + + /* name of the file. NOTE: basename, not the complete path */ + char *key; + int32_t flags; /* open flags */ }; struct bdb_dir { - struct bdb_ctx *ctx; /* pointer to bdb_ctx of this directory */ - DIR *dir; /* open directory pointer, as returned by opendir() */ - char offset[NAME_MAX]; /* FIXME: readdir offset, too crude. must go */ - char *path; /* path to this directory */ + /* pointer to bdb_ctx of this directory */ + struct bdb_ctx *ctx; + + /* open directory pointer, as returned by opendir() */ + DIR *dir; + + /* FIXME: readdir offset, too crude. must go */ + char offset[NAME_MAX]; + char *path; /* path to this directory */ }; /* cache */ struct bdb_cache { - struct list_head c_list; /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */ - char *key; /* name of the file this cache holds. NOTE: basename of file */ - char *data; /* file content */ - size_t size; /* size of the file content that this cache holds */ + /* list of 'struct bdb_cache' under a 'struct bdb_ctx' */ + struct list_head c_list; + + /* name of the file this cache holds. NOTE: basename of file */ + char *key; + char *data; /* file content */ + + /* size of the file content that this cache holds */ + size_t size; }; struct bdb_private { - inode_table_t *itable; /* pointer to inode table that we use */ - int32_t temp; /**/ - char is_stateless; /**/ - char *export_path; /* path to the export directory - * (option directory <export-path>) */ - int32_t export_path_length; /* length of 'export_path' string */ - - /* statistics */ - struct xlator_stats stats; /* Statistics, provides activity of the server */ - - struct timeval prev_fetch_time; - struct timeval init_time; - int32_t max_read; /* */ - int32_t max_write; /* */ - int64_t interval_read; /* Used to calculate the max_read value */ - int64_t interval_write; /* Used to calculate the max_write value */ - int64_t read_value; /* Total read, from init */ - int64_t write_value; /* Total write, from init */ - - /* bdb xlator specific private data */ - uint64_t envflags; /* flags used for opening DB_ENV for this xlator */ - uint64_t dbflags; /* flags to be used for opening each database */ - uint64_t cache; /* cache: can be either ON or OFF */ - uint32_t transaction; /* transaction: can be either ON or OFF */ - uint32_t active; - gf_lock_t active_lock; - struct bctx_table *b_table; - DBTYPE access_mode; /* access mode for accessing the databases, - * can be DB_HASH, DB_BTREE - * (option access-mode <mode>) */ - mode_t file_mode; /* mode for each and every file stored on bdb - * (option file-mode <mode>) */ - mode_t dir_mode; /* mode for each and every directory stored on bdb - * (option dir-mode <mode>) */ - mode_t symlink_mode; /* mode for each and every symlink stored on bdb */ - pthread_t checkpoint_thread; /* pthread_t object used for creating checkpoint - * thread */ - int32_t checkpoint_timeout; /* time duration between two consecutive checkpoint - * operations. - * (option checkpoint-timeout <time-in-seconds>) */ - ino_t next_ino; /* inode number allocation counter */ - gf_lock_t ino_lock; /* lock to protect 'next_ino' */ - char *logdir; /* environment log directory - * (option logdir <directory>) */ - char *errfile; /* errfile path, used by environment to - * print detailed error log. - * (option errfile <errfile-path>) */ - FILE *errfp; /* DB_ENV->set_errfile() expects us to fopen - * the errfile before doing DB_ENV->set_errfile() */ - uint32_t txn_timeout; /* used by DB_ENV->set_timeout to set the timeout for - * a transactionally encapsulated DB->operation() to - * timeout before waiting for locks to be released. - * (option transaction-timeout <time-in-milliseconds>) - */ - uint32_t lock_timeout; - uint32_t log_auto_remove; /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/ - uint32_t log_region_max; + /* pointer to inode table that we use */ + inode_table_t *itable; + int32_t temp; /**/ + char is_stateless; /**/ + + /* path to the export directory + * (option directory <export-path>) */ + char *export_path; + + /* length of 'export_path' string */ + int32_t export_path_length; + + /* statistics */ + /* Statistics, provides activity of the server */ + struct xlator_stats stats; + + struct timeval prev_fetch_time; + struct timeval init_time; + int32_t max_read; /* */ + int32_t max_write; /* */ + + /* Used to calculate the max_read value */ + int64_t interval_read; + + /* Used to calculate the max_write value */ + int64_t interval_write; + int64_t read_value; /* Total read, from init */ + int64_t write_value; /* Total write, from init */ + + /* bdb xlator specific private data */ + + /* flags used for opening DB_ENV for this xlator */ + uint64_t envflags; + + /* flags to be used for opening each database */ + uint64_t dbflags; + + /* cache: can be either ON or OFF */ + uint64_t cache; + + /* transaction: can be either ON or OFF */ + uint32_t transaction; + uint32_t active; + gf_lock_t active_lock; + struct bctx_table *b_table; + + /* access mode for accessing the databases, can be DB_HASH, DB_BTREE + * (option access-mode <mode>) */ + DBTYPE access_mode; + + /* mode for each and every file stored on bdb + * (option file-mode <mode>) */ + mode_t file_mode; + + /* mode for each and every directory stored on bdb + * (option dir-mode <mode>) */ + mode_t dir_mode; + + /* mode for each and every symlink stored on bdb */ + mode_t symlink_mode; + + /* pthread_t object used for creating checkpoint thread */ + pthread_t checkpoint_thread; + + /* time duration between two consecutive checkpoint operations. + * (option checkpoint-interval <time-in-seconds>) */ + uint32_t checkpoint_interval; + + /* inode number allocation counter */ + ino_t next_ino; + + /* lock to protect 'next_ino' */ + gf_lock_t ino_lock; + + /* environment log directory (option logdir <directory>) */ + char *logdir; + + /* errfile path, used by environment to print detailed error log. + * (option errfile <errfile-path>) */ + char *errfile; + + /* DB_ENV->set_errfile() expects us to fopen + * the errfile before doing DB_ENV->set_errfile() */ + FILE *errfp; + + /* used by DB_ENV->set_timeout to set the timeout for + * a transactionally encapsulated DB->operation() to + * timeout before waiting for locks to be released. + * (option transaction-timeout <time-in-milliseconds>) + */ + uint32_t txn_timeout; + uint32_t lock_timeout; + + /* DB_AUTO_LOG_REMOVE flag for DB_ENV*/ + uint32_t log_auto_remove; + uint32_t log_region_max; }; -static inline int32_t +static inline int32_t bdb_txn_begin (DB_ENV *dbenv, - DB_TXN **ptxnid) + DB_TXN **ptxnid) { - return dbenv->txn_begin (dbenv, NULL, ptxnid, 0); + return dbenv->txn_begin (dbenv, NULL, ptxnid, 0); } static inline int32_t bdb_txn_abort (DB_TXN *txnid) { - return txnid->abort (txnid); + return txnid->abort (txnid); } static inline int32_t bdb_txn_commit (DB_TXN *txnid) { - return txnid->commit (txnid, 0); + return txnid->commit (txnid, 0); } inline void * @@ -348,53 +441,53 @@ bdb_extract_bfd (fd_t *fd, xlator_t *this); void * -bdb_db_stat (bctx_t *bctx, - DB_TXN *txnid, - uint32_t flags); +bdb_db_stat (bctx_t *bctx, + DB_TXN *txnid, + uint32_t flags); int32_t bdb_db_get(struct bdb_ctx *bctx, - DB_TXN *txnid, - const char *key_string, - char **buf, - size_t size, - off_t offset); + DB_TXN *txnid, + const char *key_string, + char **buf, + size_t size, + off_t offset); #define BDB_TRUNCATE_RECORD 0xcafebabe int32_t bdb_db_put (struct bdb_ctx *bctx, - DB_TXN *txnid, - const char *key_string, - const char *buf, - size_t size, - off_t offset, - int32_t flags); + DB_TXN *txnid, + const char *key_string, + const char *buf, + size_t size, + off_t offset, + int32_t flags); int32_t bdb_db_del (struct bdb_ctx *bctx, - DB_TXN *txnid, - const char *path); + DB_TXN *txnid, + const char *path); ino_t bdb_inode_transform (ino_t parent, - struct bdb_ctx *bctx); + struct bdb_ctx *bctx); int32_t bdb_cursor_open (struct bdb_ctx *bctx, - DBC **cursorp); + DBC **cursorp); int32_t bdb_cursor_get (DBC *cursorp, - DBT *key, - DBT *value, - int32_t flags); + DBT *key, + DBT *value, + int32_t flags); int32_t bdb_cursor_close (struct bdb_ctx *ctx, - DBC *cursorp); + DBC *cursorp); int32_t @@ -405,17 +498,17 @@ dirent_size (struct dirent *entry); int bdb_db_init (xlator_t *this, - dict_t *options); + dict_t *options); void bdb_dbs_from_dict_close (dict_t *this, - char *key, - data_t *value, - void *data); + char *key, + data_t *value, + void *data); bctx_t * bctx_lookup (struct bctx_table *table, - const char *path); + const char *path); bctx_t * bctx_parent @@ -429,11 +522,11 @@ bctx_t * bctx_ref (bctx_t *ctx); bctx_t * -bctx_rename (bctx_t *bctx, - const char *db_newpath); +bctx_rename (bctx_t *bctx, + const char *db_newpath); int32_t -bdb_db_rename (bctx_table_t *table, - const char *tmp_db_newpath, - const char *real_db_newpath); +bdb_db_rename (bctx_table_t *table, + const char *tmp_db_newpath, + const char *real_db_newpath); #endif /* _BDB_H */ |