summaryrefslogtreecommitdiffstats
path: root/xlators/storage/bdb/src/bdb-ll.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/bdb/src/bdb-ll.c')
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1295
1 files changed, 644 insertions, 651 deletions
diff --git a/xlators/storage/bdb/src/bdb-ll.c b/xlators/storage/bdb/src/bdb-ll.c
index cd2d1ac4916..59d431d8257 100644
--- a/xlators/storage/bdb/src/bdb-ll.c
+++ b/xlators/storage/bdb/src/bdb-ll.c
@@ -20,6 +20,7 @@
#include <libgen.h>
#include "bdb.h"
#include <list.h>
+#include "hashfn.h"
/*
* implement the procedures to interact with bdb */
@@ -31,22 +32,41 @@
ino_t
bdb_inode_transform (ino_t parent,
- bctx_t *bctx)
+ const char *name,
+ size_t namelen)
{
- struct bdb_private *private = NULL;
ino_t ino = -1;
+ uint64_t hash = 0;
- GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
+ hash = gf_dm_hashfn (name, namelen);
- private = bctx->table->this->private;
+ ino = (((parent << 32) | 0x00000000ffffffff)
+ & (hash | 0xffffffff00000000));
- LOCK (&private->ino_lock);
- ino = ++private->next_ino;
- UNLOCK (&private->ino_lock);
-out:
return ino;
}
+static int
+bdb_generate_secondary_hash (DB *secondary,
+ const DBT *pkey,
+ const DBT *data,
+ DBT *skey)
+{
+ char *primary = NULL;
+ uint32_t *hash = NULL;
+
+ primary = pkey->data;
+
+ hash = calloc (1, sizeof (uint32_t));
+
+ *hash = gf_dm_hashfn (primary, pkey->size);
+
+ skey->data = hash;
+ skey->size = sizeof (hash);
+ skey->flags = DB_DBT_APPMALLOC;
+
+ return 0;
+}
/***********************************************************
*
@@ -63,13 +83,13 @@ out:
* if (no-empty-slots), then prune open dbs and close as many as possible
* if (empty-slot-available), tika muchkonDu db open maaDu
*
- * NOTE: illi baro munche lock hiDkobEku
*/
-static DB *
+static int
bdb_db_open (bctx_t *bctx)
{
- DB *storage_dbp = NULL;
- int32_t op_ret = -1;
+ DB *primary = NULL;
+ DB *secondary = NULL;
+ int32_t ret = -1;
bctx_table_t *table = NULL;
GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out);
@@ -78,51 +98,94 @@ bdb_db_open (bctx_t *bctx)
GF_VALIDATE_OR_GOTO ("bdb-ll", table, out);
/* we have to do the following, we can't deny someone of db_open ;) */
- op_ret = db_create (&storage_dbp, table->dbenv, 0);
- if (op_ret != 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to do db_create for directory %s (%s)",
- bctx->directory, db_strerror (op_ret));
- storage_dbp = NULL;
+ ret = db_create (&primary, table->dbenv, 0);
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_OPEN %s: %s (failed to create database object"
+ " for primary database)",
+ bctx->directory, db_strerror (ret));
+ ret = -ENOMEM;
goto out;
}
if (table->page_size) {
- op_ret = storage_dbp->set_pagesize (storage_dbp,
- table->page_size);
- if (op_ret != 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to set the page_size (%"PRIu64") for "
- "directory %s (%s)",
- table->page_size, bctx->directory,
- db_strerror (op_ret));
- } else {
+ ret = primary->set_pagesize (primary,
+ table->page_size);
+ if (ret < 0) {
gf_log ("bdb-ll", GF_LOG_DEBUG,
- "page-size (%"PRIu64") set on DB",
+ "_BDB_DB_OPEN %s: %s (failed to set page-size "
+ "to %"PRIu64")",
+ bctx->directory, db_strerror (ret),
table->page_size);
+ } else {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_OPEN %s: page-size set to %"PRIu64,
+ bctx->directory, table->page_size);
}
}
- op_ret = storage_dbp->open (storage_dbp,
- NULL,
- bctx->db_path,
- NULL,
- table->access_mode,
- table->dbflags,
- 0);
- if (op_ret != 0 ) {
- gf_log ("bdb-ll",
- GF_LOG_ERROR,
- "failed to open storage-db for directory %s (%s)",
- bctx->db_path, db_strerror (op_ret));
- storage_dbp = NULL;
+ ret = primary->open (primary, NULL, bctx->db_path, "primary",
+ table->access_mode, table->dbflags, 0);
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "_BDB_DB_OPEN %s: %s "
+ "(failed to open primary database)",
+ bctx->directory, db_strerror (ret));
+ ret = -1;
+ goto cleanup;
+ }
+
+ ret = db_create (&secondary, table->dbenv, 0);
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_OPEN %s: %s (failed to create database object"
+ " for secondary database)",
+ bctx->directory, db_strerror (ret));
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = secondary->open (secondary, NULL, bctx->db_path, "secondary",
+ table->access_mode, table->dbflags, 0);
+ if (ret != 0 ) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "_BDB_DB_OPEN %s: %s "
+ "(failed to open secondary database)",
+ bctx->directory, db_strerror (ret));
+ ret = -1;
+ goto cleanup;
+ }
+
+ ret = primary->associate (primary, NULL, secondary,
+ bdb_generate_secondary_hash,
+#ifdef DB_IMMUTABLE_KEY
+ DB_IMMUTABLE_KEY);
+#else
+ 0);
+#endif
+ if (ret != 0 ) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "_BDB_DB_OPEN %s: %s "
+ "(failed to associate primary database with "
+ "secondary database)",
+ bctx->directory, db_strerror (ret));
+ ret = -1;
+ goto cleanup;
}
out:
- return storage_dbp;
-}
+ bctx->primary = primary;
+ bctx->secondary = secondary;
+ return ret;
+cleanup:
+ if (primary)
+ primary->close (primary, 0);
+ if (secondary)
+ secondary->close (secondary, 0);
+ return ret;
+}
int32_t
bdb_cursor_close (bctx_t *bctx,
@@ -140,10 +203,10 @@ bdb_cursor_close (bctx_t *bctx,
#else
ret = cursorp->c_close (cursorp);
#endif
- if ((ret != 0)) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to close db cursor for directory "
- "%s (%s)",
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CURSOR_CLOSE %s: %s "
+ "(failed to close database cursor)",
bctx->directory, db_strerror (ret));
}
}
@@ -165,27 +228,30 @@ bdb_cursor_open (bctx_t *bctx,
LOCK (&bctx->lock);
{
- if (bctx->dbp) {
+ if (bctx->secondary) {
/* do nothing, just continue */
ret = 0;
} else {
- bctx->dbp = bdb_db_open (bctx);
- if (!bctx->dbp) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to open storage db for %s",
+ ret = bdb_db_open (bctx);
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CURSOR_OPEN %s: ENOMEM "
+ "(failed to open secondary database)",
bctx->directory);
- ret = -1;
+ ret = -ENOMEM;
} else {
ret = 0;
}
}
if (ret == 0) {
- /* all set, lets open cursor */
- ret = bctx->dbp->cursor (bctx->dbp, NULL, cursorpp, 0);
- if (ret != 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to create a cursor for %s (%s)",
+ /* all set, open cursor */
+ ret = bctx->secondary->cursor (bctx->secondary,
+ NULL, cursorpp, 0);
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CURSOR_OPEN %s: %s "
+ "(failed to open a cursor to database)",
bctx->directory, db_strerror (ret));
}
}
@@ -245,27 +311,37 @@ bdb_cache_insert (bctx_t *bctx,
/* FIXME: ugly, not supposed to disect any of the
* 'struct list_head' directly */
if (!list_empty (&bctx->c_list)) {
- bcache = list_entry (bctx->c_list.prev, bdb_cache_t, c_list);
+ bcache = list_entry (bctx->c_list.prev,
+ bdb_cache_t, c_list);
list_del_init (&bcache->c_list);
}
if (bcache->key) {
free (bcache->key);
- bcache->key = strdup ((char *)key->data);
- GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock);
+ bcache->key = calloc (key->size + 1,
+ sizeof (char));
+ GF_VALIDATE_OR_GOTO ("bdb-ll",
+ bcache->key, unlock);
+ memcpy (bcache->key, (char *)key->data,
+ key->size);
} else {
/* should never come here */
- gf_log ("bdb-ll", GF_LOG_CRITICAL,
- "bcache->key (null)");
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CACHE_INSERT %s (%s) "
+ "(found a cache entry with empty key)",
+ bctx->directory, (char *)key->data);
} /* if(bcache->key)...else */
if (bcache->data) {
free (bcache->data);
bcache->data = memdup (data->data, data->size);
- GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock);
+ GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data,
+ unlock);
bcache->size = data->size;
} else {
/* should never come here */
gf_log ("bdb-ll", GF_LOG_CRITICAL,
- "bcache->data (null)");
+ "_BDB_CACHE_INSERT %s (%s) "
+ "(found a cache entry with no data)",
+ bctx->directory, (char *)key->data);
} /* if(bcache->data)...else */
list_add (&bcache->c_list, &bctx->c_list);
ret = 0;
@@ -273,10 +349,14 @@ bdb_cache_insert (bctx_t *bctx,
/* we will be entering here very rarely */
bcache = CALLOC (1, sizeof (*bcache));
GF_VALIDATE_OR_GOTO ("bdb-ll", bcache, unlock);
- bcache->key = strdup ((char *)(key->data));
+
+ bcache->key = calloc (key->size + 1, sizeof (char));
GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock);
+ memcpy (bcache->key, key->data, key->size);
+
bcache->data = memdup (data->data, data->size);
GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock);
+
bcache->size = data->size;
list_add (&bcache->c_list, &bctx->c_list);
bctx->c_count++;
@@ -291,7 +371,7 @@ out:
static int32_t
bdb_cache_delete (bctx_t *bctx,
- char *key)
+ const char *key)
{
bdb_cache_t *bcache = NULL;
bdb_cache_t *trav = NULL;
@@ -333,12 +413,12 @@ bdb_db_stat (bctx_t *bctx,
LOCK (&bctx->lock);
{
- if (bctx->dbp == NULL) {
- bctx->dbp = bdb_db_open (bctx);
- storage = bctx->dbp;
+ if (bctx->primary == NULL) {
+ ret = bdb_db_open (bctx);
+ storage = bctx->primary;
} else {
/* we are just fine, lets continue */
- storage = bctx->dbp;
+ storage = bctx->primary;
} /* if(bctx->dbp==NULL)...else */
}
UNLOCK (&bctx->lock);
@@ -347,46 +427,48 @@ bdb_db_stat (bctx_t *bctx,
ret = storage->stat (storage, txnid, &stat, flags);
- if (ret != 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to do DB->stat() on db file %s: %s",
- bctx->db_path, db_strerror (ret));
- } else {
+ if (ret < 0) {
gf_log ("bdb-ll", GF_LOG_DEBUG,
- "successfully called DB->stat() on db file %s",
- bctx->db_path);
+ "_BDB_DB_STAT %s: %s "
+ "(failed to do stat database)",
+ bctx->directory, db_strerror (ret));
}
out:
return stat;
}
-/* bdb_storage_get - retrieve a key/value pair corresponding to @path from the corresponding
- * db file.
+/* bdb_storage_get - retrieve a key/value pair corresponding to @path from the
+ * corresponding db file.
*
- * @bctx: bctx_t * corresponding to the parent directory of @path. (should always be a valid
- * bctx). bdb_storage_get should never be called if @bctx = NULL.
- * @txnid: NULL if bdb_storage_get is not embedded in an explicit transaction or a valid
- * DB_TXN *, when embedded in an explicit transaction.
- * @path: path of the file to read from (translated to a database key using MAKE_KEY_FROM_PATH)
- * @buf: char ** - pointer to a pointer to char. a read buffer is created in this procedure
- * and pointer to the buffer is passed through @buf to the caller.
+ * @bctx: bctx_t * corresponding to the parent directory of @path. (should
+ * always be a valid bctx). bdb_storage_get should never be called if
+ * @bctx = NULL.
+ * @txnid: NULL if bdb_storage_get is not embedded in an explicit transaction
+ * or a valid DB_TXN *, when embedded in an explicit transaction.
+ * @path: path of the file to read from (translated to a database key using
+ * MAKE_KEY_FROM_PATH)
+ * @buf: char ** - pointer to a pointer to char. a read buffer is created in
+ * this procedure and pointer to the buffer is passed through @buf to the
+ * caller.
* @size: size of the file content to be read.
* @offset: offset from which the file content to be read.
*
- * NOTE: bdb_storage_get tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL,
- * nobody has opened DB till now or DB was closed by bdb_table_prune()).
+ * NOTE: bdb_storage_get tries to open DB, if @bctx->dbp == NULL
+ * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by
+ * bdb_table_prune()).
*
- * NOTE: if private->cache is set (bdb xlator's internal caching enabled), then bdb_storage_get
- * first looks up the cache for key/value pair. if bdb_lookup_cache fails, then only
- * DB->get() is called. also, inserts a newly read key/value pair to cache through
- * bdb_insert_to_cache.
+ * NOTE: if private->cache is set (bdb xlator's internal caching enabled), then
+ * bdb_storage_get first looks up the cache for key/value pair. if
+ * bdb_lookup_cache fails, then only DB->get() is called. also, inserts a
+ * newly read key/value pair to cache through bdb_insert_to_cache.
*
* return: 'number of bytes read' on success or -1 on error.
*
- * also see: bdb_lookup_cache, bdb_insert_to_cache for details about bdb xlator's internal cache.
+ * also see: bdb_lookup_cache, bdb_insert_to_cache for details about bdb
+ * xlator's internal cache.
*/
-int32_t
+static int32_t
bdb_db_get (bctx_t *bctx,
DB_TXN *txnid,
const char *path,
@@ -420,12 +502,12 @@ bdb_db_get (bctx_t *bctx,
} else {
LOCK (&bctx->lock);
{
- if (bctx->dbp == NULL) {
- bctx->dbp = bdb_db_open (bctx);
- storage = bctx->dbp;
+ if (bctx->primary == NULL) {
+ ret = bdb_db_open (bctx);
+ storage = bctx->primary;
} else {
/* we are just fine, lets continue */
- storage = bctx->dbp;
+ storage = bctx->primary;
} /* if(bctx->dbp==NULL)...else */
}
UNLOCK (&bctx->lock);
@@ -457,22 +539,25 @@ bdb_db_get (bctx_t *bctx,
if (ret == DB_NOTFOUND) {
gf_log ("bdb-ll", GF_LOG_DEBUG,
- "failed to do DB->get() for key: %s."
- " key not found in storage DB",
- key_string);
+ "_BDB_DB_GET %s - %s: ENOENT"
+ "(specified key not found in database)",
+ bctx->directory, key_string);
ret = -1;
need_break = 1;
} else if (ret == DB_LOCK_DEADLOCK) {
retries++;
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "deadlock detected in DB->put. retrying"
- " DB->put (%d)", retries);
- }else if (ret == 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_GET %s - %s"
+ "(deadlock detected, retrying for %d "
+ "time)",
+ bctx->directory, key_string, retries);
+ } else if (ret == 0) {
/* successfully read data, lets set everything
* in place and return */
if (buf) {
*buf = CALLOC (1, value.size);
- ERR_ABORT (*buf);
+ GF_VALIDATE_OR_GOTO ("bdb-ll",
+ *buf, out);
memcpy (*buf, value.data, value.size);
}
ret = value.size;
@@ -481,10 +566,12 @@ bdb_db_get (bctx_t *bctx,
free (value.data);
need_break = 1;
} else {
- gf_log ("bdb-ll",
- GF_LOG_ERROR,
- "failed to do DB->get() for key %s: %s",
- key_string, db_strerror (ret));
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_GET %s - %s: %s"
+ "(failed to retrieve specified key from"
+ " database)",
+ bctx->directory, key_string,
+ db_strerror (ret));
ret = -1;
need_break = 1;
}
@@ -494,6 +581,19 @@ out:
return ret;
}/* bdb_db_get */
+/* TODO: handle errors here and log. propogate only the errno to caller */
+int32_t
+bdb_db_fread (struct bdb_fd *bfd, char **buf, size_t size, off_t offset)
+{
+ return bdb_db_get (bfd->ctx, NULL, bfd->key, buf, size, offset);
+}
+
+int32_t
+bdb_db_iread (struct bdb_ctx *bctx, const char *key, char **buf)
+{
+ return bdb_db_get (bctx, NULL, key, buf, 0, 0);
+}
+
/* bdb_storage_put - insert a key/value specified to the corresponding DB.
*
* @bctx: bctx_t * corresponding to the parent directory of @path.
@@ -519,7 +619,7 @@ out:
* also see: bdb_cache_delete for details on how a cached key/value pair is
* removed.
*/
-int32_t
+static int32_t
bdb_db_put (bctx_t *bctx,
DB_TXN *txnid,
const char *key_string,
@@ -537,12 +637,12 @@ bdb_db_put (bctx_t *bctx,
LOCK (&bctx->lock);
{
- if (bctx->dbp == NULL) {
- bctx->dbp = bdb_db_open (bctx);
- storage = bctx->dbp;
+ if (bctx->primary == NULL) {
+ ret = bdb_db_open (bctx);
+ storage = bctx->primary;
} else {
/* we are just fine, lets continue */
- storage = bctx->dbp;
+ storage = bctx->primary;
}
}
UNLOCK (&bctx->lock);
@@ -582,15 +682,16 @@ bdb_db_put (bctx_t *bctx,
ret = storage->put (storage, txnid, &key, &value, db_flags);
if (ret == DB_LOCK_DEADLOCK) {
retries++;
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "deadlock detected in DB->put. "
- "retrying DB->put (%d)",
- retries);
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_PUT %s - %s"
+ "(deadlock detected, retying for %d time)",
+ bctx->directory, key_string, retries);
} else if (ret) {
/* write failed */
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to do DB->put() for key %s: %s",
- key_string, db_strerror (ret));
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_PUT %s - %s: %s"
+ "(failed to put specified entry into database)",
+ bctx->directory, key_string, db_strerror (ret));
need_break = 1;
} else {
/* successfully wrote */
@@ -602,44 +703,68 @@ out:
return ret;
}/* bdb_db_put */
+int32_t
+bdb_db_icreate (struct bdb_ctx *bctx, const char *key)
+{
+ return bdb_db_put (bctx, NULL, key, NULL, 0, 0, 0);
+}
+
+/* TODO: handle errors here and log. propogate only the errno to caller */
+int32_t
+bdb_db_fwrite (struct bdb_fd *bfd, char *buf, size_t size, off_t offset)
+{
+ return bdb_db_put (bfd->ctx, NULL, bfd->key, buf, size, offset, 0);
+}
+
+/* TODO: handle errors here and log. propogate only the errno to caller */
+int32_t
+bdb_db_iwrite (struct bdb_ctx *bctx, const char *key, char *buf, size_t size)
+{
+ return bdb_db_put (bctx, NULL, key, buf, size, 0, 0);
+}
+
+int32_t
+bdb_db_itruncate (struct bdb_ctx *bctx, const char *key)
+{
+ return bdb_db_put (bctx, NULL, key, NULL, 0, 1, 0);
+}
-/* bdb_storage_del - delete a key/value pair corresponding to @path from corresponding db file.
+/* bdb_storage_del - delete a key/value pair corresponding to @path from
+ * corresponding db file.
*
* @bctx: bctx_t * corresponding to the parent directory of @path.
* (should always be a valid bctx). bdb_storage_del should never be called
* if @bctx = NULL.
- * @txnid: NULL if bdb_storage_del is not embedded in an explicit transaction or a
- * valid DB_TXN *, when embedded in an explicit transaction.
+ * @txnid: NULL if bdb_storage_del is not embedded in an explicit transaction
+ * or a valid DB_TXN *, when embedded in an explicit transaction.
* @path: path to the file, whose key/value pair has to be deleted.
*
- * NOTE: bdb_storage_del tries to open DB, if @bctx->dbp == NULL (@bctx->dbp == NULL,
- * nobody has opened DB till now or DB was closed by bdb_table_prune()).
+ * NOTE: bdb_storage_del tries to open DB, if @bctx->dbp == NULL
+ * (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by
+ * bdb_table_prune()).
*
* return: 0 on success or -1 on error.
*/
-int32_t
+static int32_t
bdb_db_del (bctx_t *bctx,
DB_TXN *txnid,
- const char *path)
+ const char *key_string)
{
DB *storage = NULL;
DBT key = {0,};
- char *key_string = NULL;
int32_t ret = -1;
int32_t db_flags = 0;
uint8_t need_break = 0;
int32_t retries = 1;
- MAKE_KEY_FROM_PATH (key_string, path);
-
LOCK (&bctx->lock);
{
- if (bctx->dbp == NULL) {
- bctx->dbp = bdb_db_open (bctx);
- storage = bctx->dbp;
+ if (bctx->primary == NULL) {
+ ret = bdb_db_open (bctx);
+ storage = bctx->primary;
} else {
/* we are just fine, lets continue */
- storage = bctx->dbp;
+ storage = bctx->primary;
}
}
UNLOCK (&bctx->lock);
@@ -649,7 +774,7 @@ bdb_db_del (bctx_t *bctx,
ret = bdb_cache_delete (bctx, key_string);
GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out);
- key.data = key_string;
+ key.data = (char *)key_string;
key.size = strlen (key_string);
key.flags = DB_DBT_USERMEM;
@@ -658,26 +783,30 @@ bdb_db_del (bctx_t *bctx,
if (ret == DB_NOTFOUND) {
gf_log ("bdb-ll", GF_LOG_DEBUG,
- "failed to delete %s from storage db, "
- "doesn't exist in storage DB",
- path);
+ "_BDB_DB_DEL %s - %s: ENOENT"
+ "(failed to delete entry, could not be "
+ "found in the database)",
+ bctx->directory, key_string);
need_break = 1;
} else if (ret == DB_LOCK_DEADLOCK) {
retries++;
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "deadlock detected in DB->put. "
- "retrying DB->put (%d)",
- retries);
- }else if (ret == 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_DEL %s - %s"
+ "(deadlock detected, retying for %d time)",
+ bctx->directory, key_string, retries);
+ } else if (ret == 0) {
/* successfully deleted the entry */
gf_log ("bdb-ll", GF_LOG_DEBUG,
- "deleted %s from storage db", path);
+ "_BDB_DB_DEL %s - %s"
+ "(successfully deleted entry from database)",
+ bctx->directory, key_string);
ret = 0;
need_break = 1;
} else {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to delete %s from storage db: %s",
- path, db_strerror (ret));
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_DB_DEL %s - %s: %s"
+ "(failed to delete entry from database)",
+ bctx->directory, key_string, db_strerror (ret));
ret = -1;
need_break = 1;
}
@@ -686,11 +815,18 @@ out:
return ret;
}
+int32_t
+bdb_db_iremove (bctx_t *bctx,
+ const char *key)
+{
+ return bdb_db_del (bctx, NULL, key);
+}
+
/* NOTE: bdb version compatibility wrapper */
int32_t
bdb_cursor_get (DBC *cursorp,
- DBT *key,
- DBT *value,
+ DBT *sec, DBT *pri,
+ DBT *val,
int32_t flags)
{
int32_t ret = -1;
@@ -698,21 +834,21 @@ bdb_cursor_get (DBC *cursorp,
GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out);
#ifdef HAVE_BDB_CURSOR_GET
- ret = cursorp->get (cursorp, key, value, flags);
+ ret = cursorp->pget (cursorp, sec, pri, val, flags);
#else
- ret = cursorp->c_get (cursorp, key, value, flags);
+ ret = cursorp->c_pget (cursorp, sec, pri, val, flags);
#endif
if ((ret != 0) && (ret != DB_NOTFOUND)) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to CURSOR->get() for key %s (%s)",
- (char *)key->data, db_strerror (ret));
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CURSOR_GET: %s"
+ "(failed to retrieve entry from database cursor)",
+ db_strerror (ret));
}
out:
return ret;
}/* bdb_cursor_get */
-
int32_t
bdb_dirent_size (DBT *key)
{
@@ -720,29 +856,6 @@ bdb_dirent_size (DBT *key)
}
-/* bdb_extract_bfd - translate a fd_t to a bfd (either a 'struct bdb_bfd' or 'struct bdb_dir')
- *
- * @fd->ctx is with bdb specific file handle during a successful bdb_open (also bdb_create)
- * or bdb_opendir.
- *
- * return: 'struct bdb_bfd *' or 'struct bdb_dir *' on success, or NULL on failure.
- */
-inline void *
-bdb_extract_bfd (fd_t *fd,
- xlator_t *this)
-{
- uint64_t tmp_bfd = 0;
- void *bfd = NULL;
-
- GF_VALIDATE_OR_GOTO ("bdb-ll", fd, out);
- GF_VALIDATE_OR_GOTO ("bdb-ll", this, out);
-
- fd_ctx_get (fd, this, &tmp_bfd);
- bfd = (void *)(long)bfd;
-
-out:
- return bfd;
-}
/* bdb_dbenv_init - initialize DB_ENV
*
@@ -751,10 +864,10 @@ out:
* NOTE: see private->envflags for flags used.
* 2. DB_ENV->set_lg_dir - set log directory to be used for storing log files
* (log files are the files in which transaction logs are written by db).
- * 3. DB_ENV->set_flags (DB_LOG_AUTOREMOVE) - set DB_ENV to automatically clear
- * the unwanted log files (flushed at each checkpoint).
- * 4. DB_ENV->set_errfile - set errfile to be used by db to report detailed error logs.
- * used only for debbuging purpose.
+ * 3. DB_ENV->set_flags (DB_LOG_AUTOREMOVE) - set DB_ENV to automatically
+ * clear the unwanted log files (flushed at each checkpoint).
+ * 4. DB_ENV->set_errfile - set errfile to be used by db to report detailed
+ * error logs. used only for debbuging purpose.
*
* return: returns a valid DB_ENV * on success or NULL on error.
*
@@ -769,55 +882,49 @@ bdb_dbenv_init (xlator_t *this,
bdb_private_t *private = NULL;
int32_t fatal_flags = 0;
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (directory, out);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (directory, err);
private = this->private;
- VALIDATE_OR_GOTO (private, out);
+ VALIDATE_OR_GOTO (private, err);
ret = db_env_create (&dbenv, 0);
- VALIDATE_OR_GOTO ((ret == 0), out);
+ VALIDATE_OR_GOTO ((ret == 0), err);
/* NOTE: set_errpfx returns 'void' */
dbenv->set_errpfx(dbenv, this->name);
ret = dbenv->set_lk_detect (dbenv, DB_LOCK_DEFAULT);
- VALIDATE_OR_GOTO ((ret == 0), out);
+ VALIDATE_OR_GOTO ((ret == 0), err);
ret = dbenv->open(dbenv, directory,
private->envflags,
S_IRUSR | S_IWUSR);
if ((ret != 0) && (ret != DB_RUNRECOVERY)) {
gf_log (this->name, GF_LOG_CRITICAL,
- "failed to open DB environment (%s)",
- db_strerror (ret));
+ "failed to join Berkeley DB environment at %s: %s."
+ "please run manual recovery and retry running "
+ "glusterfs",
+ directory, db_strerror (ret));
dbenv = NULL;
- goto out;
+ goto err;
} else if (ret == DB_RUNRECOVERY) {
fatal_flags = ((private->envflags & (~DB_RECOVER))
| DB_RECOVER_FATAL);
ret = dbenv->open(dbenv, directory, fatal_flags,
S_IRUSR | S_IWUSR);
if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open DB environment (%s) with "
- "DB_REOVER_FATAL",
- db_strerror (ret));
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "failed to join Berkeley DB environment in "
+ "recovery mode at %s: %s. please run manual "
+ "recovery and retry running glusterfs",
+ directory, db_strerror (ret));
dbenv = NULL;
- goto out;
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "opened DB environment after DB_RECOVER_FATAL:"
- " %s", db_strerror (ret));
+ goto err;
}
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "DB environment successfull opened: %s",
- db_strerror (ret));
}
-
-
+ ret = 0;
#if (DB_VERSION_MAJOR == 4 && \
DB_VERSION_MINOR == 7)
if (private->log_auto_remove) {
@@ -832,41 +939,42 @@ bdb_dbenv_init (xlator_t *this,
ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 0);
}
#endif
- if (ret != 0) {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to set DB_LOG_AUTOREMOVE on dbenv: %s",
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "autoremoval of transactional log files could not be "
+ "configured (%s). you may have to do a manual "
+ "monitoring of transactional log files and remove "
+ "periodically.",
db_strerror (ret));
- } else {
- gf_log ("bctx", GF_LOG_DEBUG,
- "DB_LOG_AUTOREMOVE set on dbenv");
+ goto err;
}
if (private->transaction) {
ret = dbenv->set_flags(dbenv, DB_AUTO_COMMIT, 1);
if (ret != 0) {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to set DB_AUTO_COMMIT on dbenv: %s",
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "configuration of auto-commit failed for "
+ "database environment at %s. none of the "
+ "operations will be embedded in transaction "
+ "unless explicitly done so.",
db_strerror (ret));
- } else {
- gf_log ("bctx", GF_LOG_DEBUG,
- "DB_AUTO_COMMIT set on dbenv");
+ goto err;
}
if (private->txn_timeout) {
- ret = dbenv->set_timeout (dbenv,
- private->txn_timeout,
+ ret = dbenv->set_timeout (dbenv, private->txn_timeout,
DB_SET_TXN_TIMEOUT);
if (ret != 0) {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to set TXN_TIMEOUT to %d "
- "milliseconds on dbenv: %s",
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "could not configure Berkeley DB "
+ "transaction timeout to %d (%s). please"
+ " review 'option transaction-timeout %d"
+ "' option.",
private->txn_timeout,
- db_strerror (ret));
- } else {
- gf_log ("bctx", GF_LOG_DEBUG,
- "TXN_TIMEOUT set to %d milliseconds",
+ db_strerror (ret),
private->txn_timeout);
+ goto err;
}
}
@@ -874,32 +982,28 @@ bdb_dbenv_init (xlator_t *this,
ret = dbenv->set_timeout(dbenv,
private->txn_timeout,
DB_SET_LOCK_TIMEOUT);
-
- if (ret != 0) {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to set LOCK_TIMEOUT to %d "
- "milliseconds on dbenv: %s",
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "could not configure Berkeley DB "
+ "lock timeout to %d (%s). please"
+ " review 'option lock-timeout %d"
+ "' option.",
private->lock_timeout,
- db_strerror (ret));
- } else {
- gf_log ("bctx", GF_LOG_DEBUG,
- "LOCK_TIMEOUT set to %d milliseconds",
+ db_strerror (ret),
private->lock_timeout);
+ goto err;
}
}
ret = dbenv->set_lg_dir (dbenv, private->logdir);
-
- if (ret != 0) {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to set log directory for dbenv: %s",
- db_strerror (ret));
- } else {
- gf_log ("bctx", GF_LOG_DEBUG,
- "set dbenv log dir to %s",
- private->logdir);
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "failed to configure libdb transaction log "
+ "directory at %s. please review the "
+ "'option logdir %s' option.",
+ db_strerror (ret), private->logdir);
+ goto err;
}
-
}
if (private->errfile) {
@@ -907,41 +1011,52 @@ bdb_dbenv_init (xlator_t *this,
if (private->errfp) {
dbenv->set_errfile (dbenv, private->errfp);
} else {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to open errfile: %s",
- strerror (errno));
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "failed to open error logging file for "
+ "libdb (Berkeley DB) internal logging (%s)."
+ "please review the 'option errfile %s' option.",
+ strerror (errno), private->errfile);
+ goto err;
}
}
-out:
return dbenv;
+err:
+ if (dbenv) {
+ dbenv->close (dbenv, 0);
+ }
+
+ return NULL;
}
#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv)
-/* bdb_checkpoint - during transactional usage, db does not directly write the data to db
- * files, instead db writes a 'log' (similar to a journal entry) into a
- * log file. db normally clears the log files during opening of an
- * environment. since we expect a filesystem server to run for a pretty
- * long duration and flushing 'log's during dbenv->open would prove very
- * costly, if we accumulate the log entries for one complete run of
- * glusterfs server. to flush the logs frequently, db provides a mechanism
- * called 'checkpointing'. when we do a checkpoint, db flushes the logs to
- * disk (writes changes to db files) and we can also clear the accumulated
- * log files after checkpointing. NOTE: removing unwanted log files is not
- * part of dbenv->txn_checkpoint() call.
+/* bdb_checkpoint - during transactional usage, db does not directly write the
+ * data to db files, instead db writes a 'log' (similar to a journal entry)
+ * into a log file. db normally clears the log files during opening of an
+ * environment. since we expect a filesystem server to run for a pretty long
+ * duration and flushing 'log's during dbenv->open would prove very costly, if
+ * we accumulate the log entries for one complete run of glusterfs server. to
+ * flush the logs frequently, db provides a mechanism called 'checkpointing'.
+ * when we do a checkpoint, db flushes the logs to disk (writes changes to db
+ * files) and we can also clear the accumulated log files after checkpointing.
+ * NOTE: removing unwanted log files is not part of dbenv->txn_checkpoint()
+ * call.
*
* @data: xlator_t of the current instance of bdb xlator.
*
- * bdb_checkpoint is called in a different thread from the main glusterfs thread. bdb
- * xlator creates the checkpoint thread after successfully opening the db environment.
- * NOTE: bdb_checkpoint thread shares the DB_ENV handle with the filesystem thread.
+ * bdb_checkpoint is called in a different thread from the main glusterfs
+ * thread. bdb xlator creates the checkpoint thread after successfully opening
+ * the db environment.
+ * NOTE: bdb_checkpoint thread shares the DB_ENV handle with the filesystem
+ * thread.
*
* db environment checkpointing frequency is controlled by
* 'option checkpoint-timeout <time-in-seconds>' in volfile.
*
- * NOTE: checkpointing thread is started only if 'option transaction on' specified in
- * volfile. checkpointing is not valid for non-transactional environments.
+ * NOTE: checkpointing thread is started only if 'option transaction on'
+ * specified in volfile. checkpointing is not valid for non-transactional
+ * environments.
*
*/
static void *
@@ -965,23 +1080,29 @@ bdb_checkpoint (void *data)
if (active) {
ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0);
if (ret) {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to checkpoint environment: %s",
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CHECKPOINT: %s"
+ "(failed to checkpoint environment)",
db_strerror (ret));
} else {
- gf_log ("bctx", GF_LOG_DEBUG,
- "checkpointing successful");
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CHECKPOINT: successfully "
+ "checkpointed");
}
} else {
ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0);
if (ret) {
- gf_log ("bctx", GF_LOG_ERROR,
- "failed to do final checkpoint "
- "environment: %s",
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "_BDB_CHECKPOINT: %s"
+ "(final checkpointing failed. might "
+ "need to run recovery tool manually on "
+ "next usage of this database "
+ "environment)",
db_strerror (ret));
} else {
- gf_log ("bctx", GF_LOG_DEBUG,
- "final checkpointing successful");
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "_BDB_CHECKPOINT: final successfully "
+ "checkpointed");
}
break;
}
@@ -990,449 +1111,321 @@ bdb_checkpoint (void *data)
return NULL;
}
-static inline void
-bdb_cache_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- /* cache is always on */
- private->cache = ON;
-}
-
-static inline void
-bdb_log_remove_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- private->log_auto_remove = 1;
- gf_log (this->name, GF_LOG_DEBUG,
- "DB_ENV will use DB_LOG_AUTO_REMOVE");
-}
-static inline void
-bdb_errfile_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *errfile = NULL;
-
- ret = dict_get_str (options, "errfile", &errfile);
- if (ret == 0) {
- private->errfile = strdup (errfile);
- gf_log (this->name, GF_LOG_DEBUG,
- "using errfile: %s", private->errfile);
- }
-}
-
-static inline void
-bdb_table_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
+/* bdb_db_init - initialize bdb xlator
+ *
+ * reads the options from @options dictionary and sets appropriate values in
+ * @this->private. also initializes DB_ENV.
+ *
+ * return: 0 on success or -1 on error
+ * (with logging the error through gf_log()).
+ */
+int
+bdb_db_init (xlator_t *this,
+ dict_t *options)
{
- bctx_table_t *table = NULL;
- int32_t idx = 0;
-
- int ret = -1;
- char *lru_limit_str = NULL;
- char *page_size_str = NULL;
-
- table = CALLOC (1, sizeof (*table));
- if (table) {
- INIT_LIST_HEAD(&(table->b_lru));
- INIT_LIST_HEAD(&(table->active));
- INIT_LIST_HEAD(&(table->purge));
-
- LOCK_INIT (&table->lock);
- LOCK_INIT (&table->checkpoint_lock);
-
- table->transaction = private->transaction;
- table->access_mode = private->access_mode;
- table->dbflags = private->dbflags;
- table->this = this;
-
- {
- ret = dict_get_str (options, "lru-limit",
- &lru_limit_str);
-
- /* TODO: set max lockers and max txns to accomodate
- * for more than lru_limit */
- if (ret == 0) {
- ret = gf_string2uint32 (lru_limit_str,
- &table->lru_limit);
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "setting bctx lru limit to %d",
- table->lru_limit);
- } else {
- table->lru_limit = BDB_DEFAULT_LRU_LIMIT;
- }
- }
-
- {
- ret = dict_get_str (options, "page-size",
- &page_size_str);
-
- if (ret == 0) {
- ret = gf_string2bytesize (page_size_str,
- &table->page_size);
- if (ret != 0) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "invalid number format \"%s\""
- " of \"option page-size\"",
- page_size_str);
- }
+ /* create a db entry for root */
+ int32_t op_ret = 0;
+ bdb_private_t *private = NULL;
+ bctx_table_t *table = NULL;
- if (!PAGE_SIZE_IN_RANGE(table->page_size)) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "pagesize %s is out of range."
- "Allowed pagesize is between "
- "%d and %d",
- page_size_str,
- BDB_LL_PAGE_SIZE_MIN,
- BDB_LL_PAGE_SIZE_MAX);
- }
- }
- else {
- table->page_size = BDB_LL_PAGE_SIZE_DEFAULT;
- }
- gf_log ("bdb-ll",
- GF_LOG_DEBUG, "using page-size %"PRIu64,
- table->page_size);
- }
+ char *checkpoint_interval_str = NULL;
+ char *page_size_str = NULL;
+ char *lru_limit_str = NULL;
+ char *timeout_str = NULL;
+ char *access_mode = NULL;
+ char *endptr = NULL;
+ char *errfile = NULL;
+ char *directory = NULL;
+ char *logdir = NULL;
+ char *mode = NULL;
+ char *mode_str = NULL;
+ int ret = -1;
+ int idx = 0;
+ struct stat stbuf = {0,};
- table->hash_size = BDB_DEFAULT_HASH_SIZE;
- table->b_hash = CALLOC (BDB_DEFAULT_HASH_SIZE,
- sizeof (struct list_head));
+ private = this->private;
- for (idx = 0; idx < table->hash_size; idx++)
- INIT_LIST_HEAD(&(table->b_hash[idx]));
+ /* cache is always on */
+ private->cache = ON;
- private->b_table = table;
+ ret = dict_get_str (options, "access-mode", &access_mode);
+ if ((ret == 0)
+ && (!strcmp (access_mode, "btree"))) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "using BTREE access mode to access libdb "
+ "(Berkeley DB)");
+ private->access_mode = DB_BTREE;
} else {
- gf_log ("bdb-ll", GF_LOG_CRITICAL,
- "failed to allocate bctx table: out of memory");
+ gf_log (this->name, GF_LOG_DEBUG,
+ "using HASH access mode to access libdb (Berkeley DB)");
+ private->access_mode = DB_HASH;
}
-}
-
-static inline void
-bdb_directory_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *directory = NULL;
- char *logdir = NULL;
- int32_t op_ret = -1;
- struct stat stbuf = {0};
- ret = dict_get_str (options, "directory", &directory);
+ ret = dict_get_str (options, "mode", &mode);
+ if ((ret == 0)
+ && (!strcmp (mode, "cache"))) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cache data mode selected for 'storage/bdb'. filesystem"
+ " operations are not transactionally protected and "
+ "system crash does not guarantee recoverability of "
+ "data");
+ private->envflags = DB_CREATE | DB_INIT_LOG |
+ DB_INIT_MPOOL | DB_THREAD;
+ private->dbflags = DB_CREATE | DB_THREAD;
+ private->transaction = OFF;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "persistent data mode selected for 'storage/bdb'. each"
+ "filesystem operation is guaranteed to be Berkeley DB "
+ "transaction protected.");
+ private->transaction = ON;
+ private->envflags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG |
+ DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD;
+ private->dbflags = DB_CREATE | DB_THREAD;
- if (ret == 0) {
- ret = dict_get_str (options, "logdir", &logdir);
- if (ret != 0) {
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "using default logdir as database home");
- private->logdir = strdup (directory);
+ ret = dict_get_str (options, "lock-timeout", &timeout_str);
- } else {
- private->logdir = strdup (logdir);
- gf_log ("bdb-ll", GF_LOG_DEBUG,
- "using logdir: %s",
- private->logdir);
- umask (000);
- if (mkdir (private->logdir, 0777) == 0) {
- gf_log ("bdb-ll", GF_LOG_WARNING,
- "logdir specified (%s) not exists, "
- "created",
- private->logdir);
- }
-
- op_ret = stat (private->logdir, &stbuf);
- if ((op_ret != 0)
- || (!S_ISDIR (stbuf.st_mode))) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "specified logdir doesn't exist, "
- "using default "
- "(environment home directory: %s)",
- directory);
- private->logdir = strdup (directory);
+ if (ret == 0) {
+ ret = gf_string2time (timeout_str,
+ &private->lock_timeout);
+
+ if (private->lock_timeout > 4260000) {
+ /* db allows us to DB_SET_LOCK_TIMEOUT to be
+ * set to a maximum of 71 mins
+ * (4260000 milliseconds) */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Berkeley DB lock-timeout parameter "
+ "(%d) is out of range. please specify"
+ " a valid timeout value for "
+ "lock-timeout and retry.",
+ private->lock_timeout);
+ goto err;
}
}
-
- private->b_table->dbenv = bdb_dbenv_init (this, directory);
-
- if (!private->b_table->dbenv) {
- gf_log ("bdb-ll", GF_LOG_ERROR,
- "failed to initialize db environment");
- FREE (private);
- op_ret = -1;
- } else {
- if (private->transaction) {
- /* all well, start the checkpointing thread */
- LOCK_INIT (&private->active_lock);
-
- LOCK (&private->active_lock);
- {
- private->active = 1;
- }
- UNLOCK (&private->active_lock);
- pthread_create (&private->checkpoint_thread,
- NULL, bdb_checkpoint, this);
+ ret = dict_get_str (options, "transaction-timeout",
+ &timeout_str);
+ if (ret == 0) {
+ ret = gf_string2time (timeout_str,
+ &private->txn_timeout);
+
+ if (private->txn_timeout > 4260000) {
+ /* db allows us to DB_SET_TXN_TIMEOUT to be set
+ * to a maximum of 71 mins
+ * (4260000 milliseconds) */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Berkeley DB lock-timeout parameter "
+ "(%d) is out of range. please specify"
+ " a valid timeout value for "
+ "lock-timeout and retry.",
+ private->lock_timeout);
+ goto err;
}
}
- }
-}
-
-static inline void
-bdb_dir_mode_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *mode_str = NULL;
- char *endptr = NULL;
- ret = dict_get_str (options, "dir-mode", &mode_str);
-
- if (ret == 0) {
- private->dir_mode = strtol (mode_str, &endptr, 8);
- if ((*endptr) ||
- (!IS_VALID_FILE_MODE(private->dir_mode))) {
- gf_log (this->name, GF_LOG_DEBUG,
- "invalid dir-mode %o. setting to default %o",
- private->dir_mode,
- DEFAULT_DIR_MODE);
- private->dir_mode = DEFAULT_DIR_MODE;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting dir-mode to %o",
- private->dir_mode);
+ private->checkpoint_interval = BDB_DEFAULT_CHECKPOINT_INTERVAL;
+ ret = dict_get_str (options, "checkpoint-interval",
+ &checkpoint_interval_str);
+ if (ret == 0) {
+ ret = gf_string2time (checkpoint_interval_str,
+ &private->checkpoint_interval);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "'%"PRIu32"' is not a valid parameter "
+ "for checkpoint-interval option. "
+ "please specify a valid "
+ "checkpoint-interval and retry",
+ private->checkpoint_interval);
+ goto err;
+ }
}
- } else {
- private->dir_mode = DEFAULT_DIR_MODE;
}
- private->dir_mode = private->dir_mode | S_IFDIR;
-}
-
-static inline void
-bdb_file_mode_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *mode_str = NULL;
- char *endptr = NULL;
-
ret = dict_get_str (options, "file-mode", &mode_str);
-
if (ret == 0) {
private->file_mode = strtol (mode_str, &endptr, 8);
if ((*endptr) ||
(!IS_VALID_FILE_MODE(private->file_mode))) {
gf_log (this->name, GF_LOG_DEBUG,
- "invalid file-mode %o. setting to default %o",
- private->file_mode, DEFAULT_FILE_MODE);
- private->file_mode = DEFAULT_FILE_MODE;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting file-mode to %o",
+ "'%o' is not a valid parameter for file-mode "
+ "option. please specify a valid parameter for "
+ "file-mode and retry.",
private->file_mode);
- private->file_mode = private->file_mode;
+ goto err;
}
} else {
private->file_mode = DEFAULT_FILE_MODE;
}
-
private->symlink_mode = private->file_mode | S_IFLNK;
private->file_mode = private->file_mode | S_IFREG;
-}
-
-static inline void
-bdb_checkpoint_interval_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *checkpoint_interval_str = NULL;
-
- private->checkpoint_interval = BDB_DEFAULT_CHECKPOINT_INTERVAL;
-
- ret = dict_get_str (options, "checkpoint-interval",
- &checkpoint_interval_str);
+ ret = dict_get_str (options, "dir-mode", &mode_str);
if (ret == 0) {
- ret = gf_string2time (checkpoint_interval_str,
- &private->checkpoint_interval);
-
- if (ret == 0) {
+ private->dir_mode = strtol (mode_str, &endptr, 8);
+ if ((*endptr) ||
+ (!IS_VALID_FILE_MODE(private->dir_mode))) {
gf_log (this->name, GF_LOG_DEBUG,
- "setting checkpoint-interval to %"PRIu32" seconds",
- private->checkpoint_interval);
+ "'%o' is not a valid parameter for dir-mode "
+ "option. please specify a valid parameter for "
+ "dir-mode and retry.",
+ private->dir_mode);
+ goto err;
}
} else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting checkpoint-interval to default: %"PRIu32" seconds",
- private->checkpoint_interval);
+ private->dir_mode = DEFAULT_DIR_MODE;
}
-}
-static inline void
-bdb_lock_timeout_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *timeout_str = NULL;
+ private->dir_mode = private->dir_mode | S_IFDIR;
- ret = dict_get_str (options, "lock-timeout", &timeout_str);
+ table = CALLOC (1, sizeof (*table));
+ if (table == NULL) {
+ gf_log ("bdb-ll", GF_LOG_CRITICAL,
+ "memory allocation for 'storage/bdb' internal "
+ "context table failed.");
+ goto err;
+ }
- if (ret == 0) {
- ret = gf_string2time (timeout_str, &private->lock_timeout);
+ INIT_LIST_HEAD(&(table->b_lru));
+ INIT_LIST_HEAD(&(table->active));
+ INIT_LIST_HEAD(&(table->purge));
- if (private->lock_timeout > 4260000) {
- /* db allows us to DB_SET_LOCK_TIMEOUT to be set to a
- * maximum of 71 mins (4260000 milliseconds) */
- gf_log (this->name, GF_LOG_DEBUG,
- "lock-timeout %d, out of range",
- private->lock_timeout);
- private->lock_timeout = 0;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting lock-timeout to %d milliseconds",
- private->lock_timeout);
- }
- }
-}
+ LOCK_INIT (&table->lock);
+ LOCK_INIT (&table->checkpoint_lock);
-static inline void
-bdb_transaction_timeout_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *timeout_str = NULL;
+ table->transaction = private->transaction;
+ table->access_mode = private->access_mode;
+ table->dbflags = private->dbflags;
+ table->this = this;
- ret = dict_get_str (options, "transaction-timeout", &timeout_str);
+ ret = dict_get_str (options, "lru-limit",
+ &lru_limit_str);
+ /* TODO: set max lockers and max txns to accomodate
+ * for more than lru_limit */
if (ret == 0) {
- ret = gf_string2time (timeout_str, &private->txn_timeout);
-
- if (private->txn_timeout > 4260000) {
- /* db allows us to DB_SET_TXN_TIMEOUT to be set to
- * a maximum of 71 mins (4260000 milliseconds) */
- gf_log (this->name, GF_LOG_DEBUG,
- "transaction-timeout %d, out of range",
- private->txn_timeout);
- private->txn_timeout = 0;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting transaction-timeout to %d "
- "milliseconds",
- private->txn_timeout);
- }
+ ret = gf_string2uint32 (lru_limit_str,
+ &table->lru_limit);
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "setting lru limit of 'storage/bdb' internal context"
+ "table to %d. maximum of %d unused databases can be "
+ "open at any given point of time.",
+ table->lru_limit, table->lru_limit);
+ } else {
+ table->lru_limit = BDB_DEFAULT_LRU_LIMIT;
}
-}
-static inline void
-bdb_transaction_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *mode = NULL;
+ ret = dict_get_str (options, "page-size",
+ &page_size_str);
- ret = dict_get_str (options, "mode", &mode);
+ if (ret == 0) {
+ ret = gf_string2bytesize (page_size_str,
+ &table->page_size);
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "\"%s\" is an invalid parameter to "
+ "\"option page-size\". please specify a valid "
+ "size and retry.",
+ page_size_str);
+ goto err;
+ }
- if ((ret == 0)
- && (!strcmp (mode, "cache"))) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cache mode selected");
- private->envflags = DB_CREATE | DB_INIT_LOG |
- DB_INIT_MPOOL | DB_THREAD;
- private->dbflags = DB_CREATE | DB_THREAD;
- private->transaction = OFF;
+ if (!PAGE_SIZE_IN_RANGE(table->page_size)) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "\"%s\" is out of range for Berkeley DB "
+ "page-size. allowed page-size range is %d to "
+ "%d. please specify a page-size value in the "
+ "range and retry.",
+ page_size_str, BDB_LL_PAGE_SIZE_MIN,
+ BDB_LL_PAGE_SIZE_MAX);
+ goto err;
+ }
} else {
- gf_log (this->name, GF_LOG_DEBUG,
- "persistant mode selected");
- private->transaction = ON;
- private->envflags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG |
- DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD;
- private->dbflags = DB_CREATE | DB_THREAD;
-
- bdb_lock_timeout_init (this, options, private);
-
- bdb_transaction_timeout_init (this, options, private);
-
- bdb_log_remove_init (this, options, private);
-
- bdb_checkpoint_interval_init (this, options, private);
+ table->page_size = BDB_LL_PAGE_SIZE_DEFAULT;
}
-}
-static inline void
-bdb_access_mode_init (xlator_t *this,
- dict_t *options,
- struct bdb_private *private)
-{
- int ret = -1;
- char *access_mode = NULL;
+ table->hash_size = BDB_DEFAULT_HASH_SIZE;
+ table->b_hash = CALLOC (BDB_DEFAULT_HASH_SIZE,
+ sizeof (struct list_head));
- ret = dict_get_str (options, "access-mode", &access_mode);
+ for (idx = 0; idx < table->hash_size; idx++)
+ INIT_LIST_HEAD(&(table->b_hash[idx]));
- if ((ret == 0)
- && (!strcmp (access_mode, "btree"))) {
- gf_log (this->name, GF_LOG_DEBUG,
- "using access mode BTREE");
- private->access_mode = DB_BTREE;
- } else {
+ private->b_table = table;
+
+ ret = dict_get_str (options, "errfile", &errfile);
+ if (ret == 0) {
+ private->errfile = strdup (errfile);
gf_log (this->name, GF_LOG_DEBUG,
- "using access mode HASH");
- private->access_mode = DB_HASH;
+ "using %s as error logging file for libdb (Berkeley DB "
+ "library) internal logging.", private->errfile);
}
-}
+ ret = dict_get_str (options, "directory", &directory);
-/* bdb_db_init - initialize bdb xlator
- *
- * reads the options from @options dictionary and sets appropriate values in
- * @this->private. also initializes DB_ENV.
- *
- * return: 0 on success or -1 on error
- * (with logging the error through gf_log()).
- */
-int
-bdb_db_init (xlator_t *this,
- dict_t *options)
-{
- /* create a db entry for root */
- int32_t op_ret = 0;
- bdb_private_t *private = NULL;
+ if (ret == 0) {
+ ret = dict_get_str (options, "logdir", &logdir);
- private = this->private;
+ if (ret < 0) {
+ gf_log ("bdb-ll", GF_LOG_DEBUG,
+ "using the database environment home "
+ "directory (%s) itself as transaction log "
+ "directory", directory);
+ private->logdir = strdup (directory);
- bdb_cache_init (this, options, private);
+ } else {
+ private->logdir = strdup (logdir);
- bdb_access_mode_init (this, options, private);
+ op_ret = stat (private->logdir, &stbuf);
+ if ((op_ret != 0)
+ || (!S_ISDIR (stbuf.st_mode))) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "specified logdir %s does not exist. "
+ "please provide a valid existing "
+ "directory as parameter to 'option "
+ "logdir'",
+ private->logdir);
+ goto err;
+ }
+ }
- bdb_transaction_init (this, options, private);
+ private->b_table->dbenv = bdb_dbenv_init (this, directory);
+ if (private->b_table->dbenv == NULL) {
+ gf_log ("bdb-ll", GF_LOG_ERROR,
+ "initialization of database environment "
+ "failed");
+ goto err;
+ } else {
+ if (private->transaction) {
+ /* all well, start the checkpointing thread */
+ LOCK_INIT (&private->active_lock);
- {
- LOCK_INIT (&private->ino_lock);
- private->next_ino = 2;
+ LOCK (&private->active_lock);
+ {
+ private->active = 1;
+ }
+ UNLOCK (&private->active_lock);
+ pthread_create (&private->checkpoint_thread,
+ NULL, bdb_checkpoint, this);
+ }
+ }
}
- bdb_file_mode_init (this, options, private);
-
- bdb_dir_mode_init (this, options, private);
-
- bdb_table_init (this, options, private);
-
- bdb_errfile_init (this, options, private);
+ return op_ret;
+err:
+ if (table) {
+ FREE (table->b_hash);
+ FREE (table);
+ }
+ if (private) {
+ if (private->errfile)
+ FREE (private->errfile);
- bdb_directory_init (this, options, private);
+ if (private->logdir)
+ FREE (private->logdir);
+ }
- return op_ret;
+ return -1;
}