diff options
Diffstat (limited to 'xlators/storage/bdb/src/bdb-ll.c')
| -rw-r--r-- | xlators/storage/bdb/src/bdb-ll.c | 1464 | 
1 files changed, 0 insertions, 1464 deletions
diff --git a/xlators/storage/bdb/src/bdb-ll.c b/xlators/storage/bdb/src/bdb-ll.c deleted file mode 100644 index f70ec47f494..00000000000 --- a/xlators/storage/bdb/src/bdb-ll.c +++ /dev/null @@ -1,1464 +0,0 @@ -/* -  Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> -  This file is part of GlusterFS. - -  GlusterFS is free software; you can redistribute it and/or modify -  it under the terms of the GNU General Public License as published -  by the Free Software Foundation; either version 3 of the License, -  or (at your option) any later version. - -  GlusterFS is distributed in the hope that it will be useful, but -  WITHOUT ANY WARRANTY; without even the implied warranty of -  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -  General Public License for more details. - -  You should have received a copy of the GNU General Public License -  along with this program.  If not, see -  <http://www.gnu.org/licenses/>. -*/ - -#include <libgen.h> -#include "bdb.h" -#include <list.h> -#include "hashfn.h" -/* - * implement the procedures to interact with bdb */ - -/**************************************************************** - * - * General wrappers and utility procedures for bdb xlator - * - ****************************************************************/ - -ino_t -bdb_inode_transform (ino_t parent, -                     const char *name, -                     size_t namelen) -{ -        ino_t               ino = -1; -        uint64_t            hash = 0; - -        hash = gf_dm_hashfn (name, namelen); - -        ino = (((parent << 32) | 0x00000000ffffffffULL) -               & (hash | 0xffffffff00000000ULL)); - -        return ino; -} - -static int -bdb_generate_secondary_hash (DB *secondary, -                             const DBT *pkey, -                             const DBT *data, -                             DBT *skey) -{ -        char *primary = NULL; -        uint32_t *hash = NULL; - -        primary = pkey->data; - -        hash = GF_CALLOC (1, sizeof (uint32_t), gf_bdb_mt_uint32_t); - -        *hash = gf_dm_hashfn (primary, pkey->size); - -        skey->data = hash; -        skey->size = sizeof (hash); -        skey->flags = DB_DBT_APPMALLOC; - -        return 0; -} - -/*********************************************************** - * - *  bdb storage database utilities - * - **********************************************************/ - -/* - * bdb_db_open - opens a storage db. - * - * @ctx: context specific to the directory for which we are supposed to open db - * - * see, if we have empty slots to open a db. - *      if (no-empty-slots), then prune open dbs and close as many as possible - *      if (empty-slot-available), tika muchkonDu db open maaDu - * - */ -static int -bdb_db_open (bctx_t *bctx) -{ -        DB *primary   = NULL; -        DB *secondary = NULL; -        int32_t ret = -1; -        bctx_table_t *table = NULL; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); - -        table = bctx->table; -        GF_VALIDATE_OR_GOTO ("bdb-ll", table, out); - -        /* we have to do the following, we can't deny someone of db_open ;) */ -        ret = db_create (&primary, table->dbenv, 0); -        if (ret < 0) { -                gf_log ("bdb-ll", GF_LOG_DEBUG, -                        "_BDB_DB_OPEN %s: %s (failed to create database object" -                        " for primary database)", -                        bctx->directory, db_strerror (ret)); -                ret = -ENOMEM; -                goto out; -        } - -        if (table->page_size) { -                ret = primary->set_pagesize (primary, -                                             table->page_size); -                if (ret < 0) { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_OPEN %s: %s (failed to set page-size " -                                "to %"PRIu64")", -                                bctx->directory, db_strerror (ret), -                                table->page_size); -                } else { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_OPEN %s: page-size set to %"PRIu64, -                                bctx->directory, table->page_size); -                } -        } - -        ret = primary->open (primary, NULL, bctx->db_path, "primary", -                             table->access_mode, table->dbflags, 0); -        if (ret < 0) { -                gf_log ("bdb-ll", GF_LOG_ERROR, -                        "_BDB_DB_OPEN %s: %s " -                        "(failed to open primary database)", -                        bctx->directory, db_strerror (ret)); -                ret = -1; -                goto cleanup; -        } - -        ret = db_create (&secondary, table->dbenv, 0); -        if (ret < 0) { -                gf_log ("bdb-ll", GF_LOG_DEBUG, -                        "_BDB_DB_OPEN %s: %s (failed to create database object" -                        " for secondary database)", -                        bctx->directory, db_strerror (ret)); -                ret = -ENOMEM; -                goto cleanup; -        } - -        ret = secondary->open (secondary, NULL, bctx->db_path, "secondary", -                               table->access_mode, table->dbflags, 0); -        if (ret != 0 ) { -                gf_log ("bdb-ll", GF_LOG_ERROR, -                        "_BDB_DB_OPEN %s: %s " -                        "(failed to open secondary database)", -                        bctx->directory, db_strerror (ret)); -                ret = -1; -                goto cleanup; -        } - -        ret = primary->associate (primary, NULL, secondary, -                                  bdb_generate_secondary_hash, -#ifdef DB_IMMUTABLE_KEY -                                  DB_IMMUTABLE_KEY); -#else -                                  0); -#endif -        if (ret != 0 ) { -                gf_log ("bdb-ll", GF_LOG_ERROR, -                        "_BDB_DB_OPEN %s: %s " -                        "(failed to associate primary database with " -                        "secondary database)", -                        bctx->directory, db_strerror (ret)); -                ret = -1; -                goto cleanup; -        } - -out: -        bctx->primary = primary; -        bctx->secondary = secondary; - -        return ret; -cleanup: -        if (primary) -                primary->close (primary, 0); -        if (secondary) -                secondary->close (secondary, 0); - -        return ret; -} - -int32_t -bdb_cursor_close (bctx_t *bctx, -                  DBC *cursorp) -{ -        int32_t ret = -1; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); -        GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out); - -        LOCK (&bctx->lock); -        { -#ifdef HAVE_BDB_CURSOR_GET -                ret = cursorp->close (cursorp); -#else -                ret = cursorp->c_close (cursorp); -#endif -                if (ret < 0) { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_CURSOR_CLOSE %s: %s " -                                "(failed to close database cursor)", -                                bctx->directory, db_strerror (ret)); -                } -        } -        UNLOCK (&bctx->lock); - -out: -        return ret; -} - - -int32_t -bdb_cursor_open (bctx_t *bctx, -                 DBC **cursorpp) -{ -        int32_t ret = -1; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); -        GF_VALIDATE_OR_GOTO ("bdb-ll", cursorpp, out); - -        LOCK (&bctx->lock); -        { -                if (bctx->secondary) { -                        /* do nothing, just continue */ -                        ret = 0; -                } else { -                        ret = bdb_db_open (bctx); -                        if (ret < 0) { -                                gf_log ("bdb-ll", GF_LOG_DEBUG, -                                        "_BDB_CURSOR_OPEN %s: ENOMEM " -                                        "(failed to open secondary database)", -                                        bctx->directory); -                                ret = -ENOMEM; -                        } else { -                                ret = 0; -                        } -                } - -                if (ret == 0) { -                        /* all set, open cursor */ -                        ret = bctx->secondary->cursor (bctx->secondary, -                                                       NULL, cursorpp, 0); -                        if (ret < 0) { -                                gf_log ("bdb-ll", GF_LOG_DEBUG, -                                        "_BDB_CURSOR_OPEN %s: %s " -                                        "(failed to open a cursor to database)", -                                        bctx->directory, db_strerror (ret)); -                        } -                } -        } -        UNLOCK (&bctx->lock); - -out: -        return ret; -} - - -/* cache related */ -static bdb_cache_t * -bdb_cache_lookup (bctx_t *bctx, -                  char *path) -{ -        bdb_cache_t *bcache = NULL; -        bdb_cache_t *trav   = NULL; -        char        *key    = NULL; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); -        GF_VALIDATE_OR_GOTO ("bdb-ll", path, out); - -        MAKE_KEY_FROM_PATH (key, path); - -        LOCK (&bctx->lock); -        { -                list_for_each_entry (trav, &bctx->c_list, c_list) { -                        if (!strcmp (trav->key, key)){ -                                bcache = trav; -                                break; -                        } -                } -        } -        UNLOCK (&bctx->lock); - -out: -        return bcache; -} - -static int32_t -bdb_cache_insert (bctx_t *bctx, -                  DBT *key, -                  DBT *data) -{ -        bdb_cache_t *bcache = NULL; -        int32_t ret = -1; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); -        GF_VALIDATE_OR_GOTO ("bdb-ll", key, out); -        GF_VALIDATE_OR_GOTO ("bdb-ll", data, out); - -        LOCK (&bctx->lock); -        { -                if (bctx->c_count > 5) { -                        /* most of the times, we enter here */ -                        /* FIXME: ugly, not supposed to disect any of the -                         * 'struct list_head' directly */ -                        if (!list_empty (&bctx->c_list)) { -                                bcache = list_entry (bctx->c_list.prev, -                                                     bdb_cache_t, c_list); -                                list_del_init (&bcache->c_list); -                        } -                        if (bcache->key) { -                                GF_FREE (bcache->key); -                                bcache->key = GF_CALLOC (key->size + 1, -                                                         sizeof (char),  -                                                         gf_bdb_mt_char); -                                GF_VALIDATE_OR_GOTO ("bdb-ll", -                                                     bcache->key, unlock); -                                memcpy (bcache->key, (char *)key->data, -                                        key->size); -                        } else { -                                /* should never come here */ -                                gf_log ("bdb-ll", GF_LOG_DEBUG, -                                        "_BDB_CACHE_INSERT %s (%s) " -                                        "(found a cache entry with empty key)", -                                        bctx->directory, (char *)key->data); -                        } /* if(bcache->key)...else */ -                        if (bcache->data) { -                                GF_FREE (bcache->data); -                                bcache->data = memdup (data->data, data->size); -                                GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, -                                                     unlock); -                                bcache->size = data->size; -                        } else { -                                /* should never come here */ -                                gf_log ("bdb-ll", GF_LOG_CRITICAL, -                                        "_BDB_CACHE_INSERT %s (%s) " -                                        "(found a cache entry with no data)", -                                        bctx->directory, (char *)key->data); -                        } /* if(bcache->data)...else */ -                        list_add (&bcache->c_list, &bctx->c_list); -                        ret = 0; -                } else { -                        /* we will be entering here very rarely */ -                        bcache = GF_CALLOC (1, sizeof (*bcache),  -                                            gf_bdb_mt_bdb_cache_t); -                        GF_VALIDATE_OR_GOTO ("bdb-ll", bcache, unlock); - -                        bcache->key = GF_CALLOC (key->size + 1, sizeof (char), -                                                 gf_bdb_mt_char); -                        GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->key, unlock); -                        memcpy (bcache->key, key->data, key->size); - -                        bcache->data = memdup (data->data, data->size); -                        GF_VALIDATE_OR_GOTO ("bdb-ll", bcache->data, unlock); - -                        bcache->size = data->size; -                        list_add (&bcache->c_list, &bctx->c_list); -                        bctx->c_count++; -                        ret = 0; -                } /* if(private->c_count < 5)...else */ -        } -unlock: -        UNLOCK (&bctx->lock); -out: -        return ret; -} - -static int32_t -bdb_cache_delete (bctx_t *bctx, -                  const char *key) -{ -        bdb_cache_t *bcache = NULL; -        bdb_cache_t *trav   = NULL; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); -        GF_VALIDATE_OR_GOTO ("bdb-ll", key, out); - -        LOCK (&bctx->lock); -        { -                list_for_each_entry (trav, &bctx->c_list, c_list) { -                        if (!strcmp (trav->key, key)){ -                                bctx->c_count--; -                                bcache = trav; -                                break; -                        } -                } - -                if (bcache) { -                        list_del_init (&bcache->c_list); -                        GF_FREE (bcache->key); -                        GF_FREE (bcache->data); -                        GF_FREE (bcache); -                } -        } -        UNLOCK (&bctx->lock); - -out: -        return 0; -} - -void * -bdb_db_stat (bctx_t *bctx, -             DB_TXN *txnid, -             uint32_t flags) -{ -        DB     *storage = NULL; -        void   *stat    = NULL; -        int32_t ret     = -1; - -        LOCK (&bctx->lock); -        { -                if (bctx->primary == NULL) { -                        ret = bdb_db_open (bctx); -                        storage = bctx->primary; -                } else { -                        /* we are just fine, lets continue */ -                        storage = bctx->primary; -                } /* if(bctx->dbp==NULL)...else */ -        } -        UNLOCK (&bctx->lock); - -        GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - -        ret = storage->stat (storage, txnid, &stat, flags); - -        if (ret < 0) { -                gf_log ("bdb-ll", GF_LOG_DEBUG, -                        "_BDB_DB_STAT %s: %s " -                        "(failed to do stat database)", -                        bctx->directory, db_strerror (ret)); -        } -out: -        return stat; - -} - -/* bdb_storage_get - retrieve a key/value pair corresponding to @path from the - *  corresponding db file. - * - * @bctx: bctx_t * corresponding to the parent directory of @path. (should - *  always be a valid bctx).  bdb_storage_get should never be called if - *  @bctx = NULL. - * @txnid: NULL if bdb_storage_get is not embedded in an explicit transaction - *  or a valid DB_TXN *, when embedded in an explicit transaction. - * @path: path of the file to read from (translated to a database key using - *  MAKE_KEY_FROM_PATH) - * @buf: char ** - pointer to a pointer to char. a read buffer is created in - *  this procedure and pointer to the buffer is passed through @buf to the - *  caller. - * @size: size of the file content to be read. - * @offset: offset from which the file content to be read. - * - * NOTE: bdb_storage_get tries to open DB, if @bctx->dbp == NULL - *  (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by - *  bdb_table_prune()). - * - * NOTE: if private->cache is set (bdb xlator's internal caching enabled), then - *  bdb_storage_get first looks up the cache for key/value pair. if - *  bdb_lookup_cache fails, then only DB->get() is called. also,  inserts a - *  newly read key/value pair to cache through bdb_insert_to_cache. - * - * return: 'number of bytes read' on success or -1 on error. - * - * also see: bdb_lookup_cache, bdb_insert_to_cache for details about bdb - *  xlator's internal cache. - */ -static int32_t -bdb_db_get (bctx_t *bctx, -            DB_TXN *txnid, -            const char *path, -            char *buf, -            size_t size, -            off_t offset) -{ -        DB          *storage    = NULL; -        DBT          key        = {0,}; -        DBT          value      = {0,}; -        int32_t      ret        = -1; -        size_t       copy_size  = 0; -        char        *key_string = NULL; -        bdb_cache_t *bcache     = NULL; -        int32_t      db_flags   = 0; -        uint8_t      need_break = 0; -        int32_t      retries    = 1; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", bctx, out); -        GF_VALIDATE_OR_GOTO ("bdb-ll", path, out); - -        MAKE_KEY_FROM_PATH (key_string, path); - -        if (bctx->cache && -            ((bcache = bdb_cache_lookup (bctx, key_string)) != NULL)) { -                if (buf) { -                        copy_size = ((bcache->size - offset) < size)? -                                (bcache->size - offset) : size; - -                        memcpy (buf, (bcache->data + offset), copy_size); -                        ret = copy_size; -                } else { -                        ret = bcache->size; -                } -                 -                goto out; -        }  - -        LOCK (&bctx->lock); -        { -                if (bctx->primary == NULL) { -                        ret = bdb_db_open (bctx); -                        storage = bctx->primary; -                } else { -                        /* we are just fine, lets continue */ -                        storage = bctx->primary; -                } /* if(bctx->dbp==NULL)...else */ -        } -        UNLOCK (&bctx->lock); - -        GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - -        key.data = (char *)key_string; -        key.size = strlen (key_string); -        key.flags = DB_DBT_USERMEM; - -        if (bctx->cache){ -                value.flags = DB_DBT_MALLOC; -        } else { -                if (size) { -                        value.data  = buf; -                        value.ulen  = size; -                        value.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; -                } else { -                        value.flags = DB_DBT_MALLOC; -                } -                value.dlen = size; -                value.doff = offset; -        } - -        do { -                /* TODO: we prefer to give our own buffer to value.data -                 * and ask bdb to fill in it */ -                ret = storage->get (storage, txnid, &key, &value, -                                    db_flags); - -                if (ret == DB_NOTFOUND) { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_GET %s - %s: ENOENT" -                                "(specified key not found in database)", -                                bctx->directory, key_string); -                        ret = -1; -                        need_break = 1; -                } else if (ret == DB_LOCK_DEADLOCK) { -                        retries++; -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_GET %s - %s" -                                "(deadlock detected, retrying for %d " -                                "time)", -                                bctx->directory, key_string, retries); -                } else if (ret == 0) { -                        /* successfully read data, lets set everything -                         * in place and return */ -                        if (bctx->cache) { -                                if (buf) { -                                        copy_size = ((value.size - offset) < size) ? -                                                (value.size - offset) : size; - -                                        memcpy (buf, (value.data + offset), -                                                copy_size); -                                        ret = copy_size; -                                } - -                                bdb_cache_insert (bctx, &key, &value); -                        } else { -                                ret = value.size; -                        } - -                        if (size == 0) -                                GF_FREE (value.data); - -                        need_break = 1; -                } else { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_GET %s - %s: %s" -                                "(failed to retrieve specified key from" -                                " database)", -                                bctx->directory, key_string, -                                db_strerror (ret)); -                        ret = -1; -                        need_break = 1; -                } -        } while (!need_break); - -out: -        return ret; -}/* bdb_db_get */ - -/* TODO: handle errors here and log. propogate only the errno to caller */ -int32_t -bdb_db_fread (struct bdb_fd *bfd, char *buf, size_t size, off_t offset) -{ -        return bdb_db_get (bfd->ctx, NULL, bfd->key, buf, size, offset); -} - -int32_t -bdb_db_iread (struct bdb_ctx *bctx, const char *key, char **bufp) -{ -        char *buf = NULL; -        size_t size = 0; -        int64_t ret = 0; - -        ret = bdb_db_get (bctx, NULL, key, NULL, 0, 0); -        size = ret; - -        if (bufp) { -                buf = GF_CALLOC (size, sizeof (char), gf_bdb_mt_char); -                *bufp = buf; -                ret = bdb_db_get (bctx, NULL, key, buf, size, 0); -        } - -        return ret;  -} - -/* bdb_storage_put - insert a key/value specified to the corresponding DB. - * - * @bctx: bctx_t * corresponding to the parent directory of @path. - *        (should always be a valid bctx). bdb_storage_put should never be - *         called if @bctx = NULL. - * @txnid: NULL if bdb_storage_put is not embedded in an explicit transaction - *         or a valid DB_TXN *, when embedded in an explicit transaction. - * @key_string: key of the database entry. - * @buf: pointer to the buffer data to be written as data for @key_string. - * @size: size of @buf. - * @offset: offset in the key's data to be modified with provided data. - * @flags: valid flags are BDB_TRUNCATE_RECORD (to reduce the data of - *         @key_string to 0 size). - * - * NOTE: bdb_storage_put tries to open DB, if @bctx->dbp == NULL - *      (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by - *       bdb_table_prune()). - * - * NOTE: bdb_storage_put deletes the key/value from bdb xlator's internal cache. - * - * return: 0 on success or -1 on error. - * - * also see: bdb_cache_delete for details on how a cached key/value pair is - * removed. - */ -static int32_t -bdb_db_put (bctx_t *bctx, -            DB_TXN *txnid, -            const char *key_string, -            const char *buf, -            size_t size, -            off_t offset, -            int32_t flags) -{ -        DB     *storage = NULL; -        DBT     key = {0,}, value = {0,}; -        int32_t ret = -1; -        int32_t db_flags = DB_AUTO_COMMIT; -        uint8_t need_break = 0; -        int32_t retries = 1; - -        LOCK (&bctx->lock); -        { -                if (bctx->primary == NULL) { -                        ret = bdb_db_open (bctx); -                        storage = bctx->primary; -                } else { -                        /* we are just fine, lets continue */ -                        storage = bctx->primary; -                } -        } -        UNLOCK (&bctx->lock); - -        GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - -        if (bctx->cache) { -                ret = bdb_cache_delete (bctx, (char *)key_string); -                GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out); -        } - -        key.data = (void *)key_string; -        key.size = strlen (key_string); - -        /* NOTE: bdb lets us expand the file, suppose value.size > value.len, -         * then value.len bytes from value.doff offset and value.size bytes -         * will be written from value.doff and data from -         * value.doff + value.dlen will be pushed value.doff + value.size -         */ -        value.data = (void *)buf; - -        if (flags & BDB_TRUNCATE_RECORD) { -                value.size = size; -                value.doff = 0; -                value.dlen = offset; -        } else { -                value.size = size; -                value.dlen = size; -                value.doff = offset; -        } -        value.flags = DB_DBT_PARTIAL; -        if (buf == NULL && size == 0) -                /* truncate called us */ -                value.flags = 0; - -        do { -                ret = storage->put (storage, txnid, &key, &value, db_flags); -                if (ret == DB_LOCK_DEADLOCK) { -                        retries++; -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_PUT %s - %s" -                                "(deadlock detected, retying for %d time)", -                                bctx->directory, key_string, retries); -                } else if (ret) { -                        /* write failed */ -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_PUT %s - %s: %s" -                                "(failed to put specified entry into database)", -                                bctx->directory, key_string, db_strerror (ret)); -                        need_break = 1; -                } else { -                        /* successfully wrote */ -                        ret = 0; -                        need_break = 1; -                } -        } while (!need_break); -out: -        return ret; -}/* bdb_db_put */ - -int32_t -bdb_db_icreate (struct bdb_ctx *bctx, const char *key) -{ -        return bdb_db_put (bctx, NULL, key, NULL, 0, 0, 0); -} - -/* TODO: handle errors here and log. propogate only the errno to caller */ -int32_t -bdb_db_fwrite (struct bdb_fd *bfd, char *buf, size_t size, off_t offset) -{ -        return bdb_db_put (bfd->ctx, NULL, bfd->key, buf, size, offset, 0); -} - -/* TODO: handle errors here and log. propogate only the errno to caller */ -int32_t -bdb_db_iwrite (struct bdb_ctx *bctx, const char *key, char *buf, size_t size) -{ -        return bdb_db_put (bctx, NULL, key, buf, size, 0, 0); -} - -int32_t -bdb_db_itruncate (struct bdb_ctx *bctx, const char *key) -{ -        return bdb_db_put (bctx, NULL, key, NULL, 0, 1, 0); -} - -/* bdb_storage_del - delete a key/value pair corresponding to @path from - *  corresponding db file. - * - * @bctx: bctx_t * corresponding to the parent directory of @path. - *       (should always be a valid bctx). bdb_storage_del should never be called - *       if @bctx = NULL. - * @txnid: NULL if bdb_storage_del is not embedded in an explicit transaction - *   or a valid DB_TXN *, when embedded in an explicit transaction. - * @path: path to the file, whose key/value pair has to be deleted. - * - * NOTE: bdb_storage_del tries to open DB, if @bctx->dbp == NULL - *  (@bctx->dbp == NULL, nobody has opened DB till now or DB was closed by - *  bdb_table_prune()). - * - * return: 0 on success or -1 on error. - */ -static int32_t -bdb_db_del (bctx_t *bctx, -            DB_TXN *txnid, -            const char *key_string) -{ -        DB     *storage    = NULL; -        DBT     key        = {0,}; -        int32_t ret        = -1; -        int32_t db_flags   = 0; -        uint8_t need_break = 0; -        int32_t retries    = 1; - -        LOCK (&bctx->lock); -        { -                if (bctx->primary == NULL) { -                        ret = bdb_db_open (bctx); -                        storage = bctx->primary; -                } else { -                        /* we are just fine, lets continue */ -                        storage = bctx->primary; -                } -        } -        UNLOCK (&bctx->lock); - -        GF_VALIDATE_OR_GOTO ("bdb-ll", storage, out); - -        ret = bdb_cache_delete (bctx, key_string); -        GF_VALIDATE_OR_GOTO ("bdb-ll", (ret == 0), out); - -        key.data = (char *)key_string; -        key.size = strlen (key_string); -        key.flags = DB_DBT_USERMEM; - -        do { -                ret = storage->del (storage, txnid, &key, db_flags); - -                if (ret == DB_NOTFOUND) { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_DEL %s - %s: ENOENT" -                                "(failed to delete entry, could not be " -                                "found in the database)", -                                bctx->directory, key_string); -                        need_break = 1; -                } else if (ret == DB_LOCK_DEADLOCK) { -                        retries++; -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_DEL %s - %s" -                                "(deadlock detected, retying for %d time)", -                                bctx->directory, key_string, retries); -                } else if (ret == 0) { -                        /* successfully deleted the entry */ -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_DEL %s - %s" -                                "(successfully deleted entry from database)", -                                bctx->directory, key_string); -                        ret = 0; -                        need_break = 1; -                } else { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "_BDB_DB_DEL %s - %s: %s" -                                "(failed to delete entry from database)", -                                bctx->directory, key_string, db_strerror (ret)); -                        ret = -1; -                        need_break = 1; -                } -        } while (!need_break); -out: -        return ret; -} - -int32_t -bdb_db_iremove (bctx_t *bctx, -                const char *key) -{ -        return bdb_db_del (bctx, NULL, key); -} - -/* NOTE: bdb version compatibility wrapper */ -int32_t -bdb_cursor_get (DBC *cursorp, -                DBT *sec, DBT *pri, -                DBT *val, -                int32_t flags) -{ -        int32_t ret = -1; - -        GF_VALIDATE_OR_GOTO ("bdb-ll", cursorp, out); - -#ifdef HAVE_BDB_CURSOR_GET -        ret = cursorp->pget (cursorp, sec, pri, val, flags); -#else -        ret = cursorp->c_pget (cursorp, sec, pri, val, flags); -#endif -        if ((ret != 0)  && (ret != DB_NOTFOUND)) { -                gf_log ("bdb-ll", GF_LOG_DEBUG, -                        "_BDB_CURSOR_GET: %s" -                        "(failed to retrieve entry from database cursor)", -                        db_strerror (ret)); -        } - -out: -        return ret; -}/* bdb_cursor_get */ - -int32_t -bdb_dirent_size (DBT *key) -{ -        return GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + key->size); -} - - - -/* bdb_dbenv_init - initialize DB_ENV - * - *  initialization includes: - *   1. opening DB_ENV (db_env_create(), DB_ENV->open()). - *      NOTE: see private->envflags for flags used. - *   2. DB_ENV->set_lg_dir - set log directory to be used for storing log files - *     (log files are the files in which transaction logs are written by db). - *   3. DB_ENV->set_flags (DB_LOG_AUTOREMOVE) - set DB_ENV to automatically - *      clear the unwanted log files (flushed at each checkpoint). - *   4. DB_ENV->set_errfile - set errfile to be used by db to report detailed - *      error logs. used only for debbuging purpose. - * - * return: returns a valid DB_ENV * on success or NULL on error. - * - */ -static DB_ENV * -bdb_dbenv_init (xlator_t *this, -                char *directory) -{ -        /* Create a DB environment */ -        DB_ENV        *dbenv       = NULL; -        int32_t        ret         = 0; -        bdb_private_t *private     = NULL; -        int32_t        fatal_flags = 0; - -        VALIDATE_OR_GOTO (this, err); -        VALIDATE_OR_GOTO (directory, err); - -        private = this->private; -        VALIDATE_OR_GOTO (private, err); - -        ret = db_env_create (&dbenv, 0); -        VALIDATE_OR_GOTO ((ret == 0), err); - -        /* NOTE: set_errpfx returns 'void' */ -        dbenv->set_errpfx(dbenv, this->name); - -        ret = dbenv->set_lk_detect (dbenv, DB_LOCK_DEFAULT); -        VALIDATE_OR_GOTO ((ret == 0), err); - -        ret = dbenv->open(dbenv, directory, -                          private->envflags, -                          S_IRUSR | S_IWUSR); -        if ((ret != 0) && (ret != DB_RUNRECOVERY)) { -                gf_log (this->name, GF_LOG_CRITICAL, -                        "failed to join Berkeley DB environment at %s: %s." -                        "please run manual recovery and retry running " -                        "glusterfs", -                        directory, db_strerror (ret)); -                dbenv = NULL; -                goto err; -        } else if (ret == DB_RUNRECOVERY) { -                fatal_flags = ((private->envflags & (~DB_RECOVER)) -                               | DB_RECOVER_FATAL); -                ret = dbenv->open(dbenv, directory, fatal_flags, -                                  S_IRUSR | S_IWUSR); -                if (ret != 0) { -                        gf_log (this->name, GF_LOG_CRITICAL, -                                "failed to join Berkeley DB environment in " -                                "recovery mode at %s: %s. please run manual " -                                "recovery and retry running glusterfs", -                                directory, db_strerror (ret)); -                        dbenv = NULL; -                        goto err; -                } -        } - -        ret = 0; -#if (DB_VERSION_MAJOR == 4 &&                   \ -     DB_VERSION_MINOR == 7) -        if (private->log_auto_remove) { -                ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 1); -        } else { -                ret = dbenv->log_set_config (dbenv, DB_LOG_AUTO_REMOVE, 0); -        } -#else -        if (private->log_auto_remove) { -                ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 1); -        } else { -                ret = dbenv->set_flags (dbenv, DB_LOG_AUTOREMOVE, 0); -        } -#endif -        if (ret < 0) { -                gf_log ("bdb-ll", GF_LOG_ERROR, -                        "autoremoval of transactional log files could not be " -                        "configured (%s). you may have to do a manual " -                        "monitoring of transactional log files and remove " -                        "periodically.", -                        db_strerror (ret)); -                goto err; -        } - -        if (private->transaction) { -                ret = dbenv->set_flags(dbenv, DB_AUTO_COMMIT, 1); - -                if (ret != 0) { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "configuration of auto-commit failed for " -                                "database environment at %s. none of the " -                                "operations will be embedded in transaction " -                                "unless explicitly done so.", -                                db_strerror (ret)); -                        goto err; -                } - -                if (private->txn_timeout) { -                        ret = dbenv->set_timeout (dbenv, private->txn_timeout, -                                                  DB_SET_TXN_TIMEOUT); -                        if (ret != 0) { -                                gf_log ("bdb-ll", GF_LOG_ERROR, -                                        "could not configure Berkeley DB " -                                        "transaction timeout to %d (%s). please" -                                        " review 'option transaction-timeout %d" -                                        "' option.", -                                        private->txn_timeout, -                                        db_strerror (ret), -                                        private->txn_timeout); -                                goto err; -                        } -                } - -                if (private->lock_timeout) { -                        ret = dbenv->set_timeout(dbenv, -                                                 private->txn_timeout, -                                                 DB_SET_LOCK_TIMEOUT); -                        if (ret < 0) { -                                gf_log ("bdb-ll", GF_LOG_ERROR, -                                        "could not configure Berkeley DB " -                                        "lock timeout to %d (%s). please" -                                        " review 'option lock-timeout %d" -                                        "' option.", -                                        private->lock_timeout, -                                        db_strerror (ret), -                                        private->lock_timeout); -                                goto err; -                        } -                } - -                ret = dbenv->set_lg_dir (dbenv, private->logdir); -                if (ret < 0) { -                        gf_log ("bdb-ll", GF_LOG_ERROR, -                                "failed to configure libdb transaction log " -                                "directory at %s. please review the " -                                "'option logdir %s' option.", -                                db_strerror (ret), private->logdir); -                        goto err; -                } -        } - -        if (private->errfile) { -                private->errfp = fopen (private->errfile, "a+"); -                if (private->errfp) { -                        dbenv->set_errfile (dbenv, private->errfp); -                } else { -                        gf_log ("bdb-ll", GF_LOG_ERROR, -                                "failed to open error logging file for " -                                "libdb (Berkeley DB) internal logging (%s)." -                                "please review the 'option errfile %s' option.", -                                strerror (errno), private->errfile); -                        goto err; -                } -        } - -        return dbenv; -err: -        if (dbenv) { -                dbenv->close (dbenv, 0); -        } - -        return NULL; -} - -#define BDB_ENV(this) ((((struct bdb_private *)this->private)->b_table)->dbenv) - -/* bdb_checkpoint - during transactional usage, db does not directly write the - *  data to db files, instead db writes a 'log' (similar to a journal entry) - *  into a log file. db normally clears the log files during opening of an - *  environment. since we expect a filesystem server to run for a pretty long - *  duration and flushing 'log's during dbenv->open would prove very costly, if - *  we accumulate the log entries for one complete run of glusterfs server. to - *  flush the logs frequently, db provides a mechanism called 'checkpointing'. - *  when we do a checkpoint, db flushes the logs to disk (writes changes to db - *  files) and we can also clear the accumulated log files after checkpointing. - *  NOTE: removing unwanted log files is not part of dbenv->txn_checkpoint() - *  call. - * - * @data: xlator_t of the current instance of bdb xlator. - * - *  bdb_checkpoint is called in a different thread from the main glusterfs - *  thread. bdb xlator creates the checkpoint thread after successfully opening - *  the db environment. - *  NOTE: bdb_checkpoint thread shares the DB_ENV handle with the filesystem - *  thread. - * - *  db environment checkpointing frequency is controlled by - *  'option checkpoint-timeout <time-in-seconds>' in volfile. - * - * NOTE: checkpointing thread is started only if 'option transaction on' - *      specified in volfile. checkpointing is not valid for non-transactional - *      environments. - * - */ -static void * -bdb_checkpoint (void *data) -{ -        xlator_t *this = NULL; -        struct bdb_private *private = NULL; -        DB_ENV *dbenv = NULL; -        int32_t ret = 0; -        uint32_t active = 0; - -        this = (xlator_t *) data; -        dbenv = BDB_ENV(this); -        private = this->private; - -        for (;;sleep (private->checkpoint_interval)) { -                LOCK (&private->active_lock); -                active = private->active; -                UNLOCK (&private->active_lock); - -                if (active) { -                        ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0); -                        if (ret) { -                                gf_log ("bdb-ll", GF_LOG_DEBUG, -                                        "_BDB_CHECKPOINT: %s" -                                        "(failed to checkpoint environment)", -                                        db_strerror (ret)); -                        } else { -                                gf_log ("bdb-ll", GF_LOG_DEBUG, -                                        "_BDB_CHECKPOINT: successfully " -                                        "checkpointed"); -                        } -                } else { -                        ret = dbenv->txn_checkpoint (dbenv, 1024, 0, 0); -                        if (ret) { -                                gf_log ("bdb-ll", GF_LOG_ERROR, -                                        "_BDB_CHECKPOINT: %s" -                                        "(final checkpointing failed. might " -                                        "need to run recovery tool manually on " -                                        "next usage of this database " -                                        "environment)", -                                        db_strerror (ret)); -                        } else { -                                gf_log ("bdb-ll", GF_LOG_DEBUG, -                                        "_BDB_CHECKPOINT: final successfully " -                                        "checkpointed"); -                        } -                        break; -                } -        } - -        return NULL; -} - - -/* bdb_db_init - initialize bdb xlator - * - * reads the options from @options dictionary and sets appropriate values in - * @this->private. also initializes DB_ENV. - * - * return: 0 on success or -1 on error - * (with logging the error through gf_log()). - */ -int -bdb_db_init (xlator_t *this, -             dict_t *options) -{ -        /* create a db entry for root */ -        int32_t        op_ret  = 0; -        bdb_private_t *private = NULL; -        bctx_table_t  *table = NULL; - -        char *checkpoint_interval_str = NULL; -        char *page_size_str           = NULL; -        char *lru_limit_str           = NULL; -        char *timeout_str             = NULL; -        char *access_mode             = NULL; -        char *endptr    = NULL; -        char *errfile   = NULL; -        char *directory = NULL; -        char *logdir    = NULL; -        char *mode      = NULL; -        char *mode_str  = NULL; -        int   ret = -1; -        int   idx = 0; -        struct stat stbuf = {0,}; - -        private = this->private; - -        /* cache is always on */ -        private->cache = ON; - -        ret = dict_get_str (options, "access-mode", &access_mode); -        if ((ret == 0) -            && (!strcmp (access_mode, "btree"))) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "using BTREE access mode to access libdb " -                        "(Berkeley DB)"); -                private->access_mode = DB_BTREE; -        } else { -                gf_log (this->name, GF_LOG_DEBUG, -                        "using HASH access mode to access libdb (Berkeley DB)"); -                private->access_mode = DB_HASH; -        } - -        ret = dict_get_str (options, "mode", &mode); -        if ((ret == 0) -            && (!strcmp (mode, "cache"))) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "cache data mode selected for 'storage/bdb'. filesystem" -                        " operations are not transactionally protected and " -                        "system crash does not guarantee recoverability of " -                        "data"); -                private->envflags = DB_CREATE | DB_INIT_LOG | -                        DB_INIT_MPOOL | DB_THREAD; -                private->dbflags = DB_CREATE | DB_THREAD; -                private->transaction = OFF; -        } else { -                gf_log (this->name, GF_LOG_DEBUG, -                        "persistent data mode selected for 'storage/bdb'. each" -                        "filesystem operation is guaranteed to be Berkeley DB " -                        "transaction protected."); -                private->transaction = ON; -                private->envflags = DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | -                        DB_INIT_MPOOL | DB_INIT_TXN | DB_RECOVER | DB_THREAD; -                private->dbflags = DB_CREATE | DB_THREAD; - - -                ret = dict_get_str (options, "lock-timeout", &timeout_str); - -                if (ret == 0) { -                        ret = gf_string2time (timeout_str, -                                              &private->lock_timeout); - -                        if (private->lock_timeout > 4260000) { -                                /* db allows us to DB_SET_LOCK_TIMEOUT to be -                                 * set to a maximum of 71 mins -                                 * (4260000 milliseconds) */ -                                gf_log (this->name, GF_LOG_DEBUG, -                                        "Berkeley DB lock-timeout parameter " -                                        "(%d) is out of range. please specify" -                                        " a valid timeout value for " -                                        "lock-timeout and retry.", -                                        private->lock_timeout); -                                goto err; -                        } -                } -                ret = dict_get_str (options, "transaction-timeout", -                                    &timeout_str); -                if (ret == 0) { -                        ret = gf_string2time (timeout_str, -                                              &private->txn_timeout); - -                        if (private->txn_timeout > 4260000) { -                                /* db allows us to DB_SET_TXN_TIMEOUT to be set -                                 * to a maximum of 71 mins -                                 * (4260000 milliseconds) */ -                                gf_log (this->name, GF_LOG_DEBUG, -                                        "Berkeley DB lock-timeout parameter " -                                        "(%d) is out of range. please specify" -                                        " a valid timeout value for " -                                        "lock-timeout and retry.", -                                        private->lock_timeout); -                                goto err; -                        } -                } - -                private->checkpoint_interval = BDB_DEFAULT_CHECKPOINT_INTERVAL; -                ret = dict_get_str (options, "checkpoint-interval", -                                    &checkpoint_interval_str); -                if (ret == 0) { -                        ret = gf_string2time (checkpoint_interval_str, -                                              &private->checkpoint_interval); - -                        if (ret < 0) { -                                gf_log (this->name, GF_LOG_DEBUG, -                                        "'%"PRIu32"' is not a valid parameter " -                                        "for checkpoint-interval option. " -                                        "please specify a valid " -                                        "checkpoint-interval and retry", -                                        private->checkpoint_interval); -                                goto err; -                        } -                } -        } - -        ret = dict_get_str (options, "file-mode", &mode_str); -        if (ret == 0) { -                private->file_mode = strtol (mode_str, &endptr, 8); - -                if ((*endptr) || -                    (!IS_VALID_FILE_MODE(private->file_mode))) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "'%o' is not a valid parameter for file-mode " -                                "option. please specify a valid parameter for " -                                "file-mode and retry.", -                                private->file_mode); -                        goto err; -                } -        } else { -                private->file_mode = DEFAULT_FILE_MODE; -        } -        private->symlink_mode = private->file_mode | S_IFLNK; -        private->file_mode = private->file_mode | S_IFREG; - -        ret = dict_get_str (options, "dir-mode", &mode_str); -        if (ret == 0) { -                private->dir_mode = strtol (mode_str, &endptr, 8); -                if ((*endptr) || -                    (!IS_VALID_FILE_MODE(private->dir_mode))) { -                        gf_log (this->name, GF_LOG_DEBUG, -                                "'%o' is not a valid parameter for dir-mode " -                                "option. please specify a valid parameter for " -                                "dir-mode and retry.", -                                private->dir_mode); -                        goto err; -                } -        } else { -                private->dir_mode = DEFAULT_DIR_MODE; -        } - -        private->dir_mode = private->dir_mode | S_IFDIR; - -        table = GF_CALLOC (1, sizeof (*table), gf_bdb_mt_bctx_table_t); -        if (table == NULL) { -                gf_log ("bdb-ll", GF_LOG_CRITICAL, -                        "memory allocation for 'storage/bdb' internal " -                        "context table failed."); -                goto err; -        } - -        INIT_LIST_HEAD(&(table->b_lru)); -        INIT_LIST_HEAD(&(table->active)); -        INIT_LIST_HEAD(&(table->purge)); - -        LOCK_INIT (&table->lock); -        LOCK_INIT (&table->checkpoint_lock); - -        table->transaction = private->transaction; -        table->access_mode = private->access_mode; -        table->dbflags = private->dbflags; -        table->this    = this; - -        ret = dict_get_str (options, "lru-limit", -                            &lru_limit_str); - -        /* TODO: set max lockers and max txns to accomodate -         * for more than lru_limit */ -        if (ret == 0) { -                ret = gf_string2uint32 (lru_limit_str, -                                        &table->lru_limit); -                gf_log ("bdb-ll", GF_LOG_DEBUG, -                        "setting lru limit of 'storage/bdb' internal context" -                        "table to %d. maximum of %d unused databases can be " -                        "open at any given point of time.", -                        table->lru_limit, table->lru_limit); -        } else { -                table->lru_limit = BDB_DEFAULT_LRU_LIMIT; -        } - -        ret = dict_get_str (options, "page-size", -                            &page_size_str); - -        if (ret == 0) { -                ret = gf_string2bytesize (page_size_str, -                                          &table->page_size); -                if (ret < 0) { -                        gf_log ("bdb-ll", GF_LOG_ERROR, -                                "\"%s\" is an invalid parameter to " -                                "\"option page-size\". please specify a valid " -                                "size and retry.", -                                page_size_str); -                        goto err; -                } - -                if (!PAGE_SIZE_IN_RANGE(table->page_size)) { -                        gf_log ("bdb-ll", GF_LOG_ERROR, -                                "\"%s\" is out of range for Berkeley DB " -                                "page-size. allowed page-size range is %d to " -                                "%d. please specify a page-size value in the " -                                "range and retry.", -                                page_size_str, BDB_LL_PAGE_SIZE_MIN, -                                BDB_LL_PAGE_SIZE_MAX); -                        goto err; -                } -        } else { -                table->page_size = BDB_LL_PAGE_SIZE_DEFAULT; -        } - -        table->hash_size = BDB_DEFAULT_HASH_SIZE; -        table->b_hash = GF_CALLOC (BDB_DEFAULT_HASH_SIZE, -                                   sizeof (struct list_head), -                                   gf_bdb_mt_list_head); - -        for (idx = 0; idx < table->hash_size; idx++) -                INIT_LIST_HEAD(&(table->b_hash[idx])); - -        private->b_table = table; - -        ret = dict_get_str (options, "errfile", &errfile); -        if (ret == 0) { -                private->errfile = gf_strdup (errfile); -                gf_log (this->name, GF_LOG_DEBUG, -                        "using %s as error logging file for libdb (Berkeley DB " -                        "library) internal logging.", private->errfile); -        } - -        ret = dict_get_str (options, "directory", &directory); - -        if (ret == 0) { -                ret = dict_get_str (options, "logdir", &logdir); - -                if (ret < 0) { -                        gf_log ("bdb-ll", GF_LOG_DEBUG, -                                "using the database environment home " -                                "directory (%s) itself as transaction log " -                                "directory", directory); -                        private->logdir = gf_strdup (directory); - -                } else { -                        private->logdir = gf_strdup (logdir); - -                        op_ret = stat (private->logdir, &stbuf); -                        if ((op_ret != 0) -                            || (!S_ISDIR (stbuf.st_mode))) { -                                gf_log ("bdb-ll", GF_LOG_ERROR, -                                        "specified logdir %s does not exist. " -                                        "please provide a valid existing " -                                        "directory as parameter to 'option " -                                        "logdir'", -                                        private->logdir); -                                goto err; -                        } -                } - -                private->b_table->dbenv = bdb_dbenv_init (this, directory); -                if (private->b_table->dbenv == NULL) { -                        gf_log ("bdb-ll", GF_LOG_ERROR, -                                "initialization of database environment " -                                "failed"); -                        goto err; -                } else { -                        if (private->transaction) { -                                /* all well, start the checkpointing thread */ -                                LOCK_INIT (&private->active_lock); - -                                LOCK (&private->active_lock); -                                { -                                        private->active = 1; -                                } -                                UNLOCK (&private->active_lock); -                                pthread_create (&private->checkpoint_thread, -                                                NULL, bdb_checkpoint, this); -                        } -                } -        } - -        return op_ret; -err: -        if (table) { -                GF_FREE (table->b_hash); -                GF_FREE (table); -        } -        if (private) { -                if (private->errfile) -                        GF_FREE (private->errfile); - -                if (private->logdir) -                        GF_FREE (private->logdir); -        } - -        return -1; -}  | 
