summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorM. Mohan Kumar <mohan@in.ibm.com>2012-11-29 21:46:05 +0530
committerVijay Bellur <vbellur@redhat.com>2012-11-29 09:35:20 -0800
commitee968619cf936f0e25299beb1996abc27ed3dc72 (patch)
tree746bba7ee5bc04a367ee0eb949cff1870f8c0fed
parentb90b2c17b6b678e5aa1440a62b7588f8b7c52947 (diff)
xlators: Add Block Device(BD) backend translator
Add a new server storage xlator 'bd mapper'. Intention of this xlator is to add block device backend support to gluster. It exports block devices as regular files to the gluster client. The immediate goal of this translator is to use logical volumes to store VM images and expose them as files to QEMU/KVM. Given Volume group is represented as directory and its logical volumes as files. By exporting LUNs/LVs as regular files, it becomes possible to: * Associate each VM to a LV/LUN * Use file system commands like cp to take copy of VM images * Create linked clones of VM by doing LV snapshot at server side * Implement thin provisioning by developing a qcow2 translator As of now this patchset maps only logical volumes. BD Mapper volume file specifies which Volume group to export to the client. BD xlator exports the volume group as a directory and all logical volumes under that as regular files. BD xlator uses lvm2-devel APIs for getting the list of Volume Groups and Logical Volumes in the system. The eventual goal of this work is to support thin provisioning, snapshot, copy etc of VM images seamlessly in glusterfs storage environment BUG: 805138 Change-Id: I13b69d39d7fd199c101c8e9e4f2cf10772bdc3dd Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Reviewed-on: http://review.gluster.org/3551 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--configure.ac48
-rw-r--r--xlators/storage/Makefile.am5
-rw-r--r--xlators/storage/bd_map/Makefile.am3
-rw-r--r--xlators/storage/bd_map/src/Makefile.am21
-rw-r--r--xlators/storage/bd_map/src/bd_map.c896
-rw-r--r--xlators/storage/bd_map/src/bd_map.h75
-rw-r--r--xlators/storage/bd_map/src/bd_map_help.c462
-rw-r--r--xlators/storage/bd_map/src/bd_map_help.h63
8 files changed, 1562 insertions, 11 deletions
diff --git a/configure.ac b/configure.ac
index 4e638d56..9640c9d5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -51,6 +51,8 @@ AC_CONFIG_FILES([Makefile
xlators/storage/Makefile
xlators/storage/posix/Makefile
xlators/storage/posix/src/Makefile
+ xlators/storage/bd_map/Makefile
+ xlators/storage/bd_map/src/Makefile
xlators/cluster/Makefile
xlators/cluster/afr/Makefile
xlators/cluster/afr/src/Makefile
@@ -262,6 +264,31 @@ if test "x$enable_fuse_client" != "xno"; then
BUILD_FUSE_CLIENT="yes"
fi
+AC_ARG_ENABLE([bd-xlator],
+ AC_HELP_STRING([--enable-bd-xlator],
+ [Build BD xlator]))
+
+if test "x$enable_bd_xlator" != "xno"; then
+ AC_CHECK_LIB([lvm2app],
+ [lvm_init],
+ [HAVE_BD_LIB="yes"],
+ [HAVE_BD_LIB="no"])
+fi
+
+if test "x$enable_bd_xlator" = "xyes" -a "x$HAVE_BD_LIB" = "xno"; then
+ echo "BD xlator requested but required lvm2 development library not found."
+ exit 1
+fi
+
+BUILD_BD_XLATOR=no
+if test "x${enable_bd_xlator}" != "xno" -a "x${HAVE_BD_LIB}" = "xyes"; then
+ BUILD_BD_XLATOR=yes
+ AC_DEFINE(HAVE_BD_XLATOR, 1, [define if lvm2app library found and bd
+ xlator enabled])
+fi
+
+AM_CONDITIONAL([ENABLE_BD_XLATOR], [test x$BUILD_BD_XLATOR = xyes])
+
AC_SUBST(FUSE_CLIENT_SUBDIR)
# end FUSE section
@@ -617,14 +644,15 @@ AC_OUTPUT
echo
echo "GlusterFS configure summary"
echo "==========================="
-echo "FUSE client : $BUILD_FUSE_CLIENT"
-echo "Infiniband verbs : $BUILD_IBVERBS"
-echo "epoll IO multiplex : $BUILD_EPOLL"
-echo "argp-standalone : $BUILD_ARGP_STANDALONE"
-echo "fusermount : $BUILD_FUSERMOUNT"
-echo "readline : $BUILD_READLINE"
-echo "georeplication : $BUILD_SYNCDAEMON"
-echo "Linux-AIO : $BUILD_LIBAIO"
-echo "Enable Debug : $DEBUG"
-echo "systemtap : $BUILD_SYSTEMTAP"
+echo "FUSE client : $BUILD_FUSE_CLIENT"
+echo "Infiniband verbs : $BUILD_IBVERBS"
+echo "epoll IO multiplex : $BUILD_EPOLL"
+echo "argp-standalone : $BUILD_ARGP_STANDALONE"
+echo "fusermount : $BUILD_FUSERMOUNT"
+echo "readline : $BUILD_READLINE"
+echo "georeplication : $BUILD_SYNCDAEMON"
+echo "Linux-AIO : $BUILD_LIBAIO"
+echo "Enable Debug : $DEBUG"
+echo "systemtap : $BUILD_SYSTEMTAP"
+echo "Block Device backend : $BUILD_BD_XLATOR"
echo
diff --git a/xlators/storage/Makefile.am b/xlators/storage/Makefile.am
index 9cb9ded3..e1316a12 100644
--- a/xlators/storage/Makefile.am
+++ b/xlators/storage/Makefile.am
@@ -1,3 +1,6 @@
SUBDIRS = posix
-CLEANFILES =
+if ENABLE_BD_XLATOR
+SUBDIRS += bd_map
+endif
+CLEANFILES =
diff --git a/xlators/storage/bd_map/Makefile.am b/xlators/storage/bd_map/Makefile.am
new file mode 100644
index 00000000..a985f42a
--- /dev/null
+++ b/xlators/storage/bd_map/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/storage/bd_map/src/Makefile.am b/xlators/storage/bd_map/src/Makefile.am
new file mode 100644
index 00000000..be43d2ab
--- /dev/null
+++ b/xlators/storage/bd_map/src/Makefile.am
@@ -0,0 +1,21 @@
+
+if ENABLE_BD_XLATOR
+xlator_LTLIBRARIES = bd_map.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
+
+bd_map_la_LDFLAGS = -module -avoidversion
+LIBBD = -llvm2app -lrt
+bd_map_la_SOURCES = bd_map.c bd_map_help.c
+bd_map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBBD)
+
+noinst_HEADERS = bd_map.h bd_map_help.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+endif
diff --git a/xlators/storage/bd_map/src/bd_map.c b/xlators/storage/bd_map/src/bd_map.c
new file mode 100644
index 00000000..e7a684c5
--- /dev/null
+++ b/xlators/storage/bd_map/src/bd_map.c
@@ -0,0 +1,896 @@
+/*
+ BD translator - Exports Block devices on server side as regular
+ files to client
+
+ Now only exporting Logical volumes supported.
+
+ Copyright IBM, Corp. 2012
+
+ This file is part of GlusterFS.
+
+ Author:
+ M. Mohan Kumar <mohan@in.ibm.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <time.h>
+#include <lvm2app.h>
+#include <openssl/md5.h>
+
+#include "bd_map.h"
+#include "bd_map_help.h"
+#include "defaults.h"
+#include "glusterfs3-xdr.h"
+
+
+/* Regular fops */
+
+int32_t
+bd_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ struct iatt buf = {0, };
+ int32_t op_ret = -1;
+ int32_t entry_ret = 0;
+ int32_t op_errno = 0;
+ char *pathdup = NULL;
+ bd_entry_t *bdentry = NULL;
+ struct iatt postparent = {0, };
+ bd_priv_t *priv = NULL;
+ char *p = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (loc->path, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+
+ BD_ENTRY (priv, bdentry, loc->path);
+ if (!bdentry) {
+ op_errno = ENOENT;
+ entry_ret = -1;
+ goto parent;
+ }
+ memcpy (&buf, bdentry->attr, sizeof(buf));
+ BD_PUT_ENTRY (priv, bdentry);
+
+parent:
+ if (loc->parent) {
+ pathdup = p = gf_strdup (loc->path);
+ if (!pathdup) {
+ op_errno = ENOMEM;
+ entry_ret = -1;
+ goto out;
+ }
+ p = strrchr (pathdup, '/');
+ if (p == pathdup)
+ *(p+1) = '\0';
+ else
+ *p = '\0';
+ BD_ENTRY (priv, bdentry, pathdup);
+ if (!bdentry) {
+ op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_ERROR,
+ "post-operation lookup on parent of %s "
+ "failed: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+ memcpy (&postparent, bdentry->attr, sizeof(postparent));
+ BD_PUT_ENTRY (priv, bdentry);
+ }
+
+ op_ret = entry_ret;
+out:
+ if (pathdup)
+ GF_FREE (pathdup);
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
+ (loc)?loc->inode:NULL, &buf, NULL, &postparent);
+
+ return 0;
+}
+
+int32_t
+bd_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt buf = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ bd_entry_t *bdentry = NULL;
+ bd_priv_t *priv = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+
+ BD_ENTRY (priv, bdentry, loc->path);
+ if (!bdentry) {
+ op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_ERROR, "stat on %s failed: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+ memcpy (&buf, bdentry->attr, sizeof(buf));
+ BD_PUT_ENTRY (priv, bdentry);
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL);
+
+ return 0;
+}
+
+int32_t
+bd_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ bd_fd_t *bd_fd = NULL;
+ bd_entry_t *bdentry = NULL;
+ bd_priv_t *priv = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+ VALIDATE_OR_GOTO (loc->path, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+
+ BD_ENTRY (priv, bdentry, loc->path);
+ if (!bdentry) {
+ op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_ERROR, "opendir failed on %s: %s",
+ loc->path, strerror (op_errno));
+ goto out;
+ }
+ bd_fd = GF_CALLOC (1, sizeof(*bd_fd), gf_bd_fd);
+ if (!bd_fd) {
+ op_errno = errno;
+ BD_PUT_ENTRY (priv, bdentry);
+ goto out;
+ }
+
+ bd_fd->p_entry = bdentry;
+
+ bdentry = list_entry ((&bdentry->child)->next, typeof(*bdentry), child);
+ if (!bdentry) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_ERROR, "bd_entry NULL");
+ goto out;
+ }
+ bdentry = list_entry ((&bdentry->sibling), typeof(*bdentry), sibling);
+ if (!bdentry) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_ERROR, "bd_entry NULL");
+ goto out;
+ }
+
+ bd_fd->entry = bdentry;
+
+ op_ret = fd_ctx_set (fd, this, (uint64_t) (long)bd_fd);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the fd context path=%s fd=%p",
+ loc->path, fd);
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ BD_PUT_ENTRY (priv, bd_fd->p_entry);
+ if (bd_fd)
+ GF_FREE (bd_fd);
+ }
+
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL);
+ return 0;
+}
+
+int32_t
+bd_releasedir (xlator_t *this, fd_t *fd)
+{
+ bd_fd_t *bd_fd = NULL;
+ uint64_t tmp_bd_fd = 0;
+ int ret = 0;
+ bd_priv_t *priv = NULL;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_bd_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "bd_fd from fd=%p is NULL",
+ fd);
+ goto out;
+ }
+ bd_fd = (bd_fd_t *) (long)tmp_bd_fd;
+ BD_PUT_ENTRY (priv, bd_fd->p_entry);
+
+ bd_fd = (bd_fd_t *) (long)tmp_bd_fd;
+ GF_FREE (bd_fd);
+out:
+ return 0;
+}
+
+/*
+ * bd_statfs: Mimics statfs by returning used/free extents in the VG
+ * TODO: IF more than one VG allowed per volume, this functions needs some
+ * change
+ */
+int32_t
+bd_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ bd_priv_t *priv = NULL;
+ struct statvfs buf = {0, };
+ vg_t vg = NULL;
+ char *vg_name = NULL;
+ uint64_t size = 0;
+ uint64_t fr_size = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+
+ ret = dict_get_str (this->options, "export", &vg_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: storage/bd does not specify volume groups");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ BD_RD_LOCK (&priv->lock);
+
+ vg = lvm_vg_open (priv->handle, vg_name, "r", 0);
+ size += lvm_vg_get_size (vg);
+ fr_size += lvm_vg_get_free_size (vg);
+ lvm_vg_close (vg);
+
+ BD_UNLOCK (&priv->lock);
+
+ if (statvfs ("/", &buf) < 0) {
+ op_errno = errno;
+ goto out;
+ }
+ op_ret = 0;
+ buf.f_blocks = size / buf.f_frsize;
+ buf.f_bfree = fr_size / buf.f_frsize;
+ buf.f_bavail = fr_size / buf.f_frsize;
+out:
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL);
+ return 0;
+}
+
+int
+__bd_fill_readdir (pthread_rwlock_t *bd_lock, bd_fd_t *bd_fd, off_t off,
+ size_t size, gf_dirent_t *entries)
+{
+ size_t filled = 0;
+ int count = 0;
+ struct dirent entry = {0, };
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ bd_entry_t *bdentry = NULL;
+ bd_entry_t *cur_entry = NULL;
+ bd_entry_t *n_entry = NULL;
+
+ BD_RD_LOCK (bd_lock);
+
+ bdentry = list_entry ((&bd_fd->p_entry->child)->next, typeof(*n_entry),
+ child);
+
+ if (off) {
+ int i = 0;
+ list_for_each_entry (n_entry, &bd_fd->entry->sibling, sibling) {
+ if (i == off && strcmp (n_entry->name, "")) {
+ bd_fd->entry = n_entry;
+ break;
+ }
+ }
+ } else
+ bd_fd->entry = list_entry ((&bdentry->sibling),
+ typeof(*n_entry), sibling);
+
+ while (filled <= size) {
+ cur_entry = bd_fd->entry;
+
+ n_entry = list_entry ((&bd_fd->entry->sibling)->next,
+ typeof (*cur_entry), sibling);
+ if (&n_entry->sibling == (&bdentry->sibling))
+ break;
+
+ strcpy (entry.d_name, n_entry->name);
+ entry.d_ino = n_entry->attr->ia_ino;
+ entry.d_off = off;
+ if (n_entry->attr->ia_type == IA_IFDIR)
+ entry.d_type = DT_DIR;
+ else
+ entry.d_type = DT_REG;
+
+ this_size = max (sizeof(gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (entry.d_name) + 1;
+
+ if (this_size + filled > size)
+ break;
+
+ bd_fd->entry = n_entry;
+
+ this_entry = gf_dirent_for_name (entry.d_name);
+ if (!this_entry) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not create gf_dirent for entry %s",
+ entry.d_name);
+ goto out;
+ }
+ this_entry->d_off = off;
+ this_entry->d_ino = entry.d_ino;
+ this_entry->d_type = entry.d_type;
+ off++;
+
+ list_add_tail (&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count++;
+ }
+out:
+ BD_UNLOCK (bd_lock);
+ return count;
+}
+
+int32_t
+bd_do_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, int whichop)
+{
+ uint64_t tmp_bd_fd = 0;
+ bd_fd_t *bd_fd = NULL;
+ int ret = -1;
+ int count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp_entry = NULL;
+ bd_entry_t *bdentry = NULL;
+ bd_priv_t *priv = NULL;
+ char *devpath = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv, out);
+
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = fd_ctx_get (fd, this, &tmp_bd_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "bd_fd is NULL, fd=%p", fd);
+ op_errno = -EINVAL;
+ goto out;
+ }
+ bd_fd = (bd_fd_t *) (long)tmp_bd_fd;
+ LOCK (&fd->lock);
+ {
+ count = __bd_fill_readdir (&priv->lock, bd_fd, off,
+ size, &entries);
+ }
+ UNLOCK (&fd->lock);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+
+ if (whichop != GF_FOP_READDIRP)
+ goto out;
+
+ BD_RD_LOCK (&priv->lock);
+ list_for_each_entry (tmp_entry, &entries.list, list) {
+ char path[PATH_MAX];
+ sprintf (path, "%s/%s", bd_fd->p_entry->name,
+ tmp_entry->d_name);
+ bdentry = bd_entry_get (path);
+ if (!bdentry) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "entry failed %s\n", tmp_entry->d_name);
+ continue;
+ }
+ if (bdentry->attr->ia_ino)
+ tmp_entry->d_ino = bdentry->attr->ia_ino;
+ memcpy (&tmp_entry->d_stat,
+ bdentry->attr, sizeof (tmp_entry->d_stat));
+ bd_entry_put (bdentry);
+ GF_FREE (devpath);
+ }
+ BD_UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free (&entries);
+
+ return 0;
+}
+
+int32_t
+bd_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *dict)
+{
+ bd_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
+ return 0;
+}
+
+
+int32_t
+bd_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *dict)
+{
+ bd_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
+ return 0;
+}
+
+int32_t
+bd_priv (xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+bd_inode (xlator_t *this)
+{
+ return 0;
+}
+
+/* unsupported interfaces */
+int bd_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOSYS, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+bd_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata)
+{
+ struct iatt stbuf = {0, };
+ char *dest = NULL;
+
+ dest = alloca (size + 1);
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOSYS, dest, &stbuf, NULL);
+ return 0;
+}
+
+int
+bd_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
+{
+ struct iatt stbuf = {0, };
+ struct iatt preparent = {0, };
+ struct iatt postparent = {0, };
+
+ STACK_UNWIND_STRICT (mknod, frame, -1, ENOSYS,
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
+ return 0;
+}
+
+int
+bd_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ struct iatt stbuf = {0, };
+ struct iatt preparent = {0, };
+ struct iatt postparent = {0, };
+
+ STACK_UNWIND_STRICT (mkdir, frame, -1, ENOSYS,
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
+ return 0;
+}
+
+int
+bd_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ struct iatt preparent = {0, };
+ struct iatt postparent = {0, };
+
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOSYS,
+ &preparent, &postparent, NULL);
+ return 0;
+}
+
+int
+bd_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ struct iatt stbuf = {0, };
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+
+ STACK_UNWIND_STRICT (link, frame, -1, ENOSYS,
+ (oldloc)?oldloc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
+
+ return 0;
+}
+
+int32_t
+bd_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+bd_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+bd_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOSYS, NULL, NULL);
+ return 0;
+}
+
+int32_t
+bd_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOSYS, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+bd_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+bd_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+bd_fsyncdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+static int gf_bd_lk_log;
+int32_t
+bd_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ struct gf_flock nullock = {0, };
+
+ GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL);
+ return 0;
+}
+
+int32_t
+bd_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+bd_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+
+int32_t
+bd_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+bd_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+bd_rchecksum (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
+{
+ int32_t weak_checksum = 0;
+ unsigned char strong_checksum[MD5_DIGEST_LENGTH];
+
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOSYS,
+ weak_checksum, strong_checksum, NULL);
+ return 0;
+}
+
+int
+bd_xattrop (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOSYS, xattr, NULL);
+ return 0;
+}
+
+
+int
+bd_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOSYS, xattr, NULL);
+ return 0;
+}
+
+/**
+ * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+ */
+int32_t
+notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...)
+{
+ switch (event)
+ {
+ case GF_EVENT_PARENT_UP:
+ {
+ /* Tell the parent that bd xlator is up */
+ default_notify (this, GF_EVENT_CHILD_UP, data);
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_bd_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+/**
+ * init - Constructs lists of LVs in the given VG
+ */
+int
+init (xlator_t *this)
+{
+ bd_priv_t *_private = NULL;
+ int ret = 0;
+ char *vg = NULL;
+ char *device = NULL;
+
+ LOCK_INIT (&inode_lk);
+
+ bd_rootp = bd_entry_add_root ();
+ if (!bd_rootp) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: adding root entry failed");
+ return -1;
+ }
+
+ if (this->children) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: storage/bd cannot have subvolumes");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling. Please check the volume file.");
+ }
+
+ ret = dict_get_str (this->options, "device", &device);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: storage/bd does not specify backend");
+ return -1;
+ }
+
+ /* Now we support only LV device */
+ if (strcasecmp (device, BACKEND_VG)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: unknown %s backend %s", BD_XLATOR, device);
+ return -1;
+ }
+
+ ret = dict_get_str (this->options, "export", &vg);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: storage/bd does not specify volume groups");
+ return -1;
+ }
+
+ ret = 0;
+ _private = GF_CALLOC (1, sizeof(*_private), gf_bd_private);
+ if (!_private)
+ goto error;
+
+ pthread_rwlock_init (&_private->lock, NULL);
+ this->private = (void *)_private;
+ _private->handle = NULL;
+ _private->vg = gf_strdup (vg);
+ if (!_private->vg) {
+ goto error;
+ }
+
+ if (bd_build_lv_list (this->private, vg) < 0)
+ goto error;
+
+out:
+ return 0;
+error:
+ BD_WR_LOCK (&_private->lock);
+ bd_entry_cleanup ();
+ lvm_quit (_private->handle);
+ if (_private->vg)
+ GF_FREE (_private->vg);
+ GF_FREE (_private);
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ bd_priv_t *priv = this->private;
+ if (!priv)
+ return;
+ lvm_quit (priv->handle);
+ BD_WR_LOCK (&priv->lock);
+ bd_entry_cleanup ();
+ BD_UNLOCK (&priv->lock);
+ GF_FREE (priv->vg);
+ this->private = NULL;
+ GF_FREE (priv);
+ return;
+}
+
+struct xlator_dumpops dumpops = {
+ .priv = bd_priv,
+ .inode = bd_inode,
+};
+
+struct xlator_fops fops = {
+ /* Not supported */
+ .readlink = bd_readlink,
+ .mknod = bd_mknod,
+ .mkdir = bd_mkdir,
+ .rmdir = bd_rmdir,
+ .link = bd_link,
+ .setxattr = bd_setxattr,
+ .fsetxattr = bd_fsetxattr,
+ .getxattr = bd_getxattr,
+ .fgetxattr = bd_fgetxattr,
+ .removexattr = bd_removexattr,
+ .fremovexattr= bd_fremovexattr,
+ .fsyncdir = bd_fsyncdir,
+ .lk = bd_lk,
+ .inodelk = bd_inodelk,
+ .finodelk = bd_finodelk,
+ .entrylk = bd_entrylk,
+ .fentrylk = bd_fentrylk,
+ .rchecksum = bd_rchecksum,
+ .xattrop = bd_xattrop,
+ .setattr = bd_setattr,
+
+ /* Supported */
+ .lookup = bd_lookup,
+ .opendir = bd_opendir,
+ .readdir = bd_readdir,
+ .readdirp = bd_readdirp,
+ .stat = bd_stat,
+ .statfs = bd_statfs,
+};
+
+struct xlator_cbks cbks = {
+ .releasedir = bd_releasedir,
+};
+
+struct volume_options options[] = {
+ { .key = {"export"},
+ .type = GF_OPTION_TYPE_STR},
+ { .key = {"device"},
+ .type = GF_OPTION_TYPE_STR},
+ { .key = {NULL} }
+};
diff --git a/xlators/storage/bd_map/src/bd_map.h b/xlators/storage/bd_map/src/bd_map.h
new file mode 100644
index 00000000..974ec928
--- /dev/null
+++ b/xlators/storage/bd_map/src/bd_map.h
@@ -0,0 +1,75 @@
+/*
+ BD translator - Exports Block devices on server side as regular
+ files to client
+
+ Copyright IBM, Corp. 2012
+
+ This file is part of GlusterFS.
+
+ Author:
+ M. Mohan Kumar <mohan@in.ibm.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _BD_MAP_H
+#define _BD_MAP_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "mem-types.h"
+
+#define BD_XLATOR "block device mapper xlator"
+
+#define BACKEND_VG "vg"
+
+enum gf_bd_mem_types_ {
+ gf_bd_fd = gf_common_mt_end + 1,
+ gf_bd_private,
+ gf_bd_entry,
+ gf_bd_attr,
+ gf_bd_mt_end
+};
+
+/*
+ * Each BD/LV is represented by this data structure
+ * Usually root entry will have only children and there is no sibling for that
+ * All other entries may have children and/or sibling entries
+ * If an entry is a Volume Group it will have child (. & .. and Logical
+ * Volumes) and also other Volume groups will be a sibling for this
+ */
+typedef struct bd_entry {
+ struct list_head child; /* List to child */
+ struct list_head sibling; /* List of siblings */
+ struct bd_entry *parent;/* Parent of this node */
+ struct bd_entry *link; /* Link to actual entry, if its . or .. */
+ char name[NAME_MAX];
+ struct iatt *attr;
+ int refcnt;
+ uint64_t size;
+ pthread_rwlock_t lock;
+} bd_entry_t;
+
+
+/**
+ * bd_fd - internal structure common to file and directory fd's
+ */
+typedef struct bd_fd {
+ bd_entry_t *entry;
+ bd_entry_t *p_entry; /* Parent entry */
+} bd_fd_t;
+
+typedef struct bd_priv {
+ lvm_t handle;
+ pthread_rwlock_t lock;
+ char *vg;
+} bd_priv_t;
+
+#endif
diff --git a/xlators/storage/bd_map/src/bd_map_help.c b/xlators/storage/bd_map/src/bd_map_help.c
new file mode 100644
index 00000000..2b5c321f
--- /dev/null
+++ b/xlators/storage/bd_map/src/bd_map_help.c
@@ -0,0 +1,462 @@
+/*
+ BD translator - Exports Block devices on server side as regular
+ files to client
+
+ Copyright IBM, Corp. 2012
+
+ This file is part of GlusterFS.
+
+ Author:
+ M. Mohan Kumar <mohan@in.ibm.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#define __XOPEN_SOURCE 500
+
+#include <libgen.h>
+#include <time.h>
+#include <lvm2app.h>
+
+#include "bd_map.h"
+#include "bd_map_help.h"
+#include "defaults.h"
+#include "glusterfs3-xdr.h"
+
+#define CHILD_ENTRY(node) list_entry ((&node->child)->next, typeof(*node), \
+ child)
+
+bd_entry_t *bd_rootp;
+gf_lock_t inode_lk;
+static uint64_t bd_entry_ino = 5000; /* Starting inode */
+
+static void bd_entry_get_ino (uint64_t *inode)
+{
+ LOCK (&inode_lk);
+ {
+ *inode = bd_entry_ino++;
+ }
+ UNLOCK (&inode_lk);
+}
+
+static bd_entry_t *bd_entry_init (const char *name)
+{
+ bd_entry_t *bdentry;
+
+ bdentry = GF_MALLOC (sizeof(bd_entry_t), gf_bd_entry);
+ if (!bdentry)
+ return NULL;
+
+ bdentry->attr = GF_MALLOC (sizeof(struct iatt), gf_bd_attr);
+ if (!bdentry->attr) {
+ GF_FREE (bdentry);
+ return NULL;
+ }
+
+ strcpy (bdentry->name, name);
+ INIT_LIST_HEAD (&bdentry->sibling);
+ INIT_LIST_HEAD (&bdentry->child);
+ bdentry->link = NULL;
+ bdentry->refcnt = 0;
+ return bdentry;
+}
+
+static bd_entry_t *bd_entry_clone (bd_entry_t *orig, char *name)
+{
+ bd_entry_t *bdentry;
+
+ bdentry = GF_MALLOC (sizeof(bd_entry_t), gf_bd_entry);
+ if (!bdentry)
+ return NULL;
+
+ bdentry->attr = orig->attr;
+
+ strcpy (bdentry->name, name);
+ INIT_LIST_HEAD (&bdentry->sibling);
+ INIT_LIST_HEAD (&bdentry->child);
+ bdentry->link = orig;
+ bdentry->refcnt = 0;
+ return bdentry;
+}
+
+static void bd_entry_init_iattr (struct iatt *attr, int type)
+{
+ struct timespec ts = {0, };
+
+ clock_gettime (CLOCK_REALTIME, &ts);
+ attr->ia_dev = ia_makedev (0, 0); /* FIXME: */
+ attr->ia_type = type;
+ attr->ia_prot = ia_prot_from_st_mode (0750);
+ attr->ia_nlink = 2;
+ attr->ia_uid = 0;
+ attr->ia_gid = 0;
+ attr->ia_rdev = ia_makedev (0, 0);
+
+ attr->ia_size = 4096; /* FIXME */
+ attr->ia_blksize = 4096;
+ attr->ia_blocks = 0;
+
+ attr->ia_atime = ts.tv_sec;
+ attr->ia_atime_nsec = ts.tv_nsec;
+ attr->ia_mtime = ts.tv_sec;
+ attr->ia_mtime_nsec = ts.tv_nsec;
+ attr->ia_ctime = ts.tv_sec;
+ attr->ia_ctime_nsec = ts.tv_nsec;
+}
+
+/*
+ * bd_entry_istat: Initialize iatt strucutre for a given path on success
+ */
+void bd_entry_istat (const char *path, struct iatt *attr, int type)
+{
+ struct stat stbuf = {0, };
+
+ if (stat (path, &stbuf) < 0)
+ bd_entry_init_iattr (attr, type);
+ else
+ iatt_from_stat (attr, &stbuf);
+ sprintf ((char *)attr->ia_gfid, "%lx", stbuf.st_ino);
+}
+
+/*
+ * Adds the root entry and required entries
+ * ie header entry followed by . and .. entries
+ */
+bd_entry_t *bd_entry_add_root (void)
+{
+ bd_entry_t *bdentry = NULL;
+ bd_entry_t *h_entry = NULL;
+ bd_entry_t *d_entry = NULL;
+ bd_entry_t *dd_entry = NULL;
+
+ bdentry = bd_entry_init ("/");
+ if (!bdentry)
+ return NULL;
+
+ bdentry->parent = bdentry;
+
+ bd_entry_get_ino (&bdentry->attr->ia_ino);
+ sprintf ((char *)bdentry->attr->ia_gfid, "%ld",
+ bdentry->attr->ia_ino << 2);
+ bd_entry_init_iattr (bdentry->attr, IA_IFDIR);
+
+ h_entry = bd_entry_clone (bdentry, "");
+ bdentry->child.next = &h_entry->child;
+ bdentry->child.prev = &h_entry->child;
+
+ d_entry = bd_entry_clone (bdentry, ".");
+ dd_entry = bd_entry_clone (bdentry, "..");
+
+ list_add_tail (&d_entry->sibling, &h_entry->sibling);
+ list_add_tail (&dd_entry->sibling, &h_entry->sibling);
+ return bdentry;
+}
+
+bd_entry_t *bd_entry_add (bd_entry_t *parent, const char *name,
+ struct iatt *iattr, ia_type_t type)
+{
+ bd_entry_t *bdentry = NULL;
+ bd_entry_t *h_entry = NULL;
+ bd_entry_t *d_entry = NULL;
+ bd_entry_t *dd_entry = NULL;
+ bd_entry_t *sentry = NULL;
+ struct timespec ts = { 0, };
+
+ if (!parent)
+ parent = bd_rootp;
+
+ if (type != IA_IFREG && type != IA_IFDIR)
+ return NULL;
+
+ bdentry = bd_entry_init (name);
+ if (!bdentry)
+ return NULL;
+
+ bdentry->parent = parent;
+
+ iattr->ia_type = type;
+
+ bd_entry_get_ino (&iattr->ia_ino);
+ if (IA_ISDIR(type)) {
+ h_entry = bd_entry_clone (bdentry, "");
+ parent->attr->ia_nlink++;
+ bdentry->child.next = &h_entry->child;
+ bdentry->child.prev = &h_entry->child;
+
+ d_entry = bd_entry_clone (bdentry, ".");
+ dd_entry = bd_entry_clone (bdentry, "..");
+
+ list_add_tail (&d_entry->sibling, &h_entry->sibling);
+ list_add_tail (&dd_entry->sibling, &h_entry->sibling);
+ }
+ memcpy (bdentry->attr, iattr, sizeof(*iattr));
+
+ clock_gettime (CLOCK_REALTIME, &ts);
+ parent->attr->ia_mtime = ts.tv_sec;
+ parent->attr->ia_mtime_nsec = ts.tv_nsec;
+ bdentry->size = iattr->ia_size;
+
+ sentry = CHILD_ENTRY (parent);
+ list_add_tail (&bdentry->sibling, &sentry->sibling);
+ return bdentry;
+}
+
+bd_entry_t *bd_entry_get_list (const char *name, bd_entry_t *parent)
+{
+ bd_entry_t *centry = NULL;
+ bd_entry_t *bdentry = NULL;
+
+ if (!parent)
+ parent = bd_rootp;
+
+ if (parent->child.next == &parent->child)
+ return NULL;
+
+ centry = CHILD_ENTRY (parent);
+ if (!strcmp (centry->name, name))
+ return centry;
+
+ list_for_each_entry (bdentry, &centry->sibling, sibling) {
+ if (!strcmp (bdentry->name, name))
+ return bdentry;
+ }
+ return NULL;
+}
+
+/* FIXME: Do we need hashing here? */
+bd_entry_t *bd_entry_find_by_gfid (const char *path)
+{
+ bd_entry_t *h = NULL;
+ bd_entry_t *tmp = NULL;
+ bd_entry_t *tmp2 = NULL;
+ bd_entry_t *node = NULL;
+ bd_entry_t *cnode = NULL;
+ bd_entry_t *leaf = NULL;
+ char *gfid = NULL;
+ char *cp = NULL;
+ char *bgfid = NULL;
+ bd_entry_t *entry = NULL;
+
+ gfid = GF_MALLOC (strlen(path) + 1, gf_common_mt_char);
+ sscanf (path, "<gfid:%s", gfid);
+ if (!gfid)
+ return NULL;
+
+ cp = strchr(gfid, '>');
+ *cp = '\0';
+
+ node = CHILD_ENTRY (bd_rootp);
+
+ bgfid = GF_MALLOC (GF_UUID_BUF_SIZE, gf_common_mt_char);
+ if (!bgfid)
+ return NULL;
+
+ list_for_each_entry_safe (h, tmp, &node->sibling, sibling) {
+ uuid_utoa_r (h->attr->ia_gfid, bgfid);
+ if (!h->link && !strcmp (gfid, bgfid)) {
+ entry = h;
+ goto out;
+ }
+
+ /* if we have children for this node */
+ if (h->child.next != &h->child) {
+ cnode = CHILD_ENTRY (h);
+ uuid_utoa_r (cnode->attr->ia_gfid, bgfid);
+ if (!cnode->link && !strcmp (gfid, bgfid)) {
+ entry = cnode;
+ goto out;
+ }
+
+ list_for_each_entry_safe (leaf, tmp2, (&cnode->sibling),
+ sibling) {
+ uuid_utoa_r (leaf->attr->ia_gfid, bgfid);
+ if (!leaf->link && !strcmp (gfid, bgfid)) {
+ entry = leaf;
+ goto out;
+ }
+
+ }
+ }
+ }
+out:
+ if (bgfid)
+ GF_FREE (bgfid);
+
+ return entry;
+}
+
+/* Called with priv->bd_lock held */
+bd_entry_t *bd_entry_get (const char *name)
+{
+ bd_entry_t *pentry = NULL;
+ char *path = NULL;
+ char *comp = NULL;
+ char *save = NULL;
+
+ if (!strncmp (name, "<gfid:", 5)) {
+ pentry = bd_entry_find_by_gfid (name);
+ if (pentry)
+ pentry->refcnt++;
+ return pentry;
+ }
+
+ if (!strcmp (name, "/")) {
+ bd_rootp->refcnt++;
+ return bd_rootp;
+ }
+
+ path = gf_strdup (name);
+ comp = strtok_r (path, "/", &save);
+ pentry = bd_entry_get_list (comp, NULL);
+ if (!pentry)
+ goto out;
+ while (comp) {
+ comp = strtok_r (NULL, "/", &save);
+ if (!comp)
+ break;
+ pentry = bd_entry_get_list (comp, pentry);
+ if (!pentry)
+ goto out;
+ }
+
+ pentry->refcnt++;
+out:
+ GF_FREE (path);
+ return pentry;
+}
+
+/* Called with priv->bd_lock held */
+void bd_entry_put (bd_entry_t *entry)
+{
+ entry->refcnt--;
+}
+
+int bd_build_lv_list (bd_priv_t *priv, char *vg_name)
+{
+ struct dm_list *lv_dm_list = NULL;
+ struct lvm_lv_list *lv_list = NULL;
+ struct iatt iattr = {0, };
+ char path[PATH_MAX] = {0, };
+ vg_t vg = NULL;
+ bd_entry_t *vg_map = NULL;
+ bd_entry_t *bd = NULL;
+ int ret = -1;
+ const char *lv_name = NULL;
+
+ priv->handle = lvm_init (NULL);
+ if (!priv->handle) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "FATAL: bd_init failed");
+ return -1;
+ }
+
+ BD_WR_LOCK (&priv->lock);
+
+ vg = lvm_vg_open (priv->handle, vg_name, "r", 0);
+ if (!vg) {
+ gf_log (THIS->name, GF_LOG_CRITICAL,
+ "opening vg %s failed", vg_name);
+ goto out;
+ }
+ /* get list of LVs associated with this VG */
+ lv_dm_list = lvm_vg_list_lvs (vg);
+ sprintf (path, "/dev/%s", vg_name);
+ bd_entry_istat (path, &iattr, IA_IFDIR);
+ vg_map = bd_entry_add (bd_rootp, vg_name, &iattr,
+ IA_IFDIR);
+ if (!vg_map) {
+ gf_log (THIS->name, GF_LOG_CRITICAL,
+ "bd_add_entry failed");
+ goto out;
+ }
+ ret = 0;
+ if (!lv_dm_list) /* no lvs for this VG */
+ goto out;
+
+ dm_list_iterate_items (lv_list, lv_dm_list) {
+ if (!lv_list)
+ continue;
+ lv_name = lvm_lv_get_name (lv_list->lv);
+ /* snapshot%d is reserved name */
+ if (!strncmp (lv_name, "snapshot", 8))
+ continue;
+ /* get symbolic path for this LV */
+ sprintf (path, "/dev/%s/%s", vg_name, lv_name);
+ bd_entry_istat (path, &iattr, IA_IFREG);
+ /* Make the file size equivalant to BD size */
+ iattr.ia_size = lvm_lv_get_size (lv_list->lv);
+ /* got LV, add it to our tree */
+ bd = bd_entry_add (vg_map,
+ lvm_lv_get_name (lv_list->lv),
+ &iattr, IA_IFREG);
+ if (bd == NULL) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "bd_add_entry failed");
+ goto out;
+ }
+ }
+out:
+ if (vg)
+ lvm_vg_close (vg);
+
+ BD_UNLOCK (&priv->lock);
+ return ret;
+}
+
+/*
+ * Called with bd_lock held to cleanup entire list. If there was a
+ * reference to any one of the entry, nothing cleared.
+ * Return 0 on success -1 in case if there is a reference to the entry
+ */
+int bd_entry_cleanup (void)
+{
+ bd_entry_t *node = NULL;
+ bd_entry_t *tmp = NULL;
+ bd_entry_t *tmp2 = NULL;
+ bd_entry_t *cnode = NULL;
+ bd_entry_t *h = NULL;
+ bd_entry_t *leaf = NULL;
+
+ if (!bd_rootp)
+ return 0;
+
+ node = CHILD_ENTRY (bd_rootp);
+ if (node->refcnt) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "entry %s is inuse\n", node->name);
+ return -1;
+ }
+ list_for_each_entry_safe (h, tmp, &node->sibling, sibling) {
+ /* if we have children for this node */
+ if (h->child.next != &h->child) {
+ cnode = CHILD_ENTRY (h);
+ list_for_each_entry_safe (leaf, tmp2, (&cnode->sibling),
+ sibling) {
+ list_del_init (&leaf->sibling);
+ list_del_init (&leaf->child);
+ if (!leaf->link)
+ GF_FREE (leaf->attr);
+ GF_FREE (leaf);
+ }
+ list_del_init (&cnode->sibling);
+ list_del_init (&cnode->child);
+ if (!cnode->link)
+ GF_FREE (cnode->attr);
+ GF_FREE (cnode);
+ }
+ if (!h->link)
+ GF_FREE (h->attr);
+ GF_FREE (h);
+ }
+ GF_FREE (h);
+ GF_FREE (bd_rootp->attr);
+ GF_FREE (bd_rootp);
+ return 0;
+}
diff --git a/xlators/storage/bd_map/src/bd_map_help.h b/xlators/storage/bd_map/src/bd_map_help.h
new file mode 100644
index 00000000..997b8b71
--- /dev/null
+++ b/xlators/storage/bd_map/src/bd_map_help.h
@@ -0,0 +1,63 @@
+/*
+ BD translator - Exports Block devices on server side as regular
+ files to client.
+
+ Copyright IBM, Corp. 2012
+
+ This file is part of GlusterFS.
+
+ Author:
+ M. Mohan Kumar <mohan@in.ibm.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _BD_MAP_HELP_H
+#define _BD_MAP_HELP_H
+
+#define BD_RD_LOCK(lock) \
+ pthread_rwlock_rdlock (lock);
+
+#define BD_WR_LOCK(lock) \
+ pthread_rwlock_wrlock (lock);
+
+#define BD_UNLOCK(lock) \
+ pthread_rwlock_unlock (lock);
+
+#define BD_WR_ENTRY(priv, bdentry, path) \
+ do { \
+ BD_WR_LOCK (&priv->lock); \
+ bdentry = bd_entry_get (path); \
+ BD_UNLOCK (&priv->lock); \
+ } while (0)
+
+#define BD_ENTRY(priv, bdentry, path) \
+ do { \
+ BD_RD_LOCK (&priv->lock); \
+ bdentry = bd_entry_get (path); \
+ BD_UNLOCK (&priv->lock); \
+ } while (0)
+
+#define BD_PUT_ENTRY(priv, bdentry) \
+ do { \
+ BD_RD_LOCK (&priv->lock); \
+ bd_entry_put (bdentry); \
+ BD_UNLOCK (&priv->lock); \
+ } while (0)
+
+extern bd_entry_t *bd_rootp;
+extern gf_lock_t inode_lk;
+
+void bd_entry_istat (const char *path, struct iatt *attr, int type);
+bd_entry_t *bd_entry_add_root (void);
+bd_entry_t *bd_entry_add (bd_entry_t *parent, const char *name,
+ struct iatt *iattr, ia_type_t type);
+bd_entry_t *bd_entry_get_list (const char *name, bd_entry_t *parent);
+bd_entry_t *bd_entry_get (const char *name);
+void bd_entry_put (bd_entry_t *entry);
+int bd_build_lv_list (bd_priv_t *priv, char *vg);
+int bd_entry_cleanup (void);
+
+#endif