diff options
Diffstat (limited to 'xlators/storage')
-rw-r--r-- | xlators/storage/Makefile.am | 5 | ||||
-rw-r--r-- | xlators/storage/bd_map/Makefile.am | 3 | ||||
-rw-r--r-- | xlators/storage/bd_map/src/Makefile.am | 21 | ||||
-rw-r--r-- | xlators/storage/bd_map/src/bd_map.c | 896 | ||||
-rw-r--r-- | xlators/storage/bd_map/src/bd_map.h | 75 | ||||
-rw-r--r-- | xlators/storage/bd_map/src/bd_map_help.c | 462 | ||||
-rw-r--r-- | xlators/storage/bd_map/src/bd_map_help.h | 63 |
7 files changed, 1524 insertions, 1 deletions
diff --git a/xlators/storage/Makefile.am b/xlators/storage/Makefile.am index 9cb9ded3035..e1316a12702 100644 --- a/xlators/storage/Makefile.am +++ b/xlators/storage/Makefile.am @@ -1,3 +1,6 @@ SUBDIRS = posix -CLEANFILES = +if ENABLE_BD_XLATOR +SUBDIRS += bd_map +endif +CLEANFILES = diff --git a/xlators/storage/bd_map/Makefile.am b/xlators/storage/bd_map/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/storage/bd_map/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/storage/bd_map/src/Makefile.am b/xlators/storage/bd_map/src/Makefile.am new file mode 100644 index 00000000000..be43d2abb5b --- /dev/null +++ b/xlators/storage/bd_map/src/Makefile.am @@ -0,0 +1,21 @@ + +if ENABLE_BD_XLATOR +xlator_LTLIBRARIES = bd_map.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage + +bd_map_la_LDFLAGS = -module -avoidversion +LIBBD = -llvm2app -lrt +bd_map_la_SOURCES = bd_map.c bd_map_help.c +bd_map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBBD) + +noinst_HEADERS = bd_map.h bd_map_help.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src + +AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) + +CLEANFILES = + +endif diff --git a/xlators/storage/bd_map/src/bd_map.c b/xlators/storage/bd_map/src/bd_map.c new file mode 100644 index 00000000000..e7a684c556d --- /dev/null +++ b/xlators/storage/bd_map/src/bd_map.c @@ -0,0 +1,896 @@ +/* + BD translator - Exports Block devices on server side as regular + files to client + + Now only exporting Logical volumes supported. + + Copyright IBM, Corp. 2012 + + This file is part of GlusterFS. + + Author: + M. Mohan Kumar <mohan@in.ibm.com> + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <time.h> +#include <lvm2app.h> +#include <openssl/md5.h> + +#include "bd_map.h" +#include "bd_map_help.h" +#include "defaults.h" +#include "glusterfs3-xdr.h" + + +/* Regular fops */ + +int32_t +bd_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + struct iatt buf = {0, }; + int32_t op_ret = -1; + int32_t entry_ret = 0; + int32_t op_errno = 0; + char *pathdup = NULL; + bd_entry_t *bdentry = NULL; + struct iatt postparent = {0, }; + bd_priv_t *priv = NULL; + char *p = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (loc->path, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + BD_ENTRY (priv, bdentry, loc->path); + if (!bdentry) { + op_errno = ENOENT; + entry_ret = -1; + goto parent; + } + memcpy (&buf, bdentry->attr, sizeof(buf)); + BD_PUT_ENTRY (priv, bdentry); + +parent: + if (loc->parent) { + pathdup = p = gf_strdup (loc->path); + if (!pathdup) { + op_errno = ENOMEM; + entry_ret = -1; + goto out; + } + p = strrchr (pathdup, '/'); + if (p == pathdup) + *(p+1) = '\0'; + else + *p = '\0'; + BD_ENTRY (priv, bdentry, pathdup); + if (!bdentry) { + op_errno = ENOENT; + gf_log (this->name, GF_LOG_ERROR, + "post-operation lookup on parent of %s " + "failed: %s", + loc->path, strerror (op_errno)); + goto out; + } + memcpy (&postparent, bdentry->attr, sizeof(postparent)); + BD_PUT_ENTRY (priv, bdentry); + } + + op_ret = entry_ret; +out: + if (pathdup) + GF_FREE (pathdup); + + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &buf, NULL, &postparent); + + return 0; +} + +int32_t +bd_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + struct iatt buf = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + bd_entry_t *bdentry = NULL; + bd_priv_t *priv = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + BD_ENTRY (priv, bdentry, loc->path); + if (!bdentry) { + op_errno = ENOENT; + gf_log (this->name, GF_LOG_ERROR, "stat on %s failed: %s", + loc->path, strerror (op_errno)); + goto out; + } + memcpy (&buf, bdentry->attr, sizeof(buf)); + BD_PUT_ENTRY (priv, bdentry); + op_ret = 0; + +out: + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL); + + return 0; +} + +int32_t +bd_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + bd_fd_t *bd_fd = NULL; + bd_entry_t *bdentry = NULL; + bd_priv_t *priv = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (loc->path, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + BD_ENTRY (priv, bdentry, loc->path); + if (!bdentry) { + op_errno = ENOENT; + gf_log (this->name, GF_LOG_ERROR, "opendir failed on %s: %s", + loc->path, strerror (op_errno)); + goto out; + } + bd_fd = GF_CALLOC (1, sizeof(*bd_fd), gf_bd_fd); + if (!bd_fd) { + op_errno = errno; + BD_PUT_ENTRY (priv, bdentry); + goto out; + } + + bd_fd->p_entry = bdentry; + + bdentry = list_entry ((&bdentry->child)->next, typeof(*bdentry), child); + if (!bdentry) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_ERROR, "bd_entry NULL"); + goto out; + } + bdentry = list_entry ((&bdentry->sibling), typeof(*bdentry), sibling); + if (!bdentry) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_ERROR, "bd_entry NULL"); + goto out; + } + + bd_fd->entry = bdentry; + + op_ret = fd_ctx_set (fd, this, (uint64_t) (long)bd_fd); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set the fd context path=%s fd=%p", + loc->path, fd); + goto out; + } + + op_ret = 0; +out: + if (op_ret == -1) { + BD_PUT_ENTRY (priv, bd_fd->p_entry); + if (bd_fd) + GF_FREE (bd_fd); + } + + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); + return 0; +} + +int32_t +bd_releasedir (xlator_t *this, fd_t *fd) +{ + bd_fd_t *bd_fd = NULL; + uint64_t tmp_bd_fd = 0; + int ret = 0; + bd_priv_t *priv = NULL; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + ret = fd_ctx_del (fd, this, &tmp_bd_fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, "bd_fd from fd=%p is NULL", + fd); + goto out; + } + bd_fd = (bd_fd_t *) (long)tmp_bd_fd; + BD_PUT_ENTRY (priv, bd_fd->p_entry); + + bd_fd = (bd_fd_t *) (long)tmp_bd_fd; + GF_FREE (bd_fd); +out: + return 0; +} + +/* + * bd_statfs: Mimics statfs by returning used/free extents in the VG + * TODO: IF more than one VG allowed per volume, this functions needs some + * change + */ +int32_t +bd_statfs (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t ret = -1; + int32_t op_errno = 0; + bd_priv_t *priv = NULL; + struct statvfs buf = {0, }; + vg_t vg = NULL; + char *vg_name = NULL; + uint64_t size = 0; + uint64_t fr_size = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + ret = dict_get_str (this->options, "export", &vg_name); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "FATAL: storage/bd does not specify volume groups"); + op_errno = EINVAL; + goto out; + } + + BD_RD_LOCK (&priv->lock); + + vg = lvm_vg_open (priv->handle, vg_name, "r", 0); + size += lvm_vg_get_size (vg); + fr_size += lvm_vg_get_free_size (vg); + lvm_vg_close (vg); + + BD_UNLOCK (&priv->lock); + + if (statvfs ("/", &buf) < 0) { + op_errno = errno; + goto out; + } + op_ret = 0; + buf.f_blocks = size / buf.f_frsize; + buf.f_bfree = fr_size / buf.f_frsize; + buf.f_bavail = fr_size / buf.f_frsize; +out: + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); + return 0; +} + +int +__bd_fill_readdir (pthread_rwlock_t *bd_lock, bd_fd_t *bd_fd, off_t off, + size_t size, gf_dirent_t *entries) +{ + size_t filled = 0; + int count = 0; + struct dirent entry = {0, }; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + bd_entry_t *bdentry = NULL; + bd_entry_t *cur_entry = NULL; + bd_entry_t *n_entry = NULL; + + BD_RD_LOCK (bd_lock); + + bdentry = list_entry ((&bd_fd->p_entry->child)->next, typeof(*n_entry), + child); + + if (off) { + int i = 0; + list_for_each_entry (n_entry, &bd_fd->entry->sibling, sibling) { + if (i == off && strcmp (n_entry->name, "")) { + bd_fd->entry = n_entry; + break; + } + } + } else + bd_fd->entry = list_entry ((&bdentry->sibling), + typeof(*n_entry), sibling); + + while (filled <= size) { + cur_entry = bd_fd->entry; + + n_entry = list_entry ((&bd_fd->entry->sibling)->next, + typeof (*cur_entry), sibling); + if (&n_entry->sibling == (&bdentry->sibling)) + break; + + strcpy (entry.d_name, n_entry->name); + entry.d_ino = n_entry->attr->ia_ino; + entry.d_off = off; + if (n_entry->attr->ia_type == IA_IFDIR) + entry.d_type = DT_DIR; + else + entry.d_type = DT_REG; + + this_size = max (sizeof(gf_dirent_t), + sizeof (gfs3_dirplist)) + + strlen (entry.d_name) + 1; + + if (this_size + filled > size) + break; + + bd_fd->entry = n_entry; + + this_entry = gf_dirent_for_name (entry.d_name); + if (!this_entry) { + gf_log (THIS->name, GF_LOG_ERROR, + "could not create gf_dirent for entry %s", + entry.d_name); + goto out; + } + this_entry->d_off = off; + this_entry->d_ino = entry.d_ino; + this_entry->d_type = entry.d_type; + off++; + + list_add_tail (&this_entry->list, &entries->list); + + filled += this_size; + count++; + } +out: + BD_UNLOCK (bd_lock); + return count; +} + +int32_t +bd_do_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, int whichop) +{ + uint64_t tmp_bd_fd = 0; + bd_fd_t *bd_fd = NULL; + int ret = -1; + int count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + gf_dirent_t entries; + gf_dirent_t *tmp_entry = NULL; + bd_entry_t *bdentry = NULL; + bd_priv_t *priv = NULL; + char *devpath = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + INIT_LIST_HEAD (&entries.list); + + ret = fd_ctx_get (fd, this, &tmp_bd_fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "bd_fd is NULL, fd=%p", fd); + op_errno = -EINVAL; + goto out; + } + bd_fd = (bd_fd_t *) (long)tmp_bd_fd; + LOCK (&fd->lock); + { + count = __bd_fill_readdir (&priv->lock, bd_fd, off, + size, &entries); + } + UNLOCK (&fd->lock); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + + if (whichop != GF_FOP_READDIRP) + goto out; + + BD_RD_LOCK (&priv->lock); + list_for_each_entry (tmp_entry, &entries.list, list) { + char path[PATH_MAX]; + sprintf (path, "%s/%s", bd_fd->p_entry->name, + tmp_entry->d_name); + bdentry = bd_entry_get (path); + if (!bdentry) { + gf_log (this->name, GF_LOG_WARNING, + "entry failed %s\n", tmp_entry->d_name); + continue; + } + if (bdentry->attr->ia_ino) + tmp_entry->d_ino = bdentry->attr->ia_ino; + memcpy (&tmp_entry->d_stat, + bdentry->attr, sizeof (tmp_entry->d_stat)); + bd_entry_put (bdentry); + GF_FREE (devpath); + } + BD_UNLOCK (&priv->lock); + +out: + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); + + gf_dirent_free (&entries); + + return 0; +} + +int32_t +bd_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *dict) +{ + bd_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR); + return 0; +} + + +int32_t +bd_readdirp (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *dict) +{ + bd_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP); + return 0; +} + +int32_t +bd_priv (xlator_t *this) +{ + return 0; +} + +int32_t +bd_inode (xlator_t *this) +{ + return 0; +} + +/* unsupported interfaces */ +int bd_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + STACK_UNWIND_STRICT (setattr, frame, -1, ENOSYS, NULL, NULL, NULL); + return 0; +} + +int32_t +bd_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size, dict_t *xdata) +{ + struct iatt stbuf = {0, }; + char *dest = NULL; + + dest = alloca (size + 1); + STACK_UNWIND_STRICT (readlink, frame, -1, ENOSYS, dest, &stbuf, NULL); + return 0; +} + +int +bd_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata) +{ + struct iatt stbuf = {0, }; + struct iatt preparent = {0, }; + struct iatt postparent = {0, }; + + STACK_UNWIND_STRICT (mknod, frame, -1, ENOSYS, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + return 0; +} + +int +bd_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + struct iatt stbuf = {0, }; + struct iatt preparent = {0, }; + struct iatt postparent = {0, }; + + STACK_UNWIND_STRICT (mkdir, frame, -1, ENOSYS, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + return 0; +} + +int +bd_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + struct iatt preparent = {0, }; + struct iatt postparent = {0, }; + + STACK_UNWIND_STRICT (rmdir, frame, -1, ENOSYS, + &preparent, &postparent, NULL); + return 0; +} + +int +bd_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + struct iatt stbuf = {0, }; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + + STACK_UNWIND_STRICT (link, frame, -1, ENOSYS, + (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + return 0; +} + +int32_t +bd_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) +{ + STACK_UNWIND_STRICT (setxattr, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +bd_fsetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *dict, int flags, dict_t *xdata) +{ + STACK_UNWIND_STRICT (setxattr, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +bd_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + STACK_UNWIND_STRICT (getxattr, frame, -1, ENOSYS, NULL, NULL); + return 0; +} + +int32_t +bd_fgetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOSYS, NULL, NULL); + + return 0; +} + +int32_t +bd_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + STACK_UNWIND_STRICT (removexattr, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +bd_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +bd_fsyncdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOSYS, NULL); + return 0; +} + +static int gf_bd_lk_log; +int32_t +bd_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + struct gf_flock nullock = {0, }; + + GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); + return 0; +} + +int32_t +bd_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +bd_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); + return 0; +} + + +int32_t +bd_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +bd_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_bd_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +bd_rchecksum (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset, int32_t len, dict_t *xdata) +{ + int32_t weak_checksum = 0; + unsigned char strong_checksum[MD5_DIGEST_LENGTH]; + + STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOSYS, + weak_checksum, strong_checksum, NULL); + return 0; +} + +int +bd_xattrop (call_frame_t *frame, xlator_t *this, + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, + dict_t *xdata) +{ + STACK_UNWIND_STRICT (xattrop, frame, -1, ENOSYS, xattr, NULL); + return 0; +} + + +int +bd_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, + dict_t *xdata) +{ + STACK_UNWIND_STRICT (xattrop, frame, -1, ENOSYS, xattr, NULL); + return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + switch (event) + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that bd xlator is up */ + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; + default: + break; + } + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_bd_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + + +/** + * init - Constructs lists of LVs in the given VG + */ +int +init (xlator_t *this) +{ + bd_priv_t *_private = NULL; + int ret = 0; + char *vg = NULL; + char *device = NULL; + + LOCK_INIT (&inode_lk); + + bd_rootp = bd_entry_add_root (); + if (!bd_rootp) { + gf_log (this->name, GF_LOG_CRITICAL, + "FATAL: adding root entry failed"); + return -1; + } + + if (this->children) { + gf_log (this->name, GF_LOG_CRITICAL, + "FATAL: storage/bd cannot have subvolumes"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Volume is dangling. Please check the volume file."); + } + + ret = dict_get_str (this->options, "device", &device); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "FATAL: storage/bd does not specify backend"); + return -1; + } + + /* Now we support only LV device */ + if (strcasecmp (device, BACKEND_VG)) { + gf_log (this->name, GF_LOG_CRITICAL, + "FATAL: unknown %s backend %s", BD_XLATOR, device); + return -1; + } + + ret = dict_get_str (this->options, "export", &vg); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "FATAL: storage/bd does not specify volume groups"); + return -1; + } + + ret = 0; + _private = GF_CALLOC (1, sizeof(*_private), gf_bd_private); + if (!_private) + goto error; + + pthread_rwlock_init (&_private->lock, NULL); + this->private = (void *)_private; + _private->handle = NULL; + _private->vg = gf_strdup (vg); + if (!_private->vg) { + goto error; + } + + if (bd_build_lv_list (this->private, vg) < 0) + goto error; + +out: + return 0; +error: + BD_WR_LOCK (&_private->lock); + bd_entry_cleanup (); + lvm_quit (_private->handle); + if (_private->vg) + GF_FREE (_private->vg); + GF_FREE (_private); + return -1; +} + +void +fini (xlator_t *this) +{ + bd_priv_t *priv = this->private; + if (!priv) + return; + lvm_quit (priv->handle); + BD_WR_LOCK (&priv->lock); + bd_entry_cleanup (); + BD_UNLOCK (&priv->lock); + GF_FREE (priv->vg); + this->private = NULL; + GF_FREE (priv); + return; +} + +struct xlator_dumpops dumpops = { + .priv = bd_priv, + .inode = bd_inode, +}; + +struct xlator_fops fops = { + /* Not supported */ + .readlink = bd_readlink, + .mknod = bd_mknod, + .mkdir = bd_mkdir, + .rmdir = bd_rmdir, + .link = bd_link, + .setxattr = bd_setxattr, + .fsetxattr = bd_fsetxattr, + .getxattr = bd_getxattr, + .fgetxattr = bd_fgetxattr, + .removexattr = bd_removexattr, + .fremovexattr= bd_fremovexattr, + .fsyncdir = bd_fsyncdir, + .lk = bd_lk, + .inodelk = bd_inodelk, + .finodelk = bd_finodelk, + .entrylk = bd_entrylk, + .fentrylk = bd_fentrylk, + .rchecksum = bd_rchecksum, + .xattrop = bd_xattrop, + .setattr = bd_setattr, + + /* Supported */ + .lookup = bd_lookup, + .opendir = bd_opendir, + .readdir = bd_readdir, + .readdirp = bd_readdirp, + .stat = bd_stat, + .statfs = bd_statfs, +}; + +struct xlator_cbks cbks = { + .releasedir = bd_releasedir, +}; + +struct volume_options options[] = { + { .key = {"export"}, + .type = GF_OPTION_TYPE_STR}, + { .key = {"device"}, + .type = GF_OPTION_TYPE_STR}, + { .key = {NULL} } +}; diff --git a/xlators/storage/bd_map/src/bd_map.h b/xlators/storage/bd_map/src/bd_map.h new file mode 100644 index 00000000000..974ec928899 --- /dev/null +++ b/xlators/storage/bd_map/src/bd_map.h @@ -0,0 +1,75 @@ +/* + BD translator - Exports Block devices on server side as regular + files to client + + Copyright IBM, Corp. 2012 + + This file is part of GlusterFS. + + Author: + M. Mohan Kumar <mohan@in.ibm.com> + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _BD_MAP_H +#define _BD_MAP_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "mem-types.h" + +#define BD_XLATOR "block device mapper xlator" + +#define BACKEND_VG "vg" + +enum gf_bd_mem_types_ { + gf_bd_fd = gf_common_mt_end + 1, + gf_bd_private, + gf_bd_entry, + gf_bd_attr, + gf_bd_mt_end +}; + +/* + * Each BD/LV is represented by this data structure + * Usually root entry will have only children and there is no sibling for that + * All other entries may have children and/or sibling entries + * If an entry is a Volume Group it will have child (. & .. and Logical + * Volumes) and also other Volume groups will be a sibling for this + */ +typedef struct bd_entry { + struct list_head child; /* List to child */ + struct list_head sibling; /* List of siblings */ + struct bd_entry *parent;/* Parent of this node */ + struct bd_entry *link; /* Link to actual entry, if its . or .. */ + char name[NAME_MAX]; + struct iatt *attr; + int refcnt; + uint64_t size; + pthread_rwlock_t lock; +} bd_entry_t; + + +/** + * bd_fd - internal structure common to file and directory fd's + */ +typedef struct bd_fd { + bd_entry_t *entry; + bd_entry_t *p_entry; /* Parent entry */ +} bd_fd_t; + +typedef struct bd_priv { + lvm_t handle; + pthread_rwlock_t lock; + char *vg; +} bd_priv_t; + +#endif diff --git a/xlators/storage/bd_map/src/bd_map_help.c b/xlators/storage/bd_map/src/bd_map_help.c new file mode 100644 index 00000000000..2b5c321f607 --- /dev/null +++ b/xlators/storage/bd_map/src/bd_map_help.c @@ -0,0 +1,462 @@ +/* + BD translator - Exports Block devices on server side as regular + files to client + + Copyright IBM, Corp. 2012 + + This file is part of GlusterFS. + + Author: + M. Mohan Kumar <mohan@in.ibm.com> + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#define __XOPEN_SOURCE 500 + +#include <libgen.h> +#include <time.h> +#include <lvm2app.h> + +#include "bd_map.h" +#include "bd_map_help.h" +#include "defaults.h" +#include "glusterfs3-xdr.h" + +#define CHILD_ENTRY(node) list_entry ((&node->child)->next, typeof(*node), \ + child) + +bd_entry_t *bd_rootp; +gf_lock_t inode_lk; +static uint64_t bd_entry_ino = 5000; /* Starting inode */ + +static void bd_entry_get_ino (uint64_t *inode) +{ + LOCK (&inode_lk); + { + *inode = bd_entry_ino++; + } + UNLOCK (&inode_lk); +} + +static bd_entry_t *bd_entry_init (const char *name) +{ + bd_entry_t *bdentry; + + bdentry = GF_MALLOC (sizeof(bd_entry_t), gf_bd_entry); + if (!bdentry) + return NULL; + + bdentry->attr = GF_MALLOC (sizeof(struct iatt), gf_bd_attr); + if (!bdentry->attr) { + GF_FREE (bdentry); + return NULL; + } + + strcpy (bdentry->name, name); + INIT_LIST_HEAD (&bdentry->sibling); + INIT_LIST_HEAD (&bdentry->child); + bdentry->link = NULL; + bdentry->refcnt = 0; + return bdentry; +} + +static bd_entry_t *bd_entry_clone (bd_entry_t *orig, char *name) +{ + bd_entry_t *bdentry; + + bdentry = GF_MALLOC (sizeof(bd_entry_t), gf_bd_entry); + if (!bdentry) + return NULL; + + bdentry->attr = orig->attr; + + strcpy (bdentry->name, name); + INIT_LIST_HEAD (&bdentry->sibling); + INIT_LIST_HEAD (&bdentry->child); + bdentry->link = orig; + bdentry->refcnt = 0; + return bdentry; +} + +static void bd_entry_init_iattr (struct iatt *attr, int type) +{ + struct timespec ts = {0, }; + + clock_gettime (CLOCK_REALTIME, &ts); + attr->ia_dev = ia_makedev (0, 0); /* FIXME: */ + attr->ia_type = type; + attr->ia_prot = ia_prot_from_st_mode (0750); + attr->ia_nlink = 2; + attr->ia_uid = 0; + attr->ia_gid = 0; + attr->ia_rdev = ia_makedev (0, 0); + + attr->ia_size = 4096; /* FIXME */ + attr->ia_blksize = 4096; + attr->ia_blocks = 0; + + attr->ia_atime = ts.tv_sec; + attr->ia_atime_nsec = ts.tv_nsec; + attr->ia_mtime = ts.tv_sec; + attr->ia_mtime_nsec = ts.tv_nsec; + attr->ia_ctime = ts.tv_sec; + attr->ia_ctime_nsec = ts.tv_nsec; +} + +/* + * bd_entry_istat: Initialize iatt strucutre for a given path on success + */ +void bd_entry_istat (const char *path, struct iatt *attr, int type) +{ + struct stat stbuf = {0, }; + + if (stat (path, &stbuf) < 0) + bd_entry_init_iattr (attr, type); + else + iatt_from_stat (attr, &stbuf); + sprintf ((char *)attr->ia_gfid, "%lx", stbuf.st_ino); +} + +/* + * Adds the root entry and required entries + * ie header entry followed by . and .. entries + */ +bd_entry_t *bd_entry_add_root (void) +{ + bd_entry_t *bdentry = NULL; + bd_entry_t *h_entry = NULL; + bd_entry_t *d_entry = NULL; + bd_entry_t *dd_entry = NULL; + + bdentry = bd_entry_init ("/"); + if (!bdentry) + return NULL; + + bdentry->parent = bdentry; + + bd_entry_get_ino (&bdentry->attr->ia_ino); + sprintf ((char *)bdentry->attr->ia_gfid, "%ld", + bdentry->attr->ia_ino << 2); + bd_entry_init_iattr (bdentry->attr, IA_IFDIR); + + h_entry = bd_entry_clone (bdentry, ""); + bdentry->child.next = &h_entry->child; + bdentry->child.prev = &h_entry->child; + + d_entry = bd_entry_clone (bdentry, "."); + dd_entry = bd_entry_clone (bdentry, ".."); + + list_add_tail (&d_entry->sibling, &h_entry->sibling); + list_add_tail (&dd_entry->sibling, &h_entry->sibling); + return bdentry; +} + +bd_entry_t *bd_entry_add (bd_entry_t *parent, const char *name, + struct iatt *iattr, ia_type_t type) +{ + bd_entry_t *bdentry = NULL; + bd_entry_t *h_entry = NULL; + bd_entry_t *d_entry = NULL; + bd_entry_t *dd_entry = NULL; + bd_entry_t *sentry = NULL; + struct timespec ts = { 0, }; + + if (!parent) + parent = bd_rootp; + + if (type != IA_IFREG && type != IA_IFDIR) + return NULL; + + bdentry = bd_entry_init (name); + if (!bdentry) + return NULL; + + bdentry->parent = parent; + + iattr->ia_type = type; + + bd_entry_get_ino (&iattr->ia_ino); + if (IA_ISDIR(type)) { + h_entry = bd_entry_clone (bdentry, ""); + parent->attr->ia_nlink++; + bdentry->child.next = &h_entry->child; + bdentry->child.prev = &h_entry->child; + + d_entry = bd_entry_clone (bdentry, "."); + dd_entry = bd_entry_clone (bdentry, ".."); + + list_add_tail (&d_entry->sibling, &h_entry->sibling); + list_add_tail (&dd_entry->sibling, &h_entry->sibling); + } + memcpy (bdentry->attr, iattr, sizeof(*iattr)); + + clock_gettime (CLOCK_REALTIME, &ts); + parent->attr->ia_mtime = ts.tv_sec; + parent->attr->ia_mtime_nsec = ts.tv_nsec; + bdentry->size = iattr->ia_size; + + sentry = CHILD_ENTRY (parent); + list_add_tail (&bdentry->sibling, &sentry->sibling); + return bdentry; +} + +bd_entry_t *bd_entry_get_list (const char *name, bd_entry_t *parent) +{ + bd_entry_t *centry = NULL; + bd_entry_t *bdentry = NULL; + + if (!parent) + parent = bd_rootp; + + if (parent->child.next == &parent->child) + return NULL; + + centry = CHILD_ENTRY (parent); + if (!strcmp (centry->name, name)) + return centry; + + list_for_each_entry (bdentry, ¢ry->sibling, sibling) { + if (!strcmp (bdentry->name, name)) + return bdentry; + } + return NULL; +} + +/* FIXME: Do we need hashing here? */ +bd_entry_t *bd_entry_find_by_gfid (const char *path) +{ + bd_entry_t *h = NULL; + bd_entry_t *tmp = NULL; + bd_entry_t *tmp2 = NULL; + bd_entry_t *node = NULL; + bd_entry_t *cnode = NULL; + bd_entry_t *leaf = NULL; + char *gfid = NULL; + char *cp = NULL; + char *bgfid = NULL; + bd_entry_t *entry = NULL; + + gfid = GF_MALLOC (strlen(path) + 1, gf_common_mt_char); + sscanf (path, "<gfid:%s", gfid); + if (!gfid) + return NULL; + + cp = strchr(gfid, '>'); + *cp = '\0'; + + node = CHILD_ENTRY (bd_rootp); + + bgfid = GF_MALLOC (GF_UUID_BUF_SIZE, gf_common_mt_char); + if (!bgfid) + return NULL; + + list_for_each_entry_safe (h, tmp, &node->sibling, sibling) { + uuid_utoa_r (h->attr->ia_gfid, bgfid); + if (!h->link && !strcmp (gfid, bgfid)) { + entry = h; + goto out; + } + + /* if we have children for this node */ + if (h->child.next != &h->child) { + cnode = CHILD_ENTRY (h); + uuid_utoa_r (cnode->attr->ia_gfid, bgfid); + if (!cnode->link && !strcmp (gfid, bgfid)) { + entry = cnode; + goto out; + } + + list_for_each_entry_safe (leaf, tmp2, (&cnode->sibling), + sibling) { + uuid_utoa_r (leaf->attr->ia_gfid, bgfid); + if (!leaf->link && !strcmp (gfid, bgfid)) { + entry = leaf; + goto out; + } + + } + } + } +out: + if (bgfid) + GF_FREE (bgfid); + + return entry; +} + +/* Called with priv->bd_lock held */ +bd_entry_t *bd_entry_get (const char *name) +{ + bd_entry_t *pentry = NULL; + char *path = NULL; + char *comp = NULL; + char *save = NULL; + + if (!strncmp (name, "<gfid:", 5)) { + pentry = bd_entry_find_by_gfid (name); + if (pentry) + pentry->refcnt++; + return pentry; + } + + if (!strcmp (name, "/")) { + bd_rootp->refcnt++; + return bd_rootp; + } + + path = gf_strdup (name); + comp = strtok_r (path, "/", &save); + pentry = bd_entry_get_list (comp, NULL); + if (!pentry) + goto out; + while (comp) { + comp = strtok_r (NULL, "/", &save); + if (!comp) + break; + pentry = bd_entry_get_list (comp, pentry); + if (!pentry) + goto out; + } + + pentry->refcnt++; +out: + GF_FREE (path); + return pentry; +} + +/* Called with priv->bd_lock held */ +void bd_entry_put (bd_entry_t *entry) +{ + entry->refcnt--; +} + +int bd_build_lv_list (bd_priv_t *priv, char *vg_name) +{ + struct dm_list *lv_dm_list = NULL; + struct lvm_lv_list *lv_list = NULL; + struct iatt iattr = {0, }; + char path[PATH_MAX] = {0, }; + vg_t vg = NULL; + bd_entry_t *vg_map = NULL; + bd_entry_t *bd = NULL; + int ret = -1; + const char *lv_name = NULL; + + priv->handle = lvm_init (NULL); + if (!priv->handle) { + gf_log (THIS->name, GF_LOG_CRITICAL, "FATAL: bd_init failed"); + return -1; + } + + BD_WR_LOCK (&priv->lock); + + vg = lvm_vg_open (priv->handle, vg_name, "r", 0); + if (!vg) { + gf_log (THIS->name, GF_LOG_CRITICAL, + "opening vg %s failed", vg_name); + goto out; + } + /* get list of LVs associated with this VG */ + lv_dm_list = lvm_vg_list_lvs (vg); + sprintf (path, "/dev/%s", vg_name); + bd_entry_istat (path, &iattr, IA_IFDIR); + vg_map = bd_entry_add (bd_rootp, vg_name, &iattr, + IA_IFDIR); + if (!vg_map) { + gf_log (THIS->name, GF_LOG_CRITICAL, + "bd_add_entry failed"); + goto out; + } + ret = 0; + if (!lv_dm_list) /* no lvs for this VG */ + goto out; + + dm_list_iterate_items (lv_list, lv_dm_list) { + if (!lv_list) + continue; + lv_name = lvm_lv_get_name (lv_list->lv); + /* snapshot%d is reserved name */ + if (!strncmp (lv_name, "snapshot", 8)) + continue; + /* get symbolic path for this LV */ + sprintf (path, "/dev/%s/%s", vg_name, lv_name); + bd_entry_istat (path, &iattr, IA_IFREG); + /* Make the file size equivalant to BD size */ + iattr.ia_size = lvm_lv_get_size (lv_list->lv); + /* got LV, add it to our tree */ + bd = bd_entry_add (vg_map, + lvm_lv_get_name (lv_list->lv), + &iattr, IA_IFREG); + if (bd == NULL) { + gf_log (THIS->name, GF_LOG_ERROR, + "bd_add_entry failed"); + goto out; + } + } +out: + if (vg) + lvm_vg_close (vg); + + BD_UNLOCK (&priv->lock); + return ret; +} + +/* + * Called with bd_lock held to cleanup entire list. If there was a + * reference to any one of the entry, nothing cleared. + * Return 0 on success -1 in case if there is a reference to the entry + */ +int bd_entry_cleanup (void) +{ + bd_entry_t *node = NULL; + bd_entry_t *tmp = NULL; + bd_entry_t *tmp2 = NULL; + bd_entry_t *cnode = NULL; + bd_entry_t *h = NULL; + bd_entry_t *leaf = NULL; + + if (!bd_rootp) + return 0; + + node = CHILD_ENTRY (bd_rootp); + if (node->refcnt) { + gf_log (THIS->name, GF_LOG_WARNING, + "entry %s is inuse\n", node->name); + return -1; + } + list_for_each_entry_safe (h, tmp, &node->sibling, sibling) { + /* if we have children for this node */ + if (h->child.next != &h->child) { + cnode = CHILD_ENTRY (h); + list_for_each_entry_safe (leaf, tmp2, (&cnode->sibling), + sibling) { + list_del_init (&leaf->sibling); + list_del_init (&leaf->child); + if (!leaf->link) + GF_FREE (leaf->attr); + GF_FREE (leaf); + } + list_del_init (&cnode->sibling); + list_del_init (&cnode->child); + if (!cnode->link) + GF_FREE (cnode->attr); + GF_FREE (cnode); + } + if (!h->link) + GF_FREE (h->attr); + GF_FREE (h); + } + GF_FREE (h); + GF_FREE (bd_rootp->attr); + GF_FREE (bd_rootp); + return 0; +} diff --git a/xlators/storage/bd_map/src/bd_map_help.h b/xlators/storage/bd_map/src/bd_map_help.h new file mode 100644 index 00000000000..997b8b7162e --- /dev/null +++ b/xlators/storage/bd_map/src/bd_map_help.h @@ -0,0 +1,63 @@ +/* + BD translator - Exports Block devices on server side as regular + files to client. + + Copyright IBM, Corp. 2012 + + This file is part of GlusterFS. + + Author: + M. Mohan Kumar <mohan@in.ibm.com> + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _BD_MAP_HELP_H +#define _BD_MAP_HELP_H + +#define BD_RD_LOCK(lock) \ + pthread_rwlock_rdlock (lock); + +#define BD_WR_LOCK(lock) \ + pthread_rwlock_wrlock (lock); + +#define BD_UNLOCK(lock) \ + pthread_rwlock_unlock (lock); + +#define BD_WR_ENTRY(priv, bdentry, path) \ + do { \ + BD_WR_LOCK (&priv->lock); \ + bdentry = bd_entry_get (path); \ + BD_UNLOCK (&priv->lock); \ + } while (0) + +#define BD_ENTRY(priv, bdentry, path) \ + do { \ + BD_RD_LOCK (&priv->lock); \ + bdentry = bd_entry_get (path); \ + BD_UNLOCK (&priv->lock); \ + } while (0) + +#define BD_PUT_ENTRY(priv, bdentry) \ + do { \ + BD_RD_LOCK (&priv->lock); \ + bd_entry_put (bdentry); \ + BD_UNLOCK (&priv->lock); \ + } while (0) + +extern bd_entry_t *bd_rootp; +extern gf_lock_t inode_lk; + +void bd_entry_istat (const char *path, struct iatt *attr, int type); +bd_entry_t *bd_entry_add_root (void); +bd_entry_t *bd_entry_add (bd_entry_t *parent, const char *name, + struct iatt *iattr, ia_type_t type); +bd_entry_t *bd_entry_get_list (const char *name, bd_entry_t *parent); +bd_entry_t *bd_entry_get (const char *name); +void bd_entry_put (bd_entry_t *entry); +int bd_build_lv_list (bd_priv_t *priv, char *vg); +int bd_entry_cleanup (void); + +#endif |