summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorM. Mohan Kumar <mohan@in.ibm.com>2013-11-13 22:44:43 +0530
committerAnand Avati <avati@redhat.com>2013-11-13 11:39:22 -0800
commit81a57679c20ac0ac9b48e313af75036132e3a5ad (patch)
treec99c3cf75096a2530b65a3bb5b3c19c2c463b7fa
parentb222ce817f5f324fe20d4d3614001ed2f177afb8 (diff)
bd: Add support to create clone, snapshot and merge of LV images.
Special xattr names "clone" & "snapshot" can be used to create full and linked clone of the LV images. GFID of destination posix file (to be mapped) is passed as a value to the xattr. Destination posix file must exist before running this operation. These operations form a basis for offloading storage related operations from QEMU to GlusterFS. Syntax for full clone: xattr name: "clone" value: "gfid-of-dest-file" Syntax for linked clone: xattr name: "snapshot" value: "gfid-of-dest-file" Syntax for merging: xattr name: "merge" value: "path-to-snapshot-file" Example: setfattr -n clone -v <gfid-of-dest-file> /media/source setfattr -n snapshot -v <gfid-of-dest-file> /media/source setfattr -n merge -v "/media/sn" /media/sn Change-Id: Id9f984a709d4c2e52a64ae75bb12a8ecb01f8776 BUG: 1028672 Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Reviewed-on: http://review.gluster.org/5626 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c39
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h6
-rw-r--r--xlators/storage/bd/src/bd-helper.c202
-rw-r--r--xlators/storage/bd/src/bd.c421
-rw-r--r--xlators/storage/bd/src/bd.h30
7 files changed, 654 insertions, 56 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index cc425353516..df6087659ef 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1109,7 +1109,8 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
brick = strtok_r (brick_list+1, " \n", &saveptr);
#ifdef HAVE_BD_XLATOR
if (brickinfo->vg[0])
- caps = CAPS_BD | CAPS_THIN;
+ caps = CAPS_BD | CAPS_THIN |
+ CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
#endif
while (i <= count) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 181b8fcf1a9..fcffccc07b8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -335,7 +335,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
char *volume_id_str = NULL;
struct args_pack pack = {0,};
xlator_t *this = NULL;
-
+ int caps = 0;
GF_ASSERT (volinfo);
GF_ASSERT (volumes);
@@ -401,6 +401,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
#ifdef HAVE_BD_XLATOR
if (volinfo->caps) {
+ caps = 0;
snprintf (key, 256, "volume%d.xlator0", count);
buf = GF_MALLOC (256, gf_common_mt_char);
if (!buf) {
@@ -416,7 +417,8 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
}
if (volinfo->caps & CAPS_THIN) {
- snprintf (key, 256, "volume%d.xlator0.caps0", count);
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
buf = GF_MALLOC (256, gf_common_mt_char);
if (!buf) {
ret = ENOMEM;
@@ -429,6 +431,39 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
goto out;
}
}
+
+ if (volinfo->caps & CAPS_OFFLOAD_COPY) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "offload_copy");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "offload_snapshot");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
}
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index a2bd7334c93..561ff652d81 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -665,7 +665,7 @@ glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg)
}
next:
- brick->caps = CAPS_BD;
+ brick->caps = CAPS_BD | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
dm_lvlist = lvm_vg_list_lvs (vg);
if (!dm_lvlist)
@@ -953,7 +953,6 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)
}
}
-
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
ret = glusterd_resolve_brick (brickinfo);
if (ret) {
@@ -1011,8 +1010,8 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)
}
#ifdef HAVE_BD_XLATOR
if (brickinfo->vg[0])
- caps = CAPS_BD | CAPS_THIN;
-
+ caps = CAPS_BD | CAPS_THIN |
+ CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
/* Check for VG/thin pool if its BD volume */
if (brickinfo->vg[0]) {
ret = glusterd_is_valid_vg (brickinfo, 0, msg);
@@ -1602,7 +1601,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
if (count)
brick = strtok_r (brick_list+1, " \n", &saveptr);
- caps = CAPS_BD | CAPS_THIN;
+ caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
while ( i <= count) {
ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 6423d5a8154..ad63682e55e 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -234,8 +234,10 @@ struct _auth {
typedef struct _auth auth_t;
/* Capabilities of xlator */
-#define CAPS_BD 0x00000001
-#define CAPS_THIN 0x00000010
+#define CAPS_BD 0x00000001
+#define CAPS_THIN 0x00000002
+#define CAPS_OFFLOAD_COPY 0x00000004
+#define CAPS_OFFLOAD_SNAPSHOT 0x00000008
struct glusterd_rebalance_ {
gf_defrag_status_t defrag_status;
diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c
index 8781f9fdfb4..5525e346bd7 100644
--- a/xlators/storage/bd/src/bd-helper.c
+++ b/xlators/storage/bd/src/bd-helper.c
@@ -579,3 +579,205 @@ bd_update_amtime(struct iatt *iatt, int flag)
iatt->ia_mtime_nsec = ts.tv_nsec;
}
}
+
+int
+bd_snapshot_create (bd_local_t *local, bd_priv_t *priv)
+{
+ char *path = NULL;
+ bd_gfid_t dest = {0, };
+ bd_gfid_t origin = {0, };
+ int ret = 0;
+ runner_t runner = {0, };
+ struct stat stat = {0, };
+
+ uuid_utoa_r (local->dloc->gfid, dest);
+ uuid_utoa_r (local->loc.gfid, origin);
+
+ gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest);
+ if (!path) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "Insufficient memory");
+ return ENOMEM;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, LVM_CREATE, NULL);
+ runner_add_args (&runner, "--snapshot", NULL);
+ runner_argprintf (&runner, "/dev/%s/%s", priv->vg, origin);
+ runner_add_args (&runner, "--name", NULL);
+ runner_argprintf (&runner, "%s", dest);
+ if (strcmp (local->bdatt->type, BD_THIN))
+ runner_argprintf (&runner, "-L%ldB", local->size);
+ runner_start (&runner);
+ runner_end (&runner);
+
+ if (lstat (path, &stat) < 0)
+ ret = EIO;
+
+ GF_FREE (path);
+ return ret;
+}
+
+int
+bd_clone (bd_local_t *local, bd_priv_t *priv)
+{
+ int ret = ENOMEM;
+ int fd1 = -1;
+ int fd2 = -1;
+ int i = 0;
+ char *buff = NULL;
+ ssize_t bytes = 0;
+ char *spath = NULL;
+ char *dpath = NULL;
+ struct iovec *vec = NULL;
+ bd_gfid_t source = {0, };
+ bd_gfid_t dest = {0, };
+ void *bufp[IOV_NR] = {0, };
+
+ vec = GF_CALLOC (IOV_NR, sizeof (struct iovec), gf_common_mt_iovec);
+ if (!vec)
+ return ENOMEM;
+
+ for (i = 0; i < IOV_NR; i++) {
+ bufp[i] = page_aligned_alloc (IOV_SIZE, &buff);
+ if (!buff)
+ goto out;
+ vec[i].iov_base = buff;
+ vec[i].iov_len = IOV_SIZE;
+ }
+
+ uuid_utoa_r (local->loc.gfid, source);
+ uuid_utoa_r (local->dloc->gfid, dest);
+
+ gf_asprintf (&spath, "/dev/%s/%s", priv->vg, source);
+ gf_asprintf (&dpath, "/dev/%s/%s", priv->vg, dest);
+ if (!spath || !dpath)
+ goto out;
+
+ ret = bd_create (local->dloc->gfid, local->size,
+ local->bdatt->type, priv);
+ if (ret)
+ goto out;
+
+ fd1 = open (spath, O_RDONLY | O_DIRECT);
+ if (fd1 < 0) {
+ ret = errno;
+ goto out;
+ }
+ fd2 = open (dpath, O_WRONLY | O_DIRECT);
+ if (fd2 < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ while (1) {
+ bytes = readv (fd1, vec, IOV_NR);
+ if (bytes < 0) {
+ ret = errno;
+ gf_log (THIS->name, GF_LOG_WARNING, "read failed: %s",
+ strerror (ret));
+ goto out;
+ }
+ if (!bytes)
+ break;
+ bytes = writev (fd2, vec, IOV_NR);
+ if (bytes < 0) {
+ ret = errno;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "write failed: %s", strerror (ret));
+ goto out;
+ }
+ }
+ ret = 0;
+
+out:
+ for (i = 0; i < IOV_NR; i++)
+ GF_FREE (bufp[i]);
+ GF_FREE (vec);
+
+ if (fd1 != -1)
+ close (fd1);
+ if (fd2 != -1)
+ close (fd2);
+
+ FREE (spath);
+ FREE (dpath);
+
+ return ret;
+}
+
+/*
+ * Merges snapshot LV to origin LV and returns status
+ */
+int
+bd_merge (bd_priv_t *priv, uuid_t gfid)
+{
+ bd_gfid_t dest = {0, };
+ char *path = NULL;
+ struct stat stat = {0, };
+ runner_t runner = {0, };
+ int ret = 0;
+
+ uuid_utoa_r (gfid, dest);
+ gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest);
+
+ runinit (&runner);
+ runner_add_args (&runner, LVM_CONVERT, NULL);
+ runner_add_args (&runner, "--merge", NULL);
+ runner_argprintf (&runner, "%s", path);
+ runner_start (&runner);
+ runner_end (&runner);
+
+ if (!lstat (path, &stat))
+ ret = EIO;
+
+ GF_FREE (path);
+
+ return ret;
+}
+
+int
+bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict)
+{
+ vg_t brick = NULL;
+ lvm_property_value_t prop = {0, };
+ lv_t lv = NULL;
+ int ret = -1;
+ bd_gfid_t gfid = {0, };
+ inode_t *inode = NULL;
+ char *origin = NULL;
+
+ brick = lvm_vg_open (priv->handle, priv->vg, "w", 0);
+ if (!brick) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "VG %s is not found",
+ priv->vg);
+ return ENOENT;
+ }
+
+ if (fd)
+ inode = fd->inode;
+ else
+ inode = loc->inode;
+
+ uuid_utoa_r (inode->gfid, gfid);
+ lv = lvm_lv_from_name (brick, gfid);
+ if (!lv) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "LV %s not found", gfid);
+ ret = ENOENT;
+ goto out;
+ }
+
+ prop = lvm_lv_get_property (lv, "origin");
+ if (!prop.is_valid || !prop.value.string) {
+ ret = ENODATA;
+ goto out;
+ }
+
+ origin = gf_strdup (prop.value.string);
+ ret = dict_set_dynstr (dict, BD_ORIGIN, origin);
+
+out:
+ lvm_vg_close (brick);
+ return ret;
+}
+
diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c
index 555f1d51fc1..405474c587d 100644
--- a/xlators/storage/bd/src/bd.c
+++ b/xlators/storage/bd/src/bd.c
@@ -1063,6 +1063,278 @@ out:
return 0;
}
+int
+bd_offload_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, EIO, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, EIO, NULL);
+
+ return 0;
+}
+
+int
+bd_offload_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ bd_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (local->offload == BD_OF_SNAPSHOT)
+ op_ret = bd_snapshot_create (frame->local, this->private);
+ else
+ op_ret = bd_clone (frame->local, this->private);
+
+ if (op_ret) {
+ STACK_WIND (frame, bd_offload_rm_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ local->dloc, BD_XATTR, NULL);
+ return 0;
+ }
+
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, op_errno, op_errno, NULL);
+
+ return 0;
+}
+
+int
+bd_offload_getx_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ char *bd = NULL;
+ bd_local_t *local = frame->local;
+ char *type = NULL;
+ char *p = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (dict_get_str (xattr, BD_XATTR, &p)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ type = gf_strdup (p);
+ BD_VALIDATE_MEM_ALLOC (type, op_errno, out);
+
+ p = strrchr (type, ':');
+ if (!p) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING,
+ "source file xattr %s corrupted?", type);
+ goto out;
+ }
+
+ *p='\0';
+
+ /* For clone size is taken from source LV */
+ if (!local->size) {
+ p++;
+ gf_string2bytesize (p, &local->size);
+ }
+ gf_asprintf (&bd, "%s:%ld", type, local->size);
+ local->bdatt->type = gf_strdup (type);
+ dict_del (local->dict, BD_XATTR);
+ dict_del (local->dict, LINKTO);
+ if (dict_set_dynstr (local->dict, BD_XATTR, bd)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ STACK_WIND (frame, bd_offload_setx_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->dloc, local->dict, 0, NULL);
+
+ return 0;
+
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ GF_FREE (type);
+ GF_FREE (bd);
+
+ return 0;
+}
+
+int
+bd_offload_dest_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *iatt,
+ dict_t *xattr, struct iatt *postparent)
+{
+ bd_local_t *local = frame->local;
+ char *bd = NULL;
+ int ret = -1;
+ char *linkto = NULL;
+
+ if (op_ret < 0 && op_errno != ENODATA) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt->ia_type)) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "destination gfid is not a "
+ "regular file");
+ goto out;
+ }
+
+ ret = dict_get_str (xattr, LINKTO, &linkto);
+ if (linkto) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "destination file not "
+ "present in same brick");
+ goto out;
+ }
+
+ ret = dict_get_str (xattr, BD_XATTR, &bd);
+ if (bd) {
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ local->bdatt = CALLOC (1, sizeof (bd_attr_t));
+ BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out);
+
+ STACK_WIND (frame, bd_offload_getx_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &local->loc, BD_XATTR, NULL);
+
+ return 0;
+out:
+ if (local->fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+int
+bd_merge_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ /* FIXME: if delete failed, remove xattr */
+
+ BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+bd_do_merge(call_frame_t *frame, xlator_t *this)
+{
+ bd_local_t *local = frame->local;
+ inode_t *parent = NULL;
+ char *p = NULL;
+ int op_errno = 0;
+
+ op_errno = bd_merge (this->private, local->inode->gfid);
+ if (op_errno)
+ goto out;
+
+ /*
+ * posix_unlink needs loc->pargfid to be valid, but setxattr FOP does
+ * not have loc->pargfid set. Get parent's gfid by getting parents inode
+ */
+ parent = inode_parent (local->inode, NULL, NULL);
+ if (!parent) {
+ /*
+ * FIXME: Snapshot LV already deleted.
+ * remove xattr, instead of returning failure
+ */
+ op_errno = EINVAL;
+ goto out;
+ }
+ uuid_copy (local->loc.pargfid, parent->gfid);
+
+ p = strrchr (local->loc.path, '/');
+ if (p)
+ p++;
+ local->loc.name = p;
+
+ STACK_WIND (frame, bd_merge_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ &local->loc, 0, NULL);
+
+ return 0;
+out:
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+
+ return op_errno;
+}
+
+int
+bd_offload (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, bd_offload_t offload)
+{
+ char *param = NULL;
+ char *param_copy = NULL;
+ char *p = NULL;
+ char *size = NULL;
+ char *gfid = NULL;
+ int op_errno = 0;
+ bd_local_t *local = frame->local;
+
+ param = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char);
+ BD_VALIDATE_MEM_ALLOC (param, op_errno, out);
+ param_copy = param;
+
+ local->dict = dict_new ();
+ BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out);
+
+ local->dloc = CALLOC (1, sizeof (loc_t));
+ BD_VALIDATE_MEM_ALLOC (local->dloc, op_errno, out);
+
+ strncpy (param, local->data->data, local->data->len);
+
+ gfid = strtok_r (param, ":", &p);
+ size = strtok_r (NULL, ":", &p);
+ if (size)
+ gf_string2bytesize (size, &local->size);
+ else if (offload != BD_OF_CLONE)
+ local->size = bd_get_default_extent (this->private);
+
+ if (dict_set_int8 (local->dict, BD_XATTR, 1) < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (dict_set_int8 (local->dict, LINKTO, 1) < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ uuid_parse (gfid, local->dloc->gfid);
+ local->offload = offload;
+
+ STACK_WIND (frame, bd_offload_dest_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, local->dloc,
+ local->dict);
+
+ return 0;
+
+out:
+ if (fd)
+ BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ else
+ BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+
+ GF_FREE (param_copy);
+ return 0;
+}
/*
* bd_setxattr: Used to create & map an LV to a posix file using
@@ -1071,51 +1343,71 @@ out:
* bd_setx_setx_cbk -> create_lv
* if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk
*/
-int32_t
+int
bd_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int flags, dict_t *xdata)
{
- int op_errno = 0;
- data_t *data = NULL;
- bd_attr_t *bdatt = NULL;
- bd_local_t *local = NULL;
+ int op_errno = 0;
+ data_t *data = NULL;
+ bd_local_t *local = NULL;
+ bd_attr_t *bdatt = NULL;
+ bd_offload_t cl_type = BD_OF_NONE;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
- bd_inode_ctx_get (loc->inode, this, &bdatt);
+ if ((data = dict_get (dict, BD_XATTR)))
+ cl_type = BD_OF_NONE;
+ else if ((data = dict_get (dict, BD_CLONE)))
+ cl_type = BD_OF_CLONE;
+ else if ((data = dict_get (dict, BD_SNAPSHOT)))
+ cl_type = BD_OF_SNAPSHOT;
+ else if ((data = dict_get (dict, BD_MERGE)))
+ cl_type = BD_OF_MERGE;
- data = dict_get (dict, BD_XATTR);
- if (!data) {
- /* non bd file object */
- STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
+ bd_inode_ctx_get (loc->inode, this, &bdatt);
+ if (!cl_type && !data) {
+ STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr, loc, dict,
+ flags, xdata);
return 0;
}
- if (bdatt) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s already mapped to BD", loc->path);
- op_errno = EEXIST;
- goto out;
- }
local = bd_local_init (frame, this);
BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
- local->inode = inode_ref (loc->inode);
- loc_copy (&local->loc, loc);
local->data = data;
+ loc_copy (&local->loc, loc);
+ local->inode = inode_ref (loc->inode);
- STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
+ if (cl_type) {
+ /* For cloning/snapshot, source file must be mapped to LV */
+ if (!bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s not mapped to BD", loc->path);
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (cl_type == BD_OF_MERGE)
+ bd_do_merge (frame, this);
+ else
+ bd_offload (frame, this, loc, NULL, cl_type);
+ } else if (data) {
+ if (bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s already mapped to BD", loc->path);
+ op_errno = EEXIST;
+ goto out;
+ }
+ STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc, xdata);
+ }
return 0;
-
out:
- BD_STACK_UNWIND (setxattr, frame, -1, op_errno, xdata);
+ if (op_errno)
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+
return 0;
}
@@ -1131,10 +1423,11 @@ int32_t
bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int flags, dict_t *xdata)
{
- int op_errno = 0;
- data_t *data = NULL;
- bd_attr_t *bdatt = NULL;
- bd_local_t *local = NULL;
+ int op_errno = 0;
+ data_t *data = NULL;
+ bd_attr_t *bdatt = NULL;
+ bd_local_t *local = NULL;
+ bd_offload_t cl_type = BD_OF_NONE;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -1144,27 +1437,57 @@ bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
bd_inode_ctx_get (fd->inode, this, &bdatt);
data = dict_get (dict, BD_XATTR);
- if (data) {
+ if ((data = dict_get (dict, BD_XATTR)))
+ cl_type = BD_OF_NONE;
+ else if ((data = dict_get (dict, BD_CLONE)))
+ cl_type = BD_OF_CLONE;
+ else if ((data = dict_get (dict, BD_SNAPSHOT)))
+ cl_type = BD_OF_SNAPSHOT;
+ else if ((data = dict_get (dict, BD_MERGE))) {
+ /*
+ * bd_merge is not supported for fsetxattr, because snapshot LV
+ * is opened and it causes problem in snapshot merge
+ */
+ op_errno = EOPNOTSUPP;
+ goto out;
+ }
+
+ bd_inode_ctx_get (fd->inode, this, &bdatt);
+
+ if (!cl_type && !data) {
+ /* non bd file object */
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+ }
+
+ local = bd_local_init (frame, this);
+ BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
+
+ local->inode = inode_ref (fd->inode);
+ local->fd = fd_ref (fd);
+ local->data = data;
+
+ if (cl_type) {
+ /* For cloning/snapshot, source file must be mapped to LV */
+ if (!bdatt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "fd %p not mapped to BD", fd);
+ op_errno = EINVAL;
+ goto out;
+
+ }
+ bd_offload (frame, this, NULL, fd, cl_type);
+ } else if (data) {
if (bdatt) {
gf_log (this->name, GF_LOG_WARNING,
"fd %p already mapped to BD", fd);
op_errno = EEXIST;
goto out;
}
- local = bd_local_init (frame, this);
- BD_VALIDATE_MEM_ALLOC (local, op_errno, out);
-
- local->inode = inode_ref (fd->inode);
- local->fd = fd_ref (fd);
- local->data = data;
-
STACK_WIND(frame, bd_setx_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat, fd, xdata);
- } else {
- /* non bd file object */
- STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
}
return 0;
@@ -1682,8 +2005,10 @@ bd_handle_special_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc,
if (!strcmp (name, VOL_TYPE))
op_ret = dict_set_int64 (xattr, (char *)name, 1);
- else
+ else if (!strcmp (name, VOL_CAPS))
op_ret = dict_set_int64 (xattr, (char *)name, priv->caps);
+ else
+ op_ret = bd_get_origin (this->private, loc, fd, xattr);
out:
if (loc)
@@ -1703,7 +2028,8 @@ int
bd_fgetxattr (call_frame_t *frame, xlator_t *this,
fd_t *fd, const char *name, dict_t *xdata)
{
- if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)))
+ if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)
+ || !strcmp (name, BD_ORIGIN)))
bd_handle_special_xattrs (frame, this, NULL, fd, name, xdata);
else
STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
@@ -1716,7 +2042,8 @@ int
bd_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name, dict_t *xdata)
{
- if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)))
+ if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS)
+ || !strcmp (name, BD_ORIGIN)))
bd_handle_special_xattrs (frame, this, loc, NULL, name, xdata);
else
STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
@@ -1997,6 +2324,8 @@ init (xlator_t *this)
}
}
+ _private->caps |= BD_CAPS_OFFLOAD_COPY | BD_CAPS_OFFLOAD_SNAPSHOT;
+
return 0;
error:
GF_FREE (_private->vg);
diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h
index e3acdedc1d2..34b4c9e2226 100644
--- a/xlators/storage/bd/src/bd.h
+++ b/xlators/storage/bd/src/bd.h
@@ -40,6 +40,7 @@
#define LVM_RESIZE "/sbin/lvresize"
#define LVM_CREATE "/sbin/lvcreate"
+#define LVM_CONVERT "/sbin/lvconvert"
#define VOL_TYPE "volume.type"
#define VOL_CAPS "volume.caps"
@@ -48,6 +49,20 @@
#define BD_CAPS_BD 0x01
#define BD_CAPS_THIN 0x02
+#define BD_CAPS_OFFLOAD_COPY 0x04
+#define BD_CAPS_OFFLOAD_SNAPSHOT 0x08
+
+#define BD_CLONE "clone"
+#define BD_SNAPSHOT "snapshot"
+#define BD_MERGE "merge"
+#define BD_ORIGIN "list-origin"
+
+#define IOV_NR 4
+#define IOV_SIZE (64 * 1024)
+
+#define ALIGN_SIZE 4096
+
+#define LINKTO "trusted.glusterfs.dht.linkto"
#define BD_VALIDATE_MEM_ALLOC(buff, op_errno, label) \
if (!buff) { \
@@ -115,6 +130,13 @@ typedef struct {
char *type;
} bd_attr_t;
+typedef enum {
+ BD_OF_NONE,
+ BD_OF_CLONE,
+ BD_OF_SNAPSHOT,
+ BD_OF_MERGE,
+} bd_offload_t;
+
typedef struct {
dict_t *dict;
bd_attr_t *bdatt;
@@ -122,6 +144,9 @@ typedef struct {
loc_t loc;
fd_t *fd;
data_t *data; /* for setxattr */
+ bd_offload_t offload;
+ uint64_t size;
+ loc_t *dloc;
} bd_local_t;
/* Prototypes */
@@ -145,4 +170,9 @@ int bd_clone (bd_local_t *local, bd_priv_t *priv);
int bd_merge (bd_priv_t *priv, uuid_t gfid);
int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict);
inline void bd_update_amtime(struct iatt *iatt, int flag);
+int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv);
+int bd_clone (bd_local_t *local, bd_priv_t *priv);
+int bd_merge (bd_priv_t *priv, uuid_t gfid);
+int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict);
+
#endif