diff options
author | M. Mohan Kumar <mohan@in.ibm.com> | 2013-11-13 22:44:43 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-11-13 11:39:22 -0800 |
commit | 81a57679c20ac0ac9b48e313af75036132e3a5ad (patch) | |
tree | c99c3cf75096a2530b65a3bb5b3c19c2c463b7fa | |
parent | b222ce817f5f324fe20d4d3614001ed2f177afb8 (diff) |
bd: Add support to create clone, snapshot and merge of LV images.
Special xattr names "clone" & "snapshot" can be used to create full and
linked clone of the LV images. GFID of destination posix file (to be
mapped) is passed as a value to the xattr. Destination posix file must
exist before running this operation.
These operations form a basis for offloading storage related operations
from QEMU to GlusterFS.
Syntax for full clone: xattr name: "clone" value: "gfid-of-dest-file"
Syntax for linked clone: xattr name: "snapshot" value: "gfid-of-dest-file"
Syntax for merging: xattr name: "merge" value: "path-to-snapshot-file"
Example:
setfattr -n clone -v <gfid-of-dest-file> /media/source
setfattr -n snapshot -v <gfid-of-dest-file> /media/source
setfattr -n merge -v "/media/sn" /media/sn
Change-Id: Id9f984a709d4c2e52a64ae75bb12a8ecb01f8776
BUG: 1028672
Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Reviewed-on: http://review.gluster.org/5626
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 39 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 9 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 6 | ||||
-rw-r--r-- | xlators/storage/bd/src/bd-helper.c | 202 | ||||
-rw-r--r-- | xlators/storage/bd/src/bd.c | 421 | ||||
-rw-r--r-- | xlators/storage/bd/src/bd.h | 30 |
7 files changed, 654 insertions, 56 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index cc425353516..df6087659ef 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1109,7 +1109,8 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, brick = strtok_r (brick_list+1, " \n", &saveptr); #ifdef HAVE_BD_XLATOR if (brickinfo->vg[0]) - caps = CAPS_BD | CAPS_THIN; + caps = CAPS_BD | CAPS_THIN | + CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; #endif while (i <= count) { diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 181b8fcf1a9..fcffccc07b8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -335,7 +335,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, char *volume_id_str = NULL; struct args_pack pack = {0,}; xlator_t *this = NULL; - + int caps = 0; GF_ASSERT (volinfo); GF_ASSERT (volumes); @@ -401,6 +401,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, #ifdef HAVE_BD_XLATOR if (volinfo->caps) { + caps = 0; snprintf (key, 256, "volume%d.xlator0", count); buf = GF_MALLOC (256, gf_common_mt_char); if (!buf) { @@ -416,7 +417,8 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, } if (volinfo->caps & CAPS_THIN) { - snprintf (key, 256, "volume%d.xlator0.caps0", count); + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); buf = GF_MALLOC (256, gf_common_mt_char); if (!buf) { ret = ENOMEM; @@ -429,6 +431,39 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, goto out; } } + + if (volinfo->caps & CAPS_OFFLOAD_COPY) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "offload_copy"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + + if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "offload_snapshot"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + } #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index a2bd7334c93..561ff652d81 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -665,7 +665,7 @@ glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg) } next: - brick->caps = CAPS_BD; + brick->caps = CAPS_BD | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; dm_lvlist = lvm_vg_list_lvs (vg); if (!dm_lvlist) @@ -953,7 +953,6 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) } } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_resolve_brick (brickinfo); if (ret) { @@ -1011,8 +1010,8 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) } #ifdef HAVE_BD_XLATOR if (brickinfo->vg[0]) - caps = CAPS_BD | CAPS_THIN; - + caps = CAPS_BD | CAPS_THIN | + CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; /* Check for VG/thin pool if its BD volume */ if (brickinfo->vg[0]) { ret = glusterd_is_valid_vg (brickinfo, 0, msg); @@ -1602,7 +1601,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) if (count) brick = strtok_r (brick_list+1, " \n", &saveptr); - caps = CAPS_BD | CAPS_THIN; + caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; while ( i <= count) { ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 6423d5a8154..ad63682e55e 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -234,8 +234,10 @@ struct _auth { typedef struct _auth auth_t; /* Capabilities of xlator */ -#define CAPS_BD 0x00000001 -#define CAPS_THIN 0x00000010 +#define CAPS_BD 0x00000001 +#define CAPS_THIN 0x00000002 +#define CAPS_OFFLOAD_COPY 0x00000004 +#define CAPS_OFFLOAD_SNAPSHOT 0x00000008 struct glusterd_rebalance_ { gf_defrag_status_t defrag_status; diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c index 8781f9fdfb4..5525e346bd7 100644 --- a/xlators/storage/bd/src/bd-helper.c +++ b/xlators/storage/bd/src/bd-helper.c @@ -579,3 +579,205 @@ bd_update_amtime(struct iatt *iatt, int flag) iatt->ia_mtime_nsec = ts.tv_nsec; } } + +int +bd_snapshot_create (bd_local_t *local, bd_priv_t *priv) +{ + char *path = NULL; + bd_gfid_t dest = {0, }; + bd_gfid_t origin = {0, }; + int ret = 0; + runner_t runner = {0, }; + struct stat stat = {0, }; + + uuid_utoa_r (local->dloc->gfid, dest); + uuid_utoa_r (local->loc.gfid, origin); + + gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest); + if (!path) { + gf_log (THIS->name, GF_LOG_WARNING, + "Insufficient memory"); + return ENOMEM; + } + + runinit (&runner); + runner_add_args (&runner, LVM_CREATE, NULL); + runner_add_args (&runner, "--snapshot", NULL); + runner_argprintf (&runner, "/dev/%s/%s", priv->vg, origin); + runner_add_args (&runner, "--name", NULL); + runner_argprintf (&runner, "%s", dest); + if (strcmp (local->bdatt->type, BD_THIN)) + runner_argprintf (&runner, "-L%ldB", local->size); + runner_start (&runner); + runner_end (&runner); + + if (lstat (path, &stat) < 0) + ret = EIO; + + GF_FREE (path); + return ret; +} + +int +bd_clone (bd_local_t *local, bd_priv_t *priv) +{ + int ret = ENOMEM; + int fd1 = -1; + int fd2 = -1; + int i = 0; + char *buff = NULL; + ssize_t bytes = 0; + char *spath = NULL; + char *dpath = NULL; + struct iovec *vec = NULL; + bd_gfid_t source = {0, }; + bd_gfid_t dest = {0, }; + void *bufp[IOV_NR] = {0, }; + + vec = GF_CALLOC (IOV_NR, sizeof (struct iovec), gf_common_mt_iovec); + if (!vec) + return ENOMEM; + + for (i = 0; i < IOV_NR; i++) { + bufp[i] = page_aligned_alloc (IOV_SIZE, &buff); + if (!buff) + goto out; + vec[i].iov_base = buff; + vec[i].iov_len = IOV_SIZE; + } + + uuid_utoa_r (local->loc.gfid, source); + uuid_utoa_r (local->dloc->gfid, dest); + + gf_asprintf (&spath, "/dev/%s/%s", priv->vg, source); + gf_asprintf (&dpath, "/dev/%s/%s", priv->vg, dest); + if (!spath || !dpath) + goto out; + + ret = bd_create (local->dloc->gfid, local->size, + local->bdatt->type, priv); + if (ret) + goto out; + + fd1 = open (spath, O_RDONLY | O_DIRECT); + if (fd1 < 0) { + ret = errno; + goto out; + } + fd2 = open (dpath, O_WRONLY | O_DIRECT); + if (fd2 < 0) { + ret = errno; + goto out; + } + + while (1) { + bytes = readv (fd1, vec, IOV_NR); + if (bytes < 0) { + ret = errno; + gf_log (THIS->name, GF_LOG_WARNING, "read failed: %s", + strerror (ret)); + goto out; + } + if (!bytes) + break; + bytes = writev (fd2, vec, IOV_NR); + if (bytes < 0) { + ret = errno; + gf_log (THIS->name, GF_LOG_WARNING, + "write failed: %s", strerror (ret)); + goto out; + } + } + ret = 0; + +out: + for (i = 0; i < IOV_NR; i++) + GF_FREE (bufp[i]); + GF_FREE (vec); + + if (fd1 != -1) + close (fd1); + if (fd2 != -1) + close (fd2); + + FREE (spath); + FREE (dpath); + + return ret; +} + +/* + * Merges snapshot LV to origin LV and returns status + */ +int +bd_merge (bd_priv_t *priv, uuid_t gfid) +{ + bd_gfid_t dest = {0, }; + char *path = NULL; + struct stat stat = {0, }; + runner_t runner = {0, }; + int ret = 0; + + uuid_utoa_r (gfid, dest); + gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest); + + runinit (&runner); + runner_add_args (&runner, LVM_CONVERT, NULL); + runner_add_args (&runner, "--merge", NULL); + runner_argprintf (&runner, "%s", path); + runner_start (&runner); + runner_end (&runner); + + if (!lstat (path, &stat)) + ret = EIO; + + GF_FREE (path); + + return ret; +} + +int +bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict) +{ + vg_t brick = NULL; + lvm_property_value_t prop = {0, }; + lv_t lv = NULL; + int ret = -1; + bd_gfid_t gfid = {0, }; + inode_t *inode = NULL; + char *origin = NULL; + + brick = lvm_vg_open (priv->handle, priv->vg, "w", 0); + if (!brick) { + gf_log (THIS->name, GF_LOG_CRITICAL, "VG %s is not found", + priv->vg); + return ENOENT; + } + + if (fd) + inode = fd->inode; + else + inode = loc->inode; + + uuid_utoa_r (inode->gfid, gfid); + lv = lvm_lv_from_name (brick, gfid); + if (!lv) { + gf_log (THIS->name, GF_LOG_CRITICAL, "LV %s not found", gfid); + ret = ENOENT; + goto out; + } + + prop = lvm_lv_get_property (lv, "origin"); + if (!prop.is_valid || !prop.value.string) { + ret = ENODATA; + goto out; + } + + origin = gf_strdup (prop.value.string); + ret = dict_set_dynstr (dict, BD_ORIGIN, origin); + +out: + lvm_vg_close (brick); + return ret; +} + diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c index 555f1d51fc1..405474c587d 100644 --- a/xlators/storage/bd/src/bd.c +++ b/xlators/storage/bd/src/bd.c @@ -1063,6 +1063,278 @@ out: return 0; } +int +bd_offload_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +{ + bd_local_t *local = frame->local; + + if (local->fd) + BD_STACK_UNWIND (fsetxattr, frame, -1, EIO, NULL); + else + BD_STACK_UNWIND (setxattr, frame, -1, EIO, NULL); + + return 0; +} + +int +bd_offload_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +{ + bd_local_t *local = frame->local; + + if (op_ret < 0) + goto out; + + if (local->offload == BD_OF_SNAPSHOT) + op_ret = bd_snapshot_create (frame->local, this->private); + else + op_ret = bd_clone (frame->local, this->private); + + if (op_ret) { + STACK_WIND (frame, bd_offload_rm_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + local->dloc, BD_XATTR, NULL); + return 0; + } + +out: + if (local->fd) + BD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); + else + BD_STACK_UNWIND (setxattr, frame, op_errno, op_errno, NULL); + + return 0; +} + +int +bd_offload_getx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + char *bd = NULL; + bd_local_t *local = frame->local; + char *type = NULL; + char *p = NULL; + + if (op_ret < 0) + goto out; + + if (dict_get_str (xattr, BD_XATTR, &p)) { + op_errno = EINVAL; + goto out; + } + + type = gf_strdup (p); + BD_VALIDATE_MEM_ALLOC (type, op_errno, out); + + p = strrchr (type, ':'); + if (!p) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_WARNING, + "source file xattr %s corrupted?", type); + goto out; + } + + *p='\0'; + + /* For clone size is taken from source LV */ + if (!local->size) { + p++; + gf_string2bytesize (p, &local->size); + } + gf_asprintf (&bd, "%s:%ld", type, local->size); + local->bdatt->type = gf_strdup (type); + dict_del (local->dict, BD_XATTR); + dict_del (local->dict, LINKTO); + if (dict_set_dynstr (local->dict, BD_XATTR, bd)) { + op_errno = EINVAL; + goto out; + } + + STACK_WIND (frame, bd_offload_setx_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + local->dloc, local->dict, 0, NULL); + + return 0; + +out: + if (local->fd) + BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); + else + BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + + GF_FREE (type); + GF_FREE (bd); + + return 0; +} + +int +bd_offload_dest_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *iatt, + dict_t *xattr, struct iatt *postparent) +{ + bd_local_t *local = frame->local; + char *bd = NULL; + int ret = -1; + char *linkto = NULL; + + if (op_ret < 0 && op_errno != ENODATA) { + op_errno = EINVAL; + goto out; + } + + if (!IA_ISREG (iatt->ia_type)) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_WARNING, "destination gfid is not a " + "regular file"); + goto out; + } + + ret = dict_get_str (xattr, LINKTO, &linkto); + if (linkto) { + op_errno = EINVAL; + gf_log (this->name, GF_LOG_WARNING, "destination file not " + "present in same brick"); + goto out; + } + + ret = dict_get_str (xattr, BD_XATTR, &bd); + if (bd) { + op_errno = EEXIST; + goto out; + } + + local->bdatt = CALLOC (1, sizeof (bd_attr_t)); + BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out); + + STACK_WIND (frame, bd_offload_getx_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + &local->loc, BD_XATTR, NULL); + + return 0; +out: + if (local->fd) + BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); + else + BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + + return 0; +} + +int +bd_merge_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + /* FIXME: if delete failed, remove xattr */ + + BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int +bd_do_merge(call_frame_t *frame, xlator_t *this) +{ + bd_local_t *local = frame->local; + inode_t *parent = NULL; + char *p = NULL; + int op_errno = 0; + + op_errno = bd_merge (this->private, local->inode->gfid); + if (op_errno) + goto out; + + /* + * posix_unlink needs loc->pargfid to be valid, but setxattr FOP does + * not have loc->pargfid set. Get parent's gfid by getting parents inode + */ + parent = inode_parent (local->inode, NULL, NULL); + if (!parent) { + /* + * FIXME: Snapshot LV already deleted. + * remove xattr, instead of returning failure + */ + op_errno = EINVAL; + goto out; + } + uuid_copy (local->loc.pargfid, parent->gfid); + + p = strrchr (local->loc.path, '/'); + if (p) + p++; + local->loc.name = p; + + STACK_WIND (frame, bd_merge_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, + &local->loc, 0, NULL); + + return 0; +out: + BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); + + return op_errno; +} + +int +bd_offload (call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, bd_offload_t offload) +{ + char *param = NULL; + char *param_copy = NULL; + char *p = NULL; + char *size = NULL; + char *gfid = NULL; + int op_errno = 0; + bd_local_t *local = frame->local; + + param = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char); + BD_VALIDATE_MEM_ALLOC (param, op_errno, out); + param_copy = param; + + local->dict = dict_new (); + BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out); + + local->dloc = CALLOC (1, sizeof (loc_t)); + BD_VALIDATE_MEM_ALLOC (local->dloc, op_errno, out); + + strncpy (param, local->data->data, local->data->len); + + gfid = strtok_r (param, ":", &p); + size = strtok_r (NULL, ":", &p); + if (size) + gf_string2bytesize (size, &local->size); + else if (offload != BD_OF_CLONE) + local->size = bd_get_default_extent (this->private); + + if (dict_set_int8 (local->dict, BD_XATTR, 1) < 0) { + op_errno = EINVAL; + goto out; + } + if (dict_set_int8 (local->dict, LINKTO, 1) < 0) { + op_errno = EINVAL; + goto out; + } + + uuid_parse (gfid, local->dloc->gfid); + local->offload = offload; + + STACK_WIND (frame, bd_offload_dest_lookup_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->lookup, local->dloc, + local->dict); + + return 0; + +out: + if (fd) + BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); + else + BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + + GF_FREE (param_copy); + return 0; +} /* * bd_setxattr: Used to create & map an LV to a posix file using @@ -1071,51 +1343,71 @@ out: * bd_setx_setx_cbk -> create_lv * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk */ -int32_t +int bd_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int flags, dict_t *xdata) { - int op_errno = 0; - data_t *data = NULL; - bd_attr_t *bdatt = NULL; - bd_local_t *local = NULL; + int op_errno = 0; + data_t *data = NULL; + bd_local_t *local = NULL; + bd_attr_t *bdatt = NULL; + bd_offload_t cl_type = BD_OF_NONE; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - bd_inode_ctx_get (loc->inode, this, &bdatt); + if ((data = dict_get (dict, BD_XATTR))) + cl_type = BD_OF_NONE; + else if ((data = dict_get (dict, BD_CLONE))) + cl_type = BD_OF_CLONE; + else if ((data = dict_get (dict, BD_SNAPSHOT))) + cl_type = BD_OF_SNAPSHOT; + else if ((data = dict_get (dict, BD_MERGE))) + cl_type = BD_OF_MERGE; - data = dict_get (dict, BD_XATTR); - if (!data) { - /* non bd file object */ - STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); + bd_inode_ctx_get (loc->inode, this, &bdatt); + if (!cl_type && !data) { + STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setxattr, loc, dict, + flags, xdata); return 0; } - if (bdatt) { - gf_log (this->name, GF_LOG_WARNING, - "%s already mapped to BD", loc->path); - op_errno = EEXIST; - goto out; - } local = bd_local_init (frame, this); BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - local->inode = inode_ref (loc->inode); - loc_copy (&local->loc, loc); local->data = data; + loc_copy (&local->loc, loc); + local->inode = inode_ref (loc->inode); - STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); + if (cl_type) { + /* For cloning/snapshot, source file must be mapped to LV */ + if (!bdatt) { + gf_log (this->name, GF_LOG_WARNING, + "%s not mapped to BD", loc->path); + op_errno = EINVAL; + goto out; + } + if (cl_type == BD_OF_MERGE) + bd_do_merge (frame, this); + else + bd_offload (frame, this, loc, NULL, cl_type); + } else if (data) { + if (bdatt) { + gf_log (this->name, GF_LOG_WARNING, + "%s already mapped to BD", loc->path); + op_errno = EEXIST; + goto out; + } + STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->stat, loc, xdata); + } return 0; - out: - BD_STACK_UNWIND (setxattr, frame, -1, op_errno, xdata); + if (op_errno) + STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata); + return 0; } @@ -1131,10 +1423,11 @@ int32_t bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { - int op_errno = 0; - data_t *data = NULL; - bd_attr_t *bdatt = NULL; - bd_local_t *local = NULL; + int op_errno = 0; + data_t *data = NULL; + bd_attr_t *bdatt = NULL; + bd_local_t *local = NULL; + bd_offload_t cl_type = BD_OF_NONE; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -1144,27 +1437,57 @@ bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, bd_inode_ctx_get (fd->inode, this, &bdatt); data = dict_get (dict, BD_XATTR); - if (data) { + if ((data = dict_get (dict, BD_XATTR))) + cl_type = BD_OF_NONE; + else if ((data = dict_get (dict, BD_CLONE))) + cl_type = BD_OF_CLONE; + else if ((data = dict_get (dict, BD_SNAPSHOT))) + cl_type = BD_OF_SNAPSHOT; + else if ((data = dict_get (dict, BD_MERGE))) { + /* + * bd_merge is not supported for fsetxattr, because snapshot LV + * is opened and it causes problem in snapshot merge + */ + op_errno = EOPNOTSUPP; + goto out; + } + + bd_inode_ctx_get (fd->inode, this, &bdatt); + + if (!cl_type && !data) { + /* non bd file object */ + STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, + fd, dict, flags, xdata); + return 0; + } + + local = bd_local_init (frame, this); + BD_VALIDATE_MEM_ALLOC (local, op_errno, out); + + local->inode = inode_ref (fd->inode); + local->fd = fd_ref (fd); + local->data = data; + + if (cl_type) { + /* For cloning/snapshot, source file must be mapped to LV */ + if (!bdatt) { + gf_log (this->name, GF_LOG_WARNING, + "fd %p not mapped to BD", fd); + op_errno = EINVAL; + goto out; + + } + bd_offload (frame, this, NULL, fd, cl_type); + } else if (data) { if (bdatt) { gf_log (this->name, GF_LOG_WARNING, "fd %p already mapped to BD", fd); op_errno = EEXIST; goto out; } - local = bd_local_init (frame, this); - BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - - local->inode = inode_ref (fd->inode); - local->fd = fd_ref (fd); - local->data = data; - STACK_WIND(frame, bd_setx_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd, xdata); - } else { - /* non bd file object */ - STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - fd, dict, flags, xdata); } return 0; @@ -1682,8 +2005,10 @@ bd_handle_special_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc, if (!strcmp (name, VOL_TYPE)) op_ret = dict_set_int64 (xattr, (char *)name, 1); - else + else if (!strcmp (name, VOL_CAPS)) op_ret = dict_set_int64 (xattr, (char *)name, priv->caps); + else + op_ret = bd_get_origin (this->private, loc, fd, xattr); out: if (loc) @@ -1703,7 +2028,8 @@ int bd_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS))) + if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS) + || !strcmp (name, BD_ORIGIN))) bd_handle_special_xattrs (frame, this, NULL, fd, name, xdata); else STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), @@ -1716,7 +2042,8 @@ int bd_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS))) + if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS) + || !strcmp (name, BD_ORIGIN))) bd_handle_special_xattrs (frame, this, loc, NULL, name, xdata); else STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), @@ -1997,6 +2324,8 @@ init (xlator_t *this) } } + _private->caps |= BD_CAPS_OFFLOAD_COPY | BD_CAPS_OFFLOAD_SNAPSHOT; + return 0; error: GF_FREE (_private->vg); diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h index e3acdedc1d2..34b4c9e2226 100644 --- a/xlators/storage/bd/src/bd.h +++ b/xlators/storage/bd/src/bd.h @@ -40,6 +40,7 @@ #define LVM_RESIZE "/sbin/lvresize" #define LVM_CREATE "/sbin/lvcreate" +#define LVM_CONVERT "/sbin/lvconvert" #define VOL_TYPE "volume.type" #define VOL_CAPS "volume.caps" @@ -48,6 +49,20 @@ #define BD_CAPS_BD 0x01 #define BD_CAPS_THIN 0x02 +#define BD_CAPS_OFFLOAD_COPY 0x04 +#define BD_CAPS_OFFLOAD_SNAPSHOT 0x08 + +#define BD_CLONE "clone" +#define BD_SNAPSHOT "snapshot" +#define BD_MERGE "merge" +#define BD_ORIGIN "list-origin" + +#define IOV_NR 4 +#define IOV_SIZE (64 * 1024) + +#define ALIGN_SIZE 4096 + +#define LINKTO "trusted.glusterfs.dht.linkto" #define BD_VALIDATE_MEM_ALLOC(buff, op_errno, label) \ if (!buff) { \ @@ -115,6 +130,13 @@ typedef struct { char *type; } bd_attr_t; +typedef enum { + BD_OF_NONE, + BD_OF_CLONE, + BD_OF_SNAPSHOT, + BD_OF_MERGE, +} bd_offload_t; + typedef struct { dict_t *dict; bd_attr_t *bdatt; @@ -122,6 +144,9 @@ typedef struct { loc_t loc; fd_t *fd; data_t *data; /* for setxattr */ + bd_offload_t offload; + uint64_t size; + loc_t *dloc; } bd_local_t; /* Prototypes */ @@ -145,4 +170,9 @@ int bd_clone (bd_local_t *local, bd_priv_t *priv); int bd_merge (bd_priv_t *priv, uuid_t gfid); int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict); inline void bd_update_amtime(struct iatt *iatt, int flag); +int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv); +int bd_clone (bd_local_t *local, bd_priv_t *priv); +int bd_merge (bd_priv_t *priv, uuid_t gfid); +int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict); + #endif |