diff options
| author | M. Mohan Kumar <mohan@in.ibm.com> | 2013-11-13 22:44:43 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2013-11-13 11:39:22 -0800 | 
| commit | 81a57679c20ac0ac9b48e313af75036132e3a5ad (patch) | |
| tree | c99c3cf75096a2530b65a3bb5b3c19c2c463b7fa | |
| parent | b222ce817f5f324fe20d4d3614001ed2f177afb8 (diff) | |
bd: Add support to create clone, snapshot and merge of LV images.
Special xattr names "clone" & "snapshot" can be used to create full and
linked clone of the LV images. GFID of destination posix file (to be
mapped) is passed as a value to the xattr. Destination posix file must
exist before running this operation.
These operations form a basis for offloading storage related operations
from QEMU to GlusterFS.
Syntax for full clone: xattr name: "clone" value: "gfid-of-dest-file"
Syntax for linked clone: xattr name: "snapshot" value: "gfid-of-dest-file"
Syntax for merging: xattr name: "merge" value: "path-to-snapshot-file"
Example:
	setfattr -n clone -v <gfid-of-dest-file> /media/source
	setfattr -n snapshot -v <gfid-of-dest-file> /media/source
	setfattr -n merge -v "/media/sn" /media/sn
Change-Id: Id9f984a709d4c2e52a64ae75bb12a8ecb01f8776
BUG: 1028672
Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Reviewed-on: http://review.gluster.org/5626
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 39 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 9 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 6 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd-helper.c | 202 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd.c | 421 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd.h | 30 | 
7 files changed, 654 insertions, 56 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index cc4253535..df6087659 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1109,7 +1109,8 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,                  brick = strtok_r (brick_list+1, " \n", &saveptr);  #ifdef HAVE_BD_XLATOR          if (brickinfo->vg[0]) -                caps = CAPS_BD | CAPS_THIN; +                caps = CAPS_BD | CAPS_THIN | +                        CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;  #endif          while (i <= count) { diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 181b8fcf1..fcffccc07 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -335,7 +335,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,          char                    *volume_id_str  = NULL;          struct args_pack        pack = {0,};          xlator_t                *this = NULL; - +        int                     caps = 0;          GF_ASSERT (volinfo);          GF_ASSERT (volumes); @@ -401,6 +401,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,  #ifdef HAVE_BD_XLATOR          if (volinfo->caps) { +                caps = 0;                  snprintf (key, 256, "volume%d.xlator0", count);                  buf = GF_MALLOC (256, gf_common_mt_char);                  if (!buf) { @@ -416,7 +417,8 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,                  }                  if (volinfo->caps & CAPS_THIN) { -                        snprintf (key, 256, "volume%d.xlator0.caps0", count); +                        snprintf (key, 256, "volume%d.xlator0.caps%d", count, +                                  caps++);                          buf = GF_MALLOC (256, gf_common_mt_char);                          if (!buf) {                                  ret = ENOMEM; @@ -429,6 +431,39 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,                                  goto out;                          }                  } + +                if (volinfo->caps & CAPS_OFFLOAD_COPY) { +                        snprintf (key, 256, "volume%d.xlator0.caps%d", count, +                                  caps++); +                        buf = GF_MALLOC (256, gf_common_mt_char); +                        if (!buf) { +                                ret = ENOMEM; +                                goto out; +                        } +                        snprintf (buf, 256, "offload_copy"); +                        ret = dict_set_dynstr (volumes, key, buf); +                        if (ret) { +                                GF_FREE (buf); +                                goto out; +                        } +                } + +                if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) { +                        snprintf (key, 256, "volume%d.xlator0.caps%d", count, +                                  caps++); +                        buf = GF_MALLOC (256, gf_common_mt_char); +                        if (!buf) { +                                ret = ENOMEM; +                                goto out; +                        } +                        snprintf (buf, 256, "offload_snapshot"); +                        ret = dict_set_dynstr (volumes, key, buf); +                        if (ret)  { +                                GF_FREE (buf); +                                goto out; +                        } +                } +          }  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index a2bd7334c..561ff652d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -665,7 +665,7 @@ glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg)          }  next: -        brick->caps = CAPS_BD; +        brick->caps = CAPS_BD | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;          dm_lvlist = lvm_vg_list_lvs (vg);          if (!dm_lvlist) @@ -953,7 +953,6 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)                  }          } -          list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {                  ret = glusterd_resolve_brick (brickinfo);                  if (ret) { @@ -1011,8 +1010,8 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)                  }  #ifdef HAVE_BD_XLATOR                  if (brickinfo->vg[0]) -                        caps = CAPS_BD | CAPS_THIN; - +                        caps = CAPS_BD | CAPS_THIN | +                                CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;                  /* Check for VG/thin pool if its BD volume */                  if (brickinfo->vg[0]) {                          ret = glusterd_is_valid_vg (brickinfo, 0, msg); @@ -1602,7 +1601,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)          if (count)                  brick = strtok_r (brick_list+1, " \n", &saveptr); -        caps = CAPS_BD | CAPS_THIN; +        caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;          while ( i <= count) {                  ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 6423d5a81..ad63682e5 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -234,8 +234,10 @@ struct _auth {  typedef struct _auth auth_t;  /* Capabilities of xlator */ -#define CAPS_BD           0x00000001 -#define CAPS_THIN         0x00000010 +#define CAPS_BD               0x00000001 +#define CAPS_THIN             0x00000002 +#define CAPS_OFFLOAD_COPY     0x00000004 +#define CAPS_OFFLOAD_SNAPSHOT 0x00000008  struct glusterd_rebalance_ {          gf_defrag_status_t       defrag_status; diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c index 8781f9fdf..5525e346b 100644 --- a/xlators/storage/bd/src/bd-helper.c +++ b/xlators/storage/bd/src/bd-helper.c @@ -579,3 +579,205 @@ bd_update_amtime(struct iatt *iatt, int flag)                  iatt->ia_mtime_nsec = ts.tv_nsec;          }  } + +int +bd_snapshot_create (bd_local_t *local, bd_priv_t *priv) +{ +        char       *path   = NULL; +        bd_gfid_t   dest   = {0, }; +        bd_gfid_t   origin = {0, }; +        int         ret    = 0; +        runner_t    runner = {0, }; +        struct stat stat   = {0, }; + +        uuid_utoa_r (local->dloc->gfid, dest); +        uuid_utoa_r (local->loc.gfid, origin); + +        gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest); +        if (!path) { +                gf_log (THIS->name, GF_LOG_WARNING, +                        "Insufficient memory"); +                return ENOMEM; +        } + +        runinit (&runner); +        runner_add_args  (&runner, LVM_CREATE, NULL); +        runner_add_args  (&runner, "--snapshot", NULL); +        runner_argprintf (&runner, "/dev/%s/%s", priv->vg, origin); +        runner_add_args  (&runner, "--name", NULL); +        runner_argprintf (&runner, "%s", dest); +        if (strcmp (local->bdatt->type, BD_THIN)) +                runner_argprintf (&runner, "-L%ldB", local->size); +        runner_start (&runner); +        runner_end (&runner); + +        if (lstat (path, &stat) < 0) +                ret = EIO; + +        GF_FREE (path); +        return ret; +} + +int +bd_clone (bd_local_t *local, bd_priv_t *priv) +{ +        int           ret          = ENOMEM; +        int           fd1          = -1; +        int           fd2          = -1; +        int           i            = 0; +        char         *buff         = NULL; +        ssize_t       bytes        = 0; +        char         *spath        = NULL; +        char         *dpath        = NULL; +        struct iovec *vec          = NULL; +        bd_gfid_t     source       = {0, }; +        bd_gfid_t     dest         = {0, }; +        void         *bufp[IOV_NR] = {0, }; + +        vec = GF_CALLOC (IOV_NR, sizeof (struct iovec), gf_common_mt_iovec); +        if (!vec) +                return ENOMEM; + +        for (i = 0; i < IOV_NR; i++) { +                bufp[i] = page_aligned_alloc (IOV_SIZE, &buff); +                if (!buff) +                        goto out; +                vec[i].iov_base = buff; +                vec[i].iov_len = IOV_SIZE; +        } + +        uuid_utoa_r (local->loc.gfid, source); +        uuid_utoa_r (local->dloc->gfid, dest); + +        gf_asprintf (&spath, "/dev/%s/%s", priv->vg, source); +        gf_asprintf (&dpath, "/dev/%s/%s", priv->vg, dest); +        if (!spath || !dpath) +                goto out; + +        ret = bd_create (local->dloc->gfid, local->size, +                         local->bdatt->type,  priv); +        if (ret) +                goto out; + +        fd1 = open (spath, O_RDONLY | O_DIRECT); +        if (fd1 < 0) { +                ret = errno; +                goto out; +        } +        fd2 = open (dpath, O_WRONLY | O_DIRECT); +        if (fd2 < 0) { +                ret = errno; +                goto out; +        } + +        while (1) { +                bytes = readv (fd1, vec, IOV_NR); +                if (bytes < 0) { +                        ret = errno; +                        gf_log (THIS->name, GF_LOG_WARNING, "read failed: %s", +                                strerror (ret)); +                        goto out; +                } +                if (!bytes) +                        break; +                bytes = writev (fd2, vec, IOV_NR); +                if (bytes < 0) { +                        ret = errno; +                        gf_log (THIS->name, GF_LOG_WARNING, +                                "write failed: %s", strerror (ret)); +                        goto out; +                } +        } +        ret = 0; + +out: +        for (i = 0; i < IOV_NR; i++) +                GF_FREE (bufp[i]); +        GF_FREE (vec); + +        if (fd1 != -1) +                close (fd1); +        if (fd2 != -1) +                close (fd2); + +        FREE (spath); +        FREE (dpath); + +        return ret; +} + +/* + * Merges snapshot LV to origin LV and returns status + */ +int +bd_merge (bd_priv_t *priv, uuid_t gfid) +{ +        bd_gfid_t   dest   = {0, }; +        char       *path   = NULL; +        struct stat stat   = {0, }; +        runner_t    runner = {0, }; +        int         ret    = 0; + +        uuid_utoa_r (gfid, dest); +        gf_asprintf (&path, "/dev/%s/%s", priv->vg, dest); + +        runinit (&runner); +        runner_add_args (&runner, LVM_CONVERT, NULL); +        runner_add_args (&runner, "--merge", NULL); +        runner_argprintf (&runner, "%s", path); +        runner_start (&runner); +        runner_end (&runner); + +        if (!lstat (path, &stat)) +                ret = EIO; + +        GF_FREE (path); + +        return ret; +} + +int +bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict) +{ +        vg_t                      brick      = NULL; +        lvm_property_value_t      prop       = {0, }; +        lv_t                      lv         = NULL; +        int                       ret        = -1; +        bd_gfid_t                 gfid       = {0, }; +        inode_t                  *inode      = NULL; +        char                     *origin     = NULL; + +        brick = lvm_vg_open (priv->handle, priv->vg, "w", 0); +        if (!brick) { +                gf_log (THIS->name, GF_LOG_CRITICAL, "VG %s is not found", +                        priv->vg); +                return ENOENT; +        } + +        if (fd) +                inode = fd->inode; +        else +                inode = loc->inode; + +        uuid_utoa_r (inode->gfid, gfid); +        lv = lvm_lv_from_name (brick, gfid); +        if (!lv) { +                gf_log (THIS->name, GF_LOG_CRITICAL, "LV %s not found", gfid); +                ret = ENOENT; +                goto out; +        } + +        prop = lvm_lv_get_property (lv, "origin"); +        if (!prop.is_valid || !prop.value.string) { +                ret = ENODATA; +                goto out; +        } + +        origin = gf_strdup (prop.value.string); +        ret = dict_set_dynstr (dict, BD_ORIGIN, origin); + +out: +        lvm_vg_close (brick); +        return ret; +} + diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c index 555f1d51f..405474c58 100644 --- a/xlators/storage/bd/src/bd.c +++ b/xlators/storage/bd/src/bd.c @@ -1063,6 +1063,278 @@ out:          return 0;  } +int +bd_offload_rm_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                         int op_ret, int op_errno, dict_t *xdata) +{ +        bd_local_t *local = frame->local; + +        if (local->fd) +                BD_STACK_UNWIND (fsetxattr, frame, -1, EIO, NULL); +        else +                BD_STACK_UNWIND (setxattr, frame, -1, EIO, NULL); + +        return 0; +} + +int +bd_offload_setx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                     int op_ret, int op_errno, dict_t *xdata) +{ +        bd_local_t *local = frame->local; + +        if (op_ret < 0) +                goto out; + +        if (local->offload == BD_OF_SNAPSHOT) +                op_ret = bd_snapshot_create (frame->local, this->private); +        else +                op_ret = bd_clone (frame->local, this->private); + +        if (op_ret) { +                STACK_WIND (frame, bd_offload_rm_xattr_cbk, FIRST_CHILD(this), +                            FIRST_CHILD(this)->fops->removexattr, +                            local->dloc, BD_XATTR, NULL); +                return 0; +        } + +out: +        if (local->fd) +                BD_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); +        else +                BD_STACK_UNWIND (setxattr, frame, op_errno, op_errno, NULL); + +        return 0; +} + +int +bd_offload_getx_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                     int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ +        char       *bd    = NULL; +        bd_local_t *local = frame->local; +        char       *type  = NULL; +        char       *p     = NULL; + +        if (op_ret < 0) +                goto out; + +        if (dict_get_str (xattr, BD_XATTR, &p)) { +                op_errno = EINVAL; +                goto out; +        } + +        type = gf_strdup (p); +        BD_VALIDATE_MEM_ALLOC (type, op_errno, out); + +        p = strrchr (type, ':'); +        if (!p) { +                op_errno = EINVAL; +                gf_log (this->name, GF_LOG_WARNING, +                        "source file xattr %s corrupted?", type); +                goto out; +        } + +        *p='\0'; + +        /* For clone size is taken from source LV */ +        if (!local->size) { +                p++; +                gf_string2bytesize (p, &local->size); +        } +        gf_asprintf (&bd, "%s:%ld", type, local->size); +        local->bdatt->type = gf_strdup (type); +        dict_del (local->dict, BD_XATTR); +        dict_del (local->dict, LINKTO); +        if (dict_set_dynstr (local->dict, BD_XATTR, bd)) { +                op_errno = EINVAL; +                goto out; +        } + +        STACK_WIND (frame, bd_offload_setx_cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->setxattr, +                    local->dloc, local->dict, 0, NULL); + +        return 0; + +out: +        if (local->fd) +                BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); +        else +                BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + +        GF_FREE (type); +        GF_FREE (bd); + +        return 0; +} + +int +bd_offload_dest_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                            int op_ret, int op_errno, +                            inode_t *inode, struct iatt *iatt, +                            dict_t *xattr, struct iatt *postparent) +{ +        bd_local_t *local  = frame->local; +        char       *bd     = NULL; +        int         ret    = -1; +        char       *linkto = NULL; + +        if (op_ret < 0 && op_errno != ENODATA) { +                op_errno = EINVAL; +                goto out; +        } + +        if (!IA_ISREG (iatt->ia_type)) { +                op_errno = EINVAL; +                gf_log (this->name, GF_LOG_WARNING, "destination gfid is not a " +                        "regular file"); +                goto out; +        } + +        ret = dict_get_str (xattr, LINKTO, &linkto); +        if (linkto) { +                op_errno = EINVAL; +                gf_log (this->name, GF_LOG_WARNING, "destination file not " +                        "present in same brick"); +                goto out; +        } + +        ret = dict_get_str (xattr, BD_XATTR, &bd); +        if (bd) { +                op_errno = EEXIST; +                goto out; +        } + +        local->bdatt = CALLOC (1, sizeof (bd_attr_t)); +        BD_VALIDATE_MEM_ALLOC (local->bdatt, op_errno, out); + +        STACK_WIND (frame, bd_offload_getx_cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->getxattr, +                    &local->loc, BD_XATTR, NULL); + +        return 0; +out: +        if (local->fd) +                BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); +        else +                BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + +        return 0; +} + +int +bd_merge_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                     int op_ret, int op_errno, struct iatt *preparent, +                     struct iatt *postparent, dict_t *xdata) +{ +        /* FIXME: if delete failed, remove xattr */ + +        BD_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); +        return 0; +} + +int +bd_do_merge(call_frame_t *frame, xlator_t *this) +{ +        bd_local_t *local    = frame->local; +        inode_t    *parent   = NULL; +        char       *p        = NULL; +        int         op_errno = 0; + +        op_errno = bd_merge (this->private, local->inode->gfid); +        if (op_errno) +                goto out; + +        /* +         * posix_unlink needs loc->pargfid to be valid, but setxattr FOP does +         * not have loc->pargfid set. Get parent's gfid by getting parents inode +         */ +        parent = inode_parent (local->inode, NULL, NULL); +        if (!parent) { +                /* +                 * FIXME: Snapshot LV already deleted. +                 * remove xattr, instead of returning failure +                 */ +                op_errno = EINVAL; +                goto out; +        } +        uuid_copy (local->loc.pargfid, parent->gfid); + +        p = strrchr (local->loc.path, '/'); +        if (p) +                p++; +        local->loc.name = p; + +        STACK_WIND (frame, bd_merge_unlink_cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->unlink, +                    &local->loc, 0, NULL); + +        return 0; +out: +        BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); + +        return op_errno; +} + +int +bd_offload (call_frame_t *frame, xlator_t *this, loc_t *loc, +            fd_t *fd, bd_offload_t offload) +{ +        char       *param      = NULL; +        char       *param_copy = NULL; +        char       *p          = NULL; +        char       *size       = NULL; +        char       *gfid       = NULL; +        int         op_errno   = 0; +        bd_local_t *local      = frame->local; + +        param = GF_CALLOC (1, local->data->len + 1, gf_common_mt_char); +        BD_VALIDATE_MEM_ALLOC (param, op_errno, out); +        param_copy = param; + +        local->dict = dict_new (); +        BD_VALIDATE_MEM_ALLOC (local->dict, op_errno, out); + +        local->dloc = CALLOC (1, sizeof (loc_t)); +        BD_VALIDATE_MEM_ALLOC (local->dloc, op_errno, out); + +        strncpy (param, local->data->data, local->data->len); + +        gfid = strtok_r (param, ":", &p); +        size = strtok_r (NULL, ":", &p); +        if (size) +                gf_string2bytesize (size, &local->size); +        else if (offload != BD_OF_CLONE) +                local->size = bd_get_default_extent (this->private); + +        if (dict_set_int8 (local->dict, BD_XATTR, 1) < 0) { +                op_errno = EINVAL; +                goto out; +        } +        if (dict_set_int8 (local->dict, LINKTO, 1) < 0) { +                op_errno = EINVAL; +                goto out; +        } + +        uuid_parse (gfid, local->dloc->gfid); +        local->offload = offload; + +        STACK_WIND (frame, bd_offload_dest_lookup_cbk, FIRST_CHILD (this), +                    FIRST_CHILD (this)->fops->lookup, local->dloc, +                    local->dict); + +        return 0; + +out: +        if (fd) +                BD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); +        else +                BD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + +        GF_FREE (param_copy); +        return 0; +}  /*   * bd_setxattr: Used to create & map an LV to a posix file using @@ -1071,51 +1343,71 @@ out:   * bd_setx_setx_cbk -> create_lv   * if create_lv failed, posix_removexattr -> bd_setx_rm_xattr_cbk   */ -int32_t +int  bd_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,               int flags, dict_t *xdata)  { -        int           op_errno  = 0; -        data_t       *data      = NULL; -        bd_attr_t    *bdatt     = NULL; -        bd_local_t   *local     = NULL; +        int           op_errno = 0; +        data_t       *data     = NULL; +        bd_local_t   *local    = NULL; +        bd_attr_t    *bdatt    = NULL; +        bd_offload_t  cl_type  = BD_OF_NONE;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); -        VALIDATE_OR_GOTO (this->private, out); -        VALIDATE_OR_GOTO (loc, out); -        bd_inode_ctx_get (loc->inode, this, &bdatt); +        if ((data = dict_get (dict, BD_XATTR))) +                cl_type = BD_OF_NONE; +        else if ((data = dict_get (dict, BD_CLONE))) +                cl_type = BD_OF_CLONE; +        else if ((data = dict_get (dict, BD_SNAPSHOT))) +                cl_type = BD_OF_SNAPSHOT; +        else if ((data = dict_get (dict, BD_MERGE))) +                cl_type = BD_OF_MERGE; -        data =  dict_get (dict, BD_XATTR); -        if (!data) { -                /* non bd file object */ -                STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this), -                                   FIRST_CHILD(this)->fops->setxattr, -                                   loc, dict, flags, xdata); +        bd_inode_ctx_get (loc->inode, this, &bdatt); +        if (!cl_type && !data) { +                STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD (this), +                            FIRST_CHILD (this)->fops->setxattr, loc, dict, +                            flags, xdata);                  return 0;          } -        if (bdatt) { -                gf_log (this->name, GF_LOG_WARNING, -                        "%s already mapped to BD", loc->path); -                op_errno = EEXIST; -                goto out; -        }          local = bd_local_init (frame, this);          BD_VALIDATE_MEM_ALLOC (local, op_errno, out); -        local->inode = inode_ref (loc->inode); -        loc_copy (&local->loc, loc);          local->data = data; +        loc_copy (&local->loc, loc); +        local->inode = inode_ref (loc->inode); -        STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD(this), -                   FIRST_CHILD(this)->fops->stat, loc, xdata); +        if (cl_type) { +                /* For cloning/snapshot, source file must be mapped to LV */ +                if (!bdatt) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "%s not mapped to BD", loc->path); +                        op_errno = EINVAL; +                        goto out; +                } +                if (cl_type == BD_OF_MERGE) +                        bd_do_merge (frame, this); +                else +                        bd_offload (frame, this, loc, NULL, cl_type); +        } else if (data) { +                if (bdatt) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "%s already mapped to BD", loc->path); +                        op_errno = EEXIST; +                        goto out; +                } +                STACK_WIND (frame, bd_setx_stat_cbk, FIRST_CHILD (this), +                            FIRST_CHILD (this)->fops->stat, loc, xdata); +        }          return 0; -  out: -        BD_STACK_UNWIND (setxattr, frame, -1, op_errno, xdata); +        if (op_errno) +                STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata); +          return 0;  } @@ -1131,10 +1423,11 @@ int32_t  bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,                int flags, dict_t *xdata)  { -        int       op_errno = 0; -        data_t   *data     = NULL; -        bd_attr_t *bdatt   = NULL; -        bd_local_t *local  = NULL; +        int           op_errno = 0; +        data_t       *data     = NULL; +        bd_attr_t    *bdatt    = NULL; +        bd_local_t   *local    = NULL; +        bd_offload_t  cl_type  = BD_OF_NONE;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -1144,27 +1437,57 @@ bd_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,          bd_inode_ctx_get (fd->inode, this, &bdatt);          data =  dict_get (dict, BD_XATTR); -        if (data) { +        if ((data = dict_get (dict, BD_XATTR))) +                cl_type = BD_OF_NONE; +        else if ((data = dict_get (dict, BD_CLONE))) +                cl_type = BD_OF_CLONE; +        else if ((data = dict_get (dict, BD_SNAPSHOT))) +                cl_type = BD_OF_SNAPSHOT; +        else if ((data = dict_get (dict, BD_MERGE))) { +                /* +                 * bd_merge is not supported for fsetxattr, because snapshot LV +                 * is opened and it causes problem in snapshot merge +                 */ +                op_errno = EOPNOTSUPP; +                goto out; +        } + +        bd_inode_ctx_get (fd->inode, this, &bdatt); + +        if (!cl_type && !data) { +                /* non bd file object */ +                STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), +                            FIRST_CHILD(this)->fops->fsetxattr, +                            fd, dict, flags, xdata); +                return 0; +        } + +        local = bd_local_init (frame, this); +        BD_VALIDATE_MEM_ALLOC (local, op_errno, out); + +        local->inode = inode_ref (fd->inode); +        local->fd = fd_ref (fd); +        local->data = data; + +        if (cl_type) { +                /* For cloning/snapshot, source file must be mapped to LV */ +                if (!bdatt) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "fd %p not mapped to BD", fd); +                        op_errno = EINVAL; +                        goto out; + +                } +                bd_offload (frame, this, NULL, fd, cl_type); +        } else if (data) {                  if (bdatt) {                          gf_log (this->name, GF_LOG_WARNING,                                  "fd %p already mapped to BD", fd);                          op_errno = EEXIST;                          goto out;                  } -                local = bd_local_init (frame, this); -                BD_VALIDATE_MEM_ALLOC (local, op_errno, out); - -                local->inode = inode_ref (fd->inode); -                local->fd = fd_ref (fd); -                local->data = data; -                  STACK_WIND(frame, bd_setx_stat_cbk, FIRST_CHILD(this),                             FIRST_CHILD(this)->fops->fstat, fd, xdata); -        } else { -                /* non bd file object */ -                STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), -                            FIRST_CHILD(this)->fops->fsetxattr, -                            fd, dict, flags, xdata);          }          return 0; @@ -1682,8 +2005,10 @@ bd_handle_special_xattrs (call_frame_t *frame, xlator_t *this, loc_t *loc,          if (!strcmp (name, VOL_TYPE))                  op_ret = dict_set_int64 (xattr, (char *)name, 1); -        else +        else if (!strcmp (name, VOL_CAPS))                  op_ret = dict_set_int64 (xattr, (char *)name, priv->caps); +        else +                op_ret = bd_get_origin (this->private, loc, fd, xattr);  out:          if (loc) @@ -1703,7 +2028,8 @@ int  bd_fgetxattr (call_frame_t *frame, xlator_t *this,                fd_t *fd, const char *name, dict_t *xdata)  { -        if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS))) +        if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS) +                     || !strcmp (name, BD_ORIGIN)))                  bd_handle_special_xattrs (frame, this, NULL, fd, name, xdata);          else                  STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), @@ -1716,7 +2042,8 @@ int  bd_getxattr (call_frame_t *frame, xlator_t *this,               loc_t *loc, const char *name, dict_t *xdata)  { -        if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS))) +        if (name && (!strcmp (name, VOL_TYPE) || !strcmp (name, VOL_CAPS) +                     || !strcmp (name, BD_ORIGIN)))                  bd_handle_special_xattrs (frame, this, loc, NULL, name, xdata);          else                  STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), @@ -1997,6 +2324,8 @@ init (xlator_t *this)                  }          } +        _private->caps |= BD_CAPS_OFFLOAD_COPY | BD_CAPS_OFFLOAD_SNAPSHOT; +          return 0;  error:          GF_FREE (_private->vg); diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h index e3acdedc1..34b4c9e22 100644 --- a/xlators/storage/bd/src/bd.h +++ b/xlators/storage/bd/src/bd.h @@ -40,6 +40,7 @@  #define LVM_RESIZE "/sbin/lvresize"  #define LVM_CREATE "/sbin/lvcreate" +#define LVM_CONVERT "/sbin/lvconvert"  #define VOL_TYPE "volume.type"  #define VOL_CAPS "volume.caps" @@ -48,6 +49,20 @@  #define BD_CAPS_BD               0x01  #define BD_CAPS_THIN             0x02 +#define BD_CAPS_OFFLOAD_COPY     0x04 +#define BD_CAPS_OFFLOAD_SNAPSHOT 0x08 + +#define BD_CLONE "clone" +#define BD_SNAPSHOT "snapshot" +#define BD_MERGE "merge" +#define BD_ORIGIN "list-origin" + +#define IOV_NR    4 +#define IOV_SIZE (64 * 1024) + +#define ALIGN_SIZE 4096 + +#define LINKTO "trusted.glusterfs.dht.linkto"  #define BD_VALIDATE_MEM_ALLOC(buff, op_errno, label)                \          if (!buff) {                                                \ @@ -115,6 +130,13 @@ typedef struct {          char        *type;  } bd_attr_t; +typedef enum { +        BD_OF_NONE, +        BD_OF_CLONE, +        BD_OF_SNAPSHOT, +        BD_OF_MERGE, +} bd_offload_t; +  typedef struct {          dict_t      *dict;          bd_attr_t   *bdatt; @@ -122,6 +144,9 @@ typedef struct {          loc_t        loc;          fd_t        *fd;          data_t      *data; /* for setxattr */ +        bd_offload_t offload; +        uint64_t     size; +        loc_t       *dloc;  } bd_local_t;  /* Prototypes */ @@ -145,4 +170,9 @@ int bd_clone (bd_local_t *local, bd_priv_t *priv);  int bd_merge (bd_priv_t *priv, uuid_t gfid);  int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict);  inline void bd_update_amtime(struct iatt *iatt, int flag); +int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv); +int bd_clone (bd_local_t *local, bd_priv_t *priv); +int bd_merge (bd_priv_t *priv, uuid_t gfid); +int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict); +  #endif  | 
