diff options
| author | M. Mohan Kumar <mohan@in.ibm.com> | 2013-11-13 22:44:42 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2013-11-13 11:39:11 -0800 | 
| commit | b222ce817f5f324fe20d4d3614001ed2f177afb8 (patch) | |
| tree | be39e00cea1871b6df3d440199eb9cffd1711823 | |
| parent | 6ec9c4599e96de9dcae9426eae6bb1dde4dc7549 (diff) | |
bd: Add aio support to BD xlator
Volume option bd-aio controls AIO feature for BD xlator. Code taken from
posix-aio.c
Change-Id: Ib049bd59c9d3f9101d33939838322cfa808de053
BUG: 1028672
Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Reviewed-on: http://review.gluster.org/5748
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 | ||||
| -rw-r--r-- | xlators/storage/bd/src/Makefile.am | 6 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd-aio.c | 527 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd-aio.h | 41 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd-helper.c | 19 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd.c | 60 | ||||
| -rw-r--r-- | xlators/storage/bd/src/bd.h | 18 | 
7 files changed, 651 insertions, 24 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index ae4a464adbd..a035098d8d0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1368,6 +1368,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .voltype     = "storage/posix",            .op_version  = 3          }, +        { .key         = "storage.bd-aio", +          .voltype     = "storage/bd", +          .op_version  = 3 +        },          { .key        = "config.memory-accounting",            .voltype    = "configuration",            .option     = "!config", diff --git a/xlators/storage/bd/src/Makefile.am b/xlators/storage/bd/src/Makefile.am index 210b7453af8..3d93f744295 100644 --- a/xlators/storage/bd/src/Makefile.am +++ b/xlators/storage/bd/src/Makefile.am @@ -4,10 +4,10 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage  bd_la_LDFLAGS = -module -avoid-version  LIBBD = -llvm2app -lrt -bd_la_SOURCES = bd.c bd-helper.c -bd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBBD) +bd_la_SOURCES = bd.c bd-helper.c bd-aio.c +bd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBBD) $(LIBAIO) -noinst_HEADERS = bd.h +noinst_HEADERS = bd.h bd-aio.h  AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \              -I$(top_srcdir)/rpc/xdr/src \ diff --git a/xlators/storage/bd/src/bd-aio.c b/xlators/storage/bd/src/bd-aio.c new file mode 100644 index 00000000000..62d4590f728 --- /dev/null +++ b/xlators/storage/bd/src/bd-aio.c @@ -0,0 +1,527 @@ +/* +  Copyright IBM, Corp. 2013 + +  This file is part of GlusterFS. + +  Author: M. Mohan Kumar <mohan@in.ibm.com> + +  Based on posix-aio.c + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <lvm2app.h> +#include <sys/uio.h> + +#include "xlator.h" +#include "glusterfs.h" +#include "defaults.h" +#include "bd.h" +#include "bd-aio.h" + +#ifdef HAVE_LIBAIO +#include <libaio.h> + +struct bd_aio_cb { +        struct iocb     iocb; +        call_frame_t   *frame; +        struct iobuf   *iobuf; +        struct iobref  *iobref; +        struct iatt     prebuf; +        int             op; +        off_t           offset; +        fd_t           *fd; +}; + +void +__bd_fd_set_odirect (fd_t *fd, bd_fd_t *bd_fd, int opflags, +                     off_t offset, size_t size) +{ +        int odirect = 0; +        int flags = 0; +        int ret = 0; + +        odirect = bd_fd->odirect; + +        if ((fd->flags|opflags) & O_DIRECT) { +                /* if instructed, use O_DIRECT always */ +                odirect = 1; +        } else { +                /* else use O_DIRECT when feasible */ +                if ((offset|size) & 0xfff) +                        odirect = 0; +                else +                        odirect = 1; +        } + +        if (!odirect && bd_fd->odirect) { +                flags = fcntl (bd_fd->fd, F_GETFL); +                ret = fcntl (bd_fd->fd, F_SETFL, (flags & (~O_DIRECT))); +                bd_fd->odirect = 0; +        } + +        if (odirect && !bd_fd->odirect) { +                flags = fcntl (bd_fd->fd, F_GETFL); +                ret = fcntl (bd_fd->fd, F_SETFL, (flags | O_DIRECT)); +                bd_fd->odirect = 1; +        } + +        if (ret) { +                gf_log (THIS->name, GF_LOG_WARNING, +                        "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d", +                        strerror (errno), bd_fd->fd, flags, bd_fd->odirect); +        } +} + +int +bd_aio_readv_complete (struct bd_aio_cb *paiocb, int res, int res2) +{ +        call_frame_t   *frame = NULL; +        xlator_t       *this = NULL; +        struct iobuf   *iobuf = NULL; +        struct iatt     postbuf = {0,}; +        int             op_ret = -1; +        int             op_errno = 0; +        struct iovec    iov; +        struct iobref  *iobref = NULL; +        off_t           offset = 0; +        bd_attr_t      *bdatt = NULL; + +        frame = paiocb->frame; +        this = frame->this; +        iobuf = paiocb->iobuf; +        offset = paiocb->offset; + +        if (res < 0) { +                op_ret = -1; +                op_errno = -res; +                gf_log (this->name, GF_LOG_ERROR, +                        "readv(async) failed fd=%p,size=%lu,offset=%llu (%d/%s)", +                        paiocb->fd, paiocb->iocb.u.c.nbytes, +                        (unsigned long long) paiocb->offset, +                        res, strerror (op_errno)); +                goto out; +        } + +        bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt); +        memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt)); + +        op_ret = res; +        op_errno = 0; + +        iobref = iobref_new (); +        if (!iobref) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto out; +        } + +        iobref_add (iobref, iobuf); + +        iov.iov_base = iobuf_ptr (iobuf); +        iov.iov_len = op_ret; + +        /* Hack to notify higher layers of EOF. */ +        if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size) +                op_errno = ENOENT; + +out: +        STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1, +                             &postbuf, iobref, NULL); +        if (iobuf) +                iobuf_unref (iobuf); +        if (iobref) +                iobref_unref (iobref); + +        GF_FREE (paiocb); + +        return 0; +} + +int +bd_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, +                 size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ +        int32_t             op_errno   = EINVAL; +        int                 _fd        = -1; +        struct iobuf       *iobuf      = NULL; +        bd_fd_t            *bd_fd      = NULL; +        int                 ret        = -1; +        struct bd_aio_cb   *paiocb     = NULL; +        bd_priv_t          *priv       = NULL; +        struct iocb        *iocb       = NULL; +        bd_attr_t          *bdatt      = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        priv = this->private; + +        ret = bd_fd_ctx_get (this, fd, &bd_fd); +        if (ret < 0 || !bd_fd) { +                STACK_WIND (frame, default_readv_cbk, FIRST_CHILD (this), +                            FIRST_CHILD (this)->fops->readv, fd, size, offset, +                            flags, xdata); +                return 0; +        } +        _fd = bd_fd->fd; +        bd_inode_ctx_get (fd->inode, this, &bdatt); +        if (!size) { +                op_errno = EINVAL; +                gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size); +                goto err; +        } + +        iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); +        if (!iobuf) { +                op_errno = ENOMEM; +                goto err; +        } + +        paiocb = CALLOC (1, sizeof (*paiocb)); +        if (!paiocb) { +                op_errno = ENOMEM; +                goto err; +        } + +        paiocb->frame = frame; +        paiocb->iobuf = iobuf; +        paiocb->offset = offset; +        paiocb->op = GF_FOP_READ; +        paiocb->fd = fd; + +        paiocb->iocb.data = paiocb; +        paiocb->iocb.aio_fildes = _fd; +        paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD; +        paiocb->iocb.aio_reqprio = 0; +        paiocb->iocb.u.c.buf = iobuf_ptr (iobuf); +        paiocb->iocb.u.c.nbytes = size; +        paiocb->iocb.u.c.offset = offset; + +        iocb = &paiocb->iocb; + +        LOCK (&fd->lock); +        { +                __bd_fd_set_odirect (fd, bd_fd, flags, offset, size); + +                ret = io_submit (priv->ctxp, 1, &iocb); +        } +        UNLOCK (&fd->lock); + +        if (ret != 1) { +                gf_log (this->name, GF_LOG_ERROR, +                        "io_submit() returned %d", ret); +                op_errno = -ret; +                goto err; +        } + +        return 0; +err: +        STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0); +        if (iobuf) +                iobuf_unref (iobuf); + +        if (paiocb) +                GF_FREE (paiocb); + +        return 0; +} + +int +bd_aio_writev_complete (struct bd_aio_cb *paiocb, int res, int res2) +{ +        call_frame_t *frame    = NULL; +        xlator_t     *this     = NULL; +        struct iatt   prebuf   = {0,}; +        struct iatt   postbuf  = {0,}; +        int           op_ret   = -1; +        int           op_errno = 0; +        bd_attr_t    *bdatt    = NULL; + +        frame = paiocb->frame; +        prebuf = paiocb->prebuf; +        this = frame->this; + +        if (res < 0) { +                op_ret = -1; +                op_errno = -res; +                gf_log (this->name, GF_LOG_ERROR, +                        "writev(async) failed fd=%p,offset=%llu (%d/%s)", +                        paiocb->fd, (unsigned long long) paiocb->offset, res, +                        strerror (op_errno)); + +                goto out; +        } + +        bd_inode_ctx_get (paiocb->fd->inode, this, &bdatt); +        bd_update_amtime (&bdatt->iatt, GF_SET_ATTR_MTIME); +        memcpy (&postbuf, &bdatt->iatt, sizeof (struct iatt)); + +        op_ret = res; +        op_errno = 0; + +out: +        STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf, +                             NULL); + +        if (paiocb) { +                if (paiocb->iobref) +                        iobref_unref (paiocb->iobref); +                GF_FREE (paiocb); +        } + +        return 0; +} + +int +bd_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, +               struct iovec *iov, int count, off_t offset, uint32_t flags, +               struct iobref *iobref, dict_t *xdata) +{ +        int32_t             op_errno   = EINVAL; +        int                 _fd        = -1; +        bd_fd_t            *bd_fd      = NULL; +        int                 ret        = -1; +        struct bd_aio_cb   *paiocb     = NULL; +        bd_priv_t          *priv       = NULL; +        struct iocb        *iocb       = NULL; +        bd_attr_t          *bdatt      = NULL; + +        VALIDATE_OR_GOTO (frame, err); +        VALIDATE_OR_GOTO (this, err); +        VALIDATE_OR_GOTO (fd, err); + +        priv = this->private; + +        ret = bd_fd_ctx_get (this, fd, &bd_fd); +        if (ret < 0 || !bd_fd) { +                STACK_WIND (frame, default_writev_cbk, +                            FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, +                            fd, iov, count, offset, flags, iobref, xdata); +                return 0; +        } + +        bd_inode_ctx_get (fd->inode, this, &bdatt); + +        _fd = bd_fd->fd; + +        paiocb = CALLOC (1, sizeof (*paiocb)); +        if (!paiocb) { +                op_errno = ENOMEM; +                goto err; +        } + + +        paiocb->frame = frame; +        paiocb->offset = offset; +        paiocb->op = GF_FOP_WRITE; +        paiocb->fd = fd; + +        paiocb->iocb.data = paiocb; +        paiocb->iocb.aio_fildes = _fd; +        paiocb->iobref = iobref_ref (iobref); +        paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV; +        paiocb->iocb.aio_reqprio = 0; +        paiocb->iocb.u.v.vec = iov; +        paiocb->iocb.u.v.nr = count; +        paiocb->iocb.u.v.offset = offset; + +        iocb = &paiocb->iocb; + +        memcpy (&paiocb->prebuf, &bdatt->iatt, sizeof (struct iatt)); +        LOCK (&fd->lock); +        { +                __bd_fd_set_odirect (fd, bd_fd, flags, offset, +                                     iov_length (iov, count)); + +                ret = io_submit (priv->ctxp, 1, &iocb); +        } +        UNLOCK (&fd->lock); + +        if (ret != 1) { +                gf_log (this->name, GF_LOG_ERROR, +                        "io_submit() returned %d", ret); +                op_errno = -ret; +                goto err; +        } + +        return 0; +err: +        STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0); + +        if (paiocb) { +                if (paiocb->iobref) +                        iobref_unref (paiocb->iobref); +                GF_FREE (paiocb); +        } + +        return 0; +} + +void * +bd_aio_thread (void *data) +{ +        xlator_t           *this = NULL; +        bd_priv_t          *priv = NULL; +        int                 ret = 0; +        int                 i = 0; +        struct io_event    *event = NULL; +        struct bd_aio_cb   *paiocb = NULL; +        struct io_event     events[BD_AIO_MAX_NR_GETEVENTS]; +        struct timespec     ts = {0, }; + +        this = data; +        THIS = this; +        priv = this->private; + +        ts.tv_sec = 5; +        for (;;) { +                memset (&events[0], 0, sizeof (events)); +                ret = io_getevents (priv->ctxp, 1, BD_AIO_MAX_NR_GETEVENTS, +                                    &events[0], &ts); +                if (ret < 0) { +                        if (ret == -EINTR) +                                continue; +                        gf_log (this->name, GF_LOG_ERROR, +                                "io_getevents() returned %d, exiting", ret); +                        break; +                } + +                for (i = 0; i < ret; i++) { +                        event = &events[i]; + +                        paiocb = event->data; + +                        switch (paiocb->op) { +                        case GF_FOP_READ: +                                bd_aio_readv_complete (paiocb, event->res, +                                                          event->res2); +                                break; +                        case GF_FOP_WRITE: +                                bd_aio_writev_complete (paiocb, event->res, +                                                           event->res2); +                                break; +                        default: +                                gf_log (this->name, GF_LOG_ERROR, +                                        "unknown op %d found in piocb", +                                        paiocb->op); +                                break; +                        } +                } +        } + +        return NULL; +} + +int +bd_aio_init (xlator_t *this) +{ +        bd_priv_t *priv = NULL; +        int        ret = 0; + +        priv = this->private; + +        ret = io_setup (BD_AIO_MAX_NR_EVENTS, &priv->ctxp); +        if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Linux AIO not available at run-time." +                        " Continuing with synchronous IO"); +                ret = 0; +                goto out; +        } + +        if (ret < 0) { +                gf_log (this->name, GF_LOG_WARNING, +                        "io_setup() failed. ret=%d, errno=%d", +                        ret, errno); +                goto out; +        } + +        ret = pthread_create (&priv->aiothread, NULL, +                              bd_aio_thread, this); +        if (ret != 0) { +                io_destroy (priv->ctxp); +                goto out; +        } + +        this->fops->readv  = bd_aio_readv; +        this->fops->writev = bd_aio_writev; +out: +        return ret; +} + + +int +bd_aio_on (xlator_t *this) +{ +        bd_priv_t *priv = NULL; +        int        ret = 0; + +        priv = this->private; + +        if (!priv->aio_init_done) { +                ret = bd_aio_init (this); +                if (ret == 0) +                        priv->aio_capable = _gf_true; +                else +                        priv->aio_capable = _gf_false; +                priv->aio_init_done = _gf_true; +        } + +        if (priv->aio_capable) { +                this->fops->readv  = bd_aio_readv; +                this->fops->writev = bd_aio_writev; +        } + +        return ret; +} + +int +bd_aio_off (xlator_t *this) +{ +        this->fops->readv  = bd_readv; +        this->fops->writev = bd_writev; + +        return 0; +} + +#else + +int +bd_aio_on (xlator_t *this) +{ +        gf_log (this->name, GF_LOG_INFO, +                "Linux AIO not available at build-time." +                " Continuing with synchronous IO"); +        return 0; +} + +int +bd_aio_off (xlator_t *this) +{ +        gf_log (this->name, GF_LOG_INFO, +                "Linux AIO not available at build-time." +                " Continuing with synchronous IO"); +        return 0; +} + +void +__bd_fd_set_odirect (fd_t *fd, struct bd_fd *pfd, int opflags, +                        off_t offset, size_t size) +{ +        xlator_t        *this = THIS; +        gf_log (this->name, GF_LOG_INFO, +                "Linux AIO not available at build-time." +                " Continuing with synchronous IO"); +        return; +} +#endif diff --git a/xlators/storage/bd/src/bd-aio.h b/xlators/storage/bd/src/bd-aio.h new file mode 100644 index 00000000000..16f686a4caa --- /dev/null +++ b/xlators/storage/bd/src/bd-aio.h @@ -0,0 +1,41 @@ +/* +   Copyright IBM, Corp. 2013 + +   This file is part of GlusterFS. + +   This file is licensed to you under your choice of the GNU Lesser +   General Public License, version 3 or any later version (LGPLv3 or +   later), or the GNU General Public License, version 2 (GPLv2), in all +   cases as published by the Free Software Foundation. +*/ +#ifndef _BD_AIO_H +#define _BD_AIO_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "xlator.h" +#include "glusterfs.h" + +/* + * Maximum number of concurrently submitted IO events. The heaviest load + * GlusterFS has been able to handle had 60-80 concurrent calls + */ +#define BD_AIO_MAX_NR_EVENTS 256 + +/* Maximum number of completed IO operations to reap per getevents syscall */ +#define BD_AIO_MAX_NR_GETEVENTS 16 + +int bd_aio_on (xlator_t *this); +int bd_aio_off (xlator_t *this); + +int bd_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +                      off_t offset, uint32_t flags, dict_t *xdata); + +int bd_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, +                       struct iovec *vector, int32_t count, off_t offset, +                       uint32_t flags, struct iobref *iobref, dict_t *xdata); + +#endif /* !_BD_AIO_H */ diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c index 2c1b77a9b3e..8781f9fdfb4 100644 --- a/xlators/storage/bd/src/bd-helper.c +++ b/xlators/storage/bd/src/bd-helper.c @@ -3,6 +3,9 @@  #include "config.h"  #endif  #include <lvm2app.h> +#ifdef HAVE_LIBAIO +#include <libaio.h> +#endif  #include "bd.h"  #include "run.h" @@ -560,3 +563,19 @@ out:          return ret;  } + +inline void +bd_update_amtime(struct iatt *iatt, int flag) +{ +        struct timespec ts = {0, }; + +        clock_gettime (CLOCK_REALTIME, &ts); +        if (flag & GF_SET_ATTR_ATIME) { +                iatt->ia_atime = ts.tv_sec; +                iatt->ia_atime_nsec = ts.tv_nsec; +        } +        if (flag & GF_SET_ATTR_MTIME) { +                iatt->ia_mtime = ts.tv_sec; +                iatt->ia_mtime_nsec = ts.tv_nsec; +        } +} diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c index 5fa15c542c0..555f1d51fc1 100644 --- a/xlators/storage/bd/src/bd.c +++ b/xlators/storage/bd/src/bd.c @@ -26,8 +26,12 @@  #include <time.h>  #include <linux/fs.h>  #include <sys/ioctl.h> +#ifdef HAVE_LIBAIO +#include <libaio.h> +#endif  #include "bd.h" +#include "bd-aio.h"  #include "defaults.h"  #include "glusterfs3-xdr.h"  #include "run.h" @@ -440,22 +444,6 @@ out:          return 0;  } -static inline void -bd_update_amtime (struct iatt *iatt, int flag) -{ -        struct timespec ts         = {0, }; - -        clock_gettime (CLOCK_REALTIME, &ts); -        if (flag & GF_SET_ATTR_ATIME) { -                iatt->ia_atime = ts.tv_sec; -                iatt->ia_atime_nsec = ts.tv_nsec; -        } -        if (flag & GF_SET_ATTR_MTIME) { -                iatt->ia_mtime = ts.tv_sec; -                iatt->ia_mtime_nsec = ts.tv_nsec; -        } -} -  /*   * bd_readv: If posix file, invokes posix_readv otherwise reads from the BD   * file @@ -680,6 +668,7 @@ bd_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,          bd_fd->fd = _fd;          bd_fd->flag = flags | O_LARGEFILE; +          if (fd_ctx_set (fd, this, (uint64_t)(long)bd_fd) < 0) {                  gf_log (this->name, GF_LOG_WARNING,                          "failed to set the fd context fd=%p", fd); @@ -1918,6 +1907,25 @@ mem_acct_init (xlator_t *this)          return ret;  } +int +reconfigure (xlator_t *this, dict_t *options) +{ +        int   ret = -1; +        bd_priv_t *priv = this->private; + +        GF_OPTION_RECONF ("bd-aio", priv->aio_configured, options, +                          bool, out); + +        if (priv->aio_configured) +                bd_aio_on (this); +        else +                bd_aio_off (this); + +        ret = 0; +out: +        return ret; +} +  /**   * bd xlator init - Validate configured VG   */ @@ -1976,6 +1984,19 @@ init (xlator_t *this)          if (bd_scan_vg (this, _private))                  goto error; +        _private->aio_init_done = _gf_false; +        _private->aio_capable = _gf_false; + +        GF_OPTION_INIT ("bd-aio", _private->aio_configured, bool, error); +        if (_private->aio_configured) { +                if (bd_aio_on (this)) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "BD AIO init failed"); +                        ret = -1; +                        goto error; +                } +        } +          return 0;  error:          GF_FREE (_private->vg); @@ -2043,5 +2064,12 @@ struct volume_options options[] = {          { .key = {"device"},            .type = GF_OPTION_TYPE_STR,            .default_value = BACKEND_VG}, +        { +          .key  = {"bd-aio"}, +          .type = GF_OPTION_TYPE_BOOL, +          .default_value = "off", +          .description = "Support for native Linux AIO" +        }, +          { .key = {NULL} }  }; diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h index 4d8b8954524..e3acdedc1d2 100644 --- a/xlators/storage/bd/src/bd.h +++ b/xlators/storage/bd/src/bd.h @@ -23,6 +23,10 @@  #include "config.h"  #endif +#ifdef HAVE_LIBAIO +#include <libaio.h> +#endif +  #include "xlator.h"  #include "mem-types.h" @@ -83,6 +87,7 @@ enum gf_bd_mem_types_ {  typedef struct bd_fd {          int             fd;          int32_t         flag; +        int             odirect;  } bd_fd_t;  typedef struct bd_priv { @@ -90,6 +95,13 @@ typedef struct bd_priv {          char              *vg;          char              *pool;          int                caps; +        gf_boolean_t       aio_init_done; +        gf_boolean_t       aio_capable; +        gf_boolean_t       aio_configured; +#ifdef HAVE_LIBAIO +        io_context_t       ctxp; +        pthread_t          aiothread; +#endif  } bd_priv_t; @@ -112,11 +124,6 @@ typedef struct {          data_t      *data; /* for setxattr */  } bd_local_t; -typedef struct { -        char            *lv; -        struct list_head list; -} bd_del_entry; -  /* Prototypes */  int bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx);  int bd_inode_ctx_get (inode_t *inode, xlator_t *this, bd_attr_t **ctx); @@ -137,4 +144,5 @@ int bd_clone (bd_local_t *local, bd_priv_t *priv);  int bd_merge (bd_priv_t *priv, uuid_t gfid);  int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict); +inline void bd_update_amtime(struct iatt *iatt, int flag);  #endif  | 
