diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2012-10-28 08:30:15 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2012-12-12 00:21:27 -0500 | 
| commit | 1a42faae4967bc0cdb7cefcfbfca45f7d0b10360 (patch) | |
| tree | f30b52f6336a2f0bcd685eddabf9501f097aef58 | |
| parent | 6ae820dc4c79f34a5c381b752a80c4b0169b60bb (diff) | |
storage/posix: Make rchecksum O_DIRECT friendly
Problem:
When posix-aio is enabled to perform aio fd is set with O_DIRECT
whenever possible in read, writev fops. Rchecksum does not take
this into account. If either offset/size/memory-buf passed to
pread in rchecksum fop is not aligned, pread fails with EINVAL.
Fix:
Before doing pread necessary O_DIRECT manipulation is done when
aio is enabled. Memory buffer passed to pread is now page-aligned.
Test:
1) Create replica volume with aio enabled.
2) dd if=/dev/urandom of=a bs=1M count=1
3) kill one of the bricks in the replica pair
4) dd if=/dev/urandom of=a bs=1M count=1
5) bring back the brick. Self-heal succeeds after the change.
The test above checks both rchecksum, writev fops that were
changed in this patch.
Change-Id: I5126e20ca1d6aeb71d4d66d14de277729fc8e89f
BUG: 866459
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/156
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Vijay Bellur <vbellur@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/1880
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 73 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.h | 4 | 
2 files changed, 53 insertions, 24 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 5260ff37504..58e7b45411a 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -62,6 +62,7 @@  #include "posix-aio.h"  extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096  #undef HAVE_SET_FSID  #ifdef HAVE_SET_FSID @@ -2022,6 +2023,21 @@ err:          return op_ret;  } +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ +        char            *alloc_buf = NULL; +        char            *buf = NULL; + +        alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); +        if (!alloc_buf) +                goto out; +        /* page aligned buffer */ +        buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); +        *aligned_buf = buf; +out: +        return alloc_buf; +}  int32_t  __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, @@ -2029,7 +2045,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,  {          int32_t         op_ret = 0;          int             idx = 0; -        int             align = 4096;          int             max_buf_size = 0;          int             retval = 0;          char            *buf = NULL; @@ -2045,7 +2060,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,                          max_buf_size = vector[idx].iov_len;          } -        alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char); +        alloc_buf = _page_aligned_alloc (max_buf_size, &buf);          if (!alloc_buf) {                  op_ret = -errno;                  goto err; @@ -2053,9 +2068,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,          internal_off = startoff;          for (idx = 0; idx < count; idx++) { -                /* page aligned buffer */ -                buf = GF_ALIGN_BUF (alloc_buf, align); -                  memcpy (buf, vector[idx].iov_base, vector[idx].iov_len);                  /* not sure whether writev works on O_DIRECT'd fd */ @@ -3863,23 +3875,26 @@ int32_t  posix_rchecksum (call_frame_t *frame, xlator_t *this,                   fd_t *fd, off_t offset, int32_t len, dict_t *xdata)  { -        char            *buf           = NULL; -        int              _fd           = -1; -        struct posix_fd *pfd           = NULL; -        int              op_ret        = -1; -        int              op_errno      = 0; -        int              ret           = 0; -        int32_t          weak_checksum = 0; -        unsigned char    strong_checksum[MD5_DIGEST_LENGTH]; +        char                    *alloc_buf      = NULL; +        char                    *buf            = NULL; +        int                     _fd             = -1; +        struct posix_fd         *pfd            = NULL; +        int                     op_ret          = -1; +        int                     op_errno        = 0; +        int                     ret             = 0; +        int32_t                 weak_checksum   = 0; +        unsigned char           strong_checksum[MD5_DIGEST_LENGTH] = {0}; +        struct posix_private    *priv           = NULL;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out);          VALIDATE_OR_GOTO (fd, out); +        priv = this->private;          memset (strong_checksum, 0, MD5_DIGEST_LENGTH); -        buf = GF_CALLOC (1, len, gf_posix_mt_char); -        if (!buf) { +        alloc_buf = _page_aligned_alloc (len, &buf); +        if (!alloc_buf) {                  op_errno = ENOMEM;                  goto out;          } @@ -3894,15 +3909,25 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this,          _fd = pfd->fd; -        ret = pread (_fd, buf, len, offset); -        if (ret < 0) { -                gf_log (this->name, GF_LOG_WARNING, -                        "pread of %d bytes returned %d (%s)", -                        len, ret, strerror (errno)); +        LOCK (&fd->lock); +        { +                if (priv->aio_capable && priv->aio_init_done) +                        __posix_fd_set_odirect (fd, pfd, 0, offset, len); + +                ret = pread (_fd, buf, len, offset); +                if (ret < 0) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "pread of %d bytes returned %d (%s)", +                                len, ret, strerror (errno)); + +                        op_errno = errno; +                } -                op_errno = errno; -                goto out;          } +        UNLOCK (&fd->lock); + +        if (ret < 0) +                goto out;          weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len);          gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum); @@ -3912,8 +3937,8 @@ out:          STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno,                               weak_checksum, strong_checksum, NULL); -        if (buf) -                GF_FREE (buf); +        if (alloc_buf) +                GF_FREE (alloc_buf);          return 0;  } diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index eccd4d85fa7..f4e1b0a1015 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -168,4 +168,8 @@ int posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd);  void posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf);  gf_boolean_t posix_special_xattr (char **pattern, char *key); + +void +__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, +			off_t offset, size_t size);  #endif /* _POSIX_H */  | 
