diff options
| -rw-r--r-- | libglusterfs/src/compat.h | 8 | ||||
| -rw-r--r-- | tests/basic/ec/ec-fallocate.t | 72 | ||||
| -rw-r--r-- | tests/basic/ec/ec-rebalance.t | 60 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-fops.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-inode-write.c | 203 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec.c | 5 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 10 | 
7 files changed, 354 insertions, 8 deletions
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h index fbaac76b9ee..f4da4b2a0de 100644 --- a/libglusterfs/src/compat.h +++ b/libglusterfs/src/compat.h @@ -59,6 +59,12 @@  #ifndef FALLOC_FL_ZERO_RANGE  #define FALLOC_FL_ZERO_RANGE    0x10 /* zeroes out range */  #endif +#ifndef FALLOC_FL_COLLAPSE_RANGE +#define FALLOC_FL_COLLAPSE_RANGE  0x08 /* reduces the size */ +#endif +#ifndef FALLOC_FL_INSERT_RANGE +#define FALLOC_FL_INSERT_RANGE  0x20 /* expands the size */ +#endif  #ifndef HAVE_LLISTXATTR @@ -177,6 +183,8 @@ enum {  #define FALLOC_FL_KEEP_SIZE     0x01 /* default is extend size */  #define FALLOC_FL_PUNCH_HOLE    0x02 /* de-allocates range */  #define FALLOC_FL_ZERO_RANGE    0x10 /* zeroes out range */ +#define FALLOC_FL_INSERT_RANGE  0x20 /* Expands the size */ +#define FALLOC_FL_COLLAPSE_RANGE 0x08 /* Reduces the size */  #ifndef _PATH_UMOUNT    #define _PATH_UMOUNT "/sbin/umount" diff --git a/tests/basic/ec/ec-fallocate.t b/tests/basic/ec/ec-fallocate.t new file mode 100644 index 00000000000..1b827eed7df --- /dev/null +++ b/tests/basic/ec/ec-fallocate.t @@ -0,0 +1,72 @@ +#!/bin/bash +# +# Run several commands to verify basic fallocate functionality. We verify that +# fallocate creates and allocates blocks to a file. We also verify that the keep +# size option does not modify the file size. +### + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fallocate.rc + +cleanup + +#cleate and start volume +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2} +TEST $CLI volume start $V0 + +#Mount the volume +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +# check for fallocate support before continuing the test +require_fallocate -l 1m -n $M0/file && rm -f $M0/file + +# fallocate a file and verify blocks are allocated +TEST fallocate -l 1m $M0/file +blksz=`stat -c %b $M0/file` +nblks=`stat -c %B $M0/file` +TEST [ $(($blksz * $nblks)) -eq 1048576 ] + +TEST unlink $M0/file + +# truncate a file to a fixed size, fallocate and verify that the size does not +# change +TEST truncate -s 1M $M0/file +TEST fallocate -l 2m -n $M0/file +blksz=`stat -c %b $M0/file` +nblks=`stat -c %B $M0/file` +sz=`stat -c %s $M0/file` +TEST [ $sz -eq 1048576 ] +# Note that gluster currently incorporates a hack to limit the number of blocks +# reported as allocated to the file by the file size. We have allocated beyond the +# file size here. Just check for non-zero allocation to avoid setting a land mine +# for if/when that behavior might change. +TEST [ ! $(($blksz * $nblks)) -eq 0 ] +TEST unlink $M0/file + +# write some data, fallocate within and outside the range +# and check for data corruption. +TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1 +TEST cp $M0/file $M0/file.copy.pre +TEST fallocate -o 512k -l 128k $M0/file +TEST cp $M0/file $M0/file.copy.post +TEST cmp $M0/file.copy.pre $M0/file.copy.post +TEST fallocate -o 1000k -l 128k $M0/file +TEST cp $M0/file $M0/file.copy.post2 +TEST ! cmp $M0/file.copy.pre $M0/file.copy.post2 +TEST truncate -s 1M $M0/file.copy.post2 +TEST cmp $M0/file.copy.pre $M0/file.copy.post2 +TEST unlink $M0/file + +#Make sure offset/size are modified so that 3 blocks are allocated +TEST touch $M0/f1 +TEST fallocate -o 1280 -l 1024 $M0/f1 +EXPECT "^2304$" stat -c "%s" $M0/f1 +EXPECT "^1536$" stat -c "%s" $B0/${V0}0/f1 + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +cleanup; diff --git a/tests/basic/ec/ec-rebalance.t b/tests/basic/ec/ec-rebalance.t new file mode 100644 index 00000000000..b5c30727a15 --- /dev/null +++ b/tests/basic/ec/ec-rebalance.t @@ -0,0 +1,60 @@ +#!/bin/bash +# +# This will test the rebalance failure reported in 1447559 +# +### + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fallocate.rc + +cleanup + +#cleate and start volume +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2} +TEST $CLI volume start $V0 + +#Mount the volume +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +# Create files +for i in {1..10} +do +    dd if=/dev/urandom of=$M0/file$i bs=1024k count=1 +done + +md5_1=$(md5sum $M0/file1 | awk '{print $1}') +md5_2=$(md5sum $M0/file2 | awk '{print $1}') +md5_3=$(md5sum $M0/file3 | awk '{print $1}') +md5_4=$(md5sum $M0/file4 | awk '{print $1}') +md5_5=$(md5sum $M0/file5 | awk '{print $1}') +md5_6=$(md5sum $M0/file6 | awk '{print $1}') +md5_7=$(md5sum $M0/file7 | awk '{print $1}') +md5_8=$(md5sum $M0/file8 | awk '{print $1}') +md5_9=$(md5sum $M0/file9 | awk '{print $1}') +md5_10=$(md5sum $M0/file10 | awk '{print $1}') +# Add brick +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{3..5} + +#Trigger rebalance +TEST $CLI volume rebalance $V0 start force +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 + +#Remount to avoid any caches +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT "$md5_1" echo $(md5sum $M0/file1 | awk '{print $1}') +EXPECT "$md5_2" echo $(md5sum $M0/file2 | awk '{print $1}') +EXPECT "$md5_3" echo $(md5sum $M0/file3 | awk '{print $1}') +EXPECT "$md5_4" echo $(md5sum $M0/file4 | awk '{print $1}') +EXPECT "$md5_5" echo $(md5sum $M0/file5 | awk '{print $1}') +EXPECT "$md5_6" echo $(md5sum $M0/file6 | awk '{print $1}') +EXPECT "$md5_7" echo $(md5sum $M0/file7 | awk '{print $1}') +EXPECT "$md5_8" echo $(md5sum $M0/file8 | awk '{print $1}') +EXPECT "$md5_9" echo $(md5sum $M0/file9 | awk '{print $1}') +EXPECT "$md5_10" echo $(md5sum $M0/file10 | awk '{print $1}') + +cleanup; diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h index 4e17ec509fd..fab22d8240d 100644 --- a/xlators/cluster/ec/src/ec-fops.h +++ b/xlators/cluster/ec/src/ec-fops.h @@ -168,6 +168,10 @@ void ec_symlink(call_frame_t * frame, xlator_t * this, uintptr_t target,                  const char * linkname, loc_t * loc, mode_t umask,                  dict_t * xdata); +void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, +              int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, +              int32_t mode, off_t offset, size_t len, dict_t *xdata); +  void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,                   int32_t minimum, fop_truncate_cbk_t func, void *data,                   loc_t * loc, off_t offset, dict_t * xdata); diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c index 744797bfcfe..5405d69fe35 100644 --- a/xlators/cluster/ec/src/ec-inode-write.c +++ b/xlators/cluster/ec/src/ec-inode-write.c @@ -828,7 +828,208 @@ out:      }  } -/* FOP: truncate */ +/********************************************************************* + * + * File Operation : fallocate + * + *********************************************************************/ + +int32_t ec_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                         int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                         struct iatt *postbuf, dict_t *xdata) +{ +    return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno, +                                   prebuf, postbuf, xdata); +} + +void ec_wind_fallocate(ec_t *ec, ec_fop_data_t *fop, int32_t idx) +{ +    ec_trace("WIND", fop, "idx=%d", idx); + +    STACK_WIND_COOKIE(fop->frame, ec_fallocate_cbk, (void *)(uintptr_t)idx, +                      ec->xl_list[idx], ec->xl_list[idx]->fops->fallocate, +                      fop->fd, fop->int32, fop->offset, +                      fop->size, fop->xdata); +} + +int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state) +{ +    ec_cbk_data_t *cbk = NULL; + +    switch (state) { +    case EC_STATE_INIT: +        if (fop->size == 0) { +                ec_fop_set_error(fop, EINVAL); +                return EC_STATE_REPORT; +        } +        if (fop->int32 & (FALLOC_FL_COLLAPSE_RANGE +                         |FALLOC_FL_INSERT_RANGE +                         |FALLOC_FL_ZERO_RANGE +                         |FALLOC_FL_PUNCH_HOLE)) { +                ec_fop_set_error(fop, ENOTSUP); +                return EC_STATE_REPORT; +        } +        fop->user_size = fop->offset + fop->size; +        fop->head = ec_adjust_offset (fop->xl->private, &fop->offset, 1); +        fop->size = ec_adjust_size (fop->xl->private, fop->head + fop->size, 1); + +        /* Fall through */ + +    case EC_STATE_LOCK: +        ec_lock_prepare_fd(fop, fop->fd, +                           EC_UPDATE_DATA | EC_UPDATE_META | +                           EC_QUERY_INFO); +        ec_lock(fop); + +        return EC_STATE_DISPATCH; + +    case EC_STATE_DISPATCH: + +        ec_dispatch_all(fop); + +        return EC_STATE_PREPARE_ANSWER; + +    case EC_STATE_PREPARE_ANSWER: +        cbk = ec_fop_prepare_answer(fop, _gf_false); +        if (cbk != NULL) { +                ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, +                            cbk->count); + +                /* This shouldn't fail because we have the inode locked. */ +                GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, +                                        &cbk->iatt[0].ia_size)); + +                /*If mode has FALLOC_FL_KEEP_SIZE keep the size */ +                if (fop->int32 & FALLOC_FL_KEEP_SIZE) { +                        cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; +                } else if (fop->user_size > cbk->iatt[0].ia_size) { +                        cbk->iatt[1].ia_size = fop->user_size; + +                        /* This shouldn't fail because we have the inode +                         * locked. */ +                        GF_ASSERT(ec_set_inode_size(fop, +                                  fop->locks[0].lock->loc.inode, +                                            cbk->iatt[1].ia_size)); +                } else { +                        cbk->iatt[1].ia_size = cbk->iatt[0].ia_size; +                } + +        } + +        return EC_STATE_REPORT; + +    case EC_STATE_REPORT: +        cbk = fop->answer; + +        GF_ASSERT(cbk != NULL); + +        if (fop->cbks.fallocate != NULL) { +                fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret, +                                    cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +                                    cbk->xdata); +        } + +        return EC_STATE_LOCK_REUSE; + +    case -EC_STATE_INIT: +    case -EC_STATE_LOCK: +    case -EC_STATE_DISPATCH: +    case -EC_STATE_PREPARE_ANSWER: +    case -EC_STATE_REPORT: +        GF_ASSERT(fop->error != 0); + +        if (fop->cbks.fallocate != NULL) { +                fop->cbks.fallocate(fop->req_frame, fop, fop->xl, -1, +                                    fop->error, NULL, NULL, NULL); +        } + +        return EC_STATE_LOCK_REUSE; + +    case -EC_STATE_LOCK_REUSE: +    case EC_STATE_LOCK_REUSE: +        ec_lock_reuse(fop); + +        return EC_STATE_UNLOCK; + +    case -EC_STATE_UNLOCK: +    case EC_STATE_UNLOCK: +        ec_unlock(fop); + +        return EC_STATE_END; + +    default: +        gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL, +                EC_MSG_UNHANDLED_STATE, +                "Unhandled state %d for %s", +                state, ec_fop_name(fop->id)); + +        return EC_STATE_END; +    } +} + +void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, +              int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, +              int32_t mode, off_t offset, size_t len, dict_t *xdata) +{ +    ec_cbk_t callback = { .fallocate = func }; +    ec_fop_data_t *fop = NULL; +    int32_t error = ENOMEM; + +    gf_msg_trace ("ec", 0, "EC(FALLOCATE) %p", frame); + +    VALIDATE_OR_GOTO(this, out); +    GF_VALIDATE_OR_GOTO(this->name, frame, out); +    GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +    fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target, +                               minimum, ec_wind_fallocate, ec_manager_fallocate, +                               callback, data); +    if (fop == NULL) { +        goto out; +    } + +    fop->use_fd = 1; +    fop->int32 = mode; +    fop->offset = offset; +    fop->size = len; + +    if (fd != NULL) { +        fop->fd = fd_ref(fd); +        if (fop->fd == NULL) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        EC_MSG_FILE_DESC_REF_FAIL, +                        "Failed to reference a " +                        "file descriptor."); +                goto out; +        } +    } + +    if (xdata != NULL) { +        fop->xdata = dict_ref(xdata); +        if (fop->xdata == NULL) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        EC_MSG_DICT_REF_FAIL, +                        "Failed to reference a " +                        "dictionary."); +                goto out; +        } +    } + +    error = 0; + +out: +    if (fop != NULL) { +        ec_manager(fop, error); +    } else { +        func(frame, NULL, this, -1, error, NULL, NULL, NULL); +    } +} + +/********************************************************************* + * + * File Operation : truncate + * + *********************************************************************/  int32_t ec_truncate_write(ec_fop_data_t * fop, uintptr_t mask)  { diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 2009faccbaf..3d40b111819 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -769,10 +769,11 @@ int32_t ec_gf_fentrylk(call_frame_t * frame, xlator_t * this,  }  int32_t ec_gf_fallocate(call_frame_t * frame, xlator_t * this, fd_t * fd, -                        int32_t keep_size, off_t offset, size_t len, +                        int32_t mode, off_t offset, size_t len,                          dict_t * xdata)  { -    default_fallocate_failure_cbk(frame, ENOTSUP); +    ec_fallocate(frame, this, -1, EC_MINIMUM_MIN, default_fallocate_cbk, +                 NULL, fd, mode, offset, len, xdata);      return 0;  } diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 999e8f60534..c78decd8482 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -754,11 +754,11 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,                  goto out;          } -	ret = sys_fallocate (pfd->fd, flags, offset, len); -	if (ret == -1) { -		ret = -errno; -		goto out; -	} +        ret = sys_fallocate (pfd->fd, flags, offset, len); +        if (ret == -1) { +                ret = -errno; +                goto out; +        }          ret = posix_fdstat (this, pfd->fd, statpost);          if (ret == -1) {  | 
