summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-inode-write.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-inode-write.c')
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c124
1 files changed, 82 insertions, 42 deletions
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index 3ed9b2a1ba4..e6a67cf67bc 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -127,10 +127,12 @@ ec_manager_xattr (ec_fop_data_t *fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL) {
ec_lock_prepare_inode(fop, &fop->loc[0],
- EC_UPDATE_META | EC_QUERY_INFO);
+ EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
} else {
ec_lock_prepare_fd(fop, fop->fd,
- EC_UPDATE_META | EC_QUERY_INFO);
+ EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
}
ec_lock(fop);
@@ -369,10 +371,11 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL) {
ec_lock_prepare_inode(fop, &fop->loc[0],
- EC_UPDATE_META | EC_QUERY_INFO);
+ EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
} else {
- ec_lock_prepare_fd(fop, fop->fd,
- EC_UPDATE_META | EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
}
ec_lock(fop);
@@ -879,8 +882,8 @@ int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
case EC_STATE_LOCK:
ec_lock_prepare_fd(fop, fop->fd,
- EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fop->offset, fop->size);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -898,24 +901,28 @@ int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
cbk->count);
/* This shouldn't fail because we have the inode locked. */
- GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
- &cbk->iatt[0].ia_size));
+ LOCK(&fop->locks[0].lock->loc.inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
- /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
- if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
- cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
- } else if (fop->user_size > cbk->iatt[0].ia_size) {
- cbk->iatt[1].ia_size = fop->user_size;
-
- /* This shouldn't fail because we have the inode
- * locked. */
- GF_ASSERT(ec_set_inode_size(fop,
- fop->locks[0].lock->loc.inode,
- cbk->iatt[1].ia_size));
- } else {
- cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
+ if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ } else if (fop->user_size > cbk->iatt[0].ia_size) {
+ cbk->iatt[1].ia_size = fop->user_size;
+
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(__ec_set_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ cbk->iatt[1].ia_size));
+ } else {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
}
-
+ UNLOCK(&fop->locks[0].lock->loc.inode->lock);
}
return EC_STATE_REPORT;
@@ -1155,11 +1162,11 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
if (fop->id == GF_FOP_TRUNCATE) {
ec_lock_prepare_inode(fop, &fop->loc[0],
EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_QUERY_INFO, fop->offset, LLONG_MAX);
} else {
ec_lock_prepare_fd(fop, fop->fd,
EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_QUERY_INFO, fop->offset, LLONG_MAX);
}
ec_lock(fop);
@@ -1179,6 +1186,9 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
cbk->count);
/* This shouldn't fail because we have the inode locked. */
+ /* Inode size doesn't need to be updated under locks, because
+ * conflicting operations won't be in-flight
+ */
GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
&cbk->iatt[0].ia_size));
cbk->iatt[1].ia_size = fop->user_size;
@@ -1582,6 +1592,9 @@ void ec_writev_start(ec_fop_data_t *fop)
ctx = ec_fd_get(fop->fd, fop->xl);
if (ctx != NULL) {
if ((ctx->flags & O_APPEND) != 0) {
+ /* Appending writes take full locks so size won't change because
+ * of any parallel operations
+ */
fop->offset = current;
}
}
@@ -1601,6 +1614,10 @@ void ec_writev_start(ec_fop_data_t *fop)
}
tail = fop->size - fop->user_size - fop->head;
if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
+ /* Current locking scheme will make sure the 'current' below will
+ * never decrease while the fop is in progress, so the checks will
+ * work as expected
+ */
if (current > fop->offset + fop->head + fop->user_size) {
if (ec_make_internal_fop_xdata (&xdata)) {
err = -ENOMEM;
@@ -1678,14 +1695,32 @@ ec_writev_encode(ec_fop_data_t *fop)
int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ ec_fd_t *ctx = NULL;
+ ec_t *ec = fop->xl->private;
+ off_t fl_start = 0;
+ size_t fl_size = LLONG_MAX;
switch (state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) == 0) {
+ off_t user_size = 0;
+ off_t head = 0;
+
+ fl_start = fop->offset;
+ user_size = iov_length(fop->vector, fop->int32);
+ head = ec_adjust_offset_down(ec, &fl_start,
+ _gf_true);
+ fl_size = user_size + head;
+ ec_adjust_size_up(ec, &fl_size, _gf_true);
+ }
+ }
ec_lock_prepare_fd(fop, fop->fd,
EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_QUERY_INFO, fl_start, fl_size);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1717,23 +1752,28 @@ int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state)
cbk->count);
/* This shouldn't fail because we have the inode locked. */
- GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode,
- &cbk->iatt[0].ia_size));
- cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
- size = fop->offset + fop->head + fop->user_size;
- if (size > cbk->iatt[0].ia_size) {
- /* Only update inode size if this is a top level fop.
- * Otherwise this is an internal write and the top
- * level fop should take care of the real inode size.
- */
- if (fop->parent == NULL) {
- /* This shouldn't fail because we have the inode
- * locked. */
- GF_ASSERT(ec_set_inode_size(fop, fop->fd->inode,
- size));
- }
- cbk->iatt[1].ia_size = size;
+ LOCK(&fop->fd->inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ size = fop->offset + fop->head + fop->user_size;
+ if (size > cbk->iatt[0].ia_size) {
+ /* Only update inode size if this is a top level fop.
+ * Otherwise this is an internal write and the top
+ * level fop should take care of the real inode size.
+ */
+ if (fop->parent == NULL) {
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(__ec_set_inode_size(fop,
+ fop->fd->inode, size));
+ }
+ cbk->iatt[1].ia_size = size;
+ }
}
+ UNLOCK(&fop->fd->inode->lock);
+
if (fop->error == 0) {
cbk->op_ret *= ec->fragments;
if (cbk->op_ret < fop->head) {