summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src
diff options
context:
space:
mode:
authorSunil Kumar Acharya <sheggodu@redhat.com>2017-06-14 16:28:40 +0530
committerPranith Kumar K <pkarampu@redhat.com>2017-10-25 17:22:41 +0530
commit63160cb952fe7716a3313ce5ee32f890fe4d7a0c (patch)
tree3a82114a57071e68ed645e014931a34edebb13fe /xlators/cluster/ec/src
parent2ade36cd98ea0f5bd2a8f619a19c20438318afaf (diff)
cluster/ec: Implement DISCARD FOP for EC
Updates #254 This code change implements DISCARD FOP support for EC. BUG: 1461018 Change-Id: I09a9cb2aa9d91ec27add4f422dc9074af5b8b2db Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src')
-rw-r--r--xlators/cluster/ec/src/ec-common.h3
-rw-r--r--xlators/cluster/ec/src/ec-fops.h4
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h5
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c365
-rw-r--r--xlators/cluster/ec/src/ec.c3
5 files changed, 332 insertions, 48 deletions
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 2744c6372a1..e3468333cc4 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -85,6 +85,8 @@ void ec_update_good(ec_fop_data_t *fop, uintptr_t good);
void ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+void __ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+
ec_cbk_data_t *
ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro);
@@ -133,5 +135,4 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
ec_heal_need_t *need_heal);
int32_t
ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);
-
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
index fab22d8240d..4a926cf4802 100644
--- a/xlators/cluster/ec/src/ec-fops.h
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -172,6 +172,10 @@ void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata);
+void ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_truncate_cbk_t func, void *data,
loc_t * loc, off_t offset, dict_t * xdata);
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index cfd7daaa5c2..a8f153a395d 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -178,8 +178,5 @@ ec_is_data_fop (glusterfs_fop_t fop);
int32_t
ec_launch_replace_heal (ec_t *ec);
-/*
-gf_boolean_t
-ec_is_metadata_fop (glusterfs_fop_t fop);
-*/
+
#endif /* __EC_HELPERS_H__ */
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index e6a67cf67bc..ae5120226e3 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -19,6 +19,97 @@
#include "ec-method.h"
#include "ec-fops.h"
+int32_t
+ec_update_writev_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_cbk_data_t *cbk = NULL;
+ ec_fop_data_t *parent = fop->parent;
+ int i = 0;
+
+ ec_trace("UPDATE_WRITEV_CBK", cookie, "ret=%d, errno=%d, parent-fop=%s",
+ op_ret, op_errno, ec_fop_name (parent->id));
+
+ if (op_ret < 0) {
+ ec_fop_set_error (parent, op_errno);
+ goto out;
+ }
+ cbk = ec_cbk_data_allocate (parent->frame, this, parent,
+ parent->id, 0, op_ret, op_errno);
+ if (!cbk) {
+ ec_fop_set_error (parent, ENOMEM);
+ goto out;
+ }
+
+ if (xdata)
+ cbk->xdata = dict_ref (xdata);
+
+ if (prebuf)
+ cbk->iatt[i++] = *prebuf;
+
+ if (postbuf)
+ cbk->iatt[i++] = *postbuf;
+
+ LOCK (&parent->lock);
+ {
+ parent->good &= fop->good;
+
+ if (gf_bits_count (parent->good) < parent->minimum) {
+ __ec_fop_set_error (parent, EIO);
+ } else if (fop->error == 0 && parent->answer == NULL) {
+ parent->answer = cbk;
+ }
+ }
+ UNLOCK (&parent->lock);
+out:
+ return 0;
+}
+
+int32_t ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset,
+ size_t size)
+{
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec vector;
+ int32_t err = -ENOMEM;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ vector.iov_base = iobuf->ptr;
+ vector.iov_len = size;
+ memset(vector.iov_base, 0, vector.iov_len);
+
+ ec_writev(fop->frame, fop->xl, mask, fop->minimum,
+ ec_update_writev_cbk, NULL, fop->fd, &vector, 1,
+ offset, 0, iobref, NULL);
+
+ err = 0;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ return err;
+}
+
int
ec_inode_write_cbk (call_frame_t *frame, xlator_t *this, void *cookie,
int op_ret, int op_errno, struct iatt *prestat,
@@ -1034,62 +1125,252 @@ out:
}
}
-int32_t
-ec_truncate_writev_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
+/*********************************************************************
+ *
+ * File Operation : Discard
+ *
+ *********************************************************************/
+void ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask)
{
- ec_fop_data_t *fop = cookie;
+ ec_t *ec = fop->xl->private;
+ off_t off_head = 0;
+ off_t off_tail = 0;
+ size_t size_head = 0;
+ size_t size_tail = 0;
+ int error = 0;
+
+ off_head = fop->offset * ec->fragments - fop->int32;
+ if (fop->size == 0) {
+ error = ec_update_write (fop, mask, off_head, fop->user_size);
+ } else {
+ size_head = fop->int32;
+ size_tail = (fop->user_size - fop->int32) % ec->stripe_size;
+ off_tail = off_head + fop->user_size - size_tail;
+ if (size_head) {
+ error = ec_update_write (fop, mask, off_head, size_head);
+ goto out;
+ }
+ if (size_tail) {
+ error = ec_update_write (fop, mask, off_tail, size_tail);
+ }
+ }
+out:
+ if (error)
+ ec_fop_set_error (fop, -error);
+}
- fop->parent->good &= fop->good;
- ec_trace("TRUNCATE_WRITEV_CBK", cookie, "ret=%d, errno=%d",
- op_ret, op_errno);
- return 0;
+void ec_discard_adjust_offset_size(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+
+ fop->user_size = fop->size;
+ /* If discard length covers atleast a fragment on brick, we will
+ * perform discard operation(when fop->size is non-zero) else we just
+ * write zeros.
+ */
+ fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true);
+ if (fop->size < fop->int32) {
+ fop->size = 0;
+ } else {
+ fop->size -= fop->int32;
+ ec_adjust_size_down(ec, &fop->size, _gf_true);
+ }
}
-int32_t ec_truncate_write(ec_fop_data_t * fop, uintptr_t mask)
+int32_t ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ec_t * ec = fop->xl->private;
- struct iobref * iobref = NULL;
- struct iobuf * iobuf = NULL;
- struct iovec vector;
- int32_t err = -ENOMEM;
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
- iobref = iobref_new();
- if (iobref == NULL) {
- goto out;
- }
- iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
- if (iobuf == NULL) {
- goto out;
+void ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_discard_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->discard,
+ fop->fd, fop->offset, fop->size, fop->xdata);
+}
+
+int32_t ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+ off_t fl_start = 0;
+ size_t fl_size = 0;
+
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if ((fop->size <= 0) || (fop->offset < 0)) {
+ ec_fop_set_error(fop, EINVAL);
+ return EC_STATE_REPORT;
+ }
+ /* Because of the head/tail writes, "discard" happens on the remaining
+ * regions, but we need to compute region including head/tail writes
+ * so compute them separately*/
+ fl_start = fop->offset;
+ fl_size = fop->size;
+ fl_size += ec_adjust_offset_down (fop->xl->private, &fl_start,
+ _gf_true);
+ ec_adjust_size_up (fop->xl->private, &fl_size, _gf_true);
+
+ ec_discard_adjust_offset_size(fop);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META |
+ EC_QUERY_INFO, fl_start, fl_size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+
+ /* Dispatch discard fop only if we have whole fragment
+ * to deallocate */
+ if (fop->size) {
+ ec_dispatch_all(fop);
+ return EC_STATE_DELAYED_START;
+ } else {
+ /*Assume discard to have succeeded on mask*/
+ fop->good = fop->mask;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DELAYED_START:
+
+ if (fop->size) {
+ if (fop->answer && fop->answer->op_ret == 0)
+ ec_update_discard_write (fop, fop->answer->mask);
+ } else {
+ ec_update_discard_write (fop, fop->mask);
+ }
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.discard != NULL) {
+ fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_DELAYED_START:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.discard != NULL) {
+ fop->cbks.discard(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
}
- err = iobref_add(iobref, iobuf);
- if (err != 0) {
+}
+
+void ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ ec_cbk_t callback = { .discard = func };
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(DISCARD) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
+ minimum, ec_wind_discard, ec_manager_discard,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
- vector.iov_base = iobuf->ptr;
- vector.iov_len = fop->offset * ec->fragments - fop->user_size;
- memset(vector.iov_base, 0, vector.iov_len);
+ fop->use_fd = 1;
+ fop->offset = offset;
+ fop->size = len;
- iobuf_unref (iobuf);
- iobuf = NULL;
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ }
- ec_writev(fop->frame, fop->xl, mask, fop->minimum, ec_truncate_writev_cbk,
- NULL, fop->fd, &vector, 1, fop->user_size, 0, iobref, NULL);
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
- err = 0;
+ error = 0;
out:
- if (iobuf != NULL) {
- iobuf_unref(iobuf);
- }
- if (iobref != NULL) {
- iobref_unref(iobref);
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
+}
- return err;
+/*********************************************************************
+ *
+ * File Operation : truncate
+ *
+ *********************************************************************/
+
+int32_t ec_update_truncate_write (ec_fop_data_t *fop, uintptr_t mask)
+{
+ ec_t *ec = fop->xl->private;
+ size_t size = fop->offset * ec->fragments - fop->user_size;
+ return ec_update_write (fop, mask, fop->user_size, size);
}
int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie,
@@ -1102,9 +1383,9 @@ int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie,
fop->parent->good &= fop->good;
if (op_ret >= 0) {
fd_bind (fd);
- err = ec_truncate_write(fop->parent, fop->answer->mask);
+ err = ec_update_truncate_write (fop->parent, fop->answer->mask);
if (err != 0) {
- fop->error = -err;
+ ec_fop_set_error (fop->parent, -err);
}
}
@@ -1125,7 +1406,7 @@ int32_t ec_truncate_clean(ec_fop_data_t * fop)
return 0;
} else {
- return ec_truncate_write(fop, fop->answer->mask);
+ return ec_update_truncate_write (fop, fop->answer->mask);
}
}
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 856d60c00c9..09c5fa83eb9 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -729,7 +729,8 @@ int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc,
int32_t ec_gf_discard(call_frame_t * frame, xlator_t * this, fd_t * fd,
off_t offset, size_t len, dict_t * xdata)
{
- default_discard_failure_cbk(frame, ENOTSUP);
+ ec_discard(frame, this, -1, EC_MINIMUM_MIN, default_discard_cbk,
+ NULL, fd, offset, len, xdata);
return 0;
}