summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-generic.c
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2014-07-14 17:34:04 +0200
committerVijay Bellur <vbellur@redhat.com>2014-09-15 23:12:16 -0700
commitd97863562bb0d2f685df3d2e3aa4bef1299c8307 (patch)
treeb8d9455cdface5425e2452e98751ac75dac358e4 /xlators/cluster/ec/src/ec-generic.c
parent2be54585002cd1c9d02928b89a02047b58dd6aed (diff)
ec: Optimize read/write performance
This patch significantly improves performance of read/write operations on a dispersed volume by reusing previous inodelk/ entrylk operations on the same inode/entry. This reduces the latency of each individual operation considerably. Inode version and size are also updated when needed instead of on each request. This gives an additional boost. Change-Id: I4b98d5508c86b53032e16e295f72a3f83fd8fcac BUG: 1122586 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/8369 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-generic.c')
-rw-r--r--xlators/cluster/ec/src/ec-generic.c96
1 files changed, 79 insertions, 17 deletions
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index 4afec3524c5..4aa02903969 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -91,11 +91,17 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_fd(fop, fop->fd);
+ ec_lock_prepare_fd(fop, fop->fd);
+ ec_lock(fop);
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER;
@@ -135,7 +141,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
cbk->op_errno, cbk->xdata);
}
- return EC_STATE_UNLOCK;
+ return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH:
@@ -149,6 +155,12 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
NULL);
}
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop, 0);
+
return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK:
@@ -313,7 +325,8 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_fd(fop, fop->fd);
+ ec_lock_prepare_fd(fop, fop->fd);
+ ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
@@ -323,6 +336,11 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER;
@@ -371,7 +389,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
cbk->xdata);
}
- return EC_STATE_UNLOCK;
+ return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION:
@@ -386,6 +404,12 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL);
}
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop, 0);
+
return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK:
@@ -526,11 +550,17 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_fd(fop, fop->fd);
+ ec_lock_prepare_fd(fop, fop->fd);
+ ec_lock(fop);
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER;
@@ -570,7 +600,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
cbk->op_errno, cbk->xdata);
}
- return EC_STATE_UNLOCK;
+ return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH:
@@ -584,6 +614,12 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
fop->error, NULL);
}
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop, 0);
+
return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK:
@@ -665,10 +701,12 @@ out:
void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
{
ec_cbk_data_t * ans = NULL;
+ ec_inode_t * ctx = NULL;
+ ec_lock_t * lock = NULL;
data_t * data = NULL;
uint8_t * buff = NULL;
size_t size = 0;
- int32_t i = 0;
+ int32_t i = 0, have_size = 0;
if (cbk->op_ret < 0)
{
@@ -679,6 +717,22 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]);
+ LOCK(&cbk->inode->lock);
+
+ ctx = __ec_inode_get(cbk->inode, fop->xl);
+ if ((ctx != NULL) && !list_empty(&ctx->inode_locks))
+ {
+ lock = list_entry(ctx->inode_locks.next, ec_lock_t, list);
+ cbk->version = lock->version;
+ if (lock->have_size)
+ {
+ size = lock->size;
+ have_size = 1;
+ }
+ }
+
+ UNLOCK(&cbk->inode->lock);
+
if (cbk->iatt[0].ia_type == IA_IFREG)
{
uint8_t * blocks[cbk->count];
@@ -686,6 +740,10 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
cbk->size = cbk->iatt[0].ia_size;
ec_dict_del_number(cbk->xdata, EC_XATTR_SIZE, &cbk->iatt[0].ia_size);
+ if (have_size)
+ {
+ cbk->iatt[0].ia_size = size;
+ }
size = SIZE_MAX;
for (i = 0, ans = cbk; (ans != NULL) && (i < ec->fragments);
@@ -1314,7 +1372,15 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_inode(fop, &fop->loc[0]);
+ if (fop->fd == NULL)
+ {
+ ec_lock_prepare_inode(fop, &fop->loc[0]);
+ }
+ else
+ {
+ ec_lock_prepare_fd(fop, fop->fd);
+ }
+ ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1373,11 +1439,7 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
}
}
- if (cbk->op_ret >= 0)
- {
- return EC_STATE_UPDATE_SIZE_AND_VERSION;
- }
- return EC_STATE_UNLOCK;
+ return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH:
@@ -1402,14 +1464,14 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
}
}
- return EC_STATE_UNLOCK;
+ return EC_STATE_LOCK_REUSE;
- case EC_STATE_UPDATE_SIZE_AND_VERSION:
- ec_update_size_version(fop);
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK;
- case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK:
ec_unlock(fop);