summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-data.h
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2015-05-20 15:17:35 +0200
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-05-27 03:25:47 -0700
commit3b666b40efbed157e8c5991f29b345d93b28c659 (patch)
treea6fb9a20bed31bbcb0e5dd2025e51d7f4ea6e257 /xlators/cluster/ec/src/ec-data.h
parent5513144feb5b062b733d7514adf194429e31666f (diff)
cluster/ec: Forced unlock when lock contention is detected
EC uses an eager lock mechanism to optimize multiple read/write requests on the same entry or inode. This increases performance but can have adverse results when other clients try to access the same entry/inode. To solve this, this patch adds a functionality to detect when this happens and force an earlier release to not block other clients. The method consists on requesting GF_GLUSTERFS_INODELK_COUNT and GF_GLUSTERFS_ENTRYLK_COUNT for all fops that take a lock. When this count is greater than one, the lock is marked to be released. All fops already waiting for this lock will be executed normally before releasing the lock, but new requests that also require it will be blocked and restarted after the lock has been released and reacquired again. Another problem was that some operations did correctly lock the parent of an entry when needed, but got the size and version xattrs from the entry instead of the parent. This patch solves this problem by binding all queries of size and version to each lock and replacing all entrylk calls by inodelk ones to remove concurrent updates on directory metadata. This also allows rename to correctly update source and destination directories. Change-Id: I2df0b22bc6f407d49f3cbf0733b0720015bacfbd BUG: 1165041 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/10852 Tested-by: NetBSD Build System Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-data.h')
-rw-r--r--xlators/cluster/ec/src/ec-data.h53
1 files changed, 32 insertions, 21 deletions
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 8a58ffb288b..8204cf087de 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -67,10 +67,20 @@ struct _ec_fd
struct _ec_inode
{
uintptr_t bad;
- ec_lock_t *entry_lock;
ec_lock_t *inode_lock;
+ gf_boolean_t have_info;
+ gf_boolean_t have_config;
+ gf_boolean_t have_version;
+ gf_boolean_t have_size;
+ gf_boolean_t have_dirty;
+ ec_config_t config;
+ uint64_t pre_version[2];
+ uint64_t post_version[2];
+ uint64_t pre_size;
+ uint64_t post_size;
+ uint64_t pre_dirty[2];
+ uint64_t post_dirty[2];
struct list_head heal;
-
};
typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
@@ -80,7 +90,6 @@ typedef int32_t (* fop_fheal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
int32_t, int32_t, uintptr_t, uintptr_t,
uintptr_t, dict_t *);
-
union _ec_cbk
{
fop_access_cbk_t access;
@@ -132,21 +141,21 @@ union _ec_cbk
struct _ec_lock
{
- ec_lock_t **plock;
+ ec_inode_t *ctx;
gf_timer_t *timer;
- struct list_head waiting;
+ struct list_head waiting; /* Queue of requests being serviced. */
+ struct list_head frozen; /* Queue of requests that will be serviced in
+ the next unlock/lock cycle. */
uintptr_t mask;
uintptr_t good_mask;
- int32_t kind;
int32_t refs;
- int32_t acquired;
- int32_t have_size;
- uint64_t size;
- uint64_t size_delta;
- uint64_t version[2];
- uint64_t version_delta[2];
- gf_boolean_t is_dirty[2];
+ int32_t refs_frozen;
+ int32_t inserted;
+ gf_boolean_t acquired;
+ gf_boolean_t release;
+ gf_boolean_t query;
ec_fop_data_t *owner;
+ fd_t *fd;
loc_t loc;
union
{
@@ -157,9 +166,12 @@ struct _ec_lock
struct _ec_lock_link
{
- ec_lock_t * lock;
- ec_fop_data_t * fop;
- struct list_head wait_list;
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ struct list_head wait_list;
+ gf_boolean_t update[2];
+ loc_t *base;
+ uint64_t size;
};
struct _ec_fop_data
@@ -183,12 +195,8 @@ struct _ec_fop_data
int32_t lock_count;
int32_t locked;
ec_lock_link_t locks[2];
- int32_t locks_update;
- int32_t have_size;
- uint64_t pre_size;
- uint64_t post_size;
+ int32_t first_lock;
gf_lock_t lock;
- ec_config_t config;
uint32_t flags;
uint32_t first;
@@ -197,6 +205,7 @@ struct _ec_fop_data
if fop->minimum number of subvolumes succeed
which are not healing*/
uintptr_t remaining;
+ uintptr_t received; /* Mask of responses */
uintptr_t good;
uintptr_t bad;
@@ -300,4 +309,6 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
void ec_fop_data_acquire(ec_fop_data_t * fop);
void ec_fop_data_release(ec_fop_data_t * fop);
+void ec_fop_cleanup(ec_fop_data_t *fop);
+
#endif /* __EC_DATA_H__ */