diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2013-08-06 17:40:05 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-08-07 03:33:22 -0700 |
commit | c6a555d1268c667b72728ffa58600fc0632465e4 (patch) | |
tree | d793e16c93fb35bc431feeee6b261341fbfce067 | |
parent | 12f485982b41eec542673d74510e7ed8ef5e1fca (diff) |
features/locks: Convert old style metadata locks to new-style
Problem:
In 3.3, inode locks of both metadata and data are competing in same
domain called data domain (old style). This coupled with eager-lock,
delayed post-ops introduce delays for metadata operations like chmod,
chown etc. To avoid this problem, inode locks for metadata ops are
moved to different domain called metadata domain in 3.4 (new style).
But when both 3.3 clients and 3.4 clients are present, 3.4 clients
for metadata operations still need to take locks in "old style" so
that proper synchronization happens across 3.3 and 3.4 clients. Only
when all clients are >= 3.4 locks will be taken in "new style" for
metadata locks. Because of this behavior as long as at least one 3.3
client is present, delays will be perceived for doing metadata
operations on all 3.4 clients while data operations are in
progress (Ex: Untar will untar one file per sec).
Fix:
Make locks xlators translate old-style metadata locks to new-style
metadata locks. Since upgrade process suggests upgrading servers
first and then clients, this approach gives good results.
Tests:
1) Tested that old style metadata locks are converted to new style by
locks xlator using gdb
2) Tested that disconnects purge locks in meta-data domain as well
using gdb and statedumps.
3) Tested that untar performance is not hampered by meta-data and
data operations.
4) Had two mounts one with orthogonal-meta-data on and other with
orthogonal-meta-data off ran chmod 777 <file> on one mount and
chmod 555 <file> on the other mount in while loops when I took
statedumps I saw that both the transports are taking lock on
same domain with same range.
18:49:30 :) ⚡ sudo grep -B1 "ACTIVE" /usr/local/var/run/gluster/home-gfs-r2_0.324.dump.*
home-gfs-r2_0.324.dump.1375794971-lock-dump.domain.domain=r2-replicate-0:metadata
home-gfs-r2_0.324.dump.1375794971:inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=9223372036854775806, len=0, pid = 7525, owner=78f9e652497f0000, transport=0x15ac9e0, , granted at Tue Aug 6 18:46:11 2013
home-gfs-r2_0.324.dump.1375795051-lock-dump.domain.domain=r2-replicate-0:metadata
home-gfs-r2_0.324.dump.1375795051:inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=9223372036854775806, len=0, pid = 8879, owner=0019cc3cad7f0000, transport=0x158f580, , granted at Tue Aug 6 18:47:31 2013
Change-Id: I268df4efd93a377a0c73fbc59b739ef12a7a8bb6
BUG: 993981
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/5503
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | xlators/features/locks/src/inodelk.c | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index a211354432a..2fd8b6c293b 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -547,6 +547,37 @@ new_inode_lock (struct gf_flock *flock, void *transport, pid_t client_pid, return lock; } +int32_t +_pl_convert_volume (const char *volume, char **res) +{ + char *mdata_vol = NULL; + int ret = 0; + + mdata_vol = strrchr (volume, ':'); + //if the volume already ends with :metadata don't bother + if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0)) + return 0; + + ret = gf_asprintf (res, "%s:metadata", volume); + if (ret <= 0) + return ENOMEM; + return 0; +} + +int32_t +_pl_convert_volume_for_special_range (struct gf_flock *flock, + const char *volume, char **res) +{ + int32_t ret = 0; + + if ((flock->l_start == LLONG_MAX -1) && + (flock->l_len == 0)) { + ret = _pl_convert_volume (volume, res); + } + + return ret; +} + /* Common inodelk code called from pl_inodelk and pl_finodelk */ int pl_common_inodelk (call_frame_t *frame, xlator_t *this, @@ -562,6 +593,8 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, pl_inode_t * pinode = NULL; pl_inode_lock_t * reqlock = NULL; pl_dom_list_t * dom = NULL; + char *res = NULL; + char *res1 = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (inode, unwind); @@ -572,6 +605,12 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, goto unwind; } + op_errno = _pl_convert_volume_for_special_range (flock, volume, &res); + if (op_errno) + goto unwind; + if (res) + volume = res; + pl_trace_in (this, frame, fd, loc, cmd, flock, volume); transport = frame->root->trans; @@ -598,6 +637,13 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, "Releasing all locks from transport %p", transport); release_inode_locks_of_transport (this, dom, inode, transport); + _pl_convert_volume (volume, &res1); + if (res1) { + dom = get_domain (pinode, res1); + if (dom) + release_inode_locks_of_transport (this, dom, + inode, transport); + } op_ret = 0; goto unwind; @@ -659,6 +705,8 @@ unwind: STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL); out: + GF_FREE (res); + GF_FREE (res1); return 0; } |