summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2014-06-05 09:22:34 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2014-06-11 18:42:54 -0700
commitb9856eca80e2f820c88f60fdc6cb1427905671af (patch)
treee69c08f0b929b0bdf3fd6191a4f0d2eb6e2b5d50
parent8f88510feb49e362531a3cae4b5e295e7ca155e9 (diff)
features/locks: Clean up logging of cleanup in DISCONNECT codepath
Now, gfid is printed as opposed to path in cleanup messages. Also, refkeeper update is eliminated in inodelk and entrylk. Instead, the patch ensures inode and pl_inode are kept alive as long as there is atleast one lock (granted/blocked) on an inode. Also, every inode is unref'd appropriately on a DISCONNECT from the lock-owning client. Change-Id: I531b1a02fe1b889fdd7f54b1fd522e78a18ed1df BUG: 1104915 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/7981 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r--xlators/features/locks/src/common.c17
-rw-r--r--xlators/features/locks/src/entrylk.c89
-rw-r--r--xlators/features/locks/src/inodelk.c85
-rw-r--r--xlators/features/locks/src/locks.h4
4 files changed, 126 insertions, 69 deletions
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index f6c71c1cf86..e0a5f7c0533 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -116,21 +116,7 @@ fd_from_fdnum (posix_lock_t *lock)
int
__pl_inode_is_empty (pl_inode_t *pl_inode)
{
- pl_dom_list_t *dom = NULL;
- int is_empty = 1;
-
- if (!list_empty (&pl_inode->ext_list))
- is_empty = 0;
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (!list_empty (&dom->entrylk_list))
- is_empty = 0;
-
- if (!list_empty (&dom->inodelk_list))
- is_empty = 0;
- }
-
- return is_empty;
+ return (list_empty (&pl_inode->ext_list));
}
void
@@ -451,6 +437,7 @@ pl_inode_get (xlator_t *this, inode_t *inode)
INIT_LIST_HEAD (&pl_inode->reservelk_list);
INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
INIT_LIST_HEAD (&pl_inode->blocked_calls);
+ uuid_copy (pl_inode->gfid, inode->gfid);
__inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
}
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 8496d9d8dce..c00a7124658 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -517,18 +517,19 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- char unwind = 1;
- GF_UNUSED int dict_ret = -1;
- pl_inode_t *pinode = NULL;
- pl_entry_lock_t *reqlock = NULL;
- pl_entry_lock_t *unlocked = NULL;
- pl_dom_list_t *dom = NULL;
- char *conn_id = NULL;
- pl_ctx_t *ctx = NULL;
- int nonblock = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *reqlock = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+ int nonblock = 0;
+ gf_boolean_t need_inode_unref = _gf_false;
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -564,6 +565,25 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pinode. Now when
+ * the io-thread executing this function tries to access pinode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pinode is still alive until after the
+ * current stack unwinds.
+ */
+ pinode->inode = inode_ref (inode);
+
switch (cmd) {
case ENTRYLK_LOCK_NB:
nonblock = 1;
@@ -593,6 +613,13 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
} else {
__pl_entrylk_unref (reqlock);
}
+
+ /* For all but the case where a non-blocking lock
+ * attempt fails, the extra ref taken before the switch
+ * block must be negated.
+ */
+ if ((ret == -EAGAIN) && (nonblock))
+ need_inode_unref = _gf_true;
}
pthread_mutex_unlock (&pinode->mutex);
if (ctx)
@@ -604,6 +631,12 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pinode->mutex);
{
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done before the switch
+ * block must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
unlocked = __unlock_entrylk (dom, reqlock);
if (unlocked) {
list_del_init (&unlocked->client_list);
@@ -623,13 +656,22 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
break;
default:
+ inode_unref (pinode->inode);
gf_log (this->name, GF_LOG_ERROR,
"Unexpected case in entrylk (cmd=%d). Please file"
"a bug report at http://bugs.gluster.com", cmd);
goto out;
}
+ if (need_inode_unref)
+ inode_unref (pinode->inode);
+
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0))
+ inode_unref (pinode->inode);
+
out:
- pl_update_refkeeper (this, inode);
if (unwind) {
entrylk_trace_out (this, frame, volume, fd, loc, basename,
@@ -684,24 +726,14 @@ static void
pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
{
pl_inode_t *pinode = NULL;
- char *path = NULL;
- char *file = NULL;
pinode = lock->pinode;
- inode_path (pinode->refkeeper, NULL, &path);
-
- if (path)
- file = path;
- else
- file = uuid_utoa (pinode->refkeeper->gfid);
-
- gf_log (THIS->name, GF_LOG_WARNING,
- "releasing lock on %s held by "
- "{client=%p, pid=%"PRId64" lk-owner=%s}",
- file, lock->client, (uint64_t) lock->client_pid,
- lkowner_utoa (&lock->owner));
- GF_FREE (path);
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ uuid_utoa (pinode->gfid), lock->client,
+ (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner));
}
@@ -800,6 +832,7 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
__pl_entrylk_unref (l);
}
pthread_mutex_unlock (&pinode->mutex);
+ inode_unref (pinode->inode);
}
return 0;
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index c76cb7f9199..ef06531cfde 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -383,24 +383,13 @@ static void
pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
{
pl_inode_t *pl_inode = NULL;
- char *path = NULL;
- char *file = NULL;
pl_inode = lock->pl_inode;
- inode_path (pl_inode->refkeeper, NULL, &path);
-
- if (path)
- file = path;
- else
- file = uuid_utoa (pl_inode->refkeeper->gfid);
-
- gf_log (THIS->name, GF_LOG_WARNING,
- "releasing lock on %s held by "
- "{client=%p, pid=%"PRId64" lk-owner=%s}",
- file, lock->client, (uint64_t) lock->client_pid,
- lkowner_utoa (&lock->owner));
- GF_FREE (path);
+ gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ uuid_utoa (pl_inode->gfid), lock->client,
+ (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner));
}
@@ -501,6 +490,7 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
__pl_inodelk_unref (l);
}
pthread_mutex_unlock (&pl_inode->mutex);
+ inode_unref (pl_inode->inode);
}
return 0;
@@ -509,13 +499,36 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
static int
pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
- pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom)
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+ inode_t *inode)
{
- int ret = -EINVAL;
- pl_inode_lock_t *retlock = NULL;
- gf_boolean_t unref = _gf_true;
+ int ret = -EINVAL;
+ pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+ gf_boolean_t need_inode_unref = _gf_false;
+ short fl_type;
lock->pl_inode = pl_inode;
+ fl_type = lock->fl_type;
+
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pl_inode. Now when
+ * the io-thread executing this function tries to access pl_inode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pl_inode is still alive until after the
+ * current stack unwinds.
+ */
+ pl_inode->inode = inode_ref (inode);
if (ctx)
pthread_mutex_lock (&ctx->lock);
@@ -542,12 +555,24 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->user_flock.l_len);
if (can_block)
unref = _gf_false;
+ /* For all but the case where a non-blocking
+ * lock attempt fails, the extra ref taken at
+ * the start of this function must be negated.
+ */
+ else
+ need_inode_unref = _gf_true;
}
if (ctx && (!ret || can_block))
list_add_tail (&lock->client_list,
&ctx->inodelk_lockers);
} else {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done at the start of
+ * this function must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
retlock = __inode_unlock_lock (this, lock, dom);
if (!retlock) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -568,7 +593,16 @@ out:
if (ctx)
pthread_mutex_unlock (&ctx->lock);
- grant_blocked_inode_locks (this, pl_inode, dom);
+ if (need_inode_unref)
+ inode_unref (pl_inode->inode);
+
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((fl_type == F_UNLCK) && (ret == 0)) {
+ inode_unref (pl_inode->inode);
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ }
return ret;
}
@@ -707,7 +741,7 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
}
reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
- frame, this, volume, conn_id);
+ frame, this, dom->domain, conn_id);
if (!reqlock) {
op_ret = -1;
@@ -725,7 +759,7 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
case F_SETLK:
memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block,
- dom);
+ dom, inode);
if (ret < 0) {
if ((can_block) && (F_UNLCK != flock->l_type)) {
@@ -751,10 +785,9 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
op_ret = 0;
unwind:
- if ((inode != NULL) && (flock !=NULL)) {
- pl_update_refkeeper (this, inode);
- pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
- }
+ if (flock != NULL)
+ pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret,
+ op_errno, volume);
STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
out:
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 8c2a6f867ee..f761b3d4a00 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -146,6 +146,10 @@ struct __pl_inode {
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
+ uuid_t gfid; /* placeholder for gfid of the inode */
+ inode_t *inode; /* pointer to be used for ref and unref
+ of inode_t as long as there are
+ locks on it */
};
typedef struct __pl_inode pl_inode_t;