summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/features/locks/src/common.c17
-rw-r--r--xlators/features/locks/src/entrylk.c89
-rw-r--r--xlators/features/locks/src/inodelk.c85
-rw-r--r--xlators/features/locks/src/locks.h4
4 files changed, 126 insertions, 69 deletions
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index f6c71c1cf86..e0a5f7c0533 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -116,21 +116,7 @@ fd_from_fdnum (posix_lock_t *lock)
int
__pl_inode_is_empty (pl_inode_t *pl_inode)
{
- pl_dom_list_t *dom = NULL;
- int is_empty = 1;
-
- if (!list_empty (&pl_inode->ext_list))
- is_empty = 0;
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (!list_empty (&dom->entrylk_list))
- is_empty = 0;
-
- if (!list_empty (&dom->inodelk_list))
- is_empty = 0;
- }
-
- return is_empty;
+ return (list_empty (&pl_inode->ext_list));
}
void
@@ -451,6 +437,7 @@ pl_inode_get (xlator_t *this, inode_t *inode)
INIT_LIST_HEAD (&pl_inode->reservelk_list);
INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
INIT_LIST_HEAD (&pl_inode->blocked_calls);
+ uuid_copy (pl_inode->gfid, inode->gfid);
__inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
}
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 8496d9d8dce..c00a7124658 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -517,18 +517,19 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- char unwind = 1;
- GF_UNUSED int dict_ret = -1;
- pl_inode_t *pinode = NULL;
- pl_entry_lock_t *reqlock = NULL;
- pl_entry_lock_t *unlocked = NULL;
- pl_dom_list_t *dom = NULL;
- char *conn_id = NULL;
- pl_ctx_t *ctx = NULL;
- int nonblock = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *reqlock = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+ int nonblock = 0;
+ gf_boolean_t need_inode_unref = _gf_false;
if (xdata)
dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
@@ -564,6 +565,25 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
goto unwind;
}
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pinode. Now when
+ * the io-thread executing this function tries to access pinode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pinode is still alive until after the
+ * current stack unwinds.
+ */
+ pinode->inode = inode_ref (inode);
+
switch (cmd) {
case ENTRYLK_LOCK_NB:
nonblock = 1;
@@ -593,6 +613,13 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
} else {
__pl_entrylk_unref (reqlock);
}
+
+ /* For all but the case where a non-blocking lock
+ * attempt fails, the extra ref taken before the switch
+ * block must be negated.
+ */
+ if ((ret == -EAGAIN) && (nonblock))
+ need_inode_unref = _gf_true;
}
pthread_mutex_unlock (&pinode->mutex);
if (ctx)
@@ -604,6 +631,12 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
pthread_mutex_lock (&ctx->lock);
pthread_mutex_lock (&pinode->mutex);
{
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done before the switch
+ * block must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
unlocked = __unlock_entrylk (dom, reqlock);
if (unlocked) {
list_del_init (&unlocked->client_list);
@@ -623,13 +656,22 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
break;
default:
+ inode_unref (pinode->inode);
gf_log (this->name, GF_LOG_ERROR,
"Unexpected case in entrylk (cmd=%d). Please file"
"a bug report at http://bugs.gluster.com", cmd);
goto out;
}
+ if (need_inode_unref)
+ inode_unref (pinode->inode);
+
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0))
+ inode_unref (pinode->inode);
+
out:
- pl_update_refkeeper (this, inode);
if (unwind) {
entrylk_trace_out (this, frame, volume, fd, loc, basename,
@@ -684,24 +726,14 @@ static void
pl_entrylk_log_cleanup (pl_entry_lock_t *lock)
{
pl_inode_t *pinode = NULL;
- char *path = NULL;
- char *file = NULL;
pinode = lock->pinode;
- inode_path (pinode->refkeeper, NULL, &path);
-
- if (path)
- file = path;
- else
- file = uuid_utoa (pinode->refkeeper->gfid);
-
- gf_log (THIS->name, GF_LOG_WARNING,
- "releasing lock on %s held by "
- "{client=%p, pid=%"PRId64" lk-owner=%s}",
- file, lock->client, (uint64_t) lock->client_pid,
- lkowner_utoa (&lock->owner));
- GF_FREE (path);
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ uuid_utoa (pinode->gfid), lock->client,
+ (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner));
}
@@ -800,6 +832,7 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
__pl_entrylk_unref (l);
}
pthread_mutex_unlock (&pinode->mutex);
+ inode_unref (pinode->inode);
}
return 0;
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index c76cb7f9199..ef06531cfde 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -383,24 +383,13 @@ static void
pl_inodelk_log_cleanup (pl_inode_lock_t *lock)
{
pl_inode_t *pl_inode = NULL;
- char *path = NULL;
- char *file = NULL;
pl_inode = lock->pl_inode;
- inode_path (pl_inode->refkeeper, NULL, &path);
-
- if (path)
- file = path;
- else
- file = uuid_utoa (pl_inode->refkeeper->gfid);
-
- gf_log (THIS->name, GF_LOG_WARNING,
- "releasing lock on %s held by "
- "{client=%p, pid=%"PRId64" lk-owner=%s}",
- file, lock->client, (uint64_t) lock->client_pid,
- lkowner_utoa (&lock->owner));
- GF_FREE (path);
+ gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ uuid_utoa (pl_inode->gfid), lock->client,
+ (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner));
}
@@ -501,6 +490,7 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
__pl_inodelk_unref (l);
}
pthread_mutex_unlock (&pl_inode->mutex);
+ inode_unref (pl_inode->inode);
}
return 0;
@@ -509,13 +499,36 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
static int
pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
- pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom)
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+ inode_t *inode)
{
- int ret = -EINVAL;
- pl_inode_lock_t *retlock = NULL;
- gf_boolean_t unref = _gf_true;
+ int ret = -EINVAL;
+ pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+ gf_boolean_t need_inode_unref = _gf_false;
+ short fl_type;
lock->pl_inode = pl_inode;
+ fl_type = lock->fl_type;
+
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pl_inode. Now when
+ * the io-thread executing this function tries to access pl_inode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pl_inode is still alive until after the
+ * current stack unwinds.
+ */
+ pl_inode->inode = inode_ref (inode);
if (ctx)
pthread_mutex_lock (&ctx->lock);
@@ -542,12 +555,24 @@ pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->user_flock.l_len);
if (can_block)
unref = _gf_false;
+ /* For all but the case where a non-blocking
+ * lock attempt fails, the extra ref taken at
+ * the start of this function must be negated.
+ */
+ else
+ need_inode_unref = _gf_true;
}
if (ctx && (!ret || can_block))
list_add_tail (&lock->client_list,
&ctx->inodelk_lockers);
} else {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done at the start of
+ * this function must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
retlock = __inode_unlock_lock (this, lock, dom);
if (!retlock) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -568,7 +593,16 @@ out:
if (ctx)
pthread_mutex_unlock (&ctx->lock);
- grant_blocked_inode_locks (this, pl_inode, dom);
+ if (need_inode_unref)
+ inode_unref (pl_inode->inode);
+
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((fl_type == F_UNLCK) && (ret == 0)) {
+ inode_unref (pl_inode->inode);
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ }
return ret;
}
@@ -707,7 +741,7 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
}
reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
- frame, this, volume, conn_id);
+ frame, this, dom->domain, conn_id);
if (!reqlock) {
op_ret = -1;
@@ -725,7 +759,7 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
case F_SETLK:
memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block,
- dom);
+ dom, inode);
if (ret < 0) {
if ((can_block) && (F_UNLCK != flock->l_type)) {
@@ -751,10 +785,9 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this,
op_ret = 0;
unwind:
- if ((inode != NULL) && (flock !=NULL)) {
- pl_update_refkeeper (this, inode);
- pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
- }
+ if (flock != NULL)
+ pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret,
+ op_errno, volume);
STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
out:
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 8c2a6f867ee..f761b3d4a00 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -146,6 +146,10 @@ struct __pl_inode {
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
+ uuid_t gfid; /* placeholder for gfid of the inode */
+ inode_t *inode; /* pointer to be used for ref and unref
+ of inode_t as long as there are
+ locks on it */
};
typedef struct __pl_inode pl_inode_t;