summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2016-05-16 14:55:54 +0530
committerJeff Darcy <jdarcy@redhat.com>2016-05-26 08:27:43 -0700
commitc137f6a7389d7f760e4724f3506180f9cfc0da52 (patch)
treed487fb928270ef14d57c655d4c5ee9ef073027bc
parent04e3a343512d5f71266d334174afe44a6b8bbcfa (diff)
jbr/locking: Define path for lock/unlock fops in JBR
lock/unlock fops need to be handled differently than other 'regular' fops, so as to avoid chances of deadlock in blocking calls. This patch addresses the same in the following manner, with a caveat. 1. On receiving the fop if the node is a follower, it performs the operation (irrespective of it being lock/unlock fop), and returns the result. 2. If the node is a leader it follows the following paths for lock and unlock fops: For lock fops : -> It performs the fop on itself. If it is a failure, it sends -ve ack to the client. If it is successful, it dispatches the fop to the followers. -> On receiving responses from the followers, it checks for quorum (including the leader's outcome). If quorum is met, it sends +ve ack to the client. -> If quorum is not met, then it *should* issue a rollback to the followers, followed by the rollback on the leader. It should then send -ve ack to he client. For unlock fops: -> It dispatches the fop on the followers first. -> On receiving responses from the followers, it performs the fop on itself. On completion, it checks for quorum (including the leader's outcome). If quorum is met, it sends +ve ack to the client. -> If quorum is not met, then it *should* issue a rollback on itslef, followed by the rollback on the followers. It should then send -ve ack to he client. Caveat: -> jbr-server does not have a rollback framework yet, and hence this patch does not perform the rollbacks as discussed in the failure scenarios above. The rollback framework will be a different dependent patch. Change-Id: I26961b27cb85f324c1ffeee80e82ec082ffa4465 BUG: 1333370 Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/14226 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r--xlators/experimental/jbr-client/src/jbr-messages.h8
-rw-r--r--xlators/experimental/jbr-server/src/all-templates.c41
-rwxr-xr-xxlators/experimental/jbr-server/src/gen-fops.py4
-rw-r--r--xlators/experimental/jbr-server/src/jbr.c407
4 files changed, 421 insertions, 39 deletions
diff --git a/xlators/experimental/jbr-client/src/jbr-messages.h b/xlators/experimental/jbr-client/src/jbr-messages.h
index 61fa725d56a..626c4fd3eaa 100644
--- a/xlators/experimental/jbr-client/src/jbr-messages.h
+++ b/xlators/experimental/jbr-client/src/jbr-messages.h
@@ -102,4 +102,12 @@
*/
#define J_MSG_QUORUM_NOT_MET (JBR_COMP_BASE + 9)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ */
+#define J_MSG_LOCK_FAILURE (JBR_COMP_BASE + 10)
+
+
#endif /* _JBR_MESSAGES_H_ */
diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c
index adae2431157..7314701029c 100644
--- a/xlators/experimental/jbr-server/src/all-templates.c
+++ b/xlators/experimental/jbr-server/src/all-templates.c
@@ -351,6 +351,7 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@LONG_ARGS@)
{
+ int32_t ret = -1;
gf_boolean_t result = _gf_false;
jbr_private_t *priv = NULL;
jbr_local_t *local = NULL;
@@ -371,43 +372,9 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK(&frame->lock);
#if defined(JBR_CG_QUEUE)
- jbr_inode_ctx_t *ictx;
- jbr_local_t *next;
-
- if (local->qlinks.next != &local->qlinks) {
- list_del(&local->qlinks);
- ictx = jbr_get_inode_ctx(this, local->fd->inode);
- if (ictx) {
- LOCK(&ictx->lock);
- if (ictx->pending) {
- /*
- * TBD: dequeue *all* non-conflicting
- * reqs
- *
- * With the stub implementation there
- * can only be one request active at a
- * time (zero here) so it's not an
- * issue. In a real implementation
- * there might still be other active
- * requests to check against, and
- * multiple pending requests that could
- * continue.
- */
- gf_msg_debug (this->name, 0,
- "unblocking next request");
- --(ictx->pending);
- next = list_entry (ictx->pqueue.next,
- jbr_local_t, qlinks);
- list_del(&next->qlinks);
- list_add_tail(&next->qlinks,
- &ictx->aqueue);
- call_resume(next->qstub);
- } else {
- --(ictx->active);
- }
- UNLOCK(&ictx->lock);
- }
- }
+ ret = jbr_remove_from_queue (frame, this);
+ if (ret)
+ goto err;
#endif
#if defined(JBR_CG_FSYNC)
diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py
index 36bf1e35d27..8a2b47c5345 100755
--- a/xlators/experimental/jbr-server/src/gen-fops.py
+++ b/xlators/experimental/jbr-server/src/gen-fops.py
@@ -78,7 +78,7 @@ fop_table = {
"getxattr": "read",
# "inodelk": "read",
"link": "write",
-# "lk": "write",
+ "lk": "write,queue",
# "lookup": "read",
"mkdir": "write",
"mknod": "write",
@@ -107,7 +107,7 @@ fop_table = {
# only a few common functions will be generated, and mention those
# functions. Rest of the functions can be customized
selective_generate = {
-# "lk": "fop,dispatch,call_dispatch",
+ "lk": "fop,dispatch,call_dispatch",
}
# Stolen from gen_fdl.py
diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c
index a342d3b83d5..d27d8ab5140 100644
--- a/xlators/experimental/jbr-server/src/jbr.c
+++ b/xlators/experimental/jbr-server/src/jbr.c
@@ -38,6 +38,20 @@ enum {
JBR_SERVER_NEXT_ENTRY
};
+/*
+ * Need to declare jbr_lk_call_dispatch as jbr_lk_continue and *
+ * jbr_lk_perform_local_op call it, before code is generated. *
+ */
+int32_t
+jbr_lk_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata);
+
+int32_t
+jbr_lk_dispatch (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata);
+
/* Used to check the quorum of acks received after the fop
* confirming the status of the fop on all the brick processes
* for this particular subvolume
@@ -312,6 +326,399 @@ out:
return ret;
}
+int32_t
+jbr_remove_from_queue (call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = -1;
+ jbr_inode_ctx_t *ictx = NULL;
+ jbr_local_t *local = NULL;
+ jbr_local_t *next = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (local->qlinks.next != &local->qlinks) {
+ list_del(&local->qlinks);
+ ictx = jbr_get_inode_ctx(this, local->fd->inode);
+ if (ictx) {
+ LOCK(&ictx->lock);
+ if (ictx->pending) {
+ /*
+ * TBD: dequeue *all* non-conflicting
+ * reqs
+ *
+ * With the stub implementation there
+ * can only be one request active at a
+ * time (zero here) so it's not an
+ * issue. In a real implementation
+ * there might still be other active
+ * requests to check against, and
+ * multiple pending requests that could
+ * continue.
+ */
+ gf_msg_debug (this->name, 0,
+ "unblocking next request");
+ --(ictx->pending);
+ next = list_entry (ictx->pqueue.next,
+ jbr_local_t, qlinks);
+ list_del(&next->qlinks);
+ list_add_tail(&next->qlinks,
+ &ictx->aqueue);
+ call_resume(next->qstub);
+ } else {
+ --(ictx->active);
+ }
+ UNLOCK(&ictx->lock);
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+jbr_lk_complete (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t ret = -1;
+ jbr_private_t *priv = NULL;
+ jbr_local_t *local = NULL;
+ gf_boolean_t result = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, err);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, flock, err);
+ GF_VALIDATE_OR_GOTO (this->name, xdata, err);
+
+ /*
+ * Remove from queue for unlock operation only *
+ * For lock operation, it will be done in fan-in *
+ */
+ if (flock->l_type == F_UNLCK) {
+ ret = jbr_remove_from_queue (frame, this);
+ if (ret)
+ goto err;
+ }
+
+ /*
+ * On a follower, unwind with the op_ret and op_errno. On a *
+ * leader, if the fop is a locking fop, and its a failure, *
+ * send fail, else call stub which will dispatch the fop to *
+ * the followers. *
+ * *
+ * If the fop is a unlocking fop, check quorum. If quorum *
+ * is met, then send success. Else Rollback on leader, *
+ * followed by followers, and then send -ve ack to client. *
+ */
+ if (priv->leader) {
+
+ /* Increase the successful acks if it's a success. */
+ LOCK(&frame->lock);
+ if (op_ret != -1)
+ (local->successful_acks)++;
+ UNLOCK(&frame->lock);
+
+ if (flock->l_type == F_UNLCK) {
+ result = fop_quorum_check (this,
+ (double)priv->n_children,
+ (double)local->successful_acks);
+ if (result == _gf_false) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ J_MSG_QUORUM_NOT_MET,
+ "Quorum is not met. "
+ "The operation has failed.");
+
+ /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER *
+ * FOLLOWED BY FOLLOWERS. */
+ } else {
+ op_ret = 0;
+ op_errno = 0;
+ }
+
+ fd_unref(local->fd);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno,
+ flock, xdata);
+ } else {
+ if (op_ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ J_MSG_LOCK_FAILURE,
+ "The lock operation failed on "
+ "the leader.");
+
+ fd_unref(local->fd);
+ STACK_UNWIND_STRICT (lk, frame, op_ret,
+ op_errno, flock, xdata);
+ } else {
+ if (!local->stub) {
+ goto err;
+ }
+
+ call_resume(local->stub);
+ }
+ }
+ } else {
+ fd_unref(local->fd);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno,
+ flock, xdata);
+ }
+
+ return 0;
+
+err:
+ if (local) {
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ }
+ if (local->qstub) {
+ call_stub_destroy(local->qstub);
+ }
+ if (local->fd) {
+ fd_unref(local->fd);
+ }
+ mem_put(local);
+ }
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno,
+ flock, xdata);
+ return 0;
+}
+
+int32_t
+jbr_lk_fan_in (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ uint8_t call_count = -1;
+ int32_t ret = -1;
+ gf_boolean_t result = _gf_false;
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ priv = this->private;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n",
+ op_ret, op_errno);
+
+ LOCK(&frame->lock);
+ call_count = --(local->call_count);
+ if (op_ret != -1) {
+ /* Increment the number of successful acks *
+ * received for the operation. *
+ */
+ (local->successful_acks)++;
+ local->successful_op_ret = op_ret;
+ }
+ gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
+ op_ret, op_errno, local->successful_acks);
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ /*
+ * If the fop is a locking fop, then check quorum. If quorum *
+ * is met, send successful ack to the client. If quorum is *
+ * not met, then rollback locking on followers, followed by *
+ * rollback of locking on leader, and then sending -ve ack *
+ * to the client. *
+ * *
+ * If the fop is a unlocking fop, then call stub. *
+ */
+ if (flock->l_type == F_UNLCK) {
+ call_resume(local->stub);
+ } else {
+ /*
+ * Remove from queue for locking fops, for unlocking *
+ * fops, it is taken care of in jbr_lk_complete *
+ */
+ ret = jbr_remove_from_queue (frame, this);
+ if (ret)
+ goto out;
+
+ fd_unref(local->fd);
+
+ result = fop_quorum_check (this,
+ (double)priv->n_children,
+ (double)local->successful_acks);
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ J_MSG_QUORUM_NOT_MET,
+ "Didn't receive enough acks to meet "
+ "quorum. Failing the locking "
+ "operation and initiating rollback on "
+ "followers and the leader "
+ "respectively.");
+
+ /* TODO: PERFORM ROLLBACK OF LOCKING ON
+ * FOLLOWERS, FOLLOWED BY ROLLBACK ON
+ * LEADER.
+ */
+
+ STACK_UNWIND_STRICT (lk, frame, -1, EROFS,
+ flock, xdata);
+ } else {
+ STACK_UNWIND_STRICT (lk, frame, 0, 0,
+ flock, xdata);
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * Called from leader for locking fop, being writen as a separate *
+ * function so as to support queues. *
+ */
+int32_t
+jbr_perform_lk_on_leader (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, flock, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ STACK_WIND (frame, jbr_lk_complete,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk,
+ fd, cmd, flock, xdata);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+jbr_lk_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ jbr_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, op_errno, out);
+ GF_VALIDATE_OR_GOTO (this->name, flock, out);
+
+ /*
+ * Check if the fop is a locking fop or unlocking fop, and
+ * handle it accordingly. If it is a locking fop, take the
+ * lock on leader first, and then send it to the followers.
+ * If it is a unlocking fop, unlock the followers first,
+ * and then on meeting quorum perform the unlock on the leader.
+ */
+ if (flock->l_type == F_UNLCK) {
+ ret = jbr_lk_call_dispatch (frame, this, op_errno,
+ fd, cmd, flock, xdata);
+ if (ret)
+ goto out;
+ } else {
+ jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
+
+ if (!ictx) {
+ *op_errno = EIO;
+ goto out;
+ }
+
+ LOCK(&ictx->lock);
+ if (ictx->active) {
+ gf_msg_debug (this->name, 0,
+ "queuing request due to conflict");
+
+ local->qstub = fop_lk_stub (frame,
+ jbr_perform_lk_on_leader,
+ fd, cmd, flock, xdata);
+ if (!local->qstub) {
+ UNLOCK(&ictx->lock);
+ goto out;
+ }
+ list_add_tail(&local->qlinks, &ictx->pqueue);
+ ++(ictx->pending);
+ UNLOCK(&ictx->lock);
+ ret = 0;
+ goto out;
+ } else {
+ list_add_tail(&local->qlinks, &ictx->aqueue);
+ ++(ictx->active);
+ }
+ UNLOCK(&ictx->lock);
+ ret = jbr_perform_lk_on_leader (frame, this, fd, cmd,
+ flock, xdata);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+jbr_lk_continue (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t ret = -1;
+ jbr_local_t *local = NULL;
+ jbr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("jbr", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ priv = this->private;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+ GF_VALIDATE_OR_GOTO (this->name, flock, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, xdata, out);
+
+ /*
+ * If it's a locking fop, then call dispatch to followers *
+ * If it's a unlock fop, then perform the unlock operation *
+ */
+ if (flock->l_type == F_UNLCK) {
+ STACK_WIND (frame, jbr_lk_complete,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk,
+ fd, cmd, flock, xdata);
+ } else {
+ /*
+ * Directly call jbr_lk_dispatch instead of appending *
+ * in queue, which is done at jbr_lk_perform_local_op *
+ * for locking fops *
+ */
+ ret = jbr_lk_dispatch (frame, this, fd, cmd,
+ flock, xdata);
+ if (ret) {
+ STACK_UNWIND_STRICT (lk, frame, -1, 0,
+ flock, xdata);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
#pragma generate
uint8_t