diff options
Diffstat (limited to 'xlators/experimental')
-rw-r--r-- | xlators/experimental/jbr-client/src/jbr-messages.h | 8 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c | 41 | ||||
-rwxr-xr-x | xlators/experimental/jbr-server/src/gen-fops.py | 4 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/jbr.c | 407 |
4 files changed, 421 insertions, 39 deletions
diff --git a/xlators/experimental/jbr-client/src/jbr-messages.h b/xlators/experimental/jbr-client/src/jbr-messages.h index 61fa725d56a..626c4fd3eaa 100644 --- a/xlators/experimental/jbr-client/src/jbr-messages.h +++ b/xlators/experimental/jbr-client/src/jbr-messages.h @@ -102,4 +102,12 @@ */ #define J_MSG_QUORUM_NOT_MET (JBR_COMP_BASE + 9) +/*! + * @messageid + * @diagnosis + * @recommendedaction + */ +#define J_MSG_LOCK_FAILURE (JBR_COMP_BASE + 10) + + #endif /* _JBR_MESSAGES_H_ */ diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c index adae2431157..7314701029c 100644 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ b/xlators/experimental/jbr-server/src/all-templates.c @@ -351,6 +351,7 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @LONG_ARGS@) { + int32_t ret = -1; gf_boolean_t result = _gf_false; jbr_private_t *priv = NULL; jbr_local_t *local = NULL; @@ -371,43 +372,9 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, UNLOCK(&frame->lock); #if defined(JBR_CG_QUEUE) - jbr_inode_ctx_t *ictx; - jbr_local_t *next; - - if (local->qlinks.next != &local->qlinks) { - list_del(&local->qlinks); - ictx = jbr_get_inode_ctx(this, local->fd->inode); - if (ictx) { - LOCK(&ictx->lock); - if (ictx->pending) { - /* - * TBD: dequeue *all* non-conflicting - * reqs - * - * With the stub implementation there - * can only be one request active at a - * time (zero here) so it's not an - * issue. In a real implementation - * there might still be other active - * requests to check against, and - * multiple pending requests that could - * continue. - */ - gf_msg_debug (this->name, 0, - "unblocking next request"); - --(ictx->pending); - next = list_entry (ictx->pqueue.next, - jbr_local_t, qlinks); - list_del(&next->qlinks); - list_add_tail(&next->qlinks, - &ictx->aqueue); - call_resume(next->qstub); - } else { - --(ictx->active); - } - UNLOCK(&ictx->lock); - } - } + ret = jbr_remove_from_queue (frame, this); + if (ret) + goto err; #endif #if defined(JBR_CG_FSYNC) diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py index 36bf1e35d27..8a2b47c5345 100755 --- a/xlators/experimental/jbr-server/src/gen-fops.py +++ b/xlators/experimental/jbr-server/src/gen-fops.py @@ -78,7 +78,7 @@ fop_table = { "getxattr": "read", # "inodelk": "read", "link": "write", -# "lk": "write", + "lk": "write,queue", # "lookup": "read", "mkdir": "write", "mknod": "write", @@ -107,7 +107,7 @@ fop_table = { # only a few common functions will be generated, and mention those # functions. Rest of the functions can be customized selective_generate = { -# "lk": "fop,dispatch,call_dispatch", + "lk": "fop,dispatch,call_dispatch", } # Stolen from gen_fdl.py diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c index a342d3b83d5..d27d8ab5140 100644 --- a/xlators/experimental/jbr-server/src/jbr.c +++ b/xlators/experimental/jbr-server/src/jbr.c @@ -38,6 +38,20 @@ enum { JBR_SERVER_NEXT_ENTRY }; +/* + * Need to declare jbr_lk_call_dispatch as jbr_lk_continue and * + * jbr_lk_perform_local_op call it, before code is generated. * + */ +int32_t +jbr_lk_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, + fd_t *fd, int32_t cmd, struct gf_flock *lock, + dict_t *xdata); + +int32_t +jbr_lk_dispatch (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct gf_flock *lock, + dict_t *xdata); + /* Used to check the quorum of acks received after the fop * confirming the status of the fop on all the brick processes * for this particular subvolume @@ -312,6 +326,399 @@ out: return ret; } +int32_t +jbr_remove_from_queue (call_frame_t *frame, xlator_t *this) +{ + int32_t ret = -1; + jbr_inode_ctx_t *ictx = NULL; + jbr_local_t *local = NULL; + jbr_local_t *next = NULL; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + + if (local->qlinks.next != &local->qlinks) { + list_del(&local->qlinks); + ictx = jbr_get_inode_ctx(this, local->fd->inode); + if (ictx) { + LOCK(&ictx->lock); + if (ictx->pending) { + /* + * TBD: dequeue *all* non-conflicting + * reqs + * + * With the stub implementation there + * can only be one request active at a + * time (zero here) so it's not an + * issue. In a real implementation + * there might still be other active + * requests to check against, and + * multiple pending requests that could + * continue. + */ + gf_msg_debug (this->name, 0, + "unblocking next request"); + --(ictx->pending); + next = list_entry (ictx->pqueue.next, + jbr_local_t, qlinks); + list_del(&next->qlinks); + list_add_tail(&next->qlinks, + &ictx->aqueue); + call_resume(next->qstub); + } else { + --(ictx->active); + } + UNLOCK(&ictx->lock); + } + } + + ret = 0; + +out: + return ret; +} + +int32_t +jbr_lk_complete (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct gf_flock *flock, dict_t *xdata) +{ + int32_t ret = -1; + jbr_private_t *priv = NULL; + jbr_local_t *local = NULL; + gf_boolean_t result = _gf_false; + + GF_VALIDATE_OR_GOTO ("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, err); + GF_VALIDATE_OR_GOTO (this->name, frame, err); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, err); + GF_VALIDATE_OR_GOTO (this->name, flock, err); + GF_VALIDATE_OR_GOTO (this->name, xdata, err); + + /* + * Remove from queue for unlock operation only * + * For lock operation, it will be done in fan-in * + */ + if (flock->l_type == F_UNLCK) { + ret = jbr_remove_from_queue (frame, this); + if (ret) + goto err; + } + + /* + * On a follower, unwind with the op_ret and op_errno. On a * + * leader, if the fop is a locking fop, and its a failure, * + * send fail, else call stub which will dispatch the fop to * + * the followers. * + * * + * If the fop is a unlocking fop, check quorum. If quorum * + * is met, then send success. Else Rollback on leader, * + * followed by followers, and then send -ve ack to client. * + */ + if (priv->leader) { + + /* Increase the successful acks if it's a success. */ + LOCK(&frame->lock); + if (op_ret != -1) + (local->successful_acks)++; + UNLOCK(&frame->lock); + + if (flock->l_type == F_UNLCK) { + result = fop_quorum_check (this, + (double)priv->n_children, + (double)local->successful_acks); + if (result == _gf_false) { + op_ret = -1; + op_errno = EROFS; + gf_msg (this->name, GF_LOG_ERROR, EROFS, + J_MSG_QUORUM_NOT_MET, + "Quorum is not met. " + "The operation has failed."); + + /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER * + * FOLLOWED BY FOLLOWERS. */ + } else { + op_ret = 0; + op_errno = 0; + } + + fd_unref(local->fd); + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, + flock, xdata); + } else { + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_LOCK_FAILURE, + "The lock operation failed on " + "the leader."); + + fd_unref(local->fd); + STACK_UNWIND_STRICT (lk, frame, op_ret, + op_errno, flock, xdata); + } else { + if (!local->stub) { + goto err; + } + + call_resume(local->stub); + } + } + } else { + fd_unref(local->fd); + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, + flock, xdata); + } + + return 0; + +err: + if (local) { + if (local->stub) { + call_stub_destroy(local->stub); + } + if (local->qstub) { + call_stub_destroy(local->qstub); + } + if (local->fd) { + fd_unref(local->fd); + } + mem_put(local); + } + STACK_UNWIND_STRICT (lk, frame, -1, op_errno, + flock, xdata); + return 0; +} + +int32_t +jbr_lk_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *flock, + dict_t *xdata) +{ + uint8_t call_count = -1; + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", + op_ret, op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + if (op_ret != -1) { + /* Increment the number of successful acks * + * received for the operation. * + */ + (local->successful_acks)++; + local->successful_op_ret = op_ret; + } + gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", + op_ret, op_errno, local->successful_acks); + UNLOCK(&frame->lock); + + if (call_count == 0) { + /* + * If the fop is a locking fop, then check quorum. If quorum * + * is met, send successful ack to the client. If quorum is * + * not met, then rollback locking on followers, followed by * + * rollback of locking on leader, and then sending -ve ack * + * to the client. * + * * + * If the fop is a unlocking fop, then call stub. * + */ + if (flock->l_type == F_UNLCK) { + call_resume(local->stub); + } else { + /* + * Remove from queue for locking fops, for unlocking * + * fops, it is taken care of in jbr_lk_complete * + */ + ret = jbr_remove_from_queue (frame, this); + if (ret) + goto out; + + fd_unref(local->fd); + + result = fop_quorum_check (this, + (double)priv->n_children, + (double)local->successful_acks); + if (result == _gf_false) { + gf_msg (this->name, GF_LOG_ERROR, EROFS, + J_MSG_QUORUM_NOT_MET, + "Didn't receive enough acks to meet " + "quorum. Failing the locking " + "operation and initiating rollback on " + "followers and the leader " + "respectively."); + + /* TODO: PERFORM ROLLBACK OF LOCKING ON + * FOLLOWERS, FOLLOWED BY ROLLBACK ON + * LEADER. + */ + + STACK_UNWIND_STRICT (lk, frame, -1, EROFS, + flock, xdata); + } else { + STACK_UNWIND_STRICT (lk, frame, 0, 0, + flock, xdata); + } + } + } + + ret = 0; +out: + return ret; +} + +/* + * Called from leader for locking fop, being writen as a separate * + * function so as to support queues. * + */ +int32_t +jbr_perform_lk_on_leader (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + GF_VALIDATE_OR_GOTO (this->name, flock, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + STACK_WIND (frame, jbr_lk_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, + fd, cmd, flock, xdata); + + ret = 0; +out: + return ret; +} + +int32_t +jbr_lk_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, + fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) +{ + int32_t ret = -1; + jbr_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO (this->name, op_errno, out); + GF_VALIDATE_OR_GOTO (this->name, flock, out); + + /* + * Check if the fop is a locking fop or unlocking fop, and + * handle it accordingly. If it is a locking fop, take the + * lock on leader first, and then send it to the followers. + * If it is a unlocking fop, unlock the followers first, + * and then on meeting quorum perform the unlock on the leader. + */ + if (flock->l_type == F_UNLCK) { + ret = jbr_lk_call_dispatch (frame, this, op_errno, + fd, cmd, flock, xdata); + if (ret) + goto out; + } else { + jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); + + if (!ictx) { + *op_errno = EIO; + goto out; + } + + LOCK(&ictx->lock); + if (ictx->active) { + gf_msg_debug (this->name, 0, + "queuing request due to conflict"); + + local->qstub = fop_lk_stub (frame, + jbr_perform_lk_on_leader, + fd, cmd, flock, xdata); + if (!local->qstub) { + UNLOCK(&ictx->lock); + goto out; + } + list_add_tail(&local->qlinks, &ictx->pqueue); + ++(ictx->pending); + UNLOCK(&ictx->lock); + ret = 0; + goto out; + } else { + list_add_tail(&local->qlinks, &ictx->aqueue); + ++(ictx->active); + } + UNLOCK(&ictx->lock); + ret = jbr_perform_lk_on_leader (frame, this, fd, cmd, + flock, xdata); + } + + ret = 0; +out: + return ret; +} + +int32_t +jbr_lk_continue (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) +{ + int32_t ret = -1; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, local, out); + GF_VALIDATE_OR_GOTO (this->name, flock, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO (this->name, xdata, out); + + /* + * If it's a locking fop, then call dispatch to followers * + * If it's a unlock fop, then perform the unlock operation * + */ + if (flock->l_type == F_UNLCK) { + STACK_WIND (frame, jbr_lk_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, + fd, cmd, flock, xdata); + } else { + /* + * Directly call jbr_lk_dispatch instead of appending * + * in queue, which is done at jbr_lk_perform_local_op * + * for locking fops * + */ + ret = jbr_lk_dispatch (frame, this, fd, cmd, + flock, xdata); + if (ret) { + STACK_UNWIND_STRICT (lk, frame, -1, 0, + flock, xdata); + goto out; + } + } + + ret = 0; +out: + return ret; +} + #pragma generate uint8_t |