diff options
author | Avra Sengupta <asengupt@redhat.com> | 2016-06-23 12:15:22 +0530 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-11-08 11:25:25 -0800 |
commit | 3e50e09723e024cd451c5f48a153fef0fe4857c7 (patch) | |
tree | de1ef8f66ff17eb2791fb406e122486da8cfe463 /xlators/experimental/jbr-server/src/all-templates.c | |
parent | 3e980c5eff495725e7c01793451bc81fd6f94ad5 (diff) |
jbr: Sending rollback from failed fop to fdl
In case of a failed fop, the failure is detected
by the leader in the jbr-server in two places. First
during a quorum check of +ve responses when it
receives responses from all the followers. At this
point if the fop hasn't been successfully journaled
at a quorum of followers (as in there is no merit in
trying the fop in the leader as the quorum will never
be met), then we fail the fop.
Also if this quorum is met, then the fop is tried on
the leader, and after the leader completes the fop
a quorum check similar to the previous one is done
again, this time including the leaders outcome. If
quorum is not met, then we fail the fop.
In both these cases, when the fop fails we send a -ve
ack to the client. With this patch, now we will also
send a rollback through a GF_FOP_IPC to all the followers(and
also to the leader in the second case of failure). This
rollback will contain the index and term number of the
fop which failed. This will be recorded in the respective
journals of the bricks and will be used to rollback the
fop on that brick later.
A subsequent write, and it's respective rollback would
look something like the following in the journal.
The trusted.jbr.term and trusted.jbr.index present in the
dict of both the logs, relate them, and the presence of
"rollback-fop" in the dict of IPC indicates that it is a
rollback fop, and the value 13(stands for GF_FOP_WRITE)
indicates what kind of rollback operation it is.
=== GF_FOP_WRITE
fd = <gfid 77f12ea2-ca56-40e3-a46e-ba2308baa035>
vector = <158 bytes>
offset = 0 (0x0)
flags = 32769 (0x8001)
xdata = dict {
trusted.jbr.term = 0 <2 bytes>
trusted.jbr.index = 4 <2 bytes>
}
=== GF_FOP_IPC
xdata = dict {
trusted.jbr.term = 0 <2 bytes>
trusted.jbr.index = 4 <2 bytes>
rollback-fop = 13 <3 bytes>
}
Change-Id: I70b6a143d20697153d58e2f719e34ecd1ed160a5
BUG: 1349385
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/14783
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/experimental/jbr-server/src/all-templates.c')
-rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c | 124 |
1 files changed, 118 insertions, 6 deletions
diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c index 0fb96ac0436..9720442e63f 100644 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ b/xlators/experimental/jbr-server/src/all-templates.c @@ -105,6 +105,7 @@ jbr_@NAME@ (call_frame_t *frame, xlator_t *this, if (ret) goto err; + local->xdata = dict_ref(xdata); local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, @SHORT_ARGS@); if (!local->stub) { @@ -248,7 +249,6 @@ jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, */ local->call_count = priv->n_children - 1; - local->successful_acks = 0; for (trav = this->children->next; trav; trav = trav->next) { STACK_WIND (frame, jbr_@NAME@_fan_in, trav->xlator, trav->xlator->fops->@NAME@, @@ -307,9 +307,12 @@ int32_t jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, @LONG_ARGS@) { - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + jbr_private_t *priv = NULL; + int32_t op_errno = 0; GF_VALIDATE_OR_GOTO ("jbr", this, out); GF_VALIDATE_OR_GOTO (this->name, frame, out); @@ -330,6 +333,58 @@ jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " "to meet quorum. Failing the operation without trying " "it on the leader."); + +#if defined(JBR_CG_QUEUE) + /* + * In case of a fop failure, before unwinding need to * + * remove it from queue * + */ + ret = jbr_remove_from_queue (frame, this); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_GENERIC, "Failed to remove from queue."); + } +#endif + + /* + * In this case, the quorum is not met on the followers * + * So the operation will not be performed on the leader * + * and a rollback will be sent via GF_FOP_IPC to all the * + * followers, where this particular fop's term and index * + * numbers will be journaled, and later used to rollback * + */ + call_frame_t *new_frame; + + new_frame = copy_frame (frame); + + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + ret = dict_set_int32 (local->xdata, + "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, + "failed to set rollback-fop"); + } else { + new_local->xdata = dict_ref(local->xdata); + new_frame->local = new_local; + jbr_ipc_call_dispatch (new_frame, + this, &op_errno, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not create local for new_frame"); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); + } + STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, @ERROR_ARGS@); } else { @@ -348,12 +403,11 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @LONG_ARGS@) { -#if defined(JBR_CG_QUEUE) int32_t ret = -1; -#endif gf_boolean_t result = _gf_false; jbr_private_t *priv = NULL; jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; GF_VALIDATE_OR_GOTO ("jbr", this, err); GF_VALIDATE_OR_GOTO (this->name, frame, err); @@ -404,6 +458,59 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, gf_msg (this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, "Quorum is not met. " "The operation has failed."); + /* + * In this case, the quorum is not met after the * + * operation is performed on the leader. Hence a * + * rollback will be sent via GF_FOP_IPC to the leader * + * where this particular fop's term and index numbers * + * will be journaled, and later used to rollback. * + * The same will be done on all the followers * + */ + call_frame_t *new_frame; + + new_frame = copy_frame (frame); + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, "op = %d", + new_frame->op); + ret = dict_set_int32 (local->xdata, + "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg (this->name, + GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, + "failed to set " + "rollback-fop"); + } else { + new_local->xdata = dict_ref (local->xdata); + new_frame->local = new_local; + /* + * Calling STACK_WIND instead * + * of jbr_ipc as it will not * + * unwind to the previous * + * translators like it will * + * in case of jbr_ipc. * + */ + STACK_WIND (new_frame, + jbr_ipc_complete, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not create local " + "for new_frame"); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); + } } else { #if defined(JBR_CG_NEED_FD) op_ret = local->successful_op_ret; @@ -416,6 +523,11 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, } } + /* + * Unrefing the reference taken in jbr_@NAME@ () * + */ + dict_unref (local->xdata); + STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); |