diff options
Diffstat (limited to 'xlators/cluster/nsr-server/src/all-templates.c')
-rw-r--r-- | xlators/cluster/nsr-server/src/all-templates.c | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/xlators/cluster/nsr-server/src/all-templates.c b/xlators/cluster/nsr-server/src/all-templates.c new file mode 100644 index 000000000..fa29de7b2 --- /dev/null +++ b/xlators/cluster/nsr-server/src/all-templates.c @@ -0,0 +1,345 @@ +/* + * You can put anything here - it doesn't even have to be a comment - and it + * will be ignored until we reach the first template-name comment. + */ + + +// template-name read-fop +$TYPE$ +nsr_$NAME$ (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsr_private_t *priv = this->private; + gf_boolean_t in_recon = _gf_false; + int32_t recon_term, recon_index; + + // allow reads during reconciliation + // TBD: allow "dirty" reads on non-leaders + if (xdata && + (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && + (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { + in_recon = _gf_true; + } + + if ((!priv->leader) && (in_recon == _gf_false)) { + goto err; + } + + STACK_WIND (frame, default_$NAME$_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, + $ARGS_SHORT$); + return 0; + +err: + STACK_UNWIND_STRICT ($NAME$, frame, -1, EREMOTE, + $DEFAULTS$); + return 0; +} + +// template-name read-dispatch +/* No "dispatch" function needed for $NAME$ */ + +// template-name read-fan-in +/* No "fan-in" function needed for $NAME$ */ + +// template-name read-continue +/* No "continue" function needed for $NAME$ */ + +// template-name read-complete +/* No "complete" function needed for $NAME$ */ + +// template-name write-fop +$TYPE$ +nsr_$NAME$ (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsr_local_t *local = NULL; + nsr_private_t *priv = this->private; + int op_errno = ENOMEM; + int from_leader; + int from_recon; + uint32_t ti = 0; + double must_be_up; + double are_up; + + /* + * Our first goal here is to avoid "split brain surprise" for users who + * specify exactly 50% with two- or three-way replication. That means + * either a more-than check against half the total replicas or an + * at-least check against half of our peers (one less). Of the two, + * only an at-least check supports the intuitive use of 100% to mean + * all replicas must be present, because "more than 100%" will never + * succeed regardless of which count we use. This leaves us with a + * slightly non-traditional definition of quorum ("at least X% of peers + * not including ourselves") but one that's useful enough to be worth + * it. + * + * Note that n_children and up_children *do* include the local + * subvolume, so we need to subtract one in each case. + */ + must_be_up = ((double)(priv->n_children - 1)) * priv->quorum_pct; + are_up = ((double)(priv->up_children - 1)) * 100.0; + if (are_up < must_be_up) { + /* Emulate the AFR client-side-quorum behavior. */ + op_errno = EROFS; + goto err; + } + + local = mem_get0(this->local_pool); + if (!local) { + goto err; + } +#if defined(NSR_CG_NEED_FD) + local->fd = fd_ref(fd); +#else + local->fd = NULL; +#endif + INIT_LIST_HEAD(&local->qlinks); + frame->local = local; + + if (xdata) { + from_leader = !!dict_get(xdata,NSR_TERM_XATTR); + from_recon = !!dict_get(xdata,RECON_TERM_XATTR) + && !!dict_get(xdata,RECON_INDEX_XATTR); + } + else { + from_leader = from_recon = _gf_false; + } + + // follower/recon path + // just send it to local node + if (from_leader || from_recon) { + atomic_inc(&priv->ops_in_flight); + STACK_WIND (frame, nsr_$NAME$_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, + $ARGS_SHORT$); + return 0; + } + + + if (!priv->leader/* || priv->fence_io*/) { + op_errno = EREMOTE; + goto err; + } + + + if (!xdata) { + xdata = dict_new(); + if (!xdata) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate xdata"); + goto err; + } + } + + if (dict_set_int32(xdata,NSR_TERM_XATTR,priv->current_term) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set nsr-term"); + goto err; + } + + LOCK(&priv->index_lock); + ti = ++(priv->index); + UNLOCK(&priv->index_lock); + if (dict_set_int32(xdata,NSR_INDEX_XATTR,ti) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set index"); + goto err; + } + + local->stub = fop_$NAME$_stub (frame,nsr_$NAME$_continue, + $ARGS_SHORT$); + if (!local->stub) { + goto err; + } + + +#if defined(NSR_CG_QUEUE) + nsr_inode_ctx_t *ictx = nsr_get_inode_ctx(this,fd->inode); + if (!ictx) { + op_errno = EIO; + goto err; + } + LOCK(&ictx->lock); + if (ictx->active) { + gf_log (this->name, GF_LOG_DEBUG, + "queuing request due to conflict"); + /* + * TBD: enqueue only for real conflict + * + * Currently we just act like all writes are in + * conflict with one another. What we should really do + * is check the active/pending queues and defer only if + * there's a conflict there. + * + * It's important to check the pending queue because we + * might have an active request X which conflicts with + * a pending request Y, and this request Z might + * conflict with Y but not X. If we checked only the + * active queue then Z could jump ahead of Y, which + * would be incorrect. + */ + local->qstub = fop_$NAME$_stub (frame, + nsr_$NAME$_dispatch, + $ARGS_SHORT$); + if (!local->qstub) { + UNLOCK(&ictx->lock); + goto err; + } + list_add_tail(&local->qlinks,&ictx->pqueue); + ++(ictx->pending); + UNLOCK(&ictx->lock); + return 0; + } + else { + list_add_tail(&local->qlinks,&ictx->aqueue); + ++(ictx->active); + } + UNLOCK(&ictx->lock); +#endif + + return nsr_$NAME$_dispatch (frame, this, $ARGS_SHORT$); + +err: + if (local) { + if (local->stub) { + call_stub_destroy(local->stub); + } + if (local->qstub) { + call_stub_destroy(local->qstub); + } + if (local->fd) { + fd_unref(local->fd); + } + mem_put(local); + } + STACK_UNWIND_STRICT ($NAME$, frame, -1, op_errno, + $DEFAULTS$); + return 0; +} + +// template-name write-dispatch +$TYPE$ +nsr_$NAME$_dispatch (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsr_local_t *local = frame->local; + nsr_private_t *priv = this->private; + xlator_list_t *trav; + + atomic_inc(&priv->ops_in_flight); + + /* + * TBD: unblock pending request(s) if we fail after this point but + * before we get to nsr_$NAME$_complete (where that code currently + * resides). + */ + + local->call_count = priv->n_children - 1; + for (trav = this->children->next; trav; trav = trav->next) { + STACK_WIND (frame, nsr_$NAME$_fan_in, + trav->xlator, trav->xlator->fops->$NAME$, + $ARGS_SHORT$); + } + + // TBD: variable Issue count + return 0; +} + +// template-name write-fan-in +$TYPE$ +nsr_$NAME$_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + $ARGS_LONG$) +{ + nsr_local_t *local = frame->local; + uint8_t call_count; + + gf_log (this->name, GF_LOG_TRACE, + "op_ret = %d, op_errno = %d\n", op_ret, op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + UNLOCK(&frame->lock); + + // TBD: variable Completion count + if (call_count == 0) { + call_resume(local->stub); + } + + return 0; +} + +// template-name write-continue +$TYPE$ +nsr_$NAME$_continue (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + STACK_WIND (frame, nsr_$NAME$_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, + $ARGS_SHORT$); + return 0; +} + +// template-name write-complete +$TYPE$ +nsr_$NAME$_complete (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + $ARGS_LONG$) +{ + nsr_private_t *priv = this->private; +#if defined(NSR_CG_NEED_FD) + nsr_local_t *local = frame->local; +#endif + +#if defined(NSR_CG_QUEUE) + nsr_inode_ctx_t *ictx; + nsr_local_t *next; + if (local->qlinks.next != &local->qlinks) { + list_del(&local->qlinks); + ictx = nsr_get_inode_ctx(this,local->fd->inode); + if (ictx) { + LOCK(&ictx->lock); + if (ictx->pending) { + /* + * TBD: dequeue *all* non-conflicting reqs + * + * With the stub implementation there can only + * be one request active at a time (zero here) + * so it's not an issue. In a real + * implementation there might still be other + * active requests to check against, and + * multiple pending requests that could + * continue. + */ + gf_log (this->name, GF_LOG_DEBUG, + "unblocking next request"); + --(ictx->pending); + next = list_entry (ictx->pqueue.next, + nsr_local_t, qlinks); + list_del(&next->qlinks); + list_add_tail(&next->qlinks,&ictx->aqueue); + call_resume(next->qstub); + } + else { + --(ictx->active); + } + UNLOCK(&ictx->lock); + } + } +#endif + +#if defined(NSR_CG_FSYNC) + nsr_mark_fd_dirty(this,local); +#endif + +#if defined(NSR_CG_NEED_FD) + fd_unref(local->fd); +#endif + + STACK_UNWIND_STRICT ($NAME$, frame, op_ret, op_errno, + $ARGS_SHORT$); + atomic_dec(&priv->ops_in_flight); + return 0; + +} |