From 46d333783a968ab39e0beade9c7a1eec8035f8b1 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Mon, 31 Mar 2014 18:37:38 +0000 Subject: nsr: add quorum enforcement Change-Id: I0241f8c1ac97c80ae438e3d9f1ac492d63da9347 Signed-off-by: Jeff Darcy --- xlators/cluster/nsr-server/src/all-templates.c | 25 +++++++++++++++++++++++++ xlators/cluster/nsr-server/src/nsr-internal.h | 1 + xlators/cluster/nsr-server/src/nsr.c | 8 ++++++++ xlators/cluster/nsr-server/src/recon_notify.c | 10 ++++++++-- 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/nsr-server/src/all-templates.c b/xlators/cluster/nsr-server/src/all-templates.c index 2f0509a6c..fa29de7b2 100644 --- a/xlators/cluster/nsr-server/src/all-templates.c +++ b/xlators/cluster/nsr-server/src/all-templates.c @@ -59,6 +59,31 @@ nsr_$NAME$ (call_frame_t *frame, xlator_t *this, int from_leader; int from_recon; uint32_t ti = 0; + double must_be_up; + double are_up; + + /* + * Our first goal here is to avoid "split brain surprise" for users who + * specify exactly 50% with two- or three-way replication. That means + * either a more-than check against half the total replicas or an + * at-least check against half of our peers (one less). Of the two, + * only an at-least check supports the intuitive use of 100% to mean + * all replicas must be present, because "more than 100%" will never + * succeed regardless of which count we use. This leaves us with a + * slightly non-traditional definition of quorum ("at least X% of peers + * not including ourselves") but one that's useful enough to be worth + * it. + * + * Note that n_children and up_children *do* include the local + * subvolume, so we need to subtract one in each case. + */ + must_be_up = ((double)(priv->n_children - 1)) * priv->quorum_pct; + are_up = ((double)(priv->up_children - 1)) * 100.0; + if (are_up < must_be_up) { + /* Emulate the AFR client-side-quorum behavior. */ + op_errno = EROFS; + goto err; + } local = mem_get0(this->local_pool); if (!local) { diff --git a/xlators/cluster/nsr-server/src/nsr-internal.h b/xlators/cluster/nsr-server/src/nsr-internal.h index fc612c136..72b61bfa5 100644 --- a/xlators/cluster/nsr-server/src/nsr-internal.h +++ b/xlators/cluster/nsr-server/src/nsr-internal.h @@ -59,6 +59,7 @@ typedef struct { volatile uint32_t ops_in_flight; uint32_t index; gf_lock_t index_lock; + double quorum_pct; } nsr_private_t; typedef struct { diff --git a/xlators/cluster/nsr-server/src/nsr.c b/xlators/cluster/nsr-server/src/nsr.c index eda9e555a..85eba09b5 100644 --- a/xlators/cluster/nsr-server/src/nsr.c +++ b/xlators/cluster/nsr-server/src/nsr.c @@ -591,6 +591,9 @@ nsr_init (xlator_t *this) goto err; } + + GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err); + GF_OPTION_INIT ("subvol-uuid", priv->subvol_uuid, str, err); gf_log (this->name, GF_LOG_INFO, "subvol_uuid = %s", priv->subvol_uuid); if (gf_asprintf(&priv->leader_key,"%s:leader",priv->subvol_uuid) <= 0) { @@ -800,5 +803,10 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_STR, .description = "UUID for this NSR (sub)volume" }, + { .key = {"quorum-percent"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "50.0", + .description = "percentage of rep_count-1 that must be up" + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/nsr-server/src/recon_notify.c b/xlators/cluster/nsr-server/src/recon_notify.c index 24f7cf2de..1c50de234 100644 --- a/xlators/cluster/nsr-server/src/recon_notify.c +++ b/xlators/cluster/nsr-server/src/recon_notify.c @@ -91,8 +91,14 @@ nsr_recon_set_leader (xlator_t *this) if (ctx->last_reconciled_term == priv->current_term) return; - // No majority as of yet - if (priv->up_children <= (priv->n_children / 2)) + /* + * Quorum for reconciliation is not the same as quorum for I/O. Here, + * we require a true majority. The +1 is because we don't count + * ourselves as part of n_children or up_children. + * + * TBD: re-evaluate when to reconcile (including partial) + */ + if (priv->up_children <= (priv->n_children / 2)) return; gf_log (this->name, GF_LOG_INFO, -- cgit