diff options
author | Ravishankar N <ravishankar@redhat.com> | 2015-01-09 14:43:22 +0000 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2015-01-15 01:28:37 -0800 |
commit | 8beaf169e39b262416e2274a028292379d39b310 (patch) | |
tree | e5cfd6da9af293ba7625c057914583a03bbeadab /xlators/cluster/afr/src/afr-self-heal-common.c | |
parent | 6da85222e5e49bcb15c4c8998f26c8dffb6a5b34 (diff) |
cluster/afr: split-brain resolution CLI
Extend the AFR heal command to include automated split-brain resolution.
This patch [3/3] is the final patch for afr automated split-brain resolution
implementation.
"gluster volume heal <VOLNAME> [full | statistics [heal-count [replica
<HOSTNAME:BRICKNAME>]] |info [healed | heal-failed | split-brain]| split-brain
{bigger-file <FILE> |source-brick <HOSTNAME:BRICKNAME> [<FILE>]}]"
The new additions being:
1.gluster volume heal <VOLNAME> split-brain bigger-file <FILE>
Locates the replica containing the FILE, selects bigger-file as source and
completes heal.
2.gluster volume heal <VOLNAME> split-brain source-brick <HOSTNAME:BRICKNAME>
<FILE>
Selects <FILE> present in <HOSTNAME:BRICKNAME> as source and completes heal.
3.gluster volume heal <VOLNAME> split-brain <HOSTNAME:BRICKNAME>
Selects all split-brained files in <HOSTNAME:BRICKNAME> as source and completes
heal.
Note: <FILE> can be either the full file name as seen from the root of the
volume (or) the gfid-string representation of the file, which sometimes gets
displayed in the heal info command's output.
Entry/gfid split-brain resolution is not supported.
Example can be found in the test case.
Change-Id: I4649733922d406f14f28ee9033a5cb627b9538b3
BUG: 1136769
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/9377
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-common.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 191 |
1 files changed, 190 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 6198d4cf72c..e9d853c4ecd 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -17,7 +17,7 @@ #include "afr.h" #include "afr-self-heal.h" #include "byte-order.h" - +#include "protocol-common.h" int afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -287,6 +287,39 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, return 0; } +/* + * If by chance there are multiple sources with differing sizes, select + * the largest file as the source. + * + * This can happen if data was directly modified in the backend or for snapshots + */ +void +afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources, + struct afr_reply *replies) +{ + int i = 0; + afr_private_t *priv = NULL; + uint64_t size = 0; + + /* Find source with biggest file size */ + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (size <= replies[i].poststat.ia_size) { + size = replies[i].poststat.ia_size; + } + } + + /* Mark sources with less size as not source */ + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) + continue; + if (size > replies[i].poststat.ia_size) + sources[i] = 0; + } +} + void afr_mark_active_sinks (xlator_t *this, unsigned char *sources, unsigned char *locked_on, unsigned char *sinks) @@ -304,6 +337,154 @@ afr_mark_active_sinks (xlator_t *this, unsigned char *sources, } gf_boolean_t +afr_dict_contains_heal_op (call_frame_t *frame) +{ + afr_local_t *local = NULL; + dict_t *xdata_req = NULL; + int ret = 0; + int heal_op = -1; + + local = frame->local; + xdata_req = local->xdata_req; + ret = dict_get_int32 (xdata_req, "heal-op", &heal_op); + if (ret) + return _gf_false; + if (local->xdata_rsp == NULL) { + local->xdata_rsp = dict_new(); + if (!local->xdata_rsp) + return _gf_true; + } + ret = dict_set_str (local->xdata_rsp, "sh-fail-msg", + "File not in split-brain"); + + return _gf_true; +} + +/* Return a source depending on the type of heal_op, and set sources[source], + * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so + * only if the following condition is met: + * ∀i((i ∈ locked_on[] ∧ i=1)==>(sources[i]=0 ∧ sinks[i]=1 ∧ healed_sinks[i]=1)) + * i.e. for each locked node, sources[node] is 0; healed_sinks[node] and + * sinks[node] are 1. This should be the case if the file is in split-brain. + */ +int +afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, + unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *locked_on, + struct afr_reply *replies, + afr_transaction_type type) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xdata_req = NULL; + dict_t *xdata_rsp = NULL; + int ret = 0; + int heal_op = -1; + int i = 0; + char *name = NULL; + int source = -1; + + local = frame->local; + priv = this->private; + xdata_req = local->xdata_req; + ret = dict_get_int32 (xdata_req, "heal-op", &heal_op); + if (ret) + goto out; + for (i = 0; i < priv->child_count; i++) { + if (locked_on[i]) + if (sources[i] || !sinks[i] || !healed_sinks[i]) { + ret = -1; + goto out; + } + } + if (local->xdata_rsp == NULL) { + local->xdata_rsp = dict_new(); + if (!local->xdata_rsp) { + ret = -1; + goto out; + } + } + xdata_rsp = local->xdata_rsp; + + switch (heal_op) { + case GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: + if (type == AFR_METADATA_TRANSACTION) { + ret = dict_set_str (xdata_rsp, "sh-fail-msg", + "Use source-brick option to" + " heal metadata split-brain"); + if (!ret) + ret = -1; + goto out; + } + for (i = 0 ; i < priv->child_count; i++) + if (locked_on[i]) + sources[i] = 1; + afr_mark_largest_file_as_source (this, sources, replies); + if (AFR_COUNT (sources, priv->child_count) != 1) { + ret = dict_set_str (xdata_rsp, "sh-fail-msg", + "No bigger file"); + if (!ret) + ret = -1; + goto out; + } + for (i = 0 ; i < priv->child_count; i++) + if (sources[i]) + source = i; + sinks[source] = 0; + healed_sinks[source] = 0; + break; + case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: + ret = dict_get_str (xdata_req, "child-name", &name); + if (ret) + goto out; + source = afr_get_child_index_from_name (this, name); + if (source < 0) { + ret = dict_set_str (xdata_rsp, "sh-fail-msg", + "Invalid brick name"); + if (!ret) + ret = -1; + goto out; + } + if (locked_on[source] != 1) { + ret = dict_set_str (xdata_rsp, "sh-fail-msg", + "Brick is not up"); + if (!ret) + ret = -1; + goto out; + } + sources[source] = 1; + sinks[source] = 0; + healed_sinks[source] = 0; + break; + default: + ret = -1; + goto out; + } + ret = source; +out: + return ret; + +} + +int +afr_get_child_index_from_name (xlator_t *this, char *name) +{ + afr_private_t *priv = this->private; + int index = -1; + + for (index = 0; index < priv->child_count; index++) { + if (!strcmp (priv->children[index]->name, name)) + goto out; + } + index = -1; +out: + return index; +} + + +gf_boolean_t afr_does_witness_exist (xlator_t *this, uint64_t *witness) { int i = 0; @@ -427,6 +608,14 @@ afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, } } + /* If no sources, all locked nodes are sinks - split brain */ + if (AFR_COUNT (sources, priv->child_count) == 0) { + for (i = 0; i < priv->child_count; i++) { + if (locked_on[i]) + sinks[i] = 1; + } + } + /* In afr-v1 if a file is self-accused but didn't have any pending * operations on others then it is similar to 'dirty' in afr-v2. * Consider such cases as witness. |