summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-self-heal-common.c
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-05-02 18:45:44 +0530
committerJeff Darcy <jdarcy@redhat.com>2016-05-25 11:55:08 -0700
commit2f29065ae4715c9c4a9d20c4d15311bebd3ddb0e (patch)
tree163497880075bfcc2b97f92be2d2fb86ed95ab51 /xlators/cluster/afr/src/afr-self-heal-common.c
parentf8f16595d8dd8c8a869630bb77b7fd1b42b97e08 (diff)
afr: Automagic unsplit-brain by [ctime|mtime|size|majority]
Introduce cluster.favorite-child-policy which when enabled with [ctime|mtime|size|majority], automatically heals files that are in split-brian. The majority policy will not pick a source if there is no majority. The other three policies pick the first brick with a valid reply and non-zero ctime/mtime/size as source. Change-Id: I3c099a0404082213860f74f2c9b4d207cfaedb76 BUG: 1328224 Original-author: Richard Wareing <rwareing@fb.com> Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/14026 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anuradha Talur <atalur@redhat.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c292
1 files changed, 277 insertions, 15 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 0b92f616030..a4c0e89e434 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -466,28 +466,20 @@ afr_dict_contains_heal_op (call_frame_t *frame)
return _gf_true;
}
-/* Return a source depending on the type of heal_op, and set sources[source],
- * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so
- * only if the following condition is met:
- * ∀i((i ∈ locked_on[] ∧ i=1)==>(sources[i]=0 ∧ sinks[i]=1 ∧ healed_sinks[i]=1))
- * i.e. for each locked node, sources[node] is 0; healed_sinks[node] and
- * sinks[node] are 1. This should be the case if the file is in split-brain.
- */
int
-afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
- unsigned char *sources,
+afr_mark_split_brain_source_sinks_by_heal_op (call_frame_t *frame,
+ xlator_t *this, unsigned char *sources,
unsigned char *sinks,
unsigned char *healed_sinks,
unsigned char *locked_on,
struct afr_reply *replies,
- afr_transaction_type type)
+ afr_transaction_type type, int heal_op)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
dict_t *xdata_req = NULL;
dict_t *xdata_rsp = NULL;
int ret = 0;
- int heal_op = -1;
int i = 0;
char *name = NULL;
int source = -1;
@@ -496,10 +488,6 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
priv = this->private;
xdata_req = local->xdata_req;
- ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
- if (ret)
- goto out;
-
for (i = 0; i < priv->child_count; i++) {
if (locked_on[i])
if (sources[i] || !sinks[i] || !healed_sinks[i]) {
@@ -598,6 +586,280 @@ out:
}
+int
+afr_sh_fav_by_majority (xlator_t *this, struct afr_reply *replies,
+ inode_t *inode)
+{
+ afr_private_t *priv;
+ int vote_count = -1;
+ int fav_child = -1;
+ int i = 0;
+ int k = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "mtime_sec = %d, size = %lu for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_size,
+ uuid_utoa (inode->gfid));
+ vote_count = 0;
+ for (k = 0; k < priv->child_count; k++) {
+ if ((replies[k].poststat.ia_mtime ==
+ replies[i].poststat.ia_mtime) &&
+ (replies[k].poststat.ia_size ==
+ replies[i].poststat.ia_size)
+ ) {
+ vote_count++;
+ }
+ }
+ if (vote_count > priv->child_count/2) {
+ fav_child = i;
+ break;
+ }
+ }
+ }
+ return fav_child;
+}
+
+/*
+ * afr_sh_fav_by_mtime: Choose favorite child by mtime.
+ */
+int
+afr_sh_fav_by_mtime (xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_mtime = 0;
+ uint32_t cmp_mtime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "mtime = %d, mtime_nsec = %d for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_mtime_nsec,
+ uuid_utoa (inode->gfid));
+ if (replies[i].poststat.ia_mtime > cmp_mtime) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec =
+ replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_mtime == cmp_mtime)
+ && (replies[i].poststat.ia_mtime_nsec >
+ cmp_mtime_nsec)) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec =
+ replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
+}
+
+/*
+ * afr_sh_fav_by_ctime: Choose favorite child by ctime.
+ */
+int
+afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_ctime = 0;
+ uint32_t cmp_ctime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "ctime = %d, ctime_nsec = %d for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_ctime,
+ replies[i].poststat.ia_ctime_nsec,
+ uuid_utoa (inode->gfid));
+ if (replies[i].poststat.ia_ctime > cmp_ctime) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec =
+ replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_ctime == cmp_ctime)
+ && (replies[i].poststat.ia_ctime_nsec >
+ cmp_ctime_nsec)) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec =
+ replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
+}
+
+/*
+ * afr_sh_fav_by_size: Choose favorite child by size.
+ */
+int
+afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint64_t cmp_sz = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug (this->name, 0, "Child:%s "
+ "file size = %lu for gfid %s",
+ priv->children[i]->name,
+ replies[i].poststat.ia_size,
+ uuid_utoa (inode->gfid));
+ if (replies[i].poststat.ia_size > cmp_sz) {
+ cmp_sz = replies[i].poststat.ia_size;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
+}
+
+
+int
+afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame,
+ xlator_t *this,
+ inode_t *inode,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+ char mtime_str[256];
+ char ctime_str[256];
+ char *policy_str = NULL;
+ struct tm *tm_ptr;
+ time_t time;
+
+ priv = this->private;
+ if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY) {
+ fav_child = afr_sh_fav_by_majority (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "MAJORITY";
+ } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME) {
+ fav_child = afr_sh_fav_by_mtime (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "MTIME";
+ } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME) {
+ fav_child = afr_sh_fav_by_ctime (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "CTIME";
+ } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE) {
+ fav_child = afr_sh_fav_by_size (this, replies, inode);
+ if (fav_child >= 0)
+ policy_str = "SIZE";
+ }
+
+ if (fav_child > priv->child_count - 1) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) "
+ "selected by policy %s.", fav_child, policy_str);
+ } else if (fav_child >= 0) {
+ time = replies[fav_child].poststat.ia_mtime;
+ tm_ptr = localtime (&time);
+ strftime (mtime_str, sizeof (mtime_str), "%Y-%m-%d %H:%M:%S",
+ tm_ptr);
+ time = replies[fav_child].poststat.ia_ctime;
+ tm_ptr = localtime (&time);
+ strftime (ctime_str, sizeof (ctime_str), "%Y-%m-%d %H:%M:%S",
+ tm_ptr);
+
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Source %s "
+ "selected as authentic to resolve conflicting "
+ "data in file (gfid:%s) by %s (%lu bytes @ %s mtime, "
+ "%s ctime).",
+ priv->children[fav_child]->name,
+ uuid_utoa (inode->gfid),
+ policy_str,
+ replies[fav_child].poststat.ia_size,
+ mtime_str,
+ ctime_str);
+
+ sources[fav_child] = 1;
+ sinks[fav_child] = 0;
+ healed_sinks[fav_child] = 0;
+ }
+ return fav_child;
+}
+
+/* Return a source depending on the type of heal_op, and set sources[source],
+ * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so
+ * only if the following condition is met:
+ * ∀i((i ∈ locked_on[] ∧ i=1)==>(sources[i]=0 ∧ sinks[i]=1 ∧ healed_sinks[i]=1))
+ * i.e. for each locked node, sources[node] is 0; healed_sinks[node] and
+ * sinks[node] are 1. This should be the case if the file is in split-brain.
+ */
+int
+afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ int heal_op = -1;
+ int ret = -1;
+
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ ret = dict_get_int32 (xdata_req, "heal-op", &heal_op);
+ if (ret)
+ goto autoheal;
+
+ ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this,
+ sources, sinks,
+ healed_sinks,
+ locked_on, replies,
+ type, heal_op);
+ return ret;
+
+autoheal:
+ /* Automatically heal if fav_child_policy is set. */
+ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) {
+ ret = afr_mark_split_brain_source_sinks_by_policy (frame, this,
+ inode,
+ sources,
+ sinks,
+ healed_sinks,
+ locked_on,
+ replies,
+ type);
+ }
+
+ return ret;
+}
+
gf_boolean_t
afr_does_witness_exist (xlator_t *this, uint64_t *witness)
{