summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@fb.com>2017-09-27 13:18:25 -0700
committerJeff Darcy <jdarcy@fb.com>2017-09-27 16:03:35 -0700
commitd9f9be442808ed13135f560698a2f95fe66282a5 (patch)
tree2a18c75ca3c3ab59bf8f42381e48911a7744f9d6
parente3e7c56323a21467e7e0a0d8f2adc5b05ce40a54 (diff)
self-heal: fix automatic split-brain resolution options
Differential Revision: https://phabricator.intern.facebook.com/D5927193 Change-Id: Ife04c8738b9ee721e7be9bc843b2f6d54bbb468e
-rw-r--r--tests/basic/afr/gfid-unsplit-type-mismatch.t5
-rw-r--r--tests/basic/afr/gfid-unsplit.t5
-rw-r--r--tests/basic/afr/shd-autofix-nogfid.t5
-rw-r--r--tests/basic/afr/shd-force-inspect.t5
-rw-r--r--tests/basic/afr/shd-pgfid-heal.t5
-rwxr-xr-xtests/bugs/fb2506544_ctime.t8
-rwxr-xr-xtests/bugs/fb2506544_majority.t9
-rwxr-xr-xtests/bugs/fb2506544_mtime.t8
-rwxr-xr-xtests/bugs/fb2506544_size.t8
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c65
-rw-r--r--xlators/cluster/afr/src/afr.c122
-rw-r--r--xlators/cluster/afr/src/afr.h21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c24
13 files changed, 206 insertions, 84 deletions
diff --git a/tests/basic/afr/gfid-unsplit-type-mismatch.t b/tests/basic/afr/gfid-unsplit-type-mismatch.t
index 9e205021a0d..172645d3fae 100644
--- a/tests/basic/afr/gfid-unsplit-type-mismatch.t
+++ b/tests/basic/afr/gfid-unsplit-type-mismatch.t
@@ -15,9 +15,8 @@ TEST $CLI volume set $V0 cluster.choose-local off
TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 nfs.disable on
TEST $CLI volume set $V0 cluster.quorum-type none
-TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
+TEST $CLI volume set $V0 cluster.favorite-child-by-majority on
+TEST $CLI volume set $V0 cluster.favorite-child-by-mtime on
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
diff --git a/tests/basic/afr/gfid-unsplit.t b/tests/basic/afr/gfid-unsplit.t
index 0b883ab658f..d6cb7e74a8a 100644
--- a/tests/basic/afr/gfid-unsplit.t
+++ b/tests/basic/afr/gfid-unsplit.t
@@ -17,9 +17,8 @@ TEST $CLI volume set $V0 cluster.choose-local off
TEST $CLI volume set $V0 cluster.quorum-type none
TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 nfs.disable off
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
-TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume set $V0 cluster.favorite-child-by-majority on
+TEST $CLI volume set $V0 cluster.favorite-child-by-mtime on
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
diff --git a/tests/basic/afr/shd-autofix-nogfid.t b/tests/basic/afr/shd-autofix-nogfid.t
index 8bc2c965640..f54a8eb8600 100644
--- a/tests/basic/afr/shd-autofix-nogfid.t
+++ b/tests/basic/afr/shd-autofix-nogfid.t
@@ -15,9 +15,8 @@ TEST $CLI volume set $V0 cluster.choose-local off
TEST $CLI volume set $V0 cluster.self-heal-daemon on
TEST $CLI volume set $V0 nfs.disable on
TEST $CLI volume set $V0 cluster.quorum-type auto
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
+TEST $CLI volume set $V0 cluster.favorite-child-by-majority on
+TEST $CLI volume set $V0 cluster.favorite-child-by-mtime on
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
diff --git a/tests/basic/afr/shd-force-inspect.t b/tests/basic/afr/shd-force-inspect.t
index caceb841322..c8b027c8933 100644
--- a/tests/basic/afr/shd-force-inspect.t
+++ b/tests/basic/afr/shd-force-inspect.t
@@ -15,9 +15,8 @@ TEST $CLI volume set $V0 cluster.choose-local off
TEST $CLI volume set $V0 cluster.self-heal-daemon on
TEST $CLI volume set $V0 nfs.disable on
TEST $CLI volume set $V0 cluster.quorum-type none
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
+TEST $CLI volume set $V0 cluster.favorite-child-by-majority on
+TEST $CLI volume set $V0 cluster.favorite-child-by-mtime on
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
diff --git a/tests/basic/afr/shd-pgfid-heal.t b/tests/basic/afr/shd-pgfid-heal.t
index 6213e4c6374..d723fe436a6 100644
--- a/tests/basic/afr/shd-pgfid-heal.t
+++ b/tests/basic/afr/shd-pgfid-heal.t
@@ -15,10 +15,9 @@ TEST $CLI volume set $V0 cluster.choose-local off
TEST $CLI volume set $V0 cluster.self-heal-daemon on
TEST $CLI volume set $V0 nfs.disable on
TEST $CLI volume set $V0 cluster.quorum-type none
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
+TEST $CLI volume set $V0 cluster.favorite-child-by-majority on
+TEST $CLI volume set $V0 cluster.favorite-child-by-mtime on
TEST $CLI volume set $V0 cluster.pgfid-self-heal on
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
TEST $CLI volume set $V0 storage.build-pgfid on
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
diff --git a/tests/bugs/fb2506544_ctime.t b/tests/bugs/fb2506544_ctime.t
index 8c7ab02cc8e..f59525db590 100755
--- a/tests/bugs/fb2506544_ctime.t
+++ b/tests/bugs/fb2506544_ctime.t
@@ -16,7 +16,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal on
TEST $CLI volume set $V0 cluster.metadata-self-heal on
TEST $CLI volume set $V0 cluster.entry-self-heal on
-#EST $CLI volume set $V0 cluster.favorite-child-by-ctime off
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 1
TEST $CLI volume start $V0
@@ -61,8 +60,7 @@ sleep 1
# Ok now turn the favorite-child option and we should be able to read it.
# The MD5 should be of the file which was created first.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-ctime on
-TEST $CLI volume set $V0 cluster.favorite-child-policy ctime
+TEST $CLI volume set $V0 cluster.favorite-child-by-ctime on
sleep 1
# Mount the volume
TEST glusterfs --log-level DEBUG --volfile-id=/$V0 --volfile-server=$H0 $M0 \
@@ -85,7 +83,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal on
TEST $CLI volume set $V0 cluster.metadata-self-heal on
TEST $CLI volume set $V0 cluster.entry-self-heal on
-#EST $CLI volume set $V0 cluster.favorite-child-by-ctime off
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 1
TEST $CLI volume start $V0
@@ -131,8 +128,7 @@ sleep 1
# Ok now turn the favorite-child option and we should be able to read it.
# The MD5 should be of the file which was created first.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-ctime on
-TEST $CLI volume set $V0 cluster.favorite-child-policy ctime
+TEST $CLI volume set $V0 cluster.favorite-child-by-ctime on
TEST $CLI volume set $V0 cluster.self-heal-daemon on
sleep 1
/etc/init.d/glusterd restart_shd
diff --git a/tests/bugs/fb2506544_majority.t b/tests/bugs/fb2506544_majority.t
index c38a6d59947..a2c489a4063 100755
--- a/tests/bugs/fb2506544_majority.t
+++ b/tests/bugs/fb2506544_majority.t
@@ -16,8 +16,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal on
TEST $CLI volume set $V0 cluster.metadata-self-heal on
TEST $CLI volume set $V0 cluster.entry-self-heal on
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority off
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
# This would normally be a toxic combination because it allows us to create a
# split brain by writing to 1/3 replicas ... but for testing that's exactly
# what we want.
@@ -67,8 +65,7 @@ sleep 1
# Compare MD5's, the healed file should be that of the file which is
# on 2/3 bricks.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+TEST $CLI volume set $V0 cluster.favorite-child-by-majority on
sleep 1
# Mount the volume
TEST glusterfs --log-level DEBUG --volfile-id=/$V0 --volfile-server=$H0 $M0 \
@@ -91,7 +88,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority off
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 1
TEST $CLI volume start $V0
@@ -139,8 +135,7 @@ sleep 1
# Compare MD5's, the healed file should be that of the file which is
# on 2/3 bricks.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-majority on
-TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+TEST $CLI volume set $V0 cluster.favorite-child-by-majority on
TEST $CLI volume set $V0 cluster.self-heal-daemon on
sleep 1
/etc/init.d/glusterd restart_shd
diff --git a/tests/bugs/fb2506544_mtime.t b/tests/bugs/fb2506544_mtime.t
index b908fdaddd5..b68c6b2e089 100755
--- a/tests/bugs/fb2506544_mtime.t
+++ b/tests/bugs/fb2506544_mtime.t
@@ -16,7 +16,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal on
TEST $CLI volume set $V0 cluster.metadata-self-heal on
TEST $CLI volume set $V0 cluster.entry-self-heal on
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime off
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 1
TEST $CLI volume start $V0
@@ -62,8 +61,7 @@ sleep 1
# Ok now turn the favorite-child option and we should be able to read it.
# Compare MD5's, the MD5 should be of the file we modified last.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
-TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume set $V0 cluster.favorite-child-by-mtime on
sleep 1
# Mount the volume
TEST glusterfs --log-level DEBUG --volfile-id=/$V0 --volfile-server=$H0 $M0 \
@@ -86,7 +84,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime off
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 1
TEST $CLI volume start $V0
@@ -134,8 +131,7 @@ sleep 1
# Ok now turn the favorite-child option and we should be able to read it.
# Compare MD5's, the MD5 should be of the file we modified last.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-mtime on
-TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume set $V0 cluster.favorite-child-by-mtime on
TEST $CLI volume set $V0 cluster.self-heal-daemon on
sleep 1
/etc/init.d/glusterd restart_shd
diff --git a/tests/bugs/fb2506544_size.t b/tests/bugs/fb2506544_size.t
index 593c1053853..731ffd13f56 100755
--- a/tests/bugs/fb2506544_size.t
+++ b/tests/bugs/fb2506544_size.t
@@ -16,7 +16,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal on
TEST $CLI volume set $V0 cluster.metadata-self-heal on
TEST $CLI volume set $V0 cluster.entry-self-heal on
-#EST $CLI volume set $V0 cluster.favorite-child-by-size off
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 1
TEST $CLI volume start $V0
@@ -61,8 +60,7 @@ sleep 1
# Ok now turn the favorite-child option and we should be able to read it.
# Compare MD5's, the MD5 should be of the file that is the largest.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-size on
-TEST $CLI volume set $V0 cluster.favorite-child-policy size
+TEST $CLI volume set $V0 cluster.favorite-child-by-size on
sleep 1
# Mount the volume
TEST glusterfs --log-level DEBUG --volfile-id=/$V0 --volfile-server=$H0 $M0 \
@@ -88,7 +86,6 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
-#EST $CLI volume set $V0 cluster.favorite-child-by-size off
TEST $CLI volume set $V0 cluster.quorum-type fixed
TEST $CLI volume set $V0 cluster.quorum-count 1
TEST $CLI volume start $V0
@@ -135,8 +132,7 @@ sleep 1
# Ok now turn the favorite-child option and we should be able to read it.
# Compare MD5's, the MD5 should be of the file that is the largest.
umount $M0
-#EST $CLI volume set $V0 cluster.favorite-child-by-size on
-TEST $CLI volume set $V0 cluster.favorite-child-policy size
+TEST $CLI volume set $V0 cluster.favorite-child-by-size on
TEST $CLI volume set $V0 cluster.self-heal-daemon on
sleep 1
/etc/init.d/glusterd restart_shd
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 5a9ab795a94..6d123bf407f 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -867,12 +867,30 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode)
return fav_child;
}
+
+typedef struct {
+ int (*func) (xlator_t *this,
+ struct afr_reply *replies,
+ inode_t *inode);
+ char *name;
+} _policy_pair;
+
+static _policy_pair afr_sh_fav_child_policies[AFR_FAV_CHILD_POLICY_MAX] = {
+ [AFR_FAV_CHILD_BY_MAJORITY] = { afr_sh_fav_by_majority,
+ "MAJORITY" },
+ [AFR_FAV_CHILD_BY_MTIME] = { afr_sh_fav_by_mtime, "MTIME" },
+ [AFR_FAV_CHILD_BY_CTIME] = { afr_sh_fav_by_ctime, "CTIME" },
+ [AFR_FAV_CHILD_BY_SIZE] = { afr_sh_fav_by_size, "SIZE" },
+};
+
int
afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
inode_t *inode, char **policy_str)
{
afr_private_t *priv = NULL;
- int fav_child = -1;
+ int fav_child;
+ int pol_index;
+ _policy_pair *policy;
priv = this->private;
if (!afr_can_decide_split_brain_source_sinks (replies,
@@ -880,37 +898,26 @@ afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
return -1;
}
- switch (priv->fav_child_policy) {
- case AFR_FAV_CHILD_BY_SIZE:
- fav_child = afr_sh_fav_by_size (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "SIZE";
- }
- break;
- case AFR_FAV_CHILD_BY_CTIME:
- fav_child = afr_sh_fav_by_ctime (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "CTIME";
- }
- break;
- case AFR_FAV_CHILD_BY_MTIME:
- fav_child = afr_sh_fav_by_mtime (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "MTIME";
- }
- break;
- case AFR_FAV_CHILD_BY_MAJORITY:
- fav_child = afr_sh_fav_by_majority (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "MAJORITY";
+ pol_index = AFR_FAV_CHILD_NONE + 1;
+ while (pol_index < AFR_FAV_CHILD_POLICY_MAX) {
+ if (priv->fav_child_policy & (1 << pol_index)) {
+ policy = &afr_sh_fav_child_policies[pol_index];
+ gf_log (this->name, GF_LOG_TRACE,
+ "trying policy %s", policy->name);
+ fav_child = policy->func (this, replies, inode);
+ if (fav_child >= 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "policy %s WORKED", policy->name);
+ if (policy_str) {
+ *policy_str = policy->name;
+ }
+ return fav_child;
+ }
}
- break;
- case AFR_FAV_CHILD_NONE:
- default:
- break;
+ ++pol_index;
}
- return fav_child;
+ return -1;
}
int
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index f291626fff9..a917efbbcb6 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -112,14 +112,29 @@ fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype,
int
afr_set_favorite_child_policy (afr_private_t *priv, char *policy)
{
- int index = -1;
-
- index = gf_get_index_by_elem (afr_favorite_child_policies, policy);
- if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
- return -1;
-
- priv->fav_child_policy = index;
+ char *token;
+ char *saveptr;
+ uint32_t retval = 0;
+
+ if (strcasecmp (policy, "none") != 0) {
+ token = strtok_r (policy, ",", &saveptr);
+ while (token) {
+ if (strcasecmp (token, "majority") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_MAJORITY);
+ } else if (strcasecmp (token, "mtime") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_MTIME);
+ } else if (strcasecmp (token, "ctime") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_CTIME);
+ } else if (strcasecmp (token, "size") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_SIZE);
+ } else {
+ return -1;
+ }
+ token = strtok_r (NULL, ",", &saveptr);
+ }
+ }
+ priv->fav_child_policy = retval;
return 0;
}
int
@@ -132,6 +147,7 @@ reconfigure (xlator_t *this, dict_t *options)
int index = -1;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ gf_boolean_t policy_flag;
priv = this->private;
@@ -302,6 +318,33 @@ reconfigure (xlator_t *this, dict_t *options)
if (afr_set_favorite_child_policy (priv, fav_child_policy) == -1)
goto out;
+ GF_OPTION_RECONF ("favorite-child-by-majority", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MAJORITY);
+ }
+
+ GF_OPTION_RECONF ("favorite-child-by-mtime", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MTIME);
+ }
+
+ GF_OPTION_RECONF ("favorite-child-by-ctime", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_CTIME);
+ }
+
+ GF_OPTION_RECONF ("favorite-child-by-size", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_SIZE);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "fav_child policy = 0x%x",
+ priv->fav_child_policy);
+
priv->did_local_discovery = _gf_false;
priv->did_discovery = _gf_false;
@@ -335,6 +378,7 @@ init (xlator_t *this)
xlator_t *fav_child = NULL;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ gf_boolean_t policy_flag;
if (!this->children) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -421,6 +465,29 @@ init (xlator_t *this)
if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
goto out;
+ GF_OPTION_INIT ("favorite-child-by-majority", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MAJORITY);
+ }
+
+ GF_OPTION_INIT ("favorite-child-by-mtime", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MTIME);
+ }
+
+ GF_OPTION_INIT ("favorite-child-by-ctime", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_CTIME);
+ }
+
+ GF_OPTION_INIT ("favorite-child-by-size", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_SIZE);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "fav_child policy = 0x%x",
+ priv->fav_child_policy);
+
GF_OPTION_INIT ("shd-max-threads", priv->shd.max_threads,
uint32, out);
@@ -1102,7 +1169,6 @@ struct volume_options options[] = {
},
{ .key = {"favorite-child-policy"},
.type = GF_OPTION_TYPE_STR,
- .value = {"none", "size", "ctime", "mtime", "majority"},
.default_value = "none",
.description = "This option can be used to automatically resolve "
"split-brains using various policies without user "
@@ -1111,7 +1177,45 @@ struct volume_options options[] = {
"pick the file with the latest ctime and mtime "
"respectively as the source. \"majority\" picks a file"
" with identical mtime and size in more than half the "
- "number of bricks in the replica.",
+ "number of bricks in the replica. More than one "
+ "policy can be specified, separated by commas. The "
+ "order of attempted application (regardless of the "
+ "specification order) is: majority, mtime, ctime, "
+ "size. The value set here can be modified by the "
+ "favorite-child-by-xxx options."
+ },
+ { .key = {"favorite-child-by-majority"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "majority (more than half of the copies with same "
+ "mtime and size). This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
+ },
+ { .key = {"favorite-child-by-mtime"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "latest mtime. This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
+ },
+ { .key = {"favorite-child-by-ctime"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "latest ctime. This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
+ },
+ { .key = {"favorite-child-by-size"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "greatest size. This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
},
{ .key = {"pgfid-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 3314f865781..f6b8fa1b8b8 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -68,12 +68,16 @@ typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
uuid_utoa (local->inode->gfid)); \
} while (0)
+/*
+ * These *must* be defined in order of decreasing precedence in order for
+ * afr_sh_get_fav_by_policy to work correctly.
+ */
typedef enum {
- AFR_FAV_CHILD_NONE,
- AFR_FAV_CHILD_BY_SIZE,
- AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_NONE = 0,
+ AFR_FAV_CHILD_BY_MAJORITY, /* Highest precedence. */
AFR_FAV_CHILD_BY_MTIME,
- AFR_FAV_CHILD_BY_MAJORITY,
+ AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_BY_SIZE, /* Lowest precedence. */
AFR_FAV_CHILD_POLICY_MAX,
} afr_favorite_child_policy;
@@ -136,8 +140,13 @@ typedef struct _afr_private {
int favorite_child; /* subvolume to be preferred in resolving
split-brain cases */
- afr_favorite_child_policy fav_child_policy;/*Policy to use for automatic
- resolution of split-brains.*/
+ /*
+ * Policy to use for automatic resolution of split-brains. This needs
+ * to be a bit-field so that we can iterate over multiple policies when
+ * earlier ones yield ties. The actual bits used are (1 << X) where X
+ * is one of the enum values from afr_favorite_child_policy.
+ */
+ uint32_t fav_child_policy;
gf_boolean_t inodelk_trace;
gf_boolean_t entrylk_trace;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 24f4f62ce59..cd42cf75756 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1313,6 +1313,30 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_7_12,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "cluster.favorite-child-by-majority",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.favorite-child-by-mtime",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.favorite-child-by-ctime",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.favorite-child-by-size",
+ .voltype = "cluster/replicate",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_7_12,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = "cluster.pgfid-self-heal",
.voltype = "cluster/replicate",
.op_version = 2,