summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c65
-rw-r--r--xlators/cluster/afr/src/afr.c122
-rw-r--r--xlators/cluster/afr/src/afr.h21
3 files changed, 164 insertions, 44 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 5a9ab795a94..6d123bf407f 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -867,12 +867,30 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode)
return fav_child;
}
+
+typedef struct {
+ int (*func) (xlator_t *this,
+ struct afr_reply *replies,
+ inode_t *inode);
+ char *name;
+} _policy_pair;
+
+static _policy_pair afr_sh_fav_child_policies[AFR_FAV_CHILD_POLICY_MAX] = {
+ [AFR_FAV_CHILD_BY_MAJORITY] = { afr_sh_fav_by_majority,
+ "MAJORITY" },
+ [AFR_FAV_CHILD_BY_MTIME] = { afr_sh_fav_by_mtime, "MTIME" },
+ [AFR_FAV_CHILD_BY_CTIME] = { afr_sh_fav_by_ctime, "CTIME" },
+ [AFR_FAV_CHILD_BY_SIZE] = { afr_sh_fav_by_size, "SIZE" },
+};
+
int
afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
inode_t *inode, char **policy_str)
{
afr_private_t *priv = NULL;
- int fav_child = -1;
+ int fav_child;
+ int pol_index;
+ _policy_pair *policy;
priv = this->private;
if (!afr_can_decide_split_brain_source_sinks (replies,
@@ -880,37 +898,26 @@ afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies,
return -1;
}
- switch (priv->fav_child_policy) {
- case AFR_FAV_CHILD_BY_SIZE:
- fav_child = afr_sh_fav_by_size (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "SIZE";
- }
- break;
- case AFR_FAV_CHILD_BY_CTIME:
- fav_child = afr_sh_fav_by_ctime (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "CTIME";
- }
- break;
- case AFR_FAV_CHILD_BY_MTIME:
- fav_child = afr_sh_fav_by_mtime (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "MTIME";
- }
- break;
- case AFR_FAV_CHILD_BY_MAJORITY:
- fav_child = afr_sh_fav_by_majority (this, replies, inode);
- if (policy_str && fav_child >= 0) {
- *policy_str = "MAJORITY";
+ pol_index = AFR_FAV_CHILD_NONE + 1;
+ while (pol_index < AFR_FAV_CHILD_POLICY_MAX) {
+ if (priv->fav_child_policy & (1 << pol_index)) {
+ policy = &afr_sh_fav_child_policies[pol_index];
+ gf_log (this->name, GF_LOG_TRACE,
+ "trying policy %s", policy->name);
+ fav_child = policy->func (this, replies, inode);
+ if (fav_child >= 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "policy %s WORKED", policy->name);
+ if (policy_str) {
+ *policy_str = policy->name;
+ }
+ return fav_child;
+ }
}
- break;
- case AFR_FAV_CHILD_NONE:
- default:
- break;
+ ++pol_index;
}
- return fav_child;
+ return -1;
}
int
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index f291626fff9..a917efbbcb6 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -112,14 +112,29 @@ fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype,
int
afr_set_favorite_child_policy (afr_private_t *priv, char *policy)
{
- int index = -1;
-
- index = gf_get_index_by_elem (afr_favorite_child_policies, policy);
- if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
- return -1;
-
- priv->fav_child_policy = index;
+ char *token;
+ char *saveptr;
+ uint32_t retval = 0;
+
+ if (strcasecmp (policy, "none") != 0) {
+ token = strtok_r (policy, ",", &saveptr);
+ while (token) {
+ if (strcasecmp (token, "majority") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_MAJORITY);
+ } else if (strcasecmp (token, "mtime") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_MTIME);
+ } else if (strcasecmp (token, "ctime") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_CTIME);
+ } else if (strcasecmp (token, "size") == 0) {
+ retval |= (1 << AFR_FAV_CHILD_BY_SIZE);
+ } else {
+ return -1;
+ }
+ token = strtok_r (NULL, ",", &saveptr);
+ }
+ }
+ priv->fav_child_policy = retval;
return 0;
}
int
@@ -132,6 +147,7 @@ reconfigure (xlator_t *this, dict_t *options)
int index = -1;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ gf_boolean_t policy_flag;
priv = this->private;
@@ -302,6 +318,33 @@ reconfigure (xlator_t *this, dict_t *options)
if (afr_set_favorite_child_policy (priv, fav_child_policy) == -1)
goto out;
+ GF_OPTION_RECONF ("favorite-child-by-majority", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MAJORITY);
+ }
+
+ GF_OPTION_RECONF ("favorite-child-by-mtime", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MTIME);
+ }
+
+ GF_OPTION_RECONF ("favorite-child-by-ctime", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_CTIME);
+ }
+
+ GF_OPTION_RECONF ("favorite-child-by-size", policy_flag, options,
+ bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_SIZE);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "fav_child policy = 0x%x",
+ priv->fav_child_policy);
+
priv->did_local_discovery = _gf_false;
priv->did_discovery = _gf_false;
@@ -335,6 +378,7 @@ init (xlator_t *this)
xlator_t *fav_child = NULL;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ gf_boolean_t policy_flag;
if (!this->children) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -421,6 +465,29 @@ init (xlator_t *this)
if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
goto out;
+ GF_OPTION_INIT ("favorite-child-by-majority", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MAJORITY);
+ }
+
+ GF_OPTION_INIT ("favorite-child-by-mtime", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_MTIME);
+ }
+
+ GF_OPTION_INIT ("favorite-child-by-ctime", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_CTIME);
+ }
+
+ GF_OPTION_INIT ("favorite-child-by-size", policy_flag, bool, out);
+ if (policy_flag) {
+ priv->fav_child_policy |= (1 << AFR_FAV_CHILD_BY_SIZE);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "fav_child policy = 0x%x",
+ priv->fav_child_policy);
+
GF_OPTION_INIT ("shd-max-threads", priv->shd.max_threads,
uint32, out);
@@ -1102,7 +1169,6 @@ struct volume_options options[] = {
},
{ .key = {"favorite-child-policy"},
.type = GF_OPTION_TYPE_STR,
- .value = {"none", "size", "ctime", "mtime", "majority"},
.default_value = "none",
.description = "This option can be used to automatically resolve "
"split-brains using various policies without user "
@@ -1111,7 +1177,45 @@ struct volume_options options[] = {
"pick the file with the latest ctime and mtime "
"respectively as the source. \"majority\" picks a file"
" with identical mtime and size in more than half the "
- "number of bricks in the replica.",
+ "number of bricks in the replica. More than one "
+ "policy can be specified, separated by commas. The "
+ "order of attempted application (regardless of the "
+ "specification order) is: majority, mtime, ctime, "
+ "size. The value set here can be modified by the "
+ "favorite-child-by-xxx options."
+ },
+ { .key = {"favorite-child-by-majority"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "majority (more than half of the copies with same "
+ "mtime and size). This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
+ },
+ { .key = {"favorite-child-by-mtime"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "latest mtime. This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
+ },
+ { .key = {"favorite-child-by-ctime"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "latest ctime. This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
+ },
+ { .key = {"favorite-child-by-size"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Allow automatic resolution of split-brains by "
+ "greatest size. This can be combined with other "
+ "favorite-child-by-xxx options, and can modify the "
+ "value set by favorite-child-policy."
},
{ .key = {"pgfid-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 3314f865781..f6b8fa1b8b8 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -68,12 +68,16 @@ typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
uuid_utoa (local->inode->gfid)); \
} while (0)
+/*
+ * These *must* be defined in order of decreasing precedence in order for
+ * afr_sh_get_fav_by_policy to work correctly.
+ */
typedef enum {
- AFR_FAV_CHILD_NONE,
- AFR_FAV_CHILD_BY_SIZE,
- AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_NONE = 0,
+ AFR_FAV_CHILD_BY_MAJORITY, /* Highest precedence. */
AFR_FAV_CHILD_BY_MTIME,
- AFR_FAV_CHILD_BY_MAJORITY,
+ AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_BY_SIZE, /* Lowest precedence. */
AFR_FAV_CHILD_POLICY_MAX,
} afr_favorite_child_policy;
@@ -136,8 +140,13 @@ typedef struct _afr_private {
int favorite_child; /* subvolume to be preferred in resolving
split-brain cases */
- afr_favorite_child_policy fav_child_policy;/*Policy to use for automatic
- resolution of split-brains.*/
+ /*
+ * Policy to use for automatic resolution of split-brains. This needs
+ * to be a bit-field so that we can iterate over multiple policies when
+ * earlier ones yield ties. The actual bits used are (1 << X) where X
+ * is one of the enum values from afr_favorite_child_policy.
+ */
+ uint32_t fav_child_policy;
gf_boolean_t inodelk_trace;
gf_boolean_t entrylk_trace;