summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-08-20 21:50:06 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2014-10-01 03:29:12 -0700
commit443e48abf9b373bb1a8c56d016aad3b974554b80 (patch)
tree4a7124ff1c959d11c275dec9a0906519694e37ed /xlators/cluster/afr/src
parentf95a25c35e1ced6a3c05030d34555b757b14e1c9 (diff)
cluster/afr: Add afr-v1 xattr compatibility
All the special cases v1 handles and also self-accusing pending changelog from v1 pre-op also is handled in this patch. Change-Id: Ie10f71633fb20276f01ecafbd728f20483e7029c BUG: 1128721 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/8536 Reviewed-by: Ravishankar N <ravishankar@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c139
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c187
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c21
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c31
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c20
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h15
6 files changed, 330 insertions, 83 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index ddccc7f38ed..14a514beffa 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -287,7 +287,36 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
return 0;
}
+void
+afr_mark_active_sinks (xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ memset (sinks, 0, sizeof (*sinks) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] && locked_on[i])
+ sinks[i] = 1;
+ }
+}
+gf_boolean_t
+afr_does_witness_exist (xlator_t *this, uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
/*
* This function determines if a self-heal is required for a given inode,
@@ -309,22 +338,29 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
*/
int
-afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies,
- afr_transaction_type type, unsigned char *locked_on,
- unsigned char *sources, unsigned char *sinks)
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, uint64_t *witness)
{
- afr_private_t *priv = NULL;
- int i = 0;
- int j = 0;
- int *dirty = NULL;
- int **matrix = NULL;
- char *accused = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL; /* Denotes if dirty xattr is set */
+ int **matrix = NULL;/* Changelog matrix */
+ char *accused = NULL;/* Accused others without any self-accusal */
+ char *pending = NULL;/* Have pending operations on others */
+ char *self_accused = NULL; /* Accused itself */
priv = this->private;
dirty = alloca0 (priv->child_count * sizeof (int));
accused = alloca0 (priv->child_count);
+ pending = alloca0 (priv->child_count);
+ self_accused = alloca0 (priv->child_count);
matrix = ALLOC_MATRIX(priv->child_count, int);
+ memset (witness, 0, sizeof (*witness) * priv->child_count);
if (afr_success_count (replies,
priv->child_count) < AFR_SH_MIN_PARTICIPANTS) {
@@ -335,11 +371,23 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies,
/* First construct the pending matrix for further analysis */
afr_selfheal_extract_xattr (this, replies, type, dirty, matrix);
+ /* short list all self-accused */
+ for (i = 0; i < priv->child_count; i++) {
+ if (matrix[i][i])
+ self_accused[i] = 1;
+ }
+
/* Next short list all accused to exclude them from being sources */
+ /* Self-accused can't accuse others as they are FOOLs */
for (i = 0; i < priv->child_count; i++) {
for (j = 0; j < priv->child_count; j++) {
- if (matrix[i][j])
- accused[j] = 1;
+ if (matrix[i][j]) {
+ if (!self_accused[i])
+ accused[j] = 1;
+
+ if (i != j)
+ pending[i] = 1;
+ }
}
}
@@ -350,38 +398,47 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies,
sources[i] = 1;
}
- /* Everyone accused by sources are sinks */
- memset (sinks, 0, priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- for (j = 0; j < priv->child_count; j++) {
- if (matrix[i][j])
- sinks[j] = 1;
- }
- }
+ /* Everyone accused by non-self-accused sources are sinks */
+ memset (sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
+ }
+ }
- /* If any source has 'dirty' bit, pick first
- 'dirty' source and make everybody else sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] && dirty[i]) {
- for (j = 0; j < priv->child_count; j++) {
- if (j != i) {
- sources[j] = 0;
- sinks[j] = 1;
- }
- }
- break;
- }
- }
+ /* For breaking ties provide with number of fops they witnessed */
- /* If no sources, all locked nodes are sinks - split brain */
- if (AFR_COUNT (sources, priv->child_count) == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (locked_on[i])
- sinks[i] = 1;
- }
- }
+ /*
+ * count the pending fops witnessed from itself to others when it is
+ * self-accused
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (i == j)
+ continue;
+ witness[i] += matrix[i][j];
+ }
+ }
+
+ /* In afr-v1 if a file is self-accused but didn't have any pending
+ * operations on others then it is similar to 'dirty' in afr-v2.
+ * Consider such cases as witness.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i] && !pending[i])
+ witness[i] += matrix[i][i];
+ }
+
+ /* count the number of dirty fops witnessed */
+ for (i = 0; i < priv->child_count; i++)
+ witness[i] += dirty[i];
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 74088f4bf6d..f7503faa719 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -372,21 +372,160 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,
return 0;
}
+gf_boolean_t
+afr_has_source_witnesses (xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_does_size_mismatch (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt *min = NULL;
+ struct iatt *max = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret < 0)
+ continue;
+
+ if (!min)
+ min = &replies[i].poststat;
+
+ if (!max)
+ max = &replies[i].poststat;
+
+ if (min->ia_size > replies[i].poststat.ia_size)
+ min = &replies[i].poststat;
+
+ if (max->ia_size < replies[i].poststat.ia_size)
+ max = &replies[i].poststat;
+ }
+
+ if (min && max) {
+ if (min->ia_size != max->ia_size)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
/*
* If by chance there are multiple sources with differing sizes, select
* the largest file as the source.
*
- * This can only happen if data was directly modified in the backend.
+ * This can happen if data was directly modified in the backend or for snapshots
*/
+
+static void
+afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t size = 0;
+
+ /* Find source with biggest file size */
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size <= replies[i].poststat.ia_size) {
+ size = replies[i].poststat.ia_size;
+ }
+ }
+
+ /* Mark sources with less size as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size > replies[i].poststat.ia_size)
+ sources[i] = 0;
+ }
+
+ return;
+}
+
+static void
+afr_mark_biggest_witness_as_source (xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t biggest_witness = 0;
+
+ priv = this->private;
+ /* Find source with biggest witness count */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (biggest_witness < witness[i])
+ biggest_witness = witness[i];
+ }
+
+ /* Mark files with less witness count as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (witness[i] < biggest_witness)
+ sources[i] = 0;
+ }
+
+ return;
+}
+
+/* This is a tie breaker function. Only one source be assigned here */
+static void
+afr_mark_newest_file_as_source (xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ uint32_t max_ctime = 0;
+
+ priv = this->private;
+ /* Find source with latest ctime */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+
+ if (max_ctime <= replies[i].poststat.ia_ctime) {
+ source = i;
+ max_ctime = replies[i].poststat.ia_ctime;
+ }
+ }
+
+ /* Only mark one of the files as source to break ties */
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ sources[source] = 1;
+}
+
static int
__afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources,
unsigned char *healed_sinks,
unsigned char *locked_on,
- struct afr_reply *replies)
+ struct afr_reply *replies,
+ uint64_t *witness)
{
int i = 0;
afr_private_t *priv = NULL;
- uint64_t size = 0;
int source = -1;
int sources_count = 0;
@@ -400,24 +539,24 @@ __afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources,
return -EIO;
}
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (size <= replies[i].poststat.ia_size) {
- size = replies[i].poststat.ia_size;
- source = i;
- }
- }
+ /* If there are no witnesses/size-mismatches on sources we are done*/
+ if (!afr_does_size_mismatch (this, sources, replies) &&
+ !afr_has_source_witnesses (this, sources, witness))
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (!sources[i])
- continue;
- if (replies[i].poststat.ia_size < size) {
- sources[i] = 0;
- healed_sinks[i] = 1;
- }
- }
+ afr_mark_largest_file_as_source (this, sources, replies);
+ afr_mark_biggest_witness_as_source (this, sources, witness);
+ afr_mark_newest_file_as_source (this, sources, replies);
+
+out:
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
return source;
}
@@ -439,6 +578,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = -1;
int source = -1;
afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
priv = this->private;
@@ -447,15 +587,16 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (ret)
return ret;
- ret = afr_selfheal_find_direction (this, replies, AFR_DATA_TRANSACTION,
- locked_on, sources, sinks);
+ witness = alloca0(priv->child_count * sizeof (*witness));
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_DATA_TRANSACTION,
+ locked_on, sources, sinks, witness);
if (ret)
return ret;
/* Initialize the healed_sinks[] array optimistically to
the intersection of to-be-healed (i.e sinks[]) and
the list of servers which are up (i.e locked_on[]).
-
As we encounter failures in the healing process, we
will unmark the respective servers in the healed_sinks[]
array.
@@ -464,7 +605,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
source = __afr_selfheal_data_finalize_source (this, sources,
healed_sinks, locked_on,
- replies);
+ replies, witness);
if (source < 0)
return -EIO;
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index df6dfaaf396..3ea30a6a9d0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -326,7 +326,9 @@ __afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
static int
__afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
unsigned char *healed_sinks,
- unsigned char *locked_on)
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
{
int i = 0;
afr_private_t *priv = NULL;
@@ -338,7 +340,10 @@ __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,
sources_count = AFR_COUNT (sources, priv->child_count);
if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
- || !sources_count) {
+ || !sources_count || afr_does_witness_exist (this, witness)) {
+
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
return -1;
}
@@ -362,6 +367,7 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = -1;
int source = -1;
afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
priv = this->private;
@@ -370,8 +376,10 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (ret)
return ret;
- ret = afr_selfheal_find_direction (this, replies, AFR_ENTRY_TRANSACTION,
- locked_on, sources, sinks);
+ witness = alloca0 (sizeof (*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks, witness);
if (ret)
return ret;
@@ -386,7 +394,10 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,
AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
source = __afr_selfheal_entry_finalize_source (this, sources,
- healed_sinks, locked_on);
+ healed_sinks,
+ locked_on, replies,
+ witness);
+
if (source < 0) {
/* If source is < 0 (typically split-brain), we perform a
conservative merge of entries rather than erroring out */
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index b4714fe9e05..96b3262e471 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -169,7 +169,6 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
return source;
}
-
static int
__afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,
unsigned char *locked_on, unsigned char *sources,
@@ -179,6 +178,8 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i
int ret = -1;
int source = -1;
afr_private_t *priv = NULL;
+ int i = 0;
+ uint64_t *witness = NULL;
priv = this->private;
@@ -187,9 +188,10 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i
if (ret)
return ret;
- ret = afr_selfheal_find_direction (this, replies,
- AFR_METADATA_TRANSACTION,
- locked_on, sources, sinks);
+ witness = alloca0 (sizeof (*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_METADATA_TRANSACTION,
+ locked_on, sources, sinks, witness);
if (ret)
return ret;
@@ -203,9 +205,28 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i
*/
AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
+ /* If any source has witness, pick first
+ * witness source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ if (source != -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != source && sources[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ }
+
source = __afr_selfheal_metadata_finalize_source (frame, this, sources,
healed_sinks,
- locked_on, replies);
+ locked_on, replies);
+
if (source < 0)
return -EIO;
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index c5d126185c7..af635f06d52 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -457,7 +457,9 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
int
__afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,
unsigned char *healed_sinks,
- unsigned char *locked_on)
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
{
int i = 0;
afr_private_t *priv = NULL;
@@ -469,7 +471,9 @@ __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,
sources_count = AFR_COUNT (sources, priv->child_count);
if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
- || !sources_count) {
+ || !sources_count || afr_does_witness_exist (this, witness)) {
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
return -1;
}
@@ -483,7 +487,6 @@ __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,
return source;
}
-
int
__afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent,
uuid_t pargfid, unsigned char *locked_on,
@@ -494,6 +497,7 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren
int source = -1;
afr_private_t *priv = NULL;
struct afr_reply *replies = NULL;
+ uint64_t *witness = NULL;
priv = this->private;
@@ -503,8 +507,10 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren
if (ret)
goto out;
- ret = afr_selfheal_find_direction (this, replies, AFR_ENTRY_TRANSACTION,
- locked_on, sources, sinks);
+ witness = alloca0 (sizeof (*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction (frame, this, replies,
+ AFR_ENTRY_TRANSACTION,
+ locked_on, sources, sinks, witness);
if (ret)
goto out;
@@ -519,7 +525,9 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren
AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);
source = __afr_selfheal_name_finalize_source (this, sources,
- healed_sinks, locked_on);
+ healed_sinks,
+ locked_on, replies,
+ witness);
if (source < 0) {
/* If source is < 0 (typically split-brain), we perform a
conservative merge of entries rather than erroring out */
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index c32ec120a50..f208e6bc813 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -136,9 +136,11 @@ afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,
unsigned char *lookup_on, dict_t *xattr);
int
-afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies,
- afr_transaction_type type, unsigned char *locked_on,
- unsigned char *sources, unsigned char *sinks);
+afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, uint64_t *witness);
int
afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,
@@ -189,4 +191,11 @@ afr_success_count (struct afr_reply *replies, unsigned int count);
void
afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,
int source, unsigned char *healed_sinks);
+
+void
+afr_mark_active_sinks (xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks);
+
+gf_boolean_t
+afr_does_witness_exist (xlator_t *this, uint64_t *witness);
#endif /* !_AFR_SELFHEAL_H */