diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2014-08-20 21:50:06 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2014-10-22 03:03:51 -0700 | 
| commit | d7e56dde3c19a11647eae9cf6a868c191c641489 (patch) | |
| tree | 978d388b34c1e773b753936bfe71ec8f432e49e1 | |
| parent | 4522acc20bdd1ca17c053969ef7edce1bb6ede76 (diff) | |
cluster/afr: Add afr-v1 xattr compatibility
        Backport of http://review.gluster.org/8536
All the special cases v1 handles and also
self-accusing pending changelog from v1 pre-op also is handled
in this patch.
BUG: 1155017
Change-Id: I86cf6b80492be5c1f240c74f91a0e1b0dd9b58b2
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/8956
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 139 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 187 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 21 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 31 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 20 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 15 | 
6 files changed, 330 insertions, 83 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index ddccc7f38ed..14a514beffa 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -287,7 +287,36 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,  	return 0;  } +void +afr_mark_active_sinks (xlator_t *this, unsigned char *sources, +                       unsigned char *locked_on, unsigned char *sinks) +{ +        int i = 0; +        afr_private_t *priv = NULL; + +        priv = this->private; + +        memset (sinks, 0, sizeof (*sinks) * priv->child_count); +        for (i = 0; i < priv->child_count; i++) { +                if (!sources[i] && locked_on[i]) +                        sinks[i] = 1; +        } +} +gf_boolean_t +afr_does_witness_exist (xlator_t *this, uint64_t *witness) +{ +        int i = 0; +        afr_private_t *priv = NULL; + +        priv = this->private; + +        for (i = 0; i < priv->child_count; i++) { +                if (witness[i]) +                        return _gf_true; +        } +        return _gf_false; +}  /*   * This function determines if a self-heal is required for a given inode, @@ -309,22 +338,29 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies,   */  int -afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies, -			     afr_transaction_type type, unsigned char *locked_on, -			     unsigned char *sources, unsigned char *sinks) +afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, +                             struct afr_reply *replies, +                             afr_transaction_type type, +                             unsigned char *locked_on, unsigned char *sources, +                             unsigned char *sinks, uint64_t *witness)  { -	afr_private_t *priv = NULL; -	int i = 0; -	int j = 0; -	int *dirty = NULL; -	int **matrix = NULL; -	char *accused = NULL; +        afr_private_t *priv = NULL; +        int i = 0; +        int j = 0; +        int *dirty = NULL; /* Denotes if dirty xattr is set */ +        int **matrix = NULL;/* Changelog matrix */ +        char *accused = NULL;/* Accused others without any self-accusal */ +        char *pending = NULL;/* Have pending operations on others */ +        char *self_accused = NULL; /* Accused itself */  	priv = this->private;  	dirty = alloca0 (priv->child_count * sizeof (int));  	accused = alloca0 (priv->child_count); +        pending = alloca0 (priv->child_count); +        self_accused = alloca0 (priv->child_count);  	matrix = ALLOC_MATRIX(priv->child_count, int); +        memset (witness, 0, sizeof (*witness) * priv->child_count);          if (afr_success_count (replies,                                 priv->child_count) < AFR_SH_MIN_PARTICIPANTS) { @@ -335,11 +371,23 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies,  	/* First construct the pending matrix for further analysis */  	afr_selfheal_extract_xattr (this, replies, type, dirty, matrix); +        /* short list all self-accused */ +        for (i = 0; i < priv->child_count; i++) { +                if (matrix[i][i]) +                        self_accused[i] = 1; +        } +  	/* Next short list all accused to exclude them from being sources */ +        /* Self-accused can't accuse others as they are FOOLs */  	for (i = 0; i < priv->child_count; i++) {  		for (j = 0; j < priv->child_count; j++) { -			if (matrix[i][j]) -				accused[j] = 1; +                        if (matrix[i][j]) { +                                 if (!self_accused[i]) +                                         accused[j] = 1; + +                                 if (i != j) +                                         pending[i] = 1; +                         }  		}  	} @@ -350,38 +398,47 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies,  			sources[i] = 1;  	} -	/* Everyone accused by sources are sinks */ -	memset (sinks, 0, priv->child_count); -	for (i = 0; i < priv->child_count; i++) { -		if (!sources[i]) -			continue; -		for (j = 0; j < priv->child_count; j++) { -			if (matrix[i][j]) -				sinks[j] = 1; -		} -	} +        /* Everyone accused by non-self-accused sources are sinks */ +        memset (sinks, 0, priv->child_count); +        for (i = 0; i < priv->child_count; i++) { +                if (!sources[i]) +                        continue; +                if (self_accused[i]) +                        continue; +                for (j = 0; j < priv->child_count; j++) { +                        if (matrix[i][j]) +                                sinks[j] = 1; +                } +        } -	/* If any source has 'dirty' bit, pick first -	   'dirty' source and make everybody else sinks */ -	for (i = 0; i < priv->child_count; i++) { -		if (sources[i] && dirty[i]) { -			for (j = 0; j < priv->child_count; j++) { -				if (j != i) { -					sources[j] = 0; -					sinks[j] = 1; -				} -			} -			break; -		} -	} +        /* For breaking ties provide with number of fops they witnessed */ -	/* If no sources, all locked nodes are sinks - split brain */ -	if (AFR_COUNT (sources, priv->child_count) == 0) { -		for (i = 0; i < priv->child_count; i++) { -			if (locked_on[i]) -				sinks[i] = 1; -		} -	} +        /* +         * count the pending fops witnessed from itself to others when it is +         * self-accused +         */ +        for (i = 0; i < priv->child_count; i++) { +                if (!self_accused[i]) +                        continue; +                for (j = 0; j < priv->child_count; j++) { +                        if (i == j) +                                continue; +                        witness[i] += matrix[i][j]; +                } +        } + +        /* In afr-v1 if a file is self-accused but didn't have any pending +         * operations on others then it is similar to 'dirty' in afr-v2. +         * Consider such cases as witness. +         */ +        for (i = 0; i < priv->child_count; i++) { +                if (self_accused[i] && !pending[i]) +                        witness[i] += matrix[i][i]; +        } + +        /* count the number of dirty fops witnessed */ +        for (i = 0; i < priv->child_count; i++) +                witness[i] += dirty[i];  	return 0;  } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 74088f4bf6d..f7503faa719 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -372,21 +372,160 @@ __afr_selfheal_truncate_sinks (call_frame_t *frame, xlator_t *this,  	return 0;  } +gf_boolean_t +afr_has_source_witnesses (xlator_t *this, unsigned char *sources, +                          uint64_t *witness) +{ +        int i = 0; +        afr_private_t *priv = NULL; + +        priv = this->private; + +        for (i = 0; i < priv->child_count; i++) { +                if (sources[i] && witness[i]) +                        return _gf_true; +        } +        return _gf_false; +} + +static gf_boolean_t +afr_does_size_mismatch (xlator_t *this, unsigned char *sources, +                        struct afr_reply *replies) +{ +        int     i = 0; +        afr_private_t *priv = NULL; +        struct iatt *min = NULL; +        struct iatt *max = NULL; + +        priv = this->private; + +        for (i = 0; i < priv->child_count; i++) { +                if (!replies[i].valid) +                        continue; + +                if (replies[i].op_ret < 0) +                        continue; + +                if (!min) +                        min = &replies[i].poststat; + +                if (!max) +                        max = &replies[i].poststat; + +                if (min->ia_size > replies[i].poststat.ia_size) +                        min = &replies[i].poststat; + +                if (max->ia_size < replies[i].poststat.ia_size) +                        max = &replies[i].poststat; +        } + +        if (min && max) { +                if (min->ia_size != max->ia_size) +                        return _gf_true; +        } + +        return _gf_false; +}  /*   * If by chance there are multiple sources with differing sizes, select   * the largest file as the source.   * - * This can only happen if data was directly modified in the backend. + * This can happen if data was directly modified in the backend or for snapshots   */ + +static void +afr_mark_largest_file_as_source (xlator_t *this, unsigned char *sources, +                                 struct afr_reply *replies) +{ +        int i = 0; +        afr_private_t *priv = NULL; +        uint64_t size = 0; + +        /* Find source with biggest file size */ +        priv = this->private; +        for (i = 0; i < priv->child_count; i++) { +                if (!sources[i]) +                        continue; +                if (size <= replies[i].poststat.ia_size) { +                        size = replies[i].poststat.ia_size; +                } +        } + +        /* Mark sources with less size as not source */ +        for (i = 0; i < priv->child_count; i++) { +                if (!sources[i]) +                        continue; +                if (size > replies[i].poststat.ia_size) +                        sources[i] = 0; +        } + +        return; +} + +static void +afr_mark_biggest_witness_as_source (xlator_t *this, unsigned char *sources, +                                    uint64_t *witness) +{ +        int i = 0; +        afr_private_t *priv = NULL; +        uint64_t biggest_witness = 0; + +        priv = this->private; +        /* Find source with biggest witness count */ +        for (i = 0; i < priv->child_count; i++) { +                if (!sources[i]) +                        continue; +                if (biggest_witness < witness[i]) +                        biggest_witness = witness[i]; +        } + +        /* Mark files with less witness count as not source */ +        for (i = 0; i < priv->child_count; i++) { +                if (!sources[i]) +                        continue; +                if (witness[i] < biggest_witness) +                        sources[i] = 0; +        } + +        return; +} + +/* This is a tie breaker function. Only one source be assigned here */ +static void +afr_mark_newest_file_as_source (xlator_t *this, unsigned char *sources, +                                struct afr_reply *replies) +{ +        int i = 0; +        afr_private_t *priv = NULL; +        int source = -1; +        uint32_t max_ctime = 0; + +        priv = this->private; +        /* Find source with latest ctime */ +        for (i = 0; i < priv->child_count; i++) { +                if (!sources[i]) +                        continue; + +                if (max_ctime <= replies[i].poststat.ia_ctime) { +                        source = i; +                        max_ctime = replies[i].poststat.ia_ctime; +                } +        } + +        /* Only mark one of the files as source to break ties */ +        memset (sources, 0, sizeof (*sources) * priv->child_count); +        sources[source] = 1; +} +  static int  __afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources,  				     unsigned char *healed_sinks,  				     unsigned char *locked_on, -				     struct afr_reply *replies) +				     struct afr_reply *replies, +                                     uint64_t *witness)  {  	int i = 0;  	afr_private_t *priv = NULL; -	uint64_t size = 0;  	int source = -1;  	int sources_count = 0; @@ -400,24 +539,24 @@ __afr_selfheal_data_finalize_source (xlator_t *this, unsigned char *sources,  		return -EIO;  	} -	for (i = 0; i < priv->child_count; i++) { -		if (!sources[i]) -			continue; -		if (size <= replies[i].poststat.ia_size) { -			size = replies[i].poststat.ia_size; -			source = i; -		} -	} +        /* If there are no witnesses/size-mismatches on sources we are done*/ +        if (!afr_does_size_mismatch (this, sources, replies) && +            !afr_has_source_witnesses (this, sources, witness)) +                goto out; -	for (i = 0; i < priv->child_count; i++) { -		if (!sources[i]) -			continue; -		if (replies[i].poststat.ia_size < size) { -			sources[i] = 0; -			healed_sinks[i] = 1; -		} -	} +        afr_mark_largest_file_as_source (this, sources, replies); +        afr_mark_biggest_witness_as_source (this, sources, witness); +        afr_mark_newest_file_as_source (this, sources, replies); + +out: +        afr_mark_active_sinks (this, sources, locked_on, healed_sinks); +        for (i = 0; i < priv->child_count; i++) { +                if (sources[i]) { +                        source = i; +                        break; +                } +        }  	return source;  } @@ -439,6 +578,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,  	int ret = -1;  	int source = -1;  	afr_private_t *priv = NULL; +        uint64_t *witness = NULL;  	priv = this->private; @@ -447,15 +587,16 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,  	if (ret)  		return ret; -	ret = afr_selfheal_find_direction (this, replies, AFR_DATA_TRANSACTION, -					   locked_on, sources, sinks); +        witness = alloca0(priv->child_count * sizeof (*witness)); +	ret = afr_selfheal_find_direction (frame, this, replies, +					   AFR_DATA_TRANSACTION, +					   locked_on, sources, sinks, witness);  	if (ret)  		return ret;          /* Initialize the healed_sinks[] array optimistically to             the intersection of to-be-healed (i.e sinks[]) and             the list of servers which are up (i.e locked_on[]). -             As we encounter failures in the healing process, we             will unmark the respective servers in the healed_sinks[]             array. @@ -464,7 +605,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,  	source = __afr_selfheal_data_finalize_source (this, sources,                                                        healed_sinks, locked_on, -                                                      replies); +                                                      replies, witness);  	if (source < 0)  		return -EIO; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index df6dfaaf396..3ea30a6a9d0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -326,7 +326,9 @@ __afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,  static int  __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,  				      unsigned char *healed_sinks, -				      unsigned char *locked_on) +                                      unsigned char *locked_on, +                                      struct afr_reply *replies, +                                      uint64_t *witness)  {  	int i = 0;  	afr_private_t *priv = NULL; @@ -338,7 +340,10 @@ __afr_selfheal_entry_finalize_source (xlator_t *this, unsigned char *sources,  	sources_count = AFR_COUNT (sources, priv->child_count);  	if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) -            || !sources_count) { +            || !sources_count || afr_does_witness_exist (this, witness)) { + +                memset (sources, 0, sizeof (*sources) * priv->child_count); +                afr_mark_active_sinks (this, sources, locked_on, healed_sinks);  		return -1;  	} @@ -362,6 +367,7 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,  	int ret = -1;  	int source = -1;  	afr_private_t *priv = NULL; +        uint64_t *witness = NULL;  	priv = this->private; @@ -370,8 +376,10 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,  	if (ret)  		return ret; -	ret = afr_selfheal_find_direction (this, replies, AFR_ENTRY_TRANSACTION, -					   locked_on, sources, sinks); +        witness = alloca0 (sizeof (*witness) * priv->child_count); +	ret = afr_selfheal_find_direction (frame, this, replies, +					   AFR_ENTRY_TRANSACTION, +					   locked_on, sources, sinks, witness);  	if (ret)  		return ret; @@ -386,7 +394,10 @@ __afr_selfheal_entry_prepare (call_frame_t *frame, xlator_t *this, fd_t *fd,          AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);  	source = __afr_selfheal_entry_finalize_source (this, sources, -                                                       healed_sinks, locked_on); +                                                       healed_sinks, +						       locked_on, replies, +                                                       witness); +  	if (source < 0) {  		/* If source is < 0 (typically split-brain), we perform a  		   conservative merge of entries rather than erroring out */ diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index b4714fe9e05..96b3262e471 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -169,7 +169,6 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,  	return source;  } -  static int  __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode,  				 unsigned char *locked_on, unsigned char *sources, @@ -179,6 +178,8 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i  	int ret = -1;  	int source = -1;  	afr_private_t *priv = NULL; +	int i = 0; +        uint64_t *witness = NULL;  	priv = this->private; @@ -187,9 +188,10 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i  	if (ret)  		return ret; -	ret = afr_selfheal_find_direction (this, replies, -                                           AFR_METADATA_TRANSACTION, -					   locked_on, sources, sinks); +        witness = alloca0 (sizeof (*witness) * priv->child_count); +	ret = afr_selfheal_find_direction (frame, this, replies, +					   AFR_METADATA_TRANSACTION, +					   locked_on, sources, sinks, witness);  	if (ret)  		return ret; @@ -203,9 +205,28 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i          */          AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count); +        /* If any source has witness, pick first +         * witness source and make everybody else sinks */ +        for (i = 0; i < priv->child_count; i++) { +                if (sources[i] && witness[i]) { +                        source = i; +                        break; +                } +        } + +        if (source != -1) { +                for (i = 0; i < priv->child_count; i++) { +                        if (i != source && sources[i]) { +                                sources[i] = 0; +                                healed_sinks[i] = 1; +                        } +                } +        } +  	source = __afr_selfheal_metadata_finalize_source (frame, this, sources,                                                            healed_sinks, -							  locked_on, replies); +                                                          locked_on, replies); +  	if (source < 0)  		return -EIO; diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index c5d126185c7..af635f06d52 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -457,7 +457,9 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,  int  __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,  				     unsigned char *healed_sinks, -                                     unsigned char *locked_on) +				     unsigned char *locked_on, +				     struct afr_reply *replies, +                                     uint64_t *witness)  {  	int i = 0;  	afr_private_t *priv = NULL; @@ -469,7 +471,9 @@ __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,  	sources_count = AFR_COUNT (sources, priv->child_count);  	if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0) -            || !sources_count) { +            || !sources_count || afr_does_witness_exist (this, witness)) { +                memset (sources, 0, sizeof (*sources) * priv->child_count); +                afr_mark_active_sinks (this, sources, locked_on, healed_sinks);  		return -1;  	} @@ -483,7 +487,6 @@ __afr_selfheal_name_finalize_source (xlator_t *this, unsigned char *sources,  	return source;  } -  int  __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *parent,  			     uuid_t pargfid, unsigned char *locked_on, @@ -494,6 +497,7 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren  	int source = -1;  	afr_private_t *priv = NULL;          struct afr_reply *replies = NULL; +        uint64_t *witness = NULL;  	priv = this->private; @@ -503,8 +507,10 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren  	if (ret)  		goto out; -	ret = afr_selfheal_find_direction (this, replies, AFR_ENTRY_TRANSACTION, -					   locked_on, sources, sinks); +        witness = alloca0 (sizeof (*witness) * priv->child_count); +	ret = afr_selfheal_find_direction (frame, this, replies, +					   AFR_ENTRY_TRANSACTION, +					   locked_on, sources, sinks, witness);  	if (ret)  		goto out; @@ -519,7 +525,9 @@ __afr_selfheal_name_prepare (call_frame_t *frame, xlator_t *this, inode_t *paren          AFR_INTERSECT (healed_sinks, sinks, locked_on, priv->child_count);  	source = __afr_selfheal_name_finalize_source (this, sources, -                                                      healed_sinks, locked_on); +                                                      healed_sinks, +						      locked_on, replies, +                                                      witness);  	if (source < 0) {  		/* If source is < 0 (typically split-brain), we perform a  		   conservative merge of entries rather than erroring out */ diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index c32ec120a50..f208e6bc813 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -136,9 +136,11 @@ afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent,  				 unsigned char *lookup_on, dict_t *xattr);  int -afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies, -			     afr_transaction_type type, unsigned char *locked_on, -			     unsigned char *sources, unsigned char *sinks); +afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, +                             struct afr_reply *replies, +                             afr_transaction_type type, +                             unsigned char *locked_on, unsigned char *sources, +                             unsigned char *sinks, uint64_t *witness);  int  afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, @@ -189,4 +191,11 @@ afr_success_count (struct afr_reply *replies, unsigned int count);  void  afr_log_selfheal (uuid_t gfid, xlator_t *this, int ret, char *type,                    int source, unsigned char *healed_sinks); + +void +afr_mark_active_sinks (xlator_t *this, unsigned char *sources, +                       unsigned char *locked_on, unsigned char *sinks); + +gf_boolean_t +afr_does_witness_exist (xlator_t *this, uint64_t *witness);  #endif /* !_AFR_SELFHEAL_H */  | 
