diff options
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 9 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 11 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 239 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 20 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 27 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 25 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 35 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 132 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 52 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 53 | 
11 files changed, 322 insertions, 287 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 2a3d1548a8c..e357c7b1788 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -297,7 +297,6 @@ afr_create (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.basename = AFR_BASENAME (loc->path); -	local->transaction.pending  = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -521,7 +520,6 @@ afr_mknod (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.basename = AFR_BASENAME (loc->path); -	local->transaction.pending  = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -745,7 +743,6 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.basename = AFR_BASENAME (loc->path); -	local->transaction.pending  = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -973,7 +970,6 @@ afr_link (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame   = frame;  	local->transaction.basename     = AFR_BASENAME (oldloc->path);  	local->transaction.new_basename = AFR_BASENAME (newloc->path); -	local->transaction.pending      = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1199,7 +1195,6 @@ afr_symlink (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame   = frame;  	local->transaction.basename     = AFR_BASENAME (loc->path); -	local->transaction.pending      = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1396,7 +1391,6 @@ afr_rename (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame   = frame;  	local->transaction.basename     = AFR_BASENAME (oldloc->path);  	local->transaction.new_basename = AFR_BASENAME (newloc->path); -	local->transaction.pending      = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION); @@ -1583,7 +1577,6 @@ afr_unlink (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.basename = AFR_BASENAME (loc->path); -	local->transaction.pending  = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1770,7 +1763,6 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.basename = AFR_BASENAME (loc->path); -	local->transaction.pending  = AFR_ENTRY_PENDING;  	afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION); @@ -1915,7 +1907,6 @@ afr_setdents (call_frame_t *frame, xlator_t *this,  	local->transaction.done = afr_setdents_done;  	local->transaction.basename = NULL; -	local->transaction.pending  = AFR_ENTRY_PENDING;  	afr_transaction (frame, this, AFR_ENTRY_TRANSACTION); diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index e8d843b3ce8..8b568a0eaef 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -226,7 +226,6 @@ afr_chmod (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = 0; -	local->transaction.pending = AFR_METADATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -421,7 +420,6 @@ afr_fchmod (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = 0; -	local->transaction.pending = AFR_METADATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -618,7 +616,6 @@ afr_chown (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = 0; -	local->transaction.pending = AFR_METADATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -816,7 +813,6 @@ afr_fchown (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = 0; -	local->transaction.pending = AFR_METADATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -1028,8 +1024,6 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,  		local->transaction.len     = iov_length (vector, count);  	} -	local->transaction.pending = AFR_DATA_PENDING; -  	afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);  	op_ret = 0; @@ -1226,7 +1220,6 @@ afr_truncate (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = offset; -	local->transaction.pending = AFR_DATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); @@ -1425,7 +1418,6 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = offset; -	local->transaction.pending = AFR_DATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); @@ -1626,7 +1618,6 @@ afr_utimens (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = 0; -	local->transaction.pending = AFR_METADATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -1817,7 +1808,6 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = 0; -	local->transaction.pending = AFR_METADATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); @@ -2007,7 +1997,6 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,  	local->transaction.main_frame = frame;  	local->transaction.start   = 0;  	local->transaction.len     = 0; -	local->transaction.pending = AFR_METADATA_PENDING;  	afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 7c2e403c72a..b74035be8d5 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -88,64 +88,6 @@ afr_sh_supress_errenous_children (int sources[], int child_errno[],  } -int -afr_sh_supress_empty_children (int sources[], dict_t *xattr[], -			       struct stat *buf, -			       int child_count, const char *key) -{ -	int      i = 0; -	int32_t *pending = NULL; -	int      ret = 0; -	int      all_xattr_missing = 1; - -	/* if the file was created by afr with xattrs */ -	for (i = 0; i < child_count; i++) { -		if (!xattr[i]) -			continue; - -		ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending)); -		if (ret != 0) { -			continue; -		} - -		all_xattr_missing = 0; -		break; -	} - -	if (all_xattr_missing) { -		/* supress 0byte files.. this avoids empty file created -		   by dir selfheal to overwrite the 'good' file */ -		for (i = 0; i < child_count; i++) { -			if (!buf[i].st_size) -				sources[i] = 0; -		} -		goto out; -	} - - -	for (i = 0; i < child_count; i++) { -		if (!xattr[i]) { -			sources[i] = 0; -			continue; -		} - -		ret = dict_get_ptr (xattr[i], (char *)key, VOID(&pending)); -		if (ret != 0) { -			sources[i] = 0; -			continue; -		} - -		if (!pending) { -			sources[i] = 0; -			continue; -		} -	} - -out: -	return 0; -} - -  void  afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)  { @@ -175,11 +117,12 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)  void -afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[], -			     int child_count, const char *key) +afr_sh_build_pending_matrix (afr_private_t *priv, +                             int32_t *pending_matrix[], dict_t *xattr[], +			     int child_count, afr_transaction_type type)  { -	int i = 0; -	int j = 0; +	int i, j, k; +  	int32_t *pending = NULL;  	int ret = -1; @@ -200,22 +143,25 @@ afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[],  		pending = NULL; -		ret = dict_get_ptr (xattr[i], (char *) key, -				    VOID(&pending)); -		if (ret != 0) { -                        /* -                         * There is no xattr present. This means this -                         * subvolume should be considered an 'ignorant' -                         * subvolume. -                         */ +                for (j = 0; j < child_count; j++) { +                        ret = dict_get_ptr (xattr[i], priv->pending_key[j], +                                            VOID(&pending)); +                         +                        if (ret != 0) { +                                /* +                                 * There is no xattr present. This means this +                                 * subvolume should be considered an 'ignorant' +                                 * subvolume. +                                 */ + +                                ignorant_subvols[i] = 1; +                                continue; +                        } -                        ignorant_subvols[i] = 1; -			continue; +                        k = afr_index_for_transaction_type (type); +                         +                        pending_matrix[i][j] = ntoh32 (pending[k]);                  } - -		for (j = 0; j < child_count; j++) { -			pending_matrix[i][j] = ntoh32 (pending[j]); -		}  	}          /* @@ -563,12 +509,13 @@ out:  void -afr_sh_pending_to_delta (dict_t **xattr, char *key, +afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,                           int32_t *delta_matrix[], int success[], -                         int child_count) +                         int child_count, afr_transaction_type type)  {  	int i = 0;  	int j = 0; +        int k = 0;          int32_t * pending = NULL;          int       ret     = 0; @@ -583,29 +530,34 @@ afr_sh_pending_to_delta (dict_t **xattr, char *key,  	for (i = 0; i < child_count; i++) {                  pending = NULL; -                ret = dict_get_ptr (xattr[i], (char *) key, -                                    VOID (&pending)); - -		for (j = 0; j < child_count; j++) { -			if (!success[j]) -				continue; - +                for (j = 0; j < child_count; j++) { +                        ret = dict_get_ptr (xattr[i], priv->pending_key[j], +                                            VOID(&pending)); +                         +                        if (!success[j]) +                                continue; + +                        k = afr_index_for_transaction_type (type); +                                                  if (pending) { -                                delta_matrix[i][j] = -(ntoh32 (pending[j])); +                                delta_matrix[i][j] = -(ntoh32 (pending[k]));                          } else { -                                delta_matrix[i][j] = 0; +                                delta_matrix[i][j]  = 0;                          } -		} + +                }  	}  }  int -afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[], -		       int child_count, const char *key) +afr_sh_delta_to_xattr (afr_private_t *priv, +                       int32_t *delta_matrix[], dict_t *xattr[], +		       int child_count, afr_transaction_type type)  {  	int i = 0;  	int j = 0; +        int k = 0;  	int ret = 0; @@ -615,13 +567,18 @@ afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[],  		if (!xattr[i])  			continue; -		pending = CALLOC (sizeof (int32_t), child_count);  		for (j = 0; j < child_count; j++) { -			pending[j] = hton32 (delta_matrix[i][j]); -		} +                        pending = CALLOC (sizeof (int32_t), 3); +                        /* 3 = data+metadata+entry */ + +                        k = afr_index_for_transaction_type (type); + +			pending[k] = hton32 (delta_matrix[i][j]); -		ret = dict_set_bin (xattr[i], (char *) key, pending, -				    child_count * sizeof (int32_t)); +                        ret = dict_set_bin (xattr[i], priv->pending_key[j],  +                                            pending, +                                            3 * sizeof (int32_t)); +		}  	}  	return 0; @@ -637,21 +594,24 @@ afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this)  	int           ret = -1;  	int            i  = 0; +        int            j  = 0;  	priv = this->private; -	ret = dict_get_ptr (xattr, AFR_METADATA_PENDING, &tmp_pending); +        for (i = 0; i < priv->child_count; i++) { +                ret = dict_get_ptr (xattr, priv->pending_key[i], +                                    &tmp_pending); -	if (ret != 0) -		return 0; +                if (ret != 0) +                        return 0; +                 +                pending = tmp_pending; -	pending = tmp_pending; -	for (i = 0; i < priv->child_count; i++) { -		if (i == child_count) -			continue; -		if (pending[i]) -			return 1; -	} +                j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION); + +                if (pending[j]) +                        return 1; +        }  	return 0;  } @@ -664,23 +624,26 @@ afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this)  	int32_t       *pending = NULL;  	void          *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */ -	int          ret = -1; -	int            i = 0; +	int           ret = -1; +	int            i  = 0; +        int            j  = 0;  	priv = this->private; -	ret = dict_get_ptr (xattr, AFR_DATA_PENDING, &tmp_pending); +        for (i = 0; i < priv->child_count; i++) { +                ret = dict_get_ptr (xattr, priv->pending_key[i], +                                    &tmp_pending); -	if (ret != 0) -		return 0; +                if (ret != 0) +                        return 0; +                 +                pending = tmp_pending; -	pending = tmp_pending; -	for (i = 0; i < priv->child_count; i++) { -		if (i == child_count) -			continue; -		if (pending[i]) -			return 1; -	} +                j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION); + +                if (pending[j]) +                        return 1; +        }  	return 0;  } @@ -689,33 +652,35 @@ afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this)  int  afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this)  { -	afr_private_t *priv = NULL; +        afr_private_t *priv = NULL;  	int32_t       *pending = NULL;  	void          *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */ -	 -	int          ret = -1; -	int            i = 0; + +	int           ret = -1; +	int            i  = 0; +        int            j  = 0;  	priv = this->private; -	ret = dict_get_ptr (xattr, AFR_ENTRY_PENDING, &tmp_pending); +        for (i = 0; i < priv->child_count; i++) { +                ret = dict_get_ptr (xattr, priv->pending_key[i], +                                    &tmp_pending); -	if (ret != 0) -		return 0; +                if (ret != 0) +                        return 0; +                 +                pending = tmp_pending; -	pending = tmp_pending; -	for (i = 0; i < priv->child_count; i++) { -		if (i == child_count) -			continue; -		if (pending[i]) -			return 1; -	} +                j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION); + +                if (pending[j]) +                        return 1; +        }  	return 0;  } -  /**   * is_matrix_zero - return true if pending matrix is all zeroes   */ @@ -1229,9 +1194,13 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)  	xattr_req = dict_new(); -	if (xattr_req) -		ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING, -				       priv->child_count * sizeof(int32_t)); +	if (xattr_req) { +                for (i = 0; i < priv->child_count; i++) { +                        ret = dict_set_uint64 (xattr_req,  +                                               priv->pending_key[i], +                                               3 * sizeof(int32_t)); +                } +        }  	for (i = 0; i < priv->child_count; i++) {  		if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 86f155b6897..a311cdf5ee7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -41,29 +41,27 @@ int  afr_sh_supress_errenous_children (int sources[], int child_errno[],  				  int child_count); -int -afr_sh_supress_empty_children (int sources[], dict_t *xattr[], -			       struct stat *buf, -			       int child_count, const char *key); -  void  afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);  void -afr_sh_build_pending_matrix (int32_t *pending_matrix[], dict_t *xattr[], -			     int child_count, const char *key); +afr_sh_build_pending_matrix (afr_private_t *priv, +                             int32_t *pending_matrix[], dict_t *xattr[], +			     int child_count, afr_transaction_type type);  void -afr_sh_pending_to_delta (dict_t **xattr, char *key, int32_t *delta_matrix[], -			 int32_t success[], int child_count); +afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr, +                         int32_t *delta_matrix[], int success[], +                         int child_count, afr_transaction_type type);  int  afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,                       afr_self_heal_type type);  int -afr_sh_delta_to_xattr (int32_t *delta_matrix[], dict_t *xattr[], -		       int child_count, const char *key); +afr_sh_delta_to_xattr (afr_private_t *priv, +                       int32_t *delta_matrix[], dict_t *xattr[], +		       int child_count, afr_transaction_type type);  int  afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 0f06e45584e..a7a3d44f7ad 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -301,10 +301,8 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; - -	afr_sh_pending_to_delta (sh->xattr, AFR_DATA_PENDING, -                                 sh->delta_matrix, sh->success, -                                 priv->child_count); +	afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success, +                                 priv->child_count, AFR_DATA_TRANSACTION);  	erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); @@ -317,8 +315,8 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)  		}  	} -	afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, -			       priv->child_count, AFR_DATA_PENDING); +	afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr, +			       priv->child_count, AFR_DATA_TRANSACTION);  	local->call_count = call_count;  	for (i = 0; i < priv->child_count; i++) { @@ -773,18 +771,14 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; -	afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr,  -				     priv->child_count, AFR_DATA_PENDING); +	afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,  +				     priv->child_count, AFR_DATA_TRANSACTION);  	afr_sh_print_pending_matrix (sh->pending_matrix, this); -  	nsources = afr_sh_mark_sources (sh, priv->child_count,                                          AFR_SELF_HEAL_DATA); -	afr_sh_supress_empty_children (sh->sources, sh->xattr, sh->buf, -				       priv->child_count, AFR_DATA_PENDING); -  	afr_sh_supress_errenous_children (sh->sources, sh->child_errno,  					  priv->child_count); @@ -900,9 +894,12 @@ afr_sh_data_lookup (call_frame_t *frame, xlator_t *this)  	local->call_count = call_count;  	xattr_req = dict_new(); -	if (xattr_req) -		ret = dict_set_uint64 (xattr_req, AFR_DATA_PENDING, -				       priv->child_count * sizeof(int32_t)); +	if (xattr_req) { +                for (i = 0; i < priv->child_count; i++) { +                        ret = dict_set_uint64 (xattr_req, priv->pending_key[i], +                                               3 * sizeof(int32_t)); +                } +        }  	for (i = 0; i < priv->child_count; i++) {  		if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 3906b707e0c..70edd5babad 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -217,10 +217,8 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; - -	afr_sh_pending_to_delta (sh->xattr, AFR_ENTRY_PENDING, -                                 sh->delta_matrix, sh->success, -                                 priv->child_count); +	afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success, +                                 priv->child_count, AFR_ENTRY_TRANSACTION);  	erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); @@ -233,8 +231,8 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)  		}  	} -	afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, -			       priv->child_count, AFR_ENTRY_PENDING); +	afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr, +			       priv->child_count, AFR_ENTRY_TRANSACTION);  	local->call_count = call_count;  	for (i = 0; i < priv->child_count; i++) { @@ -1824,12 +1822,11 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; -	afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr,  -				     priv->child_count, AFR_ENTRY_PENDING); +	afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,  +				     priv->child_count, AFR_ENTRY_TRANSACTION);  	afr_sh_print_pending_matrix (sh->pending_matrix, this); -  	afr_sh_mark_sources (sh, priv->child_count,  			     AFR_SELF_HEAL_ENTRY); @@ -1902,9 +1899,13 @@ afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)  	local->call_count = call_count;  	xattr_req = dict_new(); -	if (xattr_req) -		ret = dict_set_uint64 (xattr_req, AFR_ENTRY_PENDING, -				       priv->child_count * sizeof(int32_t)); +	if (xattr_req) { +                for (i = 0; i < priv->child_count; i++) { +                        ret = dict_set_uint64 (xattr_req,  +                                               priv->pending_key[i], +                                               3 * sizeof(int32_t)); +                } +        }  	for (i = 0; i < priv->child_count; i++) {  		if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 6dce5d3afb0..8e832698fca 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -213,10 +213,9 @@ afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; - -	afr_sh_pending_to_delta (sh->xattr, AFR_METADATA_PENDING, -                                 sh->delta_matrix, sh->success, -                                 priv->child_count); +	afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,  +                                 sh->success, priv->child_count, +                                 AFR_METADATA_TRANSACTION);  	erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count); @@ -229,8 +228,8 @@ afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)  		}  	} -	afr_sh_delta_to_xattr (sh->delta_matrix, erase_xattr, -			       priv->child_count, AFR_METADATA_PENDING); +	afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr, +			       priv->child_count, AFR_METADATA_TRANSACTION);  	local->call_count = call_count; @@ -431,6 +430,8 @@ afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,  	afr_private_t   *priv = NULL;  	int              source = 0; +        int i; +  	local = frame->local;  	sh = &local->self_heal;  	priv = this->private; @@ -445,9 +446,10 @@ afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,  		afr_sh_metadata_sync (frame, this, NULL);  	} else { -		dict_del (xattr, AFR_DATA_PENDING); -		dict_del (xattr, AFR_METADATA_PENDING); -		dict_del (xattr, AFR_ENTRY_PENDING); +                for (i = 0; i < priv->child_count; i++) { +                        dict_del (xattr, priv->pending_key[i]); +                } +  		afr_sh_metadata_sync (frame, this, xattr);  	} @@ -515,8 +517,9 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; -	afr_sh_build_pending_matrix (sh->pending_matrix, sh->xattr,  -				     priv->child_count, AFR_METADATA_PENDING); +	afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,  +				     priv->child_count,  +                                     AFR_METADATA_TRANSACTION);  	afr_sh_print_pending_matrix (sh->pending_matrix, this); @@ -656,9 +659,13 @@ afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)  	xattr_req = dict_new(); -	if (xattr_req) -		ret = dict_set_uint64 (xattr_req, AFR_METADATA_PENDING, -				       priv->child_count * sizeof(int32_t)); +	if (xattr_req) { +                for (i = 0; i < priv->child_count; i++) { +                        ret = dict_set_uint64 (xattr_req,  +                                               priv->pending_key[i], +                                               3 * sizeof(int32_t)); +                } +        }  	for (i = 0; i < priv->child_count; i++) {  		if (local->child_up[i]) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index ba66332827b..7245ee1acbc 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -27,19 +27,28 @@  static void -__mark_all_pending (int32_t *pending, int child_count) +__mark_all_pending (int32_t *pending[], int child_count, +                    afr_transaction_type type)  {	  	int i; -	 -	for (i = 0; i < child_count; i++) -		pending[i] = hton32 (1); +        int j; + +        for (i = 0; i < child_count; i++) { +                j = afr_index_for_transaction_type (type); +		pending[i][j] = hton32 (1); +        }  }  static void -__mark_child_dead (int32_t *pending, int child_count, int child) +__mark_child_dead (int32_t *pending[], int child_count, int child, +                   afr_transaction_type type)  { -	pending[child] = 0; +        int j; + +        j = afr_index_for_transaction_type (type); +         +	pending[child][j] = 0;  } @@ -66,14 +75,15 @@ out:  static void -__mark_failed_children (int32_t *pending, int child_count,  -                        xlator_t *this, fd_t *fd) +__mark_failed_children (int32_t *pending[], int child_count,  +                        xlator_t *this, fd_t *fd, afr_transaction_type type)  {          uint64_t       ctx;          afr_fd_ctx_t * fd_ctx = NULL;          int ret = 0;          int i   = 0; +        int j   = 0;          ret = fd_ctx_get (fd, this, &ctx); @@ -83,8 +93,10 @@ __mark_failed_children (int32_t *pending, int child_count,          fd_ctx = (afr_fd_ctx_t *)(long) ctx;          for (i = 0; i < child_count; i++) { +                j = afr_index_for_transaction_type (type); +                  if (fd_ctx->child_failed[i]) -                        pending[i] = 0; +                        pending[i][j] = 0;          }  out: @@ -93,23 +105,32 @@ out:  static void -__mark_down_children (int32_t *pending, int child_count, unsigned char *child_up) +__mark_down_children (int32_t *pending[], int child_count,  +                      unsigned char *child_up, afr_transaction_type type)  {  	int i; -	 -	for (i = 0; i < child_count; i++) +	int j; + +	for (i = 0; i < child_count; i++) { +                j = afr_index_for_transaction_type (type); +  		if (!child_up[i]) -			pending[i] = 0; +			pending[i][j] = 0; +        }  }  static void -__mark_all_success (int32_t *pending, int child_count) +__mark_all_success (int32_t *pending[], int child_count, +                    afr_transaction_type type)  {  	int i; -	 -	for (i = 0; i < child_count; i++) -		pending[i] = hton32 (-1); +        int j; + +	for (i = 0; i < child_count; i++) { +                j = afr_index_for_transaction_type (type); +		pending[i][j] = hton32 (-1); +        }  } @@ -287,6 +308,26 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this)  static int +afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending) +{ +        int i; +        int ret = 0; + +        for (i = 0; i < priv->child_count; i++) { +                ret = dict_set_static_bin (xattr, priv->pending_key[i], +                                           pending[i], 3 * sizeof (int32_t)); +                /* 3 = data+metadata+entry */ +                 +                if (ret < 0) +                        goto out; +        } + +out: +        return ret; +} + + +static int  afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)  {  	int ret = 0; @@ -481,12 +522,13 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)  	local = frame->local; -	__mark_down_children (local->pending_array, priv->child_count,  -                              local->child_up); +	__mark_down_children (local->pending, priv->child_count,  +                              local->child_up, local->transaction.type);          if (local->op == GF_FOP_FLUSH) { -                __mark_failed_children (local->pending_array, priv->child_count, -                                        this, local->fd); +                __mark_failed_children (local->pending, priv->child_count, +                                        this, local->fd, +                                        local->transaction.type);          }  	call_count = afr_up_children_count (priv->child_count, local->child_up);  @@ -504,11 +546,11 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)  		return 0;  	} -	for (i = 0; i < priv->child_count; i++) {					 +	for (i = 0; i < priv->child_count; i++) {  		if (local->child_up[i]) { -			ret = dict_set_static_bin (xattr, local->transaction.pending,  -						   local->pending_array,  -						   priv->child_count * sizeof (int32_t)); +                        ret = afr_set_pending_dict (priv, xattr,  +                                                    local->pending); +  			if (ret < 0)  				gf_log (this->name, GF_LOG_ERROR,   					"failed to set pending entry"); @@ -553,9 +595,10 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)  			   used the dict as placeholder for return  			   value  			*/ -			ret = dict_set_static_bin (xattr, local->transaction.pending,  -						   local->pending_array,  -						   priv->child_count * sizeof (int32_t)); +                         +			ret = afr_set_pending_dict (priv, xattr,  +                                                    local->pending); +  			if (ret < 0)  				gf_log (this->name, GF_LOG_ERROR,   					"failed to set pending entry"); @@ -632,8 +675,8 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		    (local->op_errno == ENOTSUP)) {  			local->transaction.resume (frame, this);  		} else { -                        __mark_all_success (local->pending_array, -                                            priv->child_count); +                        __mark_all_success (local->pending, priv->child_count, +                                            local->transaction.type);  			local->transaction.fop (frame, this);  		} @@ -675,15 +718,14 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)  	local->call_count = call_count;		 -	__mark_all_pending (local->pending_array, priv->child_count); +	__mark_all_pending (local->pending, priv->child_count, +                            local->transaction.type);  	for (i = 0; i < priv->child_count; i++) {  		if (local->child_up[i]) { -			ret = dict_set_static_bin (xattr,  -						   local->transaction.pending,  -						   local->pending_array,  -						   (priv->child_count *  -						    sizeof (int32_t))); +			ret = afr_set_pending_dict (priv, xattr,  +                                                    local->pending); +  			if (ret < 0)  				gf_log (this->name, GF_LOG_ERROR,   					"failed to set pending entry"); @@ -735,9 +777,9 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)  			   value  			*/ -			ret = dict_set_static_bin (xattr, local->transaction.pending,  -						   local->pending_array,  -						   priv->child_count * sizeof (int32_t)); +			ret = afr_set_pending_dict (priv, xattr,  +                                                    local->pending); +  			if (ret < 0)  				gf_log (this->name, GF_LOG_ERROR,   					"failed to set pending entry"); @@ -902,8 +944,8 @@ int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index)  		if (__changelog_needed_pre_op (frame, this)) {  			afr_changelog_pre_op (frame, this);  		} else { -                        __mark_all_success (local->pending_array, -                                            priv->child_count); +                        __mark_all_success (local->pending, priv->child_count, +                                            local->transaction.type);  			local->transaction.fop (frame, this);  		} @@ -1054,8 +1096,8 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index                  __mark_fop_failed_on_fd (local->fd, this, child_index);                  break;          default: -                __mark_child_dead (local->pending_array, priv->child_count, -                                   child_index); +                __mark_child_dead (local->pending, priv->child_count, +                                   child_index, local->transaction.type);                  break;          }  } @@ -1079,8 +1121,8 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)  		if (__changelog_needed_pre_op (frame, this)) {  			afr_changelog_pre_op (frame, this);  		} else { -                        __mark_all_success (local->pending_array, -                                            priv->child_count); +                        __mark_all_success (local->pending, priv->child_count, +                                            local->transaction.type);  			local->transaction.fop (frame, this);  		} diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 95e29773902..c7a6490e764 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -20,12 +20,6 @@  #ifndef __TRANSACTION_H__  #define __TRANSACTION_H__ -#define AFR_METADATA_PENDING "trusted.glusterfs.afr.metadata-pending" - -#define AFR_DATA_PENDING "trusted.glusterfs.afr.data-pending" - -#define AFR_ENTRY_PENDING "trusted.glusterfs.afr.entry-pending" -  void  afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,  			    int child_index); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 215a80b21bd..270364ff999 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -212,13 +212,24 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)  void   afr_local_cleanup (afr_local_t *local, xlator_t *this)  { +        int i; +        afr_private_t * priv = NULL; +  	if (!local)  		return;  	afr_local_sh_cleanup (local, this);  	FREE (local->child_errno); -	FREE (local->pending_array); + +        priv = this->private; + +        for (i = 0; i < priv->child_count; i++) { +                if (local->pending && local->pending[i]) +                        FREE (local->pending[i]); +        } + +        FREE (local->pending);  	loc_wipe (&local->loc);  	loc_wipe (&local->newloc); @@ -665,28 +676,17 @@ afr_lookup (call_frame_t *frame, xlator_t *this,  	/* By default assume ENOTCONN. On success it will be set to 0. */  	local->op_errno = ENOTCONN; -	if ((xattr_req == NULL) -	    && (priv->metadata_self_heal -		|| priv->data_self_heal -		|| priv->entry_self_heal)) +	if (xattr_req == NULL)  		local->xattr_req = dict_new ();  	else  		local->xattr_req = dict_ref (xattr_req); -	if (priv->metadata_self_heal) { -		ret = dict_set_uint64 (local->xattr_req, AFR_METADATA_PENDING, -				       priv->child_count * sizeof(int32_t)); -	} -	 -	if (priv->data_self_heal) { -		ret = dict_set_uint64 (local->xattr_req, AFR_DATA_PENDING, -				       priv->child_count * sizeof(int32_t)); -	} -	 -	if (priv->entry_self_heal) { -		ret = dict_set_uint64 (local->xattr_req, AFR_ENTRY_PENDING, -				       priv->child_count * sizeof(int32_t)); -	} +        for (i = 0; i < priv->child_count; i++) { +		ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i], +				       3 * sizeof(int32_t)); +                 +                /* 3 = data+metadata+entry */ +        }  	ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0); @@ -1024,8 +1024,6 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)          local->transaction.start  = 0;          local->transaction.len    = 0; -        local->transaction.pending = AFR_DATA_PENDING; -          afr_transaction (frame, this, AFR_FLUSH_TRANSACTION);  	op_ret = 0; @@ -2362,7 +2360,6 @@ init (xlator_t *this)  		priv->entry_lock_server_count = lock_server_count;  	} -  	trav = this->children;  	while (trav) {  		if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) { @@ -2407,11 +2404,22 @@ init (xlator_t *this)  		goto out;  	} +        priv->pending_key = CALLOC (sizeof (*priv->pending_key), child_count); +        if (!priv->pending_key) { +                gf_log (this->name, GF_LOG_ERROR, +                        "out of memory :("); +                op_errno = ENOMEM; +                goto out; +        } +  	trav = this->children;  	i = 0;  	while (i < child_count) {  		priv->children[i] = trav->xlator; +                asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX, +                          trav->xlator->name); +  		trav = trav->next;  		i++;  	} diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c15ed85f9c6..32484fd0a53 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -30,6 +30,7 @@  #include "call-stub.h"  #include "compat-errno.h" +#define AFR_XATTR_PREFIX "trusted.afr"  typedef struct _afr_private {  	gf_lock_t lock;               /* to guard access to child_count, etc */ @@ -42,11 +43,12 @@ typedef struct _afr_private {  	unsigned char *child_up; +        char **pending_key; +  	gf_boolean_t data_self_heal;       /* on/off */  	gf_boolean_t metadata_self_heal;   /* on/off */  	gf_boolean_t entry_self_heal;      /* on/off */ -  	gf_boolean_t data_change_log;       /* on/off */  	gf_boolean_t metadata_change_log;   /* on/off */  	gf_boolean_t entry_change_log;      /* on/off */ @@ -103,6 +105,35 @@ typedef enum {  	AFR_FLUSH_TRANSACTION,         /* flush */  } afr_transaction_type; + +/* +  xattr format: trusted.afr.volume = [x y z] +  x - data pending +  y - metadata pending +  z - entry pending +*/ + +static inline int +afr_index_for_transaction_type (afr_transaction_type type) +{ +        switch (type) { +                 +        case AFR_DATA_TRANSACTION: +        case AFR_FLUSH_TRANSACTION: +                return 0; + +        case AFR_METADATA_TRANSACTION: +                return 1; + +        case AFR_ENTRY_TRANSACTION: +        case AFR_ENTRY_RENAME_TRANSACTION: +                return 2; +        } + +        return -1;  /* make gcc happy */ +} + +  typedef struct _afr_local {  	unsigned int call_count;  	unsigned int success_count; @@ -118,7 +149,7 @@ typedef struct _afr_local {  	int32_t op_ret;  	int32_t op_errno; -	int32_t *pending_array; +	int32_t **pending;  	loc_t loc;  	loc_t newloc; @@ -365,8 +396,6 @@ typedef struct _afr_local {  		const char *basename;  		const char *new_basename; -		char *pending; -  		loc_t parent_loc;  		loc_t new_parent_loc; @@ -521,18 +550,28 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)  static inline int  afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)  { +        int i; +  	local->child_errno = CALLOC (sizeof (*local->child_errno),  				     priv->child_count);  	if (!local->child_errno) {  		return -ENOMEM;  	} -	local->pending_array = CALLOC (sizeof (*local->pending_array), -				       priv->child_count); -	if (!local->pending_array) { +	local->pending = CALLOC (sizeof (*local->pending), +                                 priv->child_count); +         +	if (!local->pending) {  		return -ENOMEM;  	} +        for (i = 0; i < priv->child_count; i++) { +                local->pending[i] = CALLOC (sizeof (*local->pending[i]), +                                            3); /* data + metadata + entry */ +                if (!local->pending[i]) +                        return -ENOMEM; +        } +          	local->transaction.locked_nodes = CALLOC (sizeof (*local->transaction.locked_nodes),  						  priv->child_count);  | 
