diff options
| author | Anand Avati <avati@gluster.com> | 2009-10-13 06:42:15 +0000 | 
|---|---|---|
| committer | Anand V. Avati <avati@dev.gluster.com> | 2009-10-13 05:46:47 -0700 | 
| commit | f085beebd03b2c8be2fa57039ad3cbcb6eaa66d3 (patch) | |
| tree | 509511c56cf3c9f96b59f959f29817c86841643a /xlators | |
| parent | b20cee457232a88517af44ae4505361dd3a4de15 (diff) | |
prevent spurious unlocks from afr selfheal
afr selfheal now remembers all the nodes on which locks were successfully
held and sends unlocks only to those nodes
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 112 (parallel deletion of files mounted by different clients on the same back-end hangs and/or does not completely delete)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=112
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 34 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 28 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 26 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 21 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 | 
6 files changed, 90 insertions, 23 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 07d6a18b695..b42801cede3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -713,7 +713,11 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)  //	memset (sh->child_errno, 0, sizeof (int) * priv->child_count);  	memset (sh->buf, 0, sizeof (struct stat) * priv->child_count); -	 + +        for (i = 0; i < priv->child_count; i++) { +                sh->locked_nodes[i] = 0; +        } +  	for (i = 0; i < priv->child_count; i++) {  		if (sh->xattr[i])  			dict_unref (sh->xattr[i]); @@ -780,12 +784,20 @@ sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; -	call_count = local->child_count; +        for (i = 0; i < priv->child_count; i++) { +                if (sh->locked_nodes[i]) +                        call_count++; +        } + +        if (call_count == 0) { +                afr_sh_missing_entries_done (frame, this); +                return 0; +        }  	local->call_count = call_count;  	for (i = 0; i < priv->child_count; i++) { -		if (local->child_up[i]) { +		if (sh->locked_nodes[i]) {  			gf_log (this->name, GF_LOG_TRACE,  				"unlocking %"PRId64"/%s on subvolume %s",  				sh->parent_loc.inode->ino, local->loc.name, @@ -1284,11 +1296,13 @@ sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		if (op_ret == -1) {  			sh->op_failed = 1; +                        sh->locked_nodes[child_index] = 0;  			gf_log (this->name, GF_LOG_DEBUG,  				"locking inode of %s on child %d failed: %s",  				local->loc.path, child_index,  				strerror (op_errno));  		} else { +                        sh->locked_nodes[child_index] = 1;  			gf_log (this->name, GF_LOG_TRACE,  				"inode of %s on child %d locked",  				local->loc.path, child_index); @@ -1337,12 +1351,13 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)  	for (i = 0; i < priv->child_count; i++) {  		if (local->child_up[i]) { -			STACK_WIND (frame, sh_missing_entries_lk_cbk, -				    priv->children[i], -				    priv->children[i]->fops->entrylk, -                                    this->name, -				    &sh->parent_loc, local->loc.name, -				    ENTRYLK_LOCK_NB, ENTRYLK_WRLCK); +			STACK_WIND_COOKIE (frame, sh_missing_entries_lk_cbk, +                                           (void *) (long) i, +                                           priv->children[i], +                                           priv->children[i]->fops->entrylk, +                                           this->name, +                                           &sh->parent_loc, local->loc.name, +                                           ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);  			if (!--call_count)  				break;  		} @@ -1380,6 +1395,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this,  	sh->success = CALLOC (priv->child_count, sizeof (int));  	sh->xattr = CALLOC (priv->child_count, sizeof (dict_t *));  	sh->sources = CALLOC (sizeof (*sh->sources), priv->child_count); +	sh->locked_nodes = CALLOC (sizeof (*sh->locked_nodes), priv->child_count);  	sh->pending_matrix = CALLOC (sizeof (int32_t *), priv->child_count);  	for (i = 0; i < priv->child_count; i++) { diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 620ad1a2c87..d2224ec9263 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -56,6 +56,7 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)  	afr_local_t     *local = NULL;  	afr_self_heal_t *sh = NULL;  	afr_private_t   *priv = NULL; +        int              i = 0;  	local = frame->local;  	sh = &local->self_heal; @@ -65,6 +66,14 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this)  	   TODO: cleanup sh->*   	 */ +        if (sh->healing_fd) { +		fd_unref (sh->healing_fd); +		sh->healing_fd = NULL; +        } + +        for (i = 0; i < priv->child_count; i++) +                sh->locked_nodes[i] = 0; +  	gf_log (this->name, GF_LOG_TRACE,  		"self heal of %s completed",  		local->loc.path); @@ -96,8 +105,6 @@ afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	call_count = afr_frame_return (frame);  	if (call_count == 0) { -		fd_unref (sh->healing_fd); -		sh->healing_fd = NULL;  		afr_sh_data_done (frame, this);  	} @@ -261,7 +268,15 @@ afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; -	call_count = local->child_count; +        for (i = 0; i < priv->child_count; i++) { +                if (sh->locked_nodes[i]) +                        call_count++; +        } + +        if (call_count == 0) { +                afr_sh_data_close (frame, this); +                return 0; +        }  	local->call_count = call_count;		 @@ -270,7 +285,7 @@ afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)  	flock.l_type  = F_UNLCK;  	for (i = 0; i < priv->child_count; i++) { -		if (local->child_up[i]) { +		if (sh->locked_nodes[i]) {  			gf_log (this->name, GF_LOG_TRACE,  				"unlocking %s on subvolume %s",  				local->loc.path, priv->children[i]->name); @@ -861,12 +876,13 @@ afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		if (op_ret == -1) {  			sh->op_failed = 1; -			gf_log (this->name, -                                GF_LOG_DEBUG, +                        sh->locked_nodes[child_index] = 0; +			gf_log (this->name, GF_LOG_DEBUG,  				"locking of %s on child %d failed: %s",  				local->loc.path, child_index,  				strerror (op_errno));  		} else { +                        sh->locked_nodes[child_index] = 1;  			gf_log (this->name, GF_LOG_TRACE,  				"inode of %s on child %d locked",  				local->loc.path, child_index); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index b44418dd68e..5953fce15ea 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -56,6 +56,7 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this)  	afr_local_t     *local = NULL;  	afr_self_heal_t *sh = NULL;  	afr_private_t   *priv = NULL; +        int              i = 0;  	local = frame->local;  	sh = &local->self_heal; @@ -65,6 +66,14 @@ afr_sh_entry_done (call_frame_t *frame, xlator_t *this)  	   TODO: cleanup sh->*   	*/ +        if (sh->healing_fd) +                fd_unref (sh->healing_fd); +        sh->healing_fd = NULL; + +        for (i = 0; i < priv->child_count; i++) { +                sh->locked_nodes[i] = 0; +        } +  	gf_log (this->name, GF_LOG_TRACE,  		"self heal of %s completed",  		local->loc.path); @@ -107,9 +116,6 @@ afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	call_count = afr_frame_return (frame);  	if (call_count == 0) { -		if (sh->healing_fd) -			fd_unref (sh->healing_fd); -		sh->healing_fd = NULL;  		afr_sh_entry_done (frame, this);  	} @@ -132,12 +138,20 @@ afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; -	call_count = local->child_count; +        for (i = 0; i < priv->child_count; i++) { +                if (sh->locked_nodes[i]) +                        call_count++; +        } + +        if (call_count == 0) { +                afr_sh_entry_done (frame, this); +                return 0; +        }  	local->call_count = call_count;		  	for (i = 0; i < priv->child_count; i++) { -		if (local->child_up[i]) { +		if (sh->locked_nodes[i]) {  			gf_log (this->name, GF_LOG_TRACE,  				"unlocking %s on subvolume %s",  				local->loc.path, priv->children[i]->name); @@ -1977,11 +1991,13 @@ afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		if (op_ret == -1) {  			sh->op_failed = 1; +                        sh->locked_nodes[child_index] = 0;  			gf_log (this->name, GF_LOG_DEBUG,  				"locking inode of %s on child %d failed: %s",  				local->loc.path, child_index,  				strerror (op_errno));  		} else { +                        sh->locked_nodes[child_index] = 1;  			gf_log (this->name, GF_LOG_TRACE,  				"inode of %s on child %d locked",  				local->loc.path, child_index); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 9e8e995b4a3..9842902e6e0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -64,7 +64,11 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)  //	memset (sh->child_errno, 0, sizeof (int) * priv->child_count);  	memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);  	memset (sh->success, 0, sizeof (int) * priv->child_count); -	 + +        for (i = 0; i < priv->child_count; i++) { +                sh->locked_nodes[i] = 1; +        } +  	for (i = 0; i < priv->child_count; i++) {  		if (sh->xattr[i])  			dict_unref (sh->xattr[i]); @@ -137,7 +141,16 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)  	sh = &local->self_heal;  	priv = this->private; -	call_count = local->child_count; +        for (i = 0; i < priv->child_count; i++) { +                if (sh->locked_nodes[i]) +                        call_count++; +        } + +        if (call_count == 0) { +                afr_sh_metadata_done (frame, this); +                return 0; +        } +  	local->call_count = call_count;  	for (i = 0; i < priv->child_count; i++) { @@ -145,7 +158,7 @@ afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)  		flock.l_len     = 0;  		flock.l_type    = F_UNLCK; -		if (local->child_up[i]) { +		if (sh->locked_nodes[i]) {  			gf_log (this->name, GF_LOG_TRACE,  				"unlocking %s on subvolume %s",  				local->loc.path, priv->children[i]->name); @@ -712,11 +725,13 @@ afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		if (op_ret == -1) {  			sh->op_failed = 1; +                        sh->locked_nodes[child_index] = 0;  			gf_log (this->name, GF_LOG_DEBUG,  				"locking of %s on child %d failed: %s",  				local->loc.path, child_index,  				strerror (op_errno));  		} else { +                        sh->locked_nodes[child_index] = 1;  			gf_log (this->name, GF_LOG_TRACE,  				"inode of %s on child %d locked",  				local->loc.path, child_index); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 429ab97c5c4..eea030ad112 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -216,6 +216,9 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)  	if (sh->success)  		FREE (sh->success); +	if (sh->locked_nodes) +		FREE (sh->locked_nodes); +  	if (sh->healing_fd) {  		fd_unref (sh->healing_fd);  		sh->healing_fd = NULL; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 61f630b5498..4cbb1b19cb6 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -84,6 +84,7 @@ typedef struct {  	int active_source;  	int active_sinks;  	int *success; +	int *locked_nodes;  	fd_t *healing_fd;  	int   op_failed;  | 
