diff options
| author | Poornima G <pgurusid@redhat.com> | 2016-07-19 15:20:09 +0530 | 
|---|---|---|
| committer | Kaushal M <kaushal@redhat.com> | 2016-08-24 21:42:50 -0700 | 
| commit | 9df752213e6f8a1cc9a5e875cf68ca8ef32f61db (patch) | |
| tree | 1872f4e487433ff55ef001b7f680634001f461ec /api | |
| parent | 9cd5066226770cf3c06a21757b963d315b8fe32b (diff) | |
gfapi: Fix IO error caused when there is consecutive graph switches
Backport of http://review.gluster.org/#/c/14722/
This is part 2 of the fix, the part 1 can be found at:
http://review.gluster.org/#/c/14656/
Problem:
=======
Consider a race between, __glfs_active_subvol() and graph_setup().
Lets say @TIME T1:
fs->active_subvol = A
fs->next_subvol = B
__glfs_active_subvol()                //under lock fs->mutex
{
  ....
  new_subvol = fs->next_subvol       //which is B
  ....                               //Start migration from A to B
  __glfs_first_lookup(){
     ....
     unlock fs->mutex                //@TIME T2
     network fop
     lock fs->mutex
     ....
  }
  ....                                //migration continue on B
  fs->active_subvol = fs->next_subvol //which is C (explained below)
  ....
}
@Time T2, lets say in another thread, graph_setup() is called with C,
note that at T2, fs->mutex is unlocked.
graph_stup(C...)
{
  lock fs->mutex
  ....
  if (fs->next_subvol)                // which is B
      destroy subvol (fs->next_subvol)
  ....
  fs->next_subvol = C
  ....
  unlock fs->mutex
}
Thus at the end of this,
fs->old_subvol = A;
fs->active_subvol = C;
fs->next_subvol = NULL;
which is wrong, as B completed migration, but was destroyed by
graph_setup, and C never was migrated.
Solution:
=========
Any new graph can be in one of the 2 states:
- Picked for migration, migration in progress (fs->mip_subvol)
- Not picked so far for migration (fs->next_subvol)
graph_setup() updates fs->next_subvol only, __glfs_active_subvol()
moves fs->next_subvol to fs->mip_subvol and fs->next_subvol = NULL
atomically, and then once the migration is complete, make that the
fs->active_subvol
> Reviewed-on: http://review.gluster.org/14722
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
> Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
> Reviewed-by: Niels de Vos <ndevos@redhat.com>
BUG: 1367294
Change-Id: Ib6ff0565105c5eedb912a43da4017cd413243612
Signed-off-by: Poornima G <pgurusid@redhat.com>
Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Reviewed-on: http://review.gluster.org/15167
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Kaushal M <kaushal@redhat.com>
Diffstat (limited to 'api')
| -rw-r--r-- | api/src/glfs-internal.h | 12 | ||||
| -rw-r--r-- | api/src/glfs-master.c | 3 | ||||
| -rw-r--r-- | api/src/glfs-resolve.c | 14 | 
3 files changed, 23 insertions, 6 deletions
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h index e1b8c8ac5f2..fe59a3b8eb9 100644 --- a/api/src/glfs-internal.h +++ b/api/src/glfs-internal.h @@ -187,8 +187,16 @@ struct glfs {  	int                 ret;  	int                 err; -	xlator_t           *active_subvol; -	xlator_t           *next_subvol; +	xlator_t           *active_subvol; /* active graph */ +        xlator_t           *mip_subvol;    /* graph for which migration is in +                                            * progress */ +	xlator_t           *next_subvol;   /* Any new graph is put to +                                            * next_subvol, the graph in +                                            * next_subvol can either be move to +                                            * mip_subvol (if any IO picks it up +                                            * for migration), or be detroyed (if +                                            * there is a new graph, and this was +                                            * never picked for migration) */  	xlator_t           *old_subvol;  	char               *oldvolfile; diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c index b49ce2c8447..fd1f45d715f 100644 --- a/api/src/glfs-master.c +++ b/api/src/glfs-master.c @@ -45,7 +45,8 @@ graph_setup (struct glfs *fs, glusterfs_graph_t *graph)  	{  		if (new_subvol->switched ||  		    new_subvol == fs->active_subvol || -		    new_subvol == fs->next_subvol) { +		    new_subvol == fs->next_subvol || +                    new_subvol == fs->mip_subvol) {  			/* Spurious CHILD_UP event on old graph */  			ret = 0;  			goto unlock; diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c index 4c44b4f7ac8..b31507a949b 100644 --- a/api/src/glfs-resolve.c +++ b/api/src/glfs-resolve.c @@ -825,6 +825,13 @@ __glfs_migrate_openfds (struct glfs *fs, xlator_t *subvol)  } +/* Note that though it appears that this function executes under fs->mutex, + * it is not fully executed under fs->mutex. i.e. there are functions like + * __glfs_first_lookup, __glfs_refresh_inode, __glfs_migrate_openfds which + * unlocks fs->mutex before sending any network fop, and reacquire fs->mutex + * once the fop is complete. Hence the variable read from fs at the start of the + * function need not have the same value by the end of the function. + */  xlator_t *  __glfs_active_subvol (struct glfs *fs)  { @@ -835,7 +842,8 @@ __glfs_active_subvol (struct glfs *fs)  	if (!fs->next_subvol)  		return fs->active_subvol; -	new_subvol = fs->next_subvol; +	new_subvol = fs->mip_subvol = fs->next_subvol; +        fs->next_subvol = NULL;  	ret = __glfs_first_lookup (fs, new_subvol);  	if (ret) { @@ -869,8 +877,8 @@ __glfs_active_subvol (struct glfs *fs)  	   should be atomic  	*/  	fs->old_subvol = fs->active_subvol; -	fs->active_subvol = fs->next_subvol; -	fs->next_subvol = NULL; +	fs->active_subvol = fs->mip_subvol; +	fs->mip_subvol = NULL;  	if (new_cwd) {  		__glfs_cwd_set (fs, new_cwd);  | 
