diff options
| author | Amar Tumballi <amarts@redhat.com> | 2013-09-10 15:21:45 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2013-09-10 09:05:18 -0700 | 
| commit | 9d4ea7a870668d9af9bbd415ff9ad3aace59e170 (patch) | |
| tree | e08a44da52b178b37d8a9063857ea9776ce0ef82 | |
| parent | f43a223ad1e53041f46b351aa260203ea0685613 (diff) | |
cluster/dht: Ignore subvols with error in min-free-disk/inodes
Currently when selecting a alternative subvolume when hashed
subvol has exceeded min-free-disk/inodes, we do not check if
layouts have errors (including decommissioning). This leads
to data being written to those subvolumes, and in case of
decommissioning, will lead to data loss.
BUG: 982919
> Original-Author: shishir gowda <sgowda@redhat.com>
> Reviewed-on: http://review.gluster.org/5299
Change-Id: If301a86cf3ca5fad6529bd2e61382f9901663ba0
Signed-off-by: Amar Tumballi <amarts@redhat.com>
Reviewed-on: http://review.gluster.org/5888
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 10 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 75 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/nufa.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/switch.c | 6 | 
5 files changed, 86 insertions, 17 deletions
| diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index adfd0fd84..7bfe12718 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3583,7 +3583,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this,                                     subvol, subvol->fops->mknod, loc, mode,                                     rdev, umask, params);          } else { -                avail_subvol = dht_free_disk_available_subvol (this, subvol); + +                avail_subvol = dht_free_disk_available_subvol (this, subvol, +                                                               local);                  if (avail_subvol != subvol) {                          /* Choose the minimum filled volume, and create the                             files there */ @@ -4004,7 +4006,7 @@ dht_create (call_frame_t *frame, xlator_t *this,          }          /* Choose the minimum filled volume, and create the             files there */ -        avail_subvol = dht_free_disk_available_subvol (this, subvol); +        avail_subvol = dht_free_disk_available_subvol (this, subvol, local);          if (avail_subvol != subvol) {                  local->params = dict_ref (params);                  local->flags = flags; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 8f266ef8e..980a385b1 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -441,7 +441,8 @@ dht_layout_sort_volname (dht_layout_t *layout);  int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);  gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); -xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, +                                          dht_local_t *layout);  int       dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);  int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); @@ -729,9 +730,12 @@ dht_dir_has_layout (dict_t *xattr);  gf_boolean_t  dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);  xlator_t * -dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol); +dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, +                                   dht_layout_t *layout); +  xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol); +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, +                                  dht_layout_t *layout);  int  dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);  #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0c87f4a64..fe3955ecb 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -251,25 +251,45 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)  /*Get the best subvolume to create the file in*/  xlator_t * -dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) +dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, +                                dht_local_t *local)  {  	xlator_t   *avail_subvol = NULL;  	dht_conf_t *conf = NULL; +        dht_layout_t *layout = NULL; +        loc_t      *loc = NULL;  	conf = this->private; +        if (!local) +                goto out; +        loc = &local->loc; +        if (!local->layout) { +                layout = dht_layout_get (this, loc->parent); + +                if (!layout) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "layout missing path=%s parent=%s", +                                loc->path, uuid_utoa (loc->parent->gfid)); +                        goto out; +                } +        } else { +                layout = dht_layout_ref (this, local->layout); +        } -	LOCK (&conf->subvolume_lock); +        LOCK (&conf->subvolume_lock);  	{ -                avail_subvol = dht_subvol_with_free_space_inodes(this, subvol); +                avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, +                                                                 layout);                  if(!avail_subvol)                  {                          avail_subvol = dht_subvol_maxspace_nonzeroinode(this, -                                                                        subvol); +                                                                        subvol, +                                                                        layout);                  }  	}  	UNLOCK (&conf->subvolume_lock); - +out:  	if (!avail_subvol) {  		gf_log (this->name,                          GF_LOG_DEBUG, @@ -278,17 +298,42 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)                  avail_subvol = subvol;  	} - +        if (layout) +                dht_layout_unref (this, layout);  	return avail_subvol;  } +static inline +int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout) +{ +        int ret = -1; +        int i   = 0; + +        if (!this || !layout) +                goto out; + +        /* check if subvol has layout errors, before selecting it */ +        for (i = 0; i < layout->cnt; i++) { +                if (!strcmp (layout->list[i].xlator->name, this->name) && +                     (layout->list[i].err != 0)) { +                        ret = -1; +                        goto out; +                } +        } +        ret = 0; +out: +        return ret; +} +  /*Get subvolume which has both space and inodes more than the min criteria*/  xlator_t * -dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, +                                  dht_layout_t *layout)  {          int i = 0;          double max = 0;          double max_inodes = 0; +        int    ignore_subvol = 0;          xlator_t *avail_subvol = NULL;          dht_conf_t *conf = NULL; @@ -296,6 +341,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol)          conf = this->private;          for(i=0; i < conf->subvolume_cnt; i++) { +                /* check if subvol has layout errors, before selecting it */ +                ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], +                                                    layout); +                if (ignore_subvol) +                        continue; +                  if ((conf->disk_unit == 'p') &&                      (conf->du_stats[i].avail_percent > conf->min_free_disk) &&                      (conf->du_stats[i].avail_inodes  > conf->min_free_inodes)) { @@ -325,10 +376,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol)  /* Get subvol which has atleast one inode and maximum space */  xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, +                                  dht_layout_t *layout)  {          int         i = 0;          double      max = 0; +        int         ignore_subvol = 0;          xlator_t   *avail_subvol = NULL;          dht_conf_t *conf = NULL; @@ -336,6 +389,12 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol)          conf = this->private;          for (i = 0; i < conf->subvolume_cnt; i++) { +                /* check if subvol has layout errors, before selecting it */ +                ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], +                                                    layout); +                if (ignore_subvol) +                        continue; +                  if (conf->disk_unit == 'p') {                          if ((conf->du_stats[i].avail_percent > max)                              && (conf->du_stats[i].avail_inodes > 0 )) { diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 076ba3bba..20a9c5ade 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -320,7 +320,8 @@ nufa_create (call_frame_t *frame, xlator_t *this,          if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {                  avail_subvol =                          dht_free_disk_available_subvol (this, -                                                        (xlator_t *)conf->private); +                                                        (xlator_t *)conf->private, +                                                        local);          }          if (subvol != avail_subvol) { @@ -425,7 +426,8 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,          if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {                  avail_subvol =                          dht_free_disk_available_subvol (this, -                                                        (xlator_t *)conf->private); +                                                        (xlator_t *)conf->private, +                                                        local);          }          if (avail_subvol != subvol) { diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index 6ec343102..e05d6ee67 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -434,7 +434,8 @@ switch_create (call_frame_t *frame, xlator_t *this,          avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);          if (dht_is_subvol_filled (this, avail_subvol)) {                  avail_subvol = -                        dht_free_disk_available_subvol (this, avail_subvol); +                        dht_free_disk_available_subvol (this, avail_subvol, +                                                        local);          }          if (subvol != avail_subvol) { @@ -534,7 +535,8 @@ switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,          avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);          if (dht_is_subvol_filled (this, avail_subvol)) {                  avail_subvol = -                        dht_free_disk_available_subvol (this, avail_subvol); +                        dht_free_disk_available_subvol (this, avail_subvol, +                                                        local);          }          if (avail_subvol != subvol) { | 
