diff options
| author | shishir gowda <sgowda@redhat.com> | 2013-07-08 18:48:55 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2013-07-10 01:56:38 -0700 | 
| commit | 63ed610617458ac4fd85cb83471df2222380f28a (patch) | |
| tree | 1bdfea58f4fd7e0560c73358d19cf4e887f8ce84 | |
| parent | 03780d066ae7c78b969e2316dbde85e4ca0fcb85 (diff) | |
cluster/dht: Ignore subvols with error in min-free-disk/inodes
Currently when selecting a alternative subvolume when hashed
subvol has exceeded min-free-disk/inodes, we do not check if
layouts have errors (including decommissioning). This leads
to data being written to those subvolumes, and in case of
decommissioning, will lead to data loss.
Change-Id: Ie0c6cf4a29d7c53d8a6d8a8c1bd595cf58a0012a
BUG: 982919
Signed-off-by: shishir gowda <sgowda@redhat.com>
Reviewed-on: http://review.gluster.org/5299
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 9 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 75 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/nufa.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/switch.c | 6 | 
5 files changed, 85 insertions, 17 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index ec54e438e..e55c25438 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3567,7 +3567,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this,                                     subvol, subvol->fops->mknod, loc, mode,                                     rdev, umask, params);          } else { -                avail_subvol = dht_free_disk_available_subvol (this, subvol); + +                avail_subvol = dht_free_disk_available_subvol (this, subvol, +                                                               local);                  if (avail_subvol != subvol) {                          /* Choose the minimum filled volume, and create the                             files there */ @@ -3988,7 +3990,7 @@ dht_create (call_frame_t *frame, xlator_t *this,          }          /* Choose the minimum filled volume, and create the             files there */ -        avail_subvol = dht_free_disk_available_subvol (this, subvol); +        avail_subvol = dht_free_disk_available_subvol (this, subvol, local);          if (avail_subvol != subvol) {                  local->params = dict_ref (params);                  local->flags = flags; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index fb90e48cc..d00d56864 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -469,7 +469,8 @@ dht_layout_sort_volname (dht_layout_t *layout);  int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);  gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); -xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, +                                          dht_local_t *layout);  int       dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);  int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); @@ -764,9 +765,11 @@ dht_dir_has_layout (dict_t *xattr, char *name);  gf_boolean_t  dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);  xlator_t * -dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol); +dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, +                                   dht_layout_t *layout);  xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol); +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, +                                  dht_layout_t *layout);  int  dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this); diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0c87f4a64..fe3955ecb 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -251,25 +251,45 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)  /*Get the best subvolume to create the file in*/  xlator_t * -dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) +dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, +                                dht_local_t *local)  {  	xlator_t   *avail_subvol = NULL;  	dht_conf_t *conf = NULL; +        dht_layout_t *layout = NULL; +        loc_t      *loc = NULL;  	conf = this->private; +        if (!local) +                goto out; +        loc = &local->loc; +        if (!local->layout) { +                layout = dht_layout_get (this, loc->parent); + +                if (!layout) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "layout missing path=%s parent=%s", +                                loc->path, uuid_utoa (loc->parent->gfid)); +                        goto out; +                } +        } else { +                layout = dht_layout_ref (this, local->layout); +        } -	LOCK (&conf->subvolume_lock); +        LOCK (&conf->subvolume_lock);  	{ -                avail_subvol = dht_subvol_with_free_space_inodes(this, subvol); +                avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, +                                                                 layout);                  if(!avail_subvol)                  {                          avail_subvol = dht_subvol_maxspace_nonzeroinode(this, -                                                                        subvol); +                                                                        subvol, +                                                                        layout);                  }  	}  	UNLOCK (&conf->subvolume_lock); - +out:  	if (!avail_subvol) {  		gf_log (this->name,                          GF_LOG_DEBUG, @@ -278,17 +298,42 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)                  avail_subvol = subvol;  	} - +        if (layout) +                dht_layout_unref (this, layout);  	return avail_subvol;  } +static inline +int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout) +{ +        int ret = -1; +        int i   = 0; + +        if (!this || !layout) +                goto out; + +        /* check if subvol has layout errors, before selecting it */ +        for (i = 0; i < layout->cnt; i++) { +                if (!strcmp (layout->list[i].xlator->name, this->name) && +                     (layout->list[i].err != 0)) { +                        ret = -1; +                        goto out; +                } +        } +        ret = 0; +out: +        return ret; +} +  /*Get subvolume which has both space and inodes more than the min criteria*/  xlator_t * -dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, +                                  dht_layout_t *layout)  {          int i = 0;          double max = 0;          double max_inodes = 0; +        int    ignore_subvol = 0;          xlator_t *avail_subvol = NULL;          dht_conf_t *conf = NULL; @@ -296,6 +341,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol)          conf = this->private;          for(i=0; i < conf->subvolume_cnt; i++) { +                /* check if subvol has layout errors, before selecting it */ +                ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], +                                                    layout); +                if (ignore_subvol) +                        continue; +                  if ((conf->disk_unit == 'p') &&                      (conf->du_stats[i].avail_percent > conf->min_free_disk) &&                      (conf->du_stats[i].avail_inodes  > conf->min_free_inodes)) { @@ -325,10 +376,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol)  /* Get subvol which has atleast one inode and maximum space */  xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, +                                  dht_layout_t *layout)  {          int         i = 0;          double      max = 0; +        int         ignore_subvol = 0;          xlator_t   *avail_subvol = NULL;          dht_conf_t *conf = NULL; @@ -336,6 +389,12 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol)          conf = this->private;          for (i = 0; i < conf->subvolume_cnt; i++) { +                /* check if subvol has layout errors, before selecting it */ +                ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], +                                                    layout); +                if (ignore_subvol) +                        continue; +                  if (conf->disk_unit == 'p') {                          if ((conf->du_stats[i].avail_percent > max)                              && (conf->du_stats[i].avail_inodes > 0 )) { diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 9352ca55a..5fae52626 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -323,7 +323,8 @@ nufa_create (call_frame_t *frame, xlator_t *this,          if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {                  avail_subvol =                          dht_free_disk_available_subvol (this, -                                                        (xlator_t *)conf->private); +                                                        (xlator_t *)conf->private, +                                                        local);          }          if (subvol != avail_subvol) { @@ -427,7 +428,8 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,          if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {                  avail_subvol =                          dht_free_disk_available_subvol (this, -                                                        (xlator_t *)conf->private); +                                                        (xlator_t *)conf->private, +                                                        local);          }          if (avail_subvol != subvol) { diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index 861012247..d3ea90ba8 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -437,7 +437,8 @@ switch_create (call_frame_t *frame, xlator_t *this,          avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);          if (dht_is_subvol_filled (this, avail_subvol)) {                  avail_subvol = -                        dht_free_disk_available_subvol (this, avail_subvol); +                        dht_free_disk_available_subvol (this, avail_subvol, +                                                        local);          }          if (subvol != avail_subvol) { @@ -536,7 +537,8 @@ switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,          avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);          if (dht_is_subvol_filled (this, avail_subvol)) {                  avail_subvol = -                        dht_free_disk_available_subvol (this, avail_subvol); +                        dht_free_disk_available_subvol (this, avail_subvol, +                                                        local);          }          if (avail_subvol != subvol) {  | 
