diff options
author | Amar Tumballi <amarts@redhat.com> | 2013-09-10 15:21:45 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-09-10 09:05:18 -0700 |
commit | 9d4ea7a870668d9af9bbd415ff9ad3aace59e170 (patch) | |
tree | e08a44da52b178b37d8a9063857ea9776ce0ef82 | |
parent | f43a223ad1e53041f46b351aa260203ea0685613 (diff) |
cluster/dht: Ignore subvols with error in min-free-disk/inodes
Currently when selecting a alternative subvolume when hashed
subvol has exceeded min-free-disk/inodes, we do not check if
layouts have errors (including decommissioning). This leads
to data being written to those subvolumes, and in case of
decommissioning, will lead to data loss.
BUG: 982919
> Original-Author: shishir gowda <sgowda@redhat.com>
> Reviewed-on: http://review.gluster.org/5299
Change-Id: If301a86cf3ca5fad6529bd2e61382f9901663ba0
Signed-off-by: Amar Tumballi <amarts@redhat.com>
Reviewed-on: http://review.gluster.org/5888
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 10 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 75 | ||||
-rw-r--r-- | xlators/cluster/dht/src/nufa.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/switch.c | 6 |
5 files changed, 86 insertions, 17 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index adfd0fd849f..7bfe127189a 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3583,7 +3583,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this, subvol, subvol->fops->mknod, loc, mode, rdev, umask, params); } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol); + + avail_subvol = dht_free_disk_available_subvol (this, subvol, + local); if (avail_subvol != subvol) { /* Choose the minimum filled volume, and create the files there */ @@ -4004,7 +4006,7 @@ dht_create (call_frame_t *frame, xlator_t *this, } /* Choose the minimum filled volume, and create the files there */ - avail_subvol = dht_free_disk_available_subvol (this, subvol); + avail_subvol = dht_free_disk_available_subvol (this, subvol, local); if (avail_subvol != subvol) { local->params = dict_ref (params); local->flags = flags; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 8f266ef8ef9..980a385b19b 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -441,7 +441,8 @@ dht_layout_sort_volname (dht_layout_t *layout); int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); -xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, + dht_local_t *layout); int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); @@ -729,9 +730,12 @@ dht_dir_has_layout (dict_t *xattr); gf_boolean_t dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator); xlator_t * -dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol); +dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, + dht_layout_t *layout); + xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol); +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, + dht_layout_t *layout); int dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this); #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0c87f4a647c..fe3955ecbb7 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -251,25 +251,45 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) /*Get the best subvolume to create the file in*/ xlator_t * -dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) +dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, + dht_local_t *local) { xlator_t *avail_subvol = NULL; dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + loc_t *loc = NULL; conf = this->private; + if (!local) + goto out; + loc = &local->loc; + if (!local->layout) { + layout = dht_layout_get (this, loc->parent); + + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "layout missing path=%s parent=%s", + loc->path, uuid_utoa (loc->parent->gfid)); + goto out; + } + } else { + layout = dht_layout_ref (this, local->layout); + } - LOCK (&conf->subvolume_lock); + LOCK (&conf->subvolume_lock); { - avail_subvol = dht_subvol_with_free_space_inodes(this, subvol); + avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, + layout); if(!avail_subvol) { avail_subvol = dht_subvol_maxspace_nonzeroinode(this, - subvol); + subvol, + layout); } } UNLOCK (&conf->subvolume_lock); - +out: if (!avail_subvol) { gf_log (this->name, GF_LOG_DEBUG, @@ -278,17 +298,42 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) avail_subvol = subvol; } - + if (layout) + dht_layout_unref (this, layout); return avail_subvol; } +static inline +int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout) +{ + int ret = -1; + int i = 0; + + if (!this || !layout) + goto out; + + /* check if subvol has layout errors, before selecting it */ + for (i = 0; i < layout->cnt; i++) { + if (!strcmp (layout->list[i].xlator->name, this->name) && + (layout->list[i].err != 0)) { + ret = -1; + goto out; + } + } + ret = 0; +out: + return ret; +} + /*Get subvolume which has both space and inodes more than the min criteria*/ xlator_t * -dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, + dht_layout_t *layout) { int i = 0; double max = 0; double max_inodes = 0; + int ignore_subvol = 0; xlator_t *avail_subvol = NULL; dht_conf_t *conf = NULL; @@ -296,6 +341,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) conf = this->private; for(i=0; i < conf->subvolume_cnt; i++) { + /* check if subvol has layout errors, before selecting it */ + ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], + layout); + if (ignore_subvol) + continue; + if ((conf->disk_unit == 'p') && (conf->du_stats[i].avail_percent > conf->min_free_disk) && (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { @@ -325,10 +376,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) /* Get subvol which has atleast one inode and maximum space */ xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, + dht_layout_t *layout) { int i = 0; double max = 0; + int ignore_subvol = 0; xlator_t *avail_subvol = NULL; dht_conf_t *conf = NULL; @@ -336,6 +389,12 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) conf = this->private; for (i = 0; i < conf->subvolume_cnt; i++) { + /* check if subvol has layout errors, before selecting it */ + ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], + layout); + if (ignore_subvol) + continue; + if (conf->disk_unit == 'p') { if ((conf->du_stats[i].avail_percent > max) && (conf->du_stats[i].avail_inodes > 0 )) { diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 076ba3bbabf..20a9c5adec3 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -320,7 +320,8 @@ nufa_create (call_frame_t *frame, xlator_t *this, if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) { avail_subvol = dht_free_disk_available_subvol (this, - (xlator_t *)conf->private); + (xlator_t *)conf->private, + local); } if (subvol != avail_subvol) { @@ -425,7 +426,8 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) { avail_subvol = dht_free_disk_available_subvol (this, - (xlator_t *)conf->private); + (xlator_t *)conf->private, + local); } if (avail_subvol != subvol) { diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index 6ec343102b6..e05d6ee6749 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -434,7 +434,8 @@ switch_create (call_frame_t *frame, xlator_t *this, avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol); if (dht_is_subvol_filled (this, avail_subvol)) { avail_subvol = - dht_free_disk_available_subvol (this, avail_subvol); + dht_free_disk_available_subvol (this, avail_subvol, + local); } if (subvol != avail_subvol) { @@ -534,7 +535,8 @@ switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol); if (dht_is_subvol_filled (this, avail_subvol)) { avail_subvol = - dht_free_disk_available_subvol (this, avail_subvol); + dht_free_disk_available_subvol (this, avail_subvol, + local); } if (avail_subvol != subvol) { |