diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-layout.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 543 | 
1 files changed, 543 insertions, 0 deletions
| diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c new file mode 100644 index 00000000000..08b4a2746f8 --- /dev/null +++ b/xlators/cluster/dht/src/dht-layout.c @@ -0,0 +1,543 @@ +/* +   Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "byte-order.h" + +#define layout_base_size (sizeof (dht_layout_t)) + +#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0]) + +#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size)) + + +dht_layout_t * +dht_layout_new (xlator_t *this, int cnt) +{ +	dht_layout_t *layout = NULL; + + +	layout = CALLOC (1, layout_size (cnt)); +	if (!layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto out; +	} + +	layout->cnt = cnt; + +out: +	return layout; +} + + +dht_layout_t * +dht_layout_get (xlator_t *this, inode_t *inode) +{ +        uint64_t layout = 0; +        int      ret    = -1; + +        ret = inode_ctx_get (inode, this, &layout); + +        return (dht_layout_t *)(long)layout; +} + + +xlator_t * +dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) +{ +	uint32_t   hash = 0; +        xlator_t  *subvol = NULL; +	int        i = 0; +	int        ret = 0; + + +	ret = dht_hash_compute (layout->type, name, &hash); +	if (ret != 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"hash computation failed for type=%d name=%s", +			layout->type, name); +		goto out; +	} + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].start <= hash +		    && layout->list[i].stop >= hash) { +			subvol = layout->list[i].xlator; +			break; +		} +	} + +	if (!subvol) { +		gf_log (this->name, GF_LOG_DEBUG, +			"no subvolume for hash (value) = %u", hash); +	} + +out: +	return subvol; +} + + +dht_layout_t * +dht_layout_for_subvol (xlator_t *this, xlator_t *subvol) +{ +	dht_conf_t   *conf = NULL; +	dht_layout_t *layout = NULL; +	int           i = 0; + + +	conf = this->private; + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		if (conf->subvolumes[i] == subvol) { +			layout = conf->file_layouts[i]; +			break; +		} +	} + +	return layout; +} + + +int +dht_layouts_init (xlator_t *this, dht_conf_t *conf) +{ +	dht_layout_t *layout = NULL; +	int           i = 0; +	int           ret = -1; +	 + +	conf->file_layouts = CALLOC (conf->subvolume_cnt, +				     sizeof (dht_layout_t *)); +	if (!conf->file_layouts) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto out; +	} + +	for (i = 0; i < conf->subvolume_cnt; i++) { +		layout = dht_layout_new (this, 1); + +		if (!layout) { +			goto out; +		} + +		layout->preset = 1; + +		layout->list[0].xlator = conf->subvolumes[i]; + +		conf->file_layouts[i] = layout; +	} + +	ret = 0; +out: +	return ret; +} + + +int +dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, +			 int pos, int32_t **disk_layout_p) +{ +	int      ret = -1; +	int32_t *disk_layout = NULL; + +	disk_layout = CALLOC (5, sizeof (int)); +	if (!disk_layout) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		goto out; +	} + +	disk_layout[0] = hton32 (1); +	disk_layout[1] = hton32 (layout->type); +	disk_layout[2] = hton32 (layout->list[pos].start); +	disk_layout[3] = hton32 (layout->list[pos].stop); + +	if (disk_layout_p) +		*disk_layout_p = disk_layout; +	ret = 0; + +out: +	return ret; +} + + +int +dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, +		       int pos, int32_t *disk_layout) +{ +	int      cnt = 0; +	int      type = 0; +	int      start_off = 0; +	int      stop_off = 0; + + +	/* TODO: assert disk_layout_ptr is of required length */ + +	cnt  = ntoh32 (disk_layout[0]); +	if (cnt != 1) { +		gf_log (this->name, GF_LOG_ERROR, +			"disk layout has invalid count %d", cnt); +		return -1; +	} + +	/* TODO: assert type is compatible */ +	type      = ntoh32 (disk_layout[1]); +	start_off = ntoh32 (disk_layout[2]); +	stop_off  = ntoh32 (disk_layout[3]); + +	layout->list[pos].start = start_off; +	layout->list[pos].stop  = stop_off; + +	gf_log (this->name, GF_LOG_DEBUG, +		"merged to layout: %u - %u (type %d) from %s", +		start_off, stop_off, type, +		layout->list[pos].xlator->name); + +	return 0; +} + + +int +dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, +		  int op_ret, int op_errno, dict_t *xattr) +{ +	int      i     = 0; +	int      ret   = -1; +	int      err   = -1; +	int32_t *disk_layout = NULL; + + +	if (op_ret != 0) { +		err = op_errno; +	} + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].xlator == NULL) { +			layout->list[i].err    = err; +			layout->list[i].xlator = subvol; +			break; +		} +	} + +	if (op_ret != 0) { +		ret = 0; +		goto out; +	} + +	if (xattr) { +		/* during lookup and not mkdir */ +		ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", +				    VOID(&disk_layout)); +	} + +	if (ret != 0) { +		layout->list[i].err = -1; +		gf_log (this->name, GF_LOG_DEBUG, +			"missing disk layout on %s. err = %d", +			subvol->name, err); +		ret = 0; +		goto out; +	} + +	ret = dht_disk_layout_merge (this, layout, i, disk_layout); +	if (ret != 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"layout merge from subvolume %s failed", +			subvol->name); +		goto out; +	} +	layout->list[i].err = 0; + +out: +	return ret; +} + + +void +dht_layout_entry_swap (dht_layout_t *layout, int i, int j) +{ +	uint32_t  start_swap = 0; +	uint32_t  stop_swap = 0; +	xlator_t *xlator_swap = 0; +	int       err_swap = 0; + + +	start_swap  = layout->list[i].start; +	stop_swap   = layout->list[i].stop; +	xlator_swap = layout->list[i].xlator; +	err_swap    = layout->list[i].err; + +	layout->list[i].start  = layout->list[j].start; +	layout->list[i].stop   = layout->list[j].stop; +	layout->list[i].xlator = layout->list[j].xlator; +	layout->list[i].err    = layout->list[j].err; + +	layout->list[j].start  = start_swap; +	layout->list[j].stop   = stop_swap; +	layout->list[j].xlator = xlator_swap; +	layout->list[j].err    = err_swap; +} + + +int64_t +dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) +{ +	int64_t diff = 0; + +	if (layout->list[i].err || layout->list[j].err) +		diff = layout->list[i].err - layout->list[j].err; +	else +		diff = (int64_t) layout->list[i].start +			- (int64_t) layout->list[j].start; + +	return diff; +} + + +int +dht_layout_sort (dht_layout_t *layout) +{ +	int       i = 0; +	int       j = 0; +	int64_t   ret = 0; + +	/* TODO: O(n^2) -- bad bad */ + +	for (i = 0; i < layout->cnt - 1; i++) { +		for (j = i + 1; j < layout->cnt; j++) { +			ret = dht_layout_entry_cmp (layout, i, j); +			if (ret > 0) +				dht_layout_entry_swap (layout, i, j); +		} +	} + +	return 0; +} + + +int +dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, +		      uint32_t *holes_p, uint32_t *overlaps_p, +		      uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p) +{ +	dht_conf_t *conf = NULL; +	uint32_t    holes    = 0; +	uint32_t    overlaps = 0; +	uint32_t    missing  = 0; +	uint32_t    down     = 0; +	uint32_t    misc     = 0; +	uint32_t    hole_cnt = 0; +	uint32_t    overlap_cnt = 0; +	int         i = 0; +	int         ret = 0; +	uint32_t    prev_stop = 0; +	uint32_t    last_stop = 0; +	char        is_virgin = 1; + + +	conf = this->private; + +	/* TODO: explain WTF is happening */ + +	last_stop = layout->list[0].start - 1; +	prev_stop = last_stop; + +	for (i = 0; i < layout->cnt; i++) { +		if (layout->list[i].err) { +			switch (layout->list[i].err) { +			case -1: +			case ENOENT: +				missing++; +				break; +			case ENOTCONN: +				down++; +				break; +			default: +				misc++; +			} +			continue; +		} + +		is_virgin = 0; + +		if ((prev_stop + 1) < layout->list[i].start) { +			hole_cnt++; +			holes += (layout->list[i].start - (prev_stop + 1)); +		} + +		if ((prev_stop + 1) > layout->list[i].start) { +			overlap_cnt++; +			overlaps += ((prev_stop + 1) - layout->list[i].start); +		} +		prev_stop = layout->list[i].stop; +	} + +	if ((last_stop - prev_stop) || is_virgin) +	    hole_cnt++; +	holes += (last_stop - prev_stop); + +	if (holes_p) +		*holes_p = hole_cnt; + +	if (overlaps_p) +		*overlaps_p = overlap_cnt; + +	if (missing_p) +		*missing_p = missing; + +	if (down_p) +		*down_p = down; + +	if (misc_p) +		*misc_p = misc; + +	return ret; +} + + +int +dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) +{ +	int          ret   = 0; +	uint32_t     holes = 0; +	uint32_t     overlaps = 0; +	uint32_t     missing = 0; +	uint32_t     down = 0; +	uint32_t     misc = 0; + + +	ret = dht_layout_sort (layout); +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"sort failed?! how the ...."); +		goto out; +	} + +	ret = dht_layout_anomalies (this, loc, layout, +				    &holes, &overlaps, +				    &missing, &down, &misc); +	if (ret == -1) { +		gf_log (this->name, GF_LOG_ERROR, +			"error while finding anomalies in %s -- not good news", +			loc->path); +		goto out; +	} + +	if (holes || overlaps) { +		if (missing == layout->cnt) { +			gf_log (this->name, GF_LOG_WARNING, +				"directory %s looked up first time", +				loc->path); +		} else { +			gf_log (this->name, GF_LOG_ERROR, +				"found anomalies in %s. holes=%d overlaps=%d", +				loc->path, holes, overlaps); +		} +		ret = 1; +	} + +out: +	return ret; +} + + +int +dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, +			 loc_t *loc, dict_t *xattr) +{ +	int       idx = 0; +	int       pos = -1; +	int       ret = -1; +	int32_t  *disk_layout = NULL; +	int32_t   count = -1; +	uint32_t  start_off = -1; +	uint32_t  stop_off = -1; + + +	for (idx = 0; idx < layout->cnt; idx++) { +		if (layout->list[idx].xlator == subvol) { +			pos = idx; +			break; +		} +	} +	 +	if (pos == -1) { +		gf_log (this->name, GF_LOG_DEBUG, +			"%s - no layout info for subvolume %s", +			loc->path, subvol->name); +		ret = 1; +		goto out; +	} +	 +	if (xattr == NULL) { +		gf_log (this->name, GF_LOG_ERROR, +			"%s - xattr dictionary is NULL", +			loc->path); +		ret = -1; +		goto out; +	} + +	ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", +			    VOID(&disk_layout)); +	 +	if (ret < 0) { +		gf_log (this->name, GF_LOG_ERROR, +			"%s - disk layout missing", loc->path); +		ret = -1; +		goto out; +	}  + +	count  = ntoh32 (disk_layout[0]); +	if (count != 1) { +		gf_log (this->name, GF_LOG_ERROR, +			"%s - disk layout has invalid count %d", +			loc->path, count); +		ret = -1; +		goto out; +	} + +	start_off = ntoh32 (disk_layout[2]); +	stop_off  = ntoh32 (disk_layout[3]); +	 +	if ((layout->list[pos].start != start_off) +	    || (layout->list[pos].stop != stop_off)) { +		gf_log (this->name, GF_LOG_DEBUG, +			"subvol: %s; inode layout - %"PRId32" - %"PRId32"; " +			"disk layout - %"PRId32" - %"PRId32, +			layout->list[pos].xlator->name, +			layout->list[pos].start, layout->list[pos].stop, +			start_off, stop_off); +		ret = 1; +	} else { +		ret = 0; +	} +out: +	return ret; +} + | 
