diff options
Diffstat (limited to 'scheduler/rr')
| -rw-r--r-- | scheduler/rr/Makefile.am | 3 | ||||
| -rw-r--r-- | scheduler/rr/src/Makefile.am | 13 | ||||
| -rw-r--r-- | scheduler/rr/src/rr-options.c | 256 | ||||
| -rw-r--r-- | scheduler/rr/src/rr-options.h | 34 | ||||
| -rw-r--r-- | scheduler/rr/src/rr.c | 565 | ||||
| -rw-r--r-- | scheduler/rr/src/rr.h | 70 | 
6 files changed, 941 insertions, 0 deletions
diff --git a/scheduler/rr/Makefile.am b/scheduler/rr/Makefile.am new file mode 100644 index 00000000000..d471a3f9243 --- /dev/null +++ b/scheduler/rr/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES =  diff --git a/scheduler/rr/src/Makefile.am b/scheduler/rr/src/Makefile.am new file mode 100644 index 00000000000..7e911c0eda8 --- /dev/null +++ b/scheduler/rr/src/Makefile.am @@ -0,0 +1,13 @@ +sched_LTLIBRARIES = rr.la +scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler + +rr_la_LDFLAGS = -module -avoidversion + +rr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +rr_la_SOURCES = rr.c rr-options.c +noinst_HEADERS = rr.h rr-options.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ +	-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES =  diff --git a/scheduler/rr/src/rr-options.c b/scheduler/rr/src/rr-options.c new file mode 100644 index 00000000000..3f0ffcaf2e9 --- /dev/null +++ b/scheduler/rr/src/rr-options.c @@ -0,0 +1,256 @@ +/* +  Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#include "scheduler.h" +#include "rr-options.h" + +#define RR_LIMITS_MIN_FREE_DISK_OPTION_STRING  "scheduler.limits.min-free-disk" +#define RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT  15 +#define RR_LIMITS_MIN_FREE_DISK_VALUE_MIN      0 +#define RR_LIMITS_MIN_FREE_DISK_VALUE_MAX      100 + +#define RR_REFRESH_INTERVAL_OPTION_STRING      "scheduler.refresh-interval" +#define RR_REFRESH_INTERVAL_VALUE_DEFAULT      10 + +#define RR_READ_ONLY_SUBVOLUMES_OPTION_STRING  "scheduler.read-only-subvolumes" + +#define LOG_ERROR(args...)      gf_log ("rr-options", GF_LOG_ERROR, ##args) +#define LOG_WARNING(args...)    gf_log ("rr-options", GF_LOG_WARNING, ##args) + +static int  +_rr_options_min_free_disk_validate (const char *value_string, uint32_t *n) +{ +	uint32_t value = 0; +   +	if (value_string == NULL) +	{ +		return -1; +	} +   +	if (gf_string2percent (value_string, &value) != 0) +	{ +		gf_log ("rr",  +			GF_LOG_ERROR,  +			"invalid number format [%s] of option [%s]",  +			value_string,  +			RR_LIMITS_MIN_FREE_DISK_OPTION_STRING); +		return -1; +	} +   +	if ((value <= RR_LIMITS_MIN_FREE_DISK_VALUE_MIN) ||  +	    (value >= RR_LIMITS_MIN_FREE_DISK_VALUE_MAX)) +	{ +		gf_log ("rr",  +			GF_LOG_ERROR,  +			"out of range [%d] of option [%s].  Allowed range is 0 to 100.",  +			value,  +			RR_LIMITS_MIN_FREE_DISK_OPTION_STRING); +		return -1; +	} +   +	*n = value; +   +	return 0; +} + +static int  +_rr_options_refresh_interval_validate (const char *value_string, uint32_t *n) +{ +	uint32_t value = 0; +   +	if (value_string == NULL) +	{ +		return -1; +	} +   +	if (gf_string2time (value_string, &value) != 0) +	{ +		gf_log ("rr",  +			GF_LOG_ERROR,  +			"invalid number format [%s] of option [%s]",  +			value_string,  +			RR_REFRESH_INTERVAL_OPTION_STRING); +		return -1; +	} +   +	*n = value; +   +	return 0; +} + +static int  +_rr_options_read_only_subvolumes_validate (const char *value_string,  +					   char ***volume_list,  +					   uint64_t *volume_count) +{ +	char **vlist = NULL; +	int vcount = 0; +	int i = 0; +   +	if (value_string == NULL || volume_list == NULL || volume_count) +	{ +		return -1; +	} +   +	if (gf_strsplit (value_string,  +			 ", ",  +			 &vlist,  +			 &vcount) != 0) +	{ +		gf_log ("rr",  +			GF_LOG_ERROR,  +			"invalid subvolume list [%s] of option [%s]",  +			value_string,  +			RR_READ_ONLY_SUBVOLUMES_OPTION_STRING); +		return -1; +	} +   +	for (i = 0; i < vcount; i++) +	{ +		if (gf_volume_name_validate (vlist[i]) != 0) +		{ +			gf_log ("rr",  +				GF_LOG_ERROR,  +				"invalid subvolume name [%s] in [%s] of option [%s]",  +				vlist[i],  +				value_string,  +				RR_READ_ONLY_SUBVOLUMES_OPTION_STRING); +			goto free_exit; +		} +	} +   +	*volume_list = vlist; +	*volume_count = vcount; +   +	return 0; +   + free_exit: +	for (i = 0; i < vcount; i++) +	{ +		free (vlist[i]); +	} +	free (vlist); +   +	return -1; +} + +int  +rr_options_validate (dict_t *options, rr_options_t *rr_options) +{ +	char *value_string = NULL; +   +	if (options == NULL || rr_options == NULL) +	{ +		return -1; +	} +   +	if (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING)) +		if (data_to_str (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING))) +			value_string = data_to_str (dict_get (options,  +							      RR_LIMITS_MIN_FREE_DISK_OPTION_STRING)); +	if (value_string != NULL) +	{ +		if (_rr_options_min_free_disk_validate (value_string,  +							&rr_options->min_free_disk) != 0) +		{ +			return -1; +		} +       +		gf_log ("rr",  +			GF_LOG_WARNING,  +			"using %s = %d",  +			RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,  +			rr_options->min_free_disk); +	} +	else  +	{ +		rr_options->min_free_disk = RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT; +       +		gf_log ("rr", GF_LOG_DEBUG,  +			"using %s = %d [default]",  +			RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,  +			rr_options->min_free_disk); +	} +   +	value_string = NULL; +	if (dict_get (options, RR_REFRESH_INTERVAL_OPTION_STRING)) +		value_string = data_to_str (dict_get (options,  +						      RR_REFRESH_INTERVAL_OPTION_STRING)); +	if (value_string != NULL) +	{ +		if (_rr_options_refresh_interval_validate (value_string,  +							   &rr_options->refresh_interval) != 0) +		{ +			return -1; +		} +       +		gf_log ("rr",  +			GF_LOG_WARNING,  +			"using %s = %d",  +			RR_REFRESH_INTERVAL_OPTION_STRING,  +			rr_options->refresh_interval); +	} +	else  +	{ +		rr_options->refresh_interval = RR_REFRESH_INTERVAL_VALUE_DEFAULT; +       +		gf_log ("rr", GF_LOG_DEBUG,  +			"using %s = %d [default]",  +			RR_REFRESH_INTERVAL_OPTION_STRING,  +			rr_options->refresh_interval); +	} +   +	value_string = NULL; +	if (dict_get (options, RR_READ_ONLY_SUBVOLUMES_OPTION_STRING)) +		value_string = data_to_str (dict_get (options,  +						      RR_READ_ONLY_SUBVOLUMES_OPTION_STRING)); +	if (value_string != NULL) +	{ +		if (_rr_options_read_only_subvolumes_validate (value_string,  +							       &rr_options->read_only_subvolume_list,  +							       &rr_options->read_only_subvolume_count) != 0) +		{ +			return -1; +		} +       +		gf_log ("rr",  +			GF_LOG_WARNING,  +			"using %s = [%s]",  +			RR_READ_ONLY_SUBVOLUMES_OPTION_STRING,  +			value_string); +	} +   +	return 0; +} + +struct volume_options options[] = { +	{ .key   = { "scheduler.refresh-interval",  +		     "rr.refresh-interval" },   +	  .type  = GF_OPTION_TYPE_TIME +	}, +	{ .key   = { "scheduler.limits.min-free-disk",  +		     "rr.limits.min-free-disk" },   +	  .type  = GF_OPTION_TYPE_PERCENT +	}, +	{ .key   = { "scheduler.read-only-subvolumes",  +		     "rr.read-only-subvolumes" },   +	  .type  = GF_OPTION_TYPE_ANY +	}, +	{ .key = {NULL} } +}; diff --git a/scheduler/rr/src/rr-options.h b/scheduler/rr/src/rr-options.h new file mode 100644 index 00000000000..4818c7d491c --- /dev/null +++ b/scheduler/rr/src/rr-options.h @@ -0,0 +1,34 @@ +/* +   Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _RR_OPTIONS_H +#define _RR_OPTIONS_H + +struct rr_options +{ +  uint32_t min_free_disk; +  uint32_t refresh_interval; +  char     **read_only_subvolume_list; +  uint64_t read_only_subvolume_count; +}; +typedef struct rr_options rr_options_t; + +int rr_options_validate (dict_t *options, rr_options_t *rr_options); + +#endif diff --git a/scheduler/rr/src/rr.c b/scheduler/rr/src/rr.c new file mode 100644 index 00000000000..3e54ff5e1b6 --- /dev/null +++ b/scheduler/rr/src/rr.c @@ -0,0 +1,565 @@ +/* +  Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <sys/time.h> +#include <stdlib.h> + +#include <stdint.h> + +#include "scheduler.h" + +#include "rr-options.h" +#include "rr.h" + +#define RR_MIN_FREE_DISK_NOT_REACHED    0 +#define RR_MIN_FREE_DISK_REACHED        1 + +#define RR_SUBVOLUME_OFFLINE    0 +#define RR_SUBVOLUME_ONLINE     1 + +#define LOG_ERROR(args...)      gf_log ("rr", GF_LOG_ERROR, ##args) +#define LOG_WARNING(args...)    gf_log ("rr", GF_LOG_WARNING, ##args) +#define LOG_CRITICAL(args...)    gf_log ("rr", GF_LOG_CRITICAL, ##args) + +#define ROUND_ROBIN(index, count)    ((index + 1) % count) + +static int  +_cleanup_rr (rr_t *rr) +{ +	int i; +   +	if (rr == NULL) +	{ +		return -1; +	} +   +	if (rr->options.read_only_subvolume_list != NULL) +	{ +		for (i = 0; i < rr->options.read_only_subvolume_count; i++) +		{ +			free (rr->options.read_only_subvolume_list[i]); +		} +		free (rr->options.read_only_subvolume_list); +	} +   +	free (rr->subvolume_list); +   +	free (rr); +   +	return 0; +} + +int  +rr_init (xlator_t *this_xl) +{ +	rr_t *rr = NULL; +	dict_t *options = NULL; +	xlator_list_t *children = NULL; +	uint64_t children_count = 0; +	int i = 0; +	int j = 0; +   +	if (this_xl == NULL) +	{ +		return -1; +	} +   +	if ((options = this_xl->options) == NULL) +	{ +		return -1; +	} +   +	if ((children = this_xl->children) == NULL) +	{ +		return -1; +	} +   +	if ((rr = CALLOC (1, sizeof (rr_t))) == NULL) +	{ +		return -1; +	} +   +	if (rr_options_validate (options, &rr->options) != 0) +	{ +		free (rr); +		return -1; +	} +   +	for (i = 0; i < rr->options.read_only_subvolume_count; i++) +	{ +		char found = 0; +       +		for (children = this_xl->children;  +		     children != NULL;  +		     children = children->next) +		{ +			if (strcmp (rr->options.read_only_subvolume_list[i],  +				    children->xlator->name) == 0) +			{ +				found = 1; +				break; +			} +		} +       +		if (!found) +		{ +			LOG_ERROR ("read-only subvolume [%s] not found in volume list",  +				   rr->options.read_only_subvolume_list[i]); +			_cleanup_rr (rr); +			return -1; +		} +	} +   +	for (children = this_xl->children;  +	     children != NULL;  +	     children = children->next) +	{ +		children_count++; +	} +   +	/* bala: excluding read_only_subvolumes */ +	if ((rr->subvolume_count = children_count -  +	     rr->options.read_only_subvolume_count) == 0) +	{ +		LOG_ERROR ("no writable volumes found for scheduling"); +		_cleanup_rr (rr); +		return -1; +	} +   +	if ((rr->subvolume_list = CALLOC (rr->subvolume_count,  +					  sizeof (rr_subvolume_t))) == NULL) +	{ +		_cleanup_rr (rr); +		return -1; +	} +   +	i = 0; +	j = 0; +	for (children = this_xl->children;  +	     children != NULL;  +	     children = children->next) +	{ +		char found = 0; +       +		for (j = 0; j < rr->options.read_only_subvolume_count; j++) +		{ +			if (strcmp (rr->options.read_only_subvolume_list[i],  +				    children->xlator->name) == 0) +			{ +				found = 1; +				break; +			} +		} +       +		if (!found) +		{ +			rr_subvolume_t *subvolume = NULL; +	   +			subvolume = &rr->subvolume_list[i]; +	   +			subvolume->xl = children->xlator; +			subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED; +			subvolume->status = RR_SUBVOLUME_ONLINE; +	   +			i++; +		} +	} +   +	rr->schedule_index = UINT64_MAX; +	rr->last_stat_fetched_time.tv_sec = 0; +	rr->last_stat_fetched_time.tv_usec = 0; +	pthread_mutex_init (&rr->mutex, NULL); +   +	*((long *)this_xl->private) = (long)rr; +   +	return 0; +} + +void  +rr_fini (xlator_t *this_xl) +{ +	rr_t *rr = NULL; +   +	if (this_xl == NULL) +	{ +		return; +	} +   +	if ((rr = (rr_t *) *((long *)this_xl->private)) != NULL) +	{ +		pthread_mutex_destroy (&rr->mutex); +		_cleanup_rr (rr); +		this_xl->private = NULL; +	} +   +	return; +} + +xlator_t * +rr_schedule (xlator_t *this_xl, const void *path) +{ +	rr_t *rr = NULL; +	uint64_t next_schedule_index = 0; +	int i = 0; +   +	if (this_xl == NULL || path == NULL) +	{ +		return NULL; +	} +   +	rr = (rr_t *) *((long *)this_xl->private); +	next_schedule_index = ROUND_ROBIN (rr->schedule_index,  +					   rr->subvolume_count); +   +	rr_update (this_xl); +   +	for (i = next_schedule_index; i < rr->subvolume_count; i++) +	{ +		if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&  +		    rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED) +		{ +			pthread_mutex_lock (&rr->mutex); +			rr->schedule_index = i; +			pthread_mutex_unlock (&rr->mutex); +			return rr->subvolume_list[i].xl; +		} +	} +   +	for (i = 0; i < next_schedule_index; i++) +	{ +		if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&  +		    rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED) +		{ +			pthread_mutex_lock (&rr->mutex); +			rr->schedule_index = i; +			pthread_mutex_unlock (&rr->mutex); +			return rr->subvolume_list[i].xl; +		} +	} +   +	for (i = next_schedule_index; i < rr->subvolume_count; i++) +	{ +		if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE) +		{ +			pthread_mutex_lock (&rr->mutex); +			rr->schedule_index = i; +			pthread_mutex_unlock (&rr->mutex); +			return rr->subvolume_list[i].xl; +		} +	} +   +	for (i = 0; i < next_schedule_index; i++) +	{ +		if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE) +		{ +			pthread_mutex_lock (&rr->mutex); +			rr->schedule_index = i; +			pthread_mutex_unlock (&rr->mutex); +			return rr->subvolume_list[i].xl; +		} +	} +   +	return NULL; +} + +void +rr_update (xlator_t *this_xl) +{ +	rr_t *rr = NULL; +	struct timeval ctime = {0, 0}; +	int i = 0; +   +	if (this_xl == NULL) +	{ +		return ; +	} +   +	if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL) +	{ +		return ; +	} +   +	if (gettimeofday (&ctime, NULL) != 0) +	{ +		return ; +	} +   +	if (ctime.tv_sec > (rr->options.refresh_interval +  +			    rr->last_stat_fetched_time.tv_sec)) +	{ +		pthread_mutex_lock (&rr->mutex); +		rr->last_stat_fetched_time = ctime; +		pthread_mutex_unlock (&rr->mutex); +       +		for (i = 0; i < rr->subvolume_count; i++) +		{ +			xlator_t *subvolume_xl = NULL; +			call_frame_t *frame = NULL; +			call_pool_t *pool = NULL; +	   +			subvolume_xl = rr->subvolume_list[i].xl; +	   +			pool = this_xl->ctx->pool; +	   +			frame = create_frame (this_xl, pool); + +			STACK_WIND_COOKIE (frame, +					   rr_update_cbk,  +					   subvolume_xl->name,  +					   subvolume_xl,  +					   subvolume_xl->mops->stats,  +					   0); +		} +	} +   +	return ; +} + +int  +rr_update_cbk (call_frame_t *frame,  +	       void *cookie,  +	       xlator_t *this_xl,  +	       int32_t op_ret,  +	       int32_t op_errno,  +	       struct xlator_stats *stats) +{ +	rr_t *rr = NULL; +	rr_subvolume_t *subvolume = NULL; +	uint8_t free_disk_percent = 0; +	int i = 0; +   +	if (frame == NULL) +	{ +		return -1; +	} +   +	if (cookie == NULL || this_xl == NULL) +	{ +		STACK_DESTROY (frame->root); +		return -1; +	} +   +	if (op_ret == 0 && stats == NULL) +	{ +		LOG_CRITICAL ("fatal! op_ret is 0 and stats is NULL.  " +			      "Please report this to <gluster-devel@nongnu.org>"); +		STACK_DESTROY (frame->root); +		return -1; +	} +   +	if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL) +	{ +		STACK_DESTROY (frame->root); +		return -1; +	} +   +	for (i = 0; i < rr->subvolume_count; i++) +	{ +		if (rr->subvolume_list[i].xl->name == (char *) cookie) +		{ +			subvolume = &rr->subvolume_list[i]; +			break; +		} +	} +   +	if (subvolume == NULL) +	{ +		LOG_ERROR ("unknown cookie [%s]", (char *) cookie); +		STACK_DESTROY (frame->root); +		return -1; +	} +   +	if (op_ret == 0) +	{ +		free_disk_percent = (stats->free_disk * 100) / stats->total_disk_size; +		if (free_disk_percent > rr->options.min_free_disk) +		{ +			if (subvolume->free_disk_status != RR_MIN_FREE_DISK_NOT_REACHED) +			{ +				pthread_mutex_lock (&rr->mutex); +				subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED; +				pthread_mutex_unlock (&rr->mutex); +				LOG_WARNING ("subvolume [%s] is available with free space for scheduling",  +					     subvolume->xl->name); +			} +		} +		else +		{ +			if (subvolume->free_disk_status != RR_MIN_FREE_DISK_REACHED) +			{ +				pthread_mutex_lock (&rr->mutex); +				subvolume->free_disk_status = RR_MIN_FREE_DISK_REACHED; +				pthread_mutex_unlock (&rr->mutex); +				LOG_WARNING ("subvolume [%s] reached minimum disk space requirement",  +					     subvolume->xl->name); +			} +		} +	} +	else  +	{ +		pthread_mutex_lock (&rr->mutex); +		subvolume->status = RR_SUBVOLUME_OFFLINE; +		pthread_mutex_unlock (&rr->mutex); +		LOG_ERROR ("unable to get subvolume [%s] status information and " +			   "scheduling is disabled",  +			   subvolume->xl->name); +	} +   +	STACK_DESTROY (frame->root); +	return 0; +} + +void +rr_notify (xlator_t *this_xl, int32_t event, void *data) +{ +	rr_t *rr = NULL; +	rr_subvolume_t *subvolume = NULL; +	xlator_t *subvolume_xl = NULL; +	int i = 0, ret = 0; +	call_frame_t *frame = NULL; +	call_pool_t *pool = NULL; +	dict_t *xattr = get_new_dict (); +	int32_t version[1] = {1}; + +	if (this_xl == NULL || data == NULL) { +		return ; +	} +   +	if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL) { +		return ; +	} +   +	subvolume_xl = (xlator_t *) data; +   +	for (i = 0; i < rr->subvolume_count; i++) { +		if (rr->subvolume_list[i].xl == subvolume_xl) { +			subvolume = &rr->subvolume_list[i]; +			break; +		} +	} +   +	switch (event) { +	case GF_EVENT_CHILD_UP: +		/* Seeding, to be done only once */ +		if (rr->first_time && (i == rr->subvolume_count)) { +			loc_t loc = {0,}; +			xlator_t *trav = NULL; + +			pool = this_xl->ctx->pool; +			frame = create_frame (this_xl, pool); +			ret = dict_set_bin (xattr, "trusted.glusterfs.scheduler.rr", +					    version, sizeof (int32_t)); +			if (-1 == ret) { +				gf_log (this_xl->name, GF_LOG_ERROR, "rr seed setting failed"); +			} +			if (xattr) +				dict_ref (xattr); +			 +			loc.path = strdup ("/"); +			for (trav = this_xl->parents->xlator; trav; trav = trav->parents->xlator) { +				if (trav->itable) { +					loc.inode = trav->itable->root; +					break; +				} +			} +			STACK_WIND (frame, +				    rr_notify_cbk, +				    (xlator_t *)data, +				    ((xlator_t *)data)->fops->xattrop, +				    &loc, +				    GF_XATTROP_ADD_ARRAY, +				    xattr); +	   +			if (xattr) +				dict_unref (xattr); + +			rr->first_time = 0; +		} +		if (subvolume) { +			pthread_mutex_lock (&rr->mutex); +			subvolume->status = RR_SUBVOLUME_ONLINE; +			pthread_mutex_unlock (&rr->mutex); +		} +		break; +	case GF_EVENT_CHILD_DOWN: +		if (subvolume) { +			pthread_mutex_lock (&rr->mutex); +			subvolume->status = RR_SUBVOLUME_OFFLINE; +			pthread_mutex_unlock (&rr->mutex); +		} +		break; +	} +   +	return ; +} + +int  +rr_notify_cbk (call_frame_t *frame,  +	       void *cookie,  +	       xlator_t *this_xl,  +	       int32_t op_ret,  +	       int32_t op_errno, +	       dict_t *xattr) +{ +	rr_t *rr = NULL; +	int32_t *index = NULL; +	int32_t ret = -1; +	void *tmp_index_ptr = NULL; + +	if (frame == NULL)  +	{ +		return -1; +	} +   +	if ((this_xl == NULL) || (op_ret == -1)) +	{ +		STACK_DESTROY (frame->root); +		return -1; +	} +   +	if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL) +	{ +		STACK_DESTROY (frame->root); +		return -1; +	} +   +	ret = dict_get_bin (xattr, "trusted.glusterfs.scheduler.rr", &tmp_index_ptr); +	index = tmp_index_ptr; +	if (ret == 0) +		rr->schedule_index = (index[0] % rr->subvolume_count); +	else +		rr->schedule_index = 0; + +	STACK_DESTROY (frame->root); +	return 0; +} + +struct sched_ops sched = { +	.init     = rr_init, +	.fini     = rr_fini, +	.update   = rr_update, +	.schedule = rr_schedule, +	.notify   = rr_notify +}; + diff --git a/scheduler/rr/src/rr.h b/scheduler/rr/src/rr.h new file mode 100644 index 00000000000..baa471209e8 --- /dev/null +++ b/scheduler/rr/src/rr.h @@ -0,0 +1,70 @@ +/* +   Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef _RR_H +#define _RR_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "scheduler.h" +#include <stdint.h> +#include <sys/time.h> + +struct rr_subvolume +{ +  xlator_t  *xl; +  uint8_t   free_disk_status; +  uint8_t   status; +}; +typedef struct rr_subvolume rr_subvolume_t; + +struct rr +{ +  rr_options_t    options; +  rr_subvolume_t  *subvolume_list; +  uint64_t        subvolume_count; +  uint64_t        schedule_index; +  struct timeval  last_stat_fetched_time; +  pthread_mutex_t mutex; +  char            first_time; +}; +typedef struct rr rr_t; + +int rr_init (xlator_t *this_xl); +void rr_fini (xlator_t *this_xl); +xlator_t *rr_schedule (xlator_t *this_xl, const void *path); +void rr_update (xlator_t *this_xl); +int rr_update_cbk (call_frame_t *frame,  +		   void *cookie,  +		   xlator_t *this_xl,  +		   int32_t op_ret,  +		   int32_t op_errno,  +		   struct xlator_stats *stats); +void rr_notify (xlator_t *this_xl, int32_t event, void *data); +int rr_notify_cbk (call_frame_t *frame,  +		   void *cookie,  +		   xlator_t *this_xl,  +		   int32_t op_ret,  +		   int32_t op_errno, +		   dict_t *xattr); + +#endif /* _RR_H */  | 
