From 86b01a278b55b19adefe9625e586252a5c75b474 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Mon, 26 Nov 2012 16:36:42 -0500 Subject: dht: support auto-NUFA option Many people have asked for behavior like the old NUFA, which builds and seems to run but was previously impossible to enable/configure in a standard way. This change allows NUFA to be enabled instead of DHT from the command line, with automatic selection of the local subvolume on each host. Change-Id: I0065938db3922361fd450a6c1919a4cbbf6f202e BUG: 882278 Signed-off-by: Jeff Darcy Reviewed-on: http://review.gluster.org/4234 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- tests/bugs/bug-882278.t | 72 +++++++++++++++++++++++++++ tests/include.rc | 3 ++ xlators/cluster/dht/src/nufa.c | 76 +++++++++++++++++++++++++---- xlators/mgmt/glusterd/src/glusterd-volgen.c | 13 ++++- 4 files changed, 154 insertions(+), 10 deletions(-) create mode 100755 tests/bugs/bug-882278.t diff --git a/tests/bugs/bug-882278.t b/tests/bugs/bug-882278.t new file mode 100755 index 000000000..7933e1863 --- /dev/null +++ b/tests/bugs/bug-882278.t @@ -0,0 +1,72 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +cleanup + +# Is there a good reason to require --fqdn elsewhere? It's worse than useless +# here. +H0=$(hostname -s) + +function recreate { + # The rm is necessary so we don't get fooled by leftovers from old runs. + rm -rf $1 && mkdir -p $1 +} + +function count_lines { + grep "$1" $2/* | wc -l +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +## Start and create a volume +TEST recreate ${B0}/${V0}-0 +TEST recreate ${B0}/${V0}-1 +TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1} +TEST $CLI volume set $V0 cluster.nufa on + +function volinfo_field() +{ + local vol=$1; + local field=$2; + + $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +} + + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount native +special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1" +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0 + +## Create a bunch of test files. +for i in $(seq 0 99); do + echo hello > $(printf $M0/file%02d $i) +done + +## Make sure the files went to the right place. There might be link files in +## the other brick, but they won't have any contents. +EXPECT "0" count_lines hello ${B0}/${V0}-0 +EXPECT "100" count_lines hello ${B0}/${V0}-1 + +if [ "$EXIT_EARLY" = "1" ]; then + exit 0; +fi + +## Finish up +TEST umount $M0; +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/tests/include.rc b/tests/include.rc index cfa9390aa..b572fbc63 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -39,6 +39,9 @@ function test_footer() echo "ok $t"; else echo "not ok $t"; + if [ "$EXIT_EARLY" = "1" ]; then + exit $RET + fi fi dbg "RESULT $t: $RET"; diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index a2a97ab3c..701d7ae8d 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -491,6 +491,26 @@ fini (xlator_t *this) return; } +gf_boolean_t +same_first_part (char *str1, char term1, char *str2, char term2) +{ + gf_boolean_t ended1; + gf_boolean_t ended2; + + for (;;) { + ended1 = ((*str1 == '\0') || (*str1 == term1)); + ended2 = ((*str2 == '\0') || (*str2 == term2)); + if (ended1 && ended2) { + return _gf_true; + } + if (ended1 || ended2 || (*str1 != *str2)) { + return _gf_false; + } + ++str1; + ++str2; + } +} + int init (xlator_t *this) { @@ -504,6 +524,9 @@ init (xlator_t *this) char my_hostname[256]; double temp_free_disk = 0; uint64_t size = 0; + xlator_t *local_subvol = NULL; + char *brick_host = NULL; + xlator_t *kid = NULL; if (!this->children) { gf_log (this->name, GF_LOG_CRITICAL, @@ -562,23 +585,58 @@ init (xlator_t *this) local_volname = data->data; } - trav = this->children; - while (trav) { + for (trav = this->children; trav; trav = trav->next) { if (strcmp (trav->xlator->name, local_volname) == 0) break; - trav = trav->next; + if (local_subvol) { + continue; + } + kid = trav->xlator; + for (;;) { + if (dict_get_str(trav->xlator->options,"remote-host", + &brick_host) == 0) { + /* Found it. */ + break; + } + if (!kid->children) { + /* Nowhere further to look. */ + gf_log (this->name, GF_LOG_ERROR, + "could not get remote-host"); + goto err; + } + if (kid->children->next) { + /* Multiple choices, can't/shouldn't decide. */ + gf_log (this->name, GF_LOG_ERROR, + "NUFA found fan-out (type %s) volume", + kid->type); + goto err; + } + /* One-to-one xlators are OK, try the next one. */ + kid = kid->children->xlator; + } + if (same_first_part(my_hostname,'.',brick_host,'.')) { + local_subvol = trav->xlator; + } } - if (!trav) { + if (trav) { + gf_log (this->name, GF_LOG_INFO, + "Using specified subvol %s", local_volname); + conf->private = trav->xlator; + } + else if (local_subvol) { + gf_log (this->name, GF_LOG_INFO, + "Using first local subvol %s", local_subvol->name); + conf->private = local_subvol; + } + else { gf_log (this->name, GF_LOG_ERROR, - "Could not find subvolume named '%s'. " - "Please define volume with the name as the hostname " - "or override it with 'option local-volume-name'", - local_volname); + "Could not find specified or local subvol"); goto err; + } + /* The volume specified exists */ - conf->private = trav->xlator; conf->min_free_disk = 10; conf->disk_unit = 'p'; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index da1c2a4ef..40b2e32a2 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -108,6 +108,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.rebalance-stats", "cluster/distribute", NULL, NULL, NO_DOC, 0, 2}, {"cluster.subvols-per-directory", "cluster/distribute", "directory-layout-spread", NULL, NO_DOC, 0, 2}, {"cluster.readdir-optimize", "cluster/distribute", NULL, NULL, NO_DOC, 0, 2}, + {"cluster.nufa", "cluster/distribute", "!nufa", NULL, NO_DOC, 0, 2}, /* AFR xlator options */ {"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0, 1}, @@ -2399,9 +2400,19 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph, int ret = -1; char *decommissioned_children = NULL; xlator_t *dht = NULL; + char *optstr = NULL; + gf_boolean_t use_nufa = _gf_false; + if (dict_get_str(volinfo->dict,"cluster.nufa",&optstr) == 0) { + /* Keep static analyzers quiet by "using" the value. */ + ret = gf_string2boolean(optstr,&use_nufa); + } + clusters = volgen_graph_build_clusters (graph, volinfo, - "cluster/distribute", "%s-dht", + use_nufa + ? "cluster/nufa" + : "cluster/distribute", + "%s-dht", child_count, child_count); if (clusters < 0) goto out; -- cgit