diff options
Diffstat (limited to 'xlators/mgmt')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 151 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 13 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 2 |
3 files changed, 157 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index c760b947551..2c52cf72a3f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -115,7 +115,6 @@ xlator_instantiate_va (const char *type, const char *format, va_list arg) return NULL; } -#ifdef __not_used_as_of_now_ static xlator_t * xlator_instantiate (const char *type, const char *format, ...) { @@ -128,7 +127,6 @@ xlator_instantiate (const char *type, const char *format, ...) return xl; } -#endif static int volgen_xlator_link (xlator_t *pxl, xlator_t *cxl) @@ -1825,6 +1823,107 @@ out: return ret; } +xlator_t * +add_one_peer (volgen_graph_t *graph, glusterd_brickinfo_t *peer, + char *volname, uint16_t index) +{ + xlator_t *kid; + + kid = volgen_graph_add_nolink (graph, "protocol/client", + "%s-client-%u", volname, + index++); + if (!kid) { + return NULL; + } + + /* TBD: figure out where to get the proper transport list */ + if (xlator_set_option(kid, "transport-type", "socket")) { + return NULL; + } + if (xlator_set_option(kid, "remote-host", peer->hostname)) { + return NULL; + } + if (xlator_set_option(kid, "remote-subvolume", peer->path)) { + return NULL; + } + /* TBD: deal with RDMA, SSL */ + + return kid; +} + +int +add_nsr_stuff (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + xlator_t *me; + glusterd_brickinfo_t *peer; + glusterd_brickinfo_t *prev_peer; + char *leader_opt; + uint16_t index = 0; + xlator_t *kid; + + /* Create the NSR xlator, but defer linkage for now. */ + me = xlator_instantiate ("experimental/nsr", "%s-nsr", + volinfo->volname); + if (!me || volgen_xlator_link(me, first_of(graph))) { + return -1; + } + + /* Figure out if we should start as leader, mark appropriately. */ + peer = list_prev (brickinfo, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + leader_opt = (!peer || (peer->group != brickinfo->group)) ? "yes" + : "no"; + if (xlator_set_option(me, "leader", leader_opt)) { + /* + * TBD: fix memory leak ("me" and associated dictionary) + * There seems to be no function already to clean up a + * just-allocated translator object if something else fails. + * Apparently the convention elsewhere in this file is to return + * without freeing anything, but we can't keep being that sloppy + * forever. + */ + return -1; + } + + /* + * Make sure we're at the beginning of the list of bricks in this + * replica set. This way all bricks' volfiles have peers in a + * consistent order. + */ + peer = brickinfo; + for (;;) { + prev_peer = list_prev (peer, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + if (!prev_peer || (prev_peer->group != brickinfo->group)) { + break; + } + peer = prev_peer; + } + + /* Actually add the peers. */ + do { + if (peer != brickinfo) { + gf_log ("glusterd", GF_LOG_INFO, + "%s:%s needs client for %s:%s", + brickinfo->hostname, brickinfo->path, + peer->hostname, peer->path); + kid = add_one_peer (graph, peer, + volinfo->volname, index++); + if (!kid || volgen_xlator_link(me, kid)) { + return -1; + } + } + peer = list_next (peer, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + } while (peer && (peer->group == brickinfo->group)); + + /* Finish linkage to client file. */ + glusterfs_graph_set_first(&graph->graph, me); + + return 0; +} + static int brick_graph_add_index (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -1837,6 +1936,11 @@ brick_graph_add_index (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (!graph || !volinfo || !brickinfo || !set_dict) goto out; + /* For NSR we don't need/want index. */ + if (glusterd_volinfo_get_boolean(volinfo, "cluster.nsr") > 0) { + return add_nsr_stuff (graph, volinfo, brickinfo); + } + xl = volgen_graph_add (graph, "features/index", volinfo->volname); if (!xl) goto out; @@ -3379,12 +3483,18 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph, int i = 0; int ret = 0; int clusters = 0; - char *replicate_args[] = {"cluster/replicate", - "%s-replicate-%d"}; + char *replicate_type = NULL; + char *replicate_name = "%s-replicate-%d"; xlator_t *afr = NULL; char option[32] = {0}; int start_count = 0; + if (glusterd_volinfo_get_boolean(volinfo, "cluster.nsr") > 0) { + replicate_type = "experimental/nsrc"; + } else { + replicate_type = "cluster/replicate"; + } + if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_REPLICATE) start_count = volinfo->tier_info.cold_brick_count / volinfo->tier_info.cold_replica_count; @@ -3392,16 +3502,16 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph, if (volinfo->tier_info.cur_tier_hot) clusters = volgen_link_bricks_from_list_head_start (graph, volinfo, - replicate_args[0], - replicate_args[1], + replicate_type, + replicate_name, volinfo->brick_count, volinfo->replica_count, start_count); else clusters = volgen_link_bricks_from_list_tail (graph, volinfo, - replicate_args[0], - replicate_args[1], + replicate_type, + replicate_name, volinfo->brick_count, volinfo->replica_count); @@ -5139,6 +5249,27 @@ get_parent_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo) PATH_MAX - strlen(filename) - 1); } +void +assign_groups (glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + uint16_t group_num = 0; + int in_group = 0; + uuid_t tmp_uuid; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (in_group == 0) { + gf_uuid_generate(tmp_uuid); + } + brickinfo->group = group_num; + gf_uuid_copy(brickinfo->nsr_uuid, tmp_uuid); + if (++in_group >= volinfo->replica_count) { + in_group = 0; + ++group_num; + } + } +} + int generate_brick_volfiles (glusterd_volinfo_t *volinfo) { @@ -5207,6 +5338,10 @@ generate_brick_volfiles (glusterd_volinfo_t *volinfo) } } + if (glusterd_volinfo_get_boolean(volinfo, "cluster.nsr") > 0) { + assign_groups(volinfo); + } + ret = glusterd_volume_brick_for_each (volinfo, NULL, glusterd_generate_brick_volfile); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 112980c4661..ec7a19070ab 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2644,6 +2644,19 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_7_6, .flags = OPT_FLAG_CLIENT_OPT }, + { .key = "cluster.nsr", + .voltype = "experimental/nsr", + .option = "!nsr", + .op_version = GD_OP_VERSION_4_0_0, + .description = "enable NSR instead of AFR for replication", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "cluster.nsr.quorum-percent", + .voltype = "experimental/nsr", + .option = "quorum-percent", + .op_version = GD_OP_VERSION_4_0_0, + .description = "percent of rep_count-1 bricks that must be up" + }, { .key = NULL } }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 0af7be07300..0a313918856 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -215,7 +215,7 @@ struct glusterd_brickinfo { * a replica 3 volume with arbiter enabled. */ uint16_t group; - + uuid_t nsr_uuid; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; |