diff options
author | Ravishankar N <ravishankar@redhat.com> | 2015-04-15 22:53:21 +0530 |
---|---|---|
committer | Krishnan Parthasarathi <kparthas@redhat.com> | 2015-04-27 22:40:31 -0700 |
commit | d4889b2cfd29e6ecc911d2b29d1f85d516a66eaf (patch) | |
tree | a702c6323d43561d79d2236b8cb7087e5207b2b7 /xlators | |
parent | 70a729e9751e45e266f7462443dcf2b6be3cecbe (diff) |
arbiter: load arbiter xlator on every 3rd brick of a replica 3 AFR subvol
Logic for adding the 'glusterd_brickinfo->group' member and using it to
find the brick positon has been taken from http://review.gluster.org/#/c/9919.
Thanks to Jeff Darcy for that.
This patch is a part of the arbiter logic implementation for 3 way AFR
details of which can be found at http://review.gluster.org/#/c/9656/
Change-Id: Idbfe4f29ee8e098e0102def8f38b32314316b188
BUG: 1199985
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/10257
Tested-by: NetBSD Build System
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 86 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 9 |
7 files changed, 97 insertions, 14 deletions
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index f962fb6494e..21575fed2de 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -274,6 +274,8 @@ init (xlator_t *this) priv->read_child = -1; + GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out); + GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out); GF_OPTION_INIT ("metadata-splitbrain-forced-heal", @@ -794,5 +796,9 @@ struct volume_options options[] = { "attributes from the same subvol as long as it holds " " a good copy of the file/dir.", }, + { .key = {"arbiter-count"}, + .type = GF_OPTION_TYPE_INT, + .description = "subset of child_count. Has to be 0 or 1." + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index de000e765ea..f7bc6ea0f94 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -55,6 +55,8 @@ typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this); typedef struct _afr_private { gf_lock_t lock; /* to guard access to child_count, etc */ unsigned int child_count; /* total number of children */ + unsigned int arbiter_count; /*subset of child_count. + Has to be 0 or 1.*/ xlator_t **children; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e23d2a35fe8..2103fd62e03 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -6028,6 +6028,7 @@ glusterd_recreate_volfiles (glusterd_conf_t *conf) int op_ret = 0; GF_ASSERT (conf); + cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) { ret = generate_brick_volfiles (volinfo); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index faaf5d59a48..a149e9916df 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1502,12 +1502,26 @@ brick_graph_add_arbiter (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) { xlator_t *xl = NULL; + glusterd_brickinfo_t *next = NULL; + glusterd_brickinfo_t *last = NULL; int ret = -1; if (volinfo->arbiter_count != 1) return 0; - /*TODO: Parse brickinfo and add the arbiter xlator only if brick is the - * last brick (i.e. 3rd brick) of the replcia pair.*/ + + /* Find the last brick in the same group. */ + last = brickinfo; + for (;;) { + next = list_next (last, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + if (!next || (next->group != brickinfo->group)) { + break; + } + last = next; + } + if (last != brickinfo) + return 0; + xl = volgen_graph_add (graph, "features/arbiter", volinfo->volname); if (!xl) goto out; @@ -1571,6 +1585,22 @@ out: return ret; } +void +assign_brick_groups (glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + uint16_t group_num = 0; + int in_group = 0; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brickinfo->group = group_num; + if (++in_group >= volinfo->replica_count) { + in_group = 0; + ++group_num; + } + } +} + static int brick_graph_add_changelog (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -3087,6 +3117,43 @@ out: } static int +volgen_graph_build_afr_clusters (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) +{ + int i = 0; + int ret = 0; + int clusters = 0; + char *replicate_args[] = {"cluster/replicate", + "%s-replicate-%d"}; + xlator_t *afr = NULL; + char option[32] = {0}; + + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, + replicate_args[0], + replicate_args[1], + volinfo->brick_count, + volinfo->replica_count); + if (clusters < 0) + goto out; + + if (!volinfo->arbiter_count) + goto out; + + afr = first_of (graph); + sprintf(option, "%d", volinfo->arbiter_count); + for (i = 0; i < clusters; i++) { + ret = xlator_set_option (afr, "arbiter-count", option); + if (ret) { + clusters = -1; + goto out; + } + afr = afr->next; + } +out: + return clusters; +} + +static int volume_volgen_graph_build_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, gf_boolean_t is_quotad) @@ -3116,13 +3183,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, /* All other cases, it will have one or the other cluster type */ switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: - clusters = volgen_link_bricks_from_list_tail - (graph, volinfo, - replicate_args[0], - replicate_args[1], - volinfo->brick_count, - volinfo->replica_count); - + clusters = volgen_graph_build_afr_clusters (graph, volinfo); if (clusters < 0) goto out; break; @@ -3146,11 +3207,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, /* Replicate after the clients, then stripe */ if (volinfo->replica_count == 0) goto out; - clusters = volgen_link_bricks_from_list_tail (graph, volinfo, - replicate_args[0], - replicate_args[1], - volinfo->brick_count, - volinfo->replica_count); + clusters = volgen_graph_build_afr_clusters (graph, volinfo); if (clusters < 0) goto out; @@ -4473,6 +4530,7 @@ generate_brick_volfiles (glusterd_volinfo_t *volinfo) if (ret == -1) return -1; + assign_brick_groups (volinfo); get_vol_tstamp_file (tstamp_file, volinfo); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 02f8df0cf7d..4575049ada9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -218,6 +218,9 @@ glusterd_check_voloption_flags (char *key, int32_t flags); gf_boolean_t glusterd_is_valid_volfpath (char *volname, char *brick); +void +assign_brick_groups (glusterd_volinfo_t *volinfo); + int generate_brick_volfiles (glusterd_volinfo_t *volinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index a42f08c1600..de3045ffde3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1996,6 +1996,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) "replica count for volume %s", volname); goto out; } + ret = dict_get_int32 (dict, "arbiter-count", + &volinfo->arbiter_count); } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) { ret = dict_get_int32 (dict, "stripe-count", &volinfo->stripe_count); @@ -2019,6 +2021,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) "replica count for volume %s", volname); goto out; } + ret = dict_get_int32 (dict, "arbiter-count", + &volinfo->arbiter_count); } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { ret = dict_get_int32 (dict, "disperse-count", &volinfo->disperse_count); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ff63cce2234..60c3ebdf9bb 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -202,6 +202,15 @@ struct glusterd_brickinfo { char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ int caps; /* Capability */ int32_t snap_status; + /* + * The group is used to identify which bricks are part of the same + * replica set during brick-volfile generation, so that NSR volfiles + * can "cross-connect" the bricks to one another. It is also used by + * AFR to load the arbiter xlator in the appropriate brick in case of + * a replica 3 volume with arbiter enabled. + */ + uint16_t group; + }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; |