diff options
-rw-r--r-- | libglusterfs/src/list.h | 14 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 86 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 9 |
8 files changed, 111 insertions, 14 deletions
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h index a860275a91e..875594136a2 100644 --- a/libglusterfs/src/list.h +++ b/libglusterfs/src/list.h @@ -256,4 +256,18 @@ static inline void list_replace_init(struct list_head *old, &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) +/* + * This list implementation has some advantages, but one disadvantage: you + * can't use NULL to check whether you're at the head or tail. Thus, the + * address of the head has to be an argument for these macros. + */ + +#define list_next(ptr, head, type, member) \ + (((ptr)->member.next == head) ? NULL \ + : list_entry((ptr)->member.next, type, member)) + +#define list_prev(ptr, head, type, member) \ + (((ptr)->member.prev == head) ? NULL \ + : list_entry((ptr)->member.prev, type, member)) + #endif /* _LLIST_H */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index f962fb6494e..21575fed2de 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -274,6 +274,8 @@ init (xlator_t *this) priv->read_child = -1; + GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out); + GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out); GF_OPTION_INIT ("metadata-splitbrain-forced-heal", @@ -794,5 +796,9 @@ struct volume_options options[] = { "attributes from the same subvol as long as it holds " " a good copy of the file/dir.", }, + { .key = {"arbiter-count"}, + .type = GF_OPTION_TYPE_INT, + .description = "subset of child_count. Has to be 0 or 1." + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index de000e765ea..f7bc6ea0f94 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -55,6 +55,8 @@ typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this); typedef struct _afr_private { gf_lock_t lock; /* to guard access to child_count, etc */ unsigned int child_count; /* total number of children */ + unsigned int arbiter_count; /*subset of child_count. + Has to be 0 or 1.*/ xlator_t **children; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e23d2a35fe8..2103fd62e03 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -6028,6 +6028,7 @@ glusterd_recreate_volfiles (glusterd_conf_t *conf) int op_ret = 0; GF_ASSERT (conf); + cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) { ret = generate_brick_volfiles (volinfo); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index faaf5d59a48..a149e9916df 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1502,12 +1502,26 @@ brick_graph_add_arbiter (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) { xlator_t *xl = NULL; + glusterd_brickinfo_t *next = NULL; + glusterd_brickinfo_t *last = NULL; int ret = -1; if (volinfo->arbiter_count != 1) return 0; - /*TODO: Parse brickinfo and add the arbiter xlator only if brick is the - * last brick (i.e. 3rd brick) of the replcia pair.*/ + + /* Find the last brick in the same group. */ + last = brickinfo; + for (;;) { + next = list_next (last, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + if (!next || (next->group != brickinfo->group)) { + break; + } + last = next; + } + if (last != brickinfo) + return 0; + xl = volgen_graph_add (graph, "features/arbiter", volinfo->volname); if (!xl) goto out; @@ -1571,6 +1585,22 @@ out: return ret; } +void +assign_brick_groups (glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + uint16_t group_num = 0; + int in_group = 0; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brickinfo->group = group_num; + if (++in_group >= volinfo->replica_count) { + in_group = 0; + ++group_num; + } + } +} + static int brick_graph_add_changelog (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -3087,6 +3117,43 @@ out: } static int +volgen_graph_build_afr_clusters (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) +{ + int i = 0; + int ret = 0; + int clusters = 0; + char *replicate_args[] = {"cluster/replicate", + "%s-replicate-%d"}; + xlator_t *afr = NULL; + char option[32] = {0}; + + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, + replicate_args[0], + replicate_args[1], + volinfo->brick_count, + volinfo->replica_count); + if (clusters < 0) + goto out; + + if (!volinfo->arbiter_count) + goto out; + + afr = first_of (graph); + sprintf(option, "%d", volinfo->arbiter_count); + for (i = 0; i < clusters; i++) { + ret = xlator_set_option (afr, "arbiter-count", option); + if (ret) { + clusters = -1; + goto out; + } + afr = afr->next; + } +out: + return clusters; +} + +static int volume_volgen_graph_build_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, gf_boolean_t is_quotad) @@ -3116,13 +3183,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, /* All other cases, it will have one or the other cluster type */ switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: - clusters = volgen_link_bricks_from_list_tail - (graph, volinfo, - replicate_args[0], - replicate_args[1], - volinfo->brick_count, - volinfo->replica_count); - + clusters = volgen_graph_build_afr_clusters (graph, volinfo); if (clusters < 0) goto out; break; @@ -3146,11 +3207,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, /* Replicate after the clients, then stripe */ if (volinfo->replica_count == 0) goto out; - clusters = volgen_link_bricks_from_list_tail (graph, volinfo, - replicate_args[0], - replicate_args[1], - volinfo->brick_count, - volinfo->replica_count); + clusters = volgen_graph_build_afr_clusters (graph, volinfo); if (clusters < 0) goto out; @@ -4473,6 +4530,7 @@ generate_brick_volfiles (glusterd_volinfo_t *volinfo) if (ret == -1) return -1; + assign_brick_groups (volinfo); get_vol_tstamp_file (tstamp_file, volinfo); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 02f8df0cf7d..4575049ada9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -218,6 +218,9 @@ glusterd_check_voloption_flags (char *key, int32_t flags); gf_boolean_t glusterd_is_valid_volfpath (char *volname, char *brick); +void +assign_brick_groups (glusterd_volinfo_t *volinfo); + int generate_brick_volfiles (glusterd_volinfo_t *volinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index a42f08c1600..de3045ffde3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1996,6 +1996,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) "replica count for volume %s", volname); goto out; } + ret = dict_get_int32 (dict, "arbiter-count", + &volinfo->arbiter_count); } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) { ret = dict_get_int32 (dict, "stripe-count", &volinfo->stripe_count); @@ -2019,6 +2021,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) "replica count for volume %s", volname); goto out; } + ret = dict_get_int32 (dict, "arbiter-count", + &volinfo->arbiter_count); } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { ret = dict_get_int32 (dict, "disperse-count", &volinfo->disperse_count); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ff63cce2234..60c3ebdf9bb 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -202,6 +202,15 @@ struct glusterd_brickinfo { char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ int caps; /* Capability */ int32_t snap_status; + /* + * The group is used to identify which bricks are part of the same + * replica set during brick-volfile generation, so that NSR volfiles + * can "cross-connect" the bricks to one another. It is also used by + * AFR to load the arbiter xlator in the appropriate brick in case of + * a replica 3 volume with arbiter enabled. + */ + uint16_t group; + }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; |