summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libglusterfs/src/list.h14
-rw-r--r--xlators/cluster/afr/src/afr.c6
-rw-r--r--xlators/cluster/afr/src/afr.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c86
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h9
8 files changed, 111 insertions, 14 deletions
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h
index a860275a91e..875594136a2 100644
--- a/libglusterfs/src/list.h
+++ b/libglusterfs/src/list.h
@@ -256,4 +256,18 @@ static inline void list_replace_init(struct list_head *old,
&pos->member != (head); \
pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+/*
+ * This list implementation has some advantages, but one disadvantage: you
+ * can't use NULL to check whether you're at the head or tail. Thus, the
+ * address of the head has to be an argument for these macros.
+ */
+
+#define list_next(ptr, head, type, member) \
+ (((ptr)->member.next == head) ? NULL \
+ : list_entry((ptr)->member.next, type, member))
+
+#define list_prev(ptr, head, type, member) \
+ (((ptr)->member.prev == head) ? NULL \
+ : list_entry((ptr)->member.prev, type, member))
+
#endif /* _LLIST_H */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index f962fb6494e..21575fed2de 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -274,6 +274,8 @@ init (xlator_t *this)
priv->read_child = -1;
+ GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out);
+
GF_OPTION_INIT ("afr-dirty-xattr", priv->afr_dirty, str, out);
GF_OPTION_INIT ("metadata-splitbrain-forced-heal",
@@ -794,5 +796,9 @@ struct volume_options options[] = {
"attributes from the same subvol as long as it holds "
" a good copy of the file/dir.",
},
+ { .key = {"arbiter-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "subset of child_count. Has to be 0 or 1."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index de000e765ea..f7bc6ea0f94 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -55,6 +55,8 @@ typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
+ unsigned int arbiter_count; /*subset of child_count.
+ Has to be 0 or 1.*/
xlator_t **children;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index e23d2a35fe8..2103fd62e03 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -6028,6 +6028,7 @@ glusterd_recreate_volfiles (glusterd_conf_t *conf)
int op_ret = 0;
GF_ASSERT (conf);
+
cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
ret = generate_brick_volfiles (volinfo);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index faaf5d59a48..a149e9916df 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1502,12 +1502,26 @@ brick_graph_add_arbiter (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
{
xlator_t *xl = NULL;
+ glusterd_brickinfo_t *next = NULL;
+ glusterd_brickinfo_t *last = NULL;
int ret = -1;
if (volinfo->arbiter_count != 1)
return 0;
- /*TODO: Parse brickinfo and add the arbiter xlator only if brick is the
- * last brick (i.e. 3rd brick) of the replcia pair.*/
+
+ /* Find the last brick in the same group. */
+ last = brickinfo;
+ for (;;) {
+ next = list_next (last, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ if (!next || (next->group != brickinfo->group)) {
+ break;
+ }
+ last = next;
+ }
+ if (last != brickinfo)
+ return 0;
+
xl = volgen_graph_add (graph, "features/arbiter", volinfo->volname);
if (!xl)
goto out;
@@ -1571,6 +1585,22 @@ out:
return ret;
}
+void
+assign_brick_groups (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ uint16_t group_num = 0;
+ int in_group = 0;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brickinfo->group = group_num;
+ if (++in_group >= volinfo->replica_count) {
+ in_group = 0;
+ ++group_num;
+ }
+ }
+}
+
static int
brick_graph_add_changelog (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
@@ -3087,6 +3117,43 @@ out:
}
static int
+volgen_graph_build_afr_clusters (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo)
+{
+ int i = 0;
+ int ret = 0;
+ int clusters = 0;
+ char *replicate_args[] = {"cluster/replicate",
+ "%s-replicate-%d"};
+ xlator_t *afr = NULL;
+ char option[32] = {0};
+
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
+ replicate_args[0],
+ replicate_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+ if (clusters < 0)
+ goto out;
+
+ if (!volinfo->arbiter_count)
+ goto out;
+
+ afr = first_of (graph);
+ sprintf(option, "%d", volinfo->arbiter_count);
+ for (i = 0; i < clusters; i++) {
+ ret = xlator_set_option (afr, "arbiter-count", option);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+ afr = afr->next;
+ }
+out:
+ return clusters;
+}
+
+static int
volume_volgen_graph_build_clusters (volgen_graph_t *graph,
glusterd_volinfo_t *volinfo,
gf_boolean_t is_quotad)
@@ -3116,13 +3183,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
/* All other cases, it will have one or the other cluster type */
switch (volinfo->type) {
case GF_CLUSTER_TYPE_REPLICATE:
- clusters = volgen_link_bricks_from_list_tail
- (graph, volinfo,
- replicate_args[0],
- replicate_args[1],
- volinfo->brick_count,
- volinfo->replica_count);
-
+ clusters = volgen_graph_build_afr_clusters (graph, volinfo);
if (clusters < 0)
goto out;
break;
@@ -3146,11 +3207,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
/* Replicate after the clients, then stripe */
if (volinfo->replica_count == 0)
goto out;
- clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
- replicate_args[0],
- replicate_args[1],
- volinfo->brick_count,
- volinfo->replica_count);
+ clusters = volgen_graph_build_afr_clusters (graph, volinfo);
if (clusters < 0)
goto out;
@@ -4473,6 +4530,7 @@ generate_brick_volfiles (glusterd_volinfo_t *volinfo)
if (ret == -1)
return -1;
+ assign_brick_groups (volinfo);
get_vol_tstamp_file (tstamp_file, volinfo);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h
index 02f8df0cf7d..4575049ada9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h
@@ -218,6 +218,9 @@ glusterd_check_voloption_flags (char *key, int32_t flags);
gf_boolean_t
glusterd_is_valid_volfpath (char *volname, char *brick);
+void
+assign_brick_groups (glusterd_volinfo_t *volinfo);
+
int
generate_brick_volfiles (glusterd_volinfo_t *volinfo);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index a42f08c1600..de3045ffde3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -1996,6 +1996,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
"replica count for volume %s", volname);
goto out;
}
+ ret = dict_get_int32 (dict, "arbiter-count",
+ &volinfo->arbiter_count);
} else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) {
ret = dict_get_int32 (dict, "stripe-count",
&volinfo->stripe_count);
@@ -2019,6 +2021,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
"replica count for volume %s", volname);
goto out;
}
+ ret = dict_get_int32 (dict, "arbiter-count",
+ &volinfo->arbiter_count);
} else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
ret = dict_get_int32 (dict, "disperse-count",
&volinfo->disperse_count);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index ff63cce2234..60c3ebdf9bb 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -202,6 +202,15 @@ struct glusterd_brickinfo {
char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */
int caps; /* Capability */
int32_t snap_status;
+ /*
+ * The group is used to identify which bricks are part of the same
+ * replica set during brick-volfile generation, so that NSR volfiles
+ * can "cross-connect" the bricks to one another. It is also used by
+ * AFR to load the arbiter xlator in the appropriate brick in case of
+ * a replica 3 volume with arbiter enabled.
+ */
+ uint16_t group;
+
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;