diff options
author | Ravishankar N <ravishankar@redhat.com> | 2015-11-25 09:49:19 +0530 |
---|---|---|
committer | Atin Mukherjee <amukherj@redhat.com> | 2015-12-15 22:17:14 -0800 |
commit | 6e635284a4411b816d4d860a28262c9e6dc4bd6a (patch) | |
tree | 26cfc4129b2632ce1aa3891e31ac7031a53c54b4 /xlators | |
parent | 55f4e8a74e89d61c97e79474c4488ba0bf40a3c1 (diff) |
glusterd/afr: store afr pending xattrs as a volume option
Problem:
When AFR xlator initialises, it uses the name of the client xlators
below it for storing the pending changelogs (xattrs). This can be
problem when some other xlator is loaded in between AFR and the client.
Though that is a trivial 'traverse-graph-till-the-client-and-use-the-name'
fix in AFR's init(), there are other issues like when there's no client
xlator at all when, say, AFR is moved to the server side.
Fix:
The client xlator names are currenly unique and stored as
brickinfo->brick_ids. So persist these ids as comma separated values in
AFR's volume_options and use them as xattr values during init().
Change-Id: Ie761ffeb3373a4c4d85ad05c84a768c4188aa90d
BUG: 1285152
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/12738
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 32 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 104 |
2 files changed, 120 insertions, 16 deletions
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index fc8940d107b..a3a4490d063 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -244,6 +244,8 @@ init (xlator_t *this) int read_subvol_index = -1; xlator_t *fav_child = NULL; char *qtype = NULL; + char *xattrs_list = NULL; + char *ptr = NULL; if (!this->children) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -397,6 +399,7 @@ init (xlator_t *this) goto out; } + GF_OPTION_INIT ("afr-pending-xattr", xattrs_list, str, out); priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key), child_count, gf_afr_mt_char); @@ -404,20 +407,25 @@ init (xlator_t *this) ret = -ENOMEM; goto out; } - - trav = this->children; - i = 0; - while (i < child_count) { - priv->children[i] = trav->xlator; - + ptr = gf_strdup (xattrs_list); + if (!ptr) { + ret = -ENOMEM; + goto out; + } + for (i = 0, ptr = strtok (ptr, ","); ptr; ptr = strtok (NULL, ",")) { ret = gf_asprintf (&priv->pending_key[i], "%s.%s", - AFR_XATTR_PREFIX, - trav->xlator->name); - if (-1 == ret) { + AFR_XATTR_PREFIX, ptr); + if (ret == -1) { ret = -ENOMEM; goto out; } + i++; + } + trav = this->children; + i = 0; + while (i < child_count) { + priv->children[i] = trav->xlator; trav = trav->next; i++; } @@ -453,6 +461,7 @@ init (xlator_t *this) ret = 0; out: + GF_FREE (ptr); return ret; } @@ -776,6 +785,11 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_STR, .default_value = AFR_DIRTY_DEFAULT, }, + { .key = {"afr-pending-xattr"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma seperated list of xattrs that are used to " + "capture information on pending heals." + }, { .key = {"metadata-splitbrain-forced-heal"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index ebe696a1cf7..c760b947551 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3299,6 +3299,80 @@ out: } static int +set_afr_pending_xattrs_option (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + xlator_t **afr_xlators_list = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brick = NULL; + char *ptr = NULL; + int i = 0; + int index = -1; + int ret = 0; + char *afr_xattrs_list = NULL; + int list_size = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO ("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO (this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_3_7_7) + return ret; + + /* (brick_id x rep.count) + (rep.count-1 commas) + NULL*/ + list_size = (1024 * volinfo->replica_count) + + (volinfo->replica_count - 1) + 1; + afr_xattrs_list = GF_CALLOC (1, list_size, gf_common_mt_char); + if (!afr_xattrs_list) + goto out; + + ptr = afr_xattrs_list; + afr_xlators_list = GF_CALLOC (clusters, sizeof (xlator_t *), + gf_common_mt_xlator_t); + if (!afr_xlators_list) + goto out; + + xlator = first_of (graph); + + for (i = 0, index = clusters - 1; i < clusters; i++) { + afr_xlators_list[index--] = xlator; + xlator = xlator->next; + } + + i = 1; + index = 0; + + cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) { + if (index == clusters) + break; + strncat (ptr, brick->brick_id, strlen(brick->brick_id)); + if (i == volinfo->replica_count) { + ret = xlator_set_option (afr_xlators_list[index++], + "afr-pending-xattr", + afr_xattrs_list); + if (ret) + return ret; + memset (afr_xattrs_list, 0, list_size); + ptr = afr_xattrs_list; + i = 1; + continue; + } + ptr[strlen(brick->brick_id)] = ','; + ptr += strlen (brick->brick_id) + 1; + i++; + } + +out: + GF_FREE (afr_xattrs_list); + GF_FREE (afr_xlators_list); + return ret; +} + +static int volgen_graph_build_afr_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo) { @@ -3309,7 +3383,7 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph, "%s-replicate-%d"}; xlator_t *afr = NULL; char option[32] = {0}; - int start_count = 0; + int start_count = 0; if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_REPLICATE) start_count = volinfo->tier_info.cold_brick_count / @@ -3334,6 +3408,11 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph, if (clusters < 0) goto out; + ret = set_afr_pending_xattrs_option (graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } if (!volinfo->arbiter_count) goto out; @@ -3356,8 +3435,6 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, gf_boolean_t is_quotad) { - char *replicate_args[] = {"cluster/replicate", - "%s-replicate-%d"}; char *tier_args[] = {"cluster/tier", "%s-tier-%d"}; char *stripe_args[] = {"cluster/stripe", @@ -3549,7 +3626,6 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, int ret = -1; xlator_t *root = NULL; xlator_t *xl, *hxl, *cxl; - glusterd_brickinfo_t *brick = NULL; char *rule = NULL; int st_brick_count = 0; int st_replica_count = 0; @@ -3559,6 +3635,8 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, int dist_count = 0; int start_count = 0; char *decommissioned_children = NULL; + glusterd_volinfo_t *dup_volinfo = NULL; + gf_boolean_t is_hot_tier = _gf_false; st_brick_count = volinfo->brick_count; st_replica_count = volinfo->replica_count; @@ -3573,8 +3651,13 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, volinfo->redundancy_count = volinfo->tier_info.cold_redundancy_count; volinfo->type = volinfo->tier_info.cold_type; volinfo->tier_info.cur_tier_hot = 0; + ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo, + is_hot_tier, volinfo->volname); + if (ret) + goto out; - ret = volume_volgen_graph_build_clusters (graph, volinfo, is_quotad); + ret = volume_volgen_graph_build_clusters (graph, dup_volinfo, + is_quotad); if (ret) goto out; cxl = first_of(graph); @@ -3601,12 +3684,17 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, volinfo->brick_count, volinfo->replica_count, start_count); - if (ret != -1) - volgen_link_bricks_from_list_tail (graph, volinfo, + if (ret != -1) { + ret = set_afr_pending_xattrs_option (graph, volinfo, + ret); + if (ret) + goto out; + volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/distribute", "%s-hot-dht", dist_count, dist_count); + } } else { ret = volgen_link_bricks_from_list_head (graph, volinfo, "cluster/distribute", @@ -3666,6 +3754,8 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, volinfo->dist_leaf_count = st_dist_leaf_count; volinfo->tier_info.cur_tier_hot = 0; + if (dup_volinfo) + glusterd_volinfo_delete (dup_volinfo); GF_FREE (rule); return ret; } |