From 0baa65b651036ada96d9fc190232e4f100dc12e8 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Fri, 23 Mar 2012 16:44:38 -0400 Subject: replicate: default read_child to a local brick if there is one. Controlled by the "choose-local" option (on by default). Change-Id: I560f27c81703f2c9c62fdb51532c8eb763826df7 BUG: 806462 Signed-off-by: Jeff Darcy Reviewed-on: http://review.gluster.com/3005 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-common.c | 88 +++++++++++++++++++++++++++- xlators/cluster/afr/src/afr-self-heald.c | 56 ++++++++++++++++++ xlators/cluster/afr/src/afr-self-heald.h | 7 +++ xlators/cluster/afr/src/afr.c | 11 +++- xlators/cluster/afr/src/afr.h | 3 + xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 + xlators/protocol/client/src/client3_1-fops.c | 4 +- 7 files changed, 165 insertions(+), 5 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 21a2be3dd..f24bd8b7d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2035,12 +2035,79 @@ afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this, afr_set_root_inode_on_first_lookup (local, this, inode); } +static int32_t +afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int ret = 0; + char *pathinfo = NULL; + gf_boolean_t is_local = _gf_false; + afr_private_t *priv = NULL; + int32_t child_index = -1; + + if (op_ret != 0) { + goto out; + } + + ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo); + if (ret != 0) { + goto out; + } + + ret = afr_local_pathinfo (pathinfo, &is_local); + if (ret) { + goto out; + } + + priv = this->private; + /* + * Note that one local subvolume will override another here. The only + * way to avoid that would be to retain extra information about whether + * the previous read_child is local, and it's just not worth it. Even + * the slowest local subvolume is far preferable to a remote one. + */ + if (is_local) { + child_index = (int32_t)(long)cookie; + gf_log (this->name, GF_LOG_INFO, + "selecting local read_child %s", + priv->children[child_index]->name); + priv->read_child = child_index; + } + +out: + STACK_DESTROY(frame->root); + return 0; +} + +static void +afr_attempt_local_discovery (xlator_t *this, int32_t child_index) +{ + call_frame_t *newframe = NULL; + loc_t tmploc = {0,}; + afr_private_t *priv = this->private; + + newframe = create_frame(this,this->ctx->pool); + if (!newframe) { + return; + } + + tmploc.gfid[sizeof(tmploc.gfid)-1] = 1; + STACK_WIND_COOKIE (newframe, afr_discovery_cbk, + (void *)(long)child_index, + priv->children[child_index], + priv->children[child_index]->fops->getxattr, + &tmploc, GF_XATTR_PATHINFO_KEY, NULL); +} + static void afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xattr, struct iatt *postparent) { + afr_private_t *priv = this->private; + if (local->success_count == 0) { if (local->op_errno != ESTALE) { local->op_ret = op_ret; @@ -2053,6 +2120,11 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind afr_lookup_cache_args (local, child_index, xattr, buf, postparent); + + if (local->do_discovery && (priv->read_child == (-1))) { + afr_attempt_local_discovery(this,child_index); + } + local->cont.lookup.success_children[local->success_count] = child_index; local->success_count++; } @@ -2214,8 +2286,6 @@ afr_lookup (call_frame_t *frame, xlator_t *this, /* By default assume ENOTCONN. On success it will be set to 0. */ local->op_errno = ENOTCONN; - local->call_count = afr_up_children_count (local->child_up, - priv->child_count); ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc, &gfid_req); if (ret) { @@ -2225,6 +2295,12 @@ afr_lookup (call_frame_t *frame, xlator_t *this, afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req, &local->loc); local->fop = GF_FOP_LOOKUP; + if (priv->choose_local && !priv->did_discovery) { + if (__is_root_gfid(gfid_req)) { + local->do_discovery = _gf_true; + priv->did_discovery = _gf_true; + } + } for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND_COOKIE (frame, afr_lookup_cbk, @@ -3626,6 +3702,14 @@ afr_notify (xlator_t *this, int32_t event, if (!priv) return 0; + /* + * We need to reset this in case children come up in "staggered" + * fashion, so that we discover a late-arriving local subvolume. Note + * that we could end up issuing N lookups to the first subvolume, and + * O(N^2) overall, but N is small for AFR so it shouldn't be an issue. + */ + priv->did_discovery = _gf_false; + had_heard_from_all = 1; for (i = 0; i < priv->child_count; i++) { if (!priv->last_event[i]) { diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 575bf4361..c5deb18b8 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -671,6 +671,62 @@ out: return; } +static int +get_pathinfo_host (char *pathinfo, char *hostname, size_t size) +{ + char *start = NULL; + char *end = NULL; + int ret = -1; + int i = 0; + + if (!pathinfo) + goto out; + + start = strchr (pathinfo, ':'); + if (!start) + goto out; + end = strrchr (pathinfo, ':'); + if (start == end) + goto out; + + memset (hostname, 0, size); + i = 0; + while (++start != end) + hostname[i++] = *start; + ret = 0; +out: + return ret; +} + +int +afr_local_pathinfo (char *pathinfo, gf_boolean_t *local) +{ + int ret = 0; + char pathinfohost[1024] = {0}; + char localhost[1024] = {0}; + xlator_t *this = THIS; + + *local = _gf_false; + ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s", + pathinfo); + goto out; + } + + ret = gethostname (localhost, sizeof (localhost)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, " + "reason: %s", strerror (errno)); + goto out; + } + + if (!strcmp (localhost, pathinfohost)) + *local = _gf_true; +out: + return ret; +} + int afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, loc_t *dirloc) diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 8e6084596..32a8aaca5 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -42,4 +42,11 @@ afr_proactive_self_heal (void *data); int afr_xl_op (xlator_t *this, dict_t *input, dict_t *output); + +/* + * In addition to its self-heal use, this is used to find a local default + * read_child. + */ +int +afr_local_pathinfo (char *pathinfo, gf_boolean_t *local); #endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index b7ba26197..4f7bf2de0 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -170,6 +170,9 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options, int32, out); + /* Reset this so we re-discover in case the topology changed. */ + priv->did_discovery = _gf_false; + ret = 0; out: return ret; @@ -227,7 +230,6 @@ init (xlator_t *this) priv->child_count = child_count; - priv->read_child = -1; GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out); @@ -239,6 +241,7 @@ init (xlator_t *this) goto out; } } + GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out); GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out); @@ -508,6 +511,12 @@ struct volume_options options[] = { "1 = hash by GFID (all clients use same subvolume), " "2 = hash by GFID and client PID", }, + { .key = {"choose-local" }, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "Choose a local subvolume to read from if " + "read-subvolume is not explicitly set.", + }, { .key = {"favorite-child"}, .type = GF_OPTION_TYPE_XLATOR }, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index a1a30562b..c8e01fcb8 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -156,6 +156,8 @@ typedef struct _afr_private { char vol_uuid[UUID_SIZE + 1]; int32_t *last_event; afr_self_heald_t shd; + gf_boolean_t choose_local; + gf_boolean_t did_discovery; } afr_private_t; typedef struct { @@ -697,6 +699,7 @@ typedef struct _afr_local { mode_t umask; int xflag; + gf_boolean_t do_discovery; } afr_local_t; typedef enum { diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 515bff359..219352c61 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -136,6 +136,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.eager-lock", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.quorum-type", "cluster/replicate", "quorum-type", NULL, NO_DOC, 0}, {"cluster.quorum-count", "cluster/replicate", "quorum-count", NULL, NO_DOC, 0}, + {"cluster.choose-local", "cluster/replicate", NULL, NULL, DOC, 0}, {"cluster.stripe-block-size", "cluster/stripe", "block-size", NULL, DOC, 0}, diff --git a/xlators/protocol/client/src/client3_1-fops.c b/xlators/protocol/client/src/client3_1-fops.c index 266f84a1d..d44eb86c4 100644 --- a/xlators/protocol/client/src/client3_1-fops.c +++ b/xlators/protocol/client/src/client3_1-fops.c @@ -4688,7 +4688,7 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this, } args = data; - if (!(args->loc && args->loc->inode)) { + if (!args->loc) { op_errno = EINVAL; goto unwind; } @@ -4729,7 +4729,7 @@ client3_1_getxattr (call_frame_t *frame, xlator_t *this, rsp_iobuf = NULL; rsp_iobref = NULL; - if (!uuid_is_null (args->loc->inode->gfid)) + if (args->loc->inode && !uuid_is_null (args->loc->inode->gfid)) memcpy (req.gfid, args->loc->inode->gfid, 16); else memcpy (req.gfid, args->loc->gfid, 16); -- cgit