summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c88
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c56
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h7
-rw-r--r--xlators/cluster/afr/src/afr.c11
-rw-r--r--xlators/cluster/afr/src/afr.h3
5 files changed, 162 insertions, 3 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 21a2be3dd6f..f24bd8b7d7f 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2035,12 +2035,79 @@ afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
afr_set_root_inode_on_first_lookup (local, this, inode);
}
+static int32_t
+afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ priv = this->private;
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ child_index = (int32_t)(long)cookie;
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
+
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
static void
afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xattr,
struct iatt *postparent)
{
+ afr_private_t *priv = this->private;
+
if (local->success_count == 0) {
if (local->op_errno != ESTALE) {
local->op_ret = op_ret;
@@ -2053,6 +2120,11 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind
afr_lookup_cache_args (local, child_index, xattr,
buf, postparent);
+
+ if (local->do_discovery && (priv->read_child == (-1))) {
+ afr_attempt_local_discovery(this,child_index);
+ }
+
local->cont.lookup.success_children[local->success_count] = child_index;
local->success_count++;
}
@@ -2214,8 +2286,6 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
/* By default assume ENOTCONN. On success it will be set to 0. */
local->op_errno = ENOTCONN;
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
&gfid_req);
if (ret) {
@@ -2225,6 +2295,12 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
&local->loc);
local->fop = GF_FOP_LOOKUP;
+ if (priv->choose_local && !priv->did_discovery) {
+ if (__is_root_gfid(gfid_req)) {
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
STACK_WIND_COOKIE (frame, afr_lookup_cbk,
@@ -3626,6 +3702,14 @@ afr_notify (xlator_t *this, int32_t event,
if (!priv)
return 0;
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
had_heard_from_all = 1;
for (i = 0; i < priv->child_count; i++) {
if (!priv->last_event[i]) {
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 575bf4361d2..c5deb18b8af 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -671,6 +671,62 @@ out:
return;
}
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
+{
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
+
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
+
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
+ }
+
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
+out:
+ return ret;
+}
+
int
afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
loc_t *dirloc)
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 8e608459684..32a8aaca50c 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -42,4 +42,11 @@ afr_proactive_self_heal (void *data);
int
afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+
+/*
+ * In addition to its self-heal use, this is used to find a local default
+ * read_child.
+ */
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local);
#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index b7ba2619711..4f7bf2de004 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -170,6 +170,9 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
int32, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ priv->did_discovery = _gf_false;
+
ret = 0;
out:
return ret;
@@ -227,7 +230,6 @@ init (xlator_t *this)
priv->child_count = child_count;
-
priv->read_child = -1;
GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
@@ -239,6 +241,7 @@ init (xlator_t *this)
goto out;
}
}
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
@@ -508,6 +511,12 @@ struct volume_options options[] = {
"1 = hash by GFID (all clients use same subvolume), "
"2 = hash by GFID and client PID",
},
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume to read from if "
+ "read-subvolume is not explicitly set.",
+ },
{ .key = {"favorite-child"},
.type = GF_OPTION_TYPE_XLATOR
},
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index a1a30562bf1..c8e01fcb841 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -156,6 +156,8 @@ typedef struct _afr_private {
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
afr_self_heald_t shd;
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
} afr_private_t;
typedef struct {
@@ -697,6 +699,7 @@ typedef struct _afr_local {
mode_t umask;
int xflag;
+ gf_boolean_t do_discovery;
} afr_local_t;
typedef enum {