summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src
diff options
context:
space:
mode:
authorKevin Vigor <kvigor@fb.com>2017-03-21 08:23:25 -0700
committerPranith Kumar Karampuri <pkarampu@redhat.com>2017-05-02 10:23:53 +0000
commit07cc8679cdf3b29680f4f105d0222da168d8bfc1 (patch)
treeda838a4f8afc045253300604e2536f97c533e41d /libglusterfs/src
parent9374338f9c2f126c6608625f750d5ea1f7ef6a06 (diff)
Halo Replication feature for AFR translator
Summary: Halo Geo-replication is a feature which allows Gluster or NFS clients to write locally to their region (as defined by a latency "halo" or threshold if you like), and have their writes asynchronously propagate from their origin to the rest of the cluster. Clients can also write synchronously to the cluster simply by specifying a halo-latency which is very large (e.g. 10seconds) which will include all bricks. In other words, it allows clients to decide at mount time if they desire synchronous or asynchronous IO into a cluster and the cluster can support both of these modes to any number of clients simultaneously. There are a few new volume options due to this feature: halo-shd-latency: The threshold below which self-heal daemons will consider children (bricks) connected. halo-nfsd-latency: The threshold below which NFS daemons will consider children (bricks) connected. halo-latency: The threshold below which all other clients will consider children (bricks) connected. halo-min-replicas: The minimum number of replicas which are to be enforced regardless of latency specified in the above 3 options. If the number of children falls below this threshold the next best (chosen by latency) shall be swapped in. New FUSE mount options: halo-latency & halo-min-replicas: As descripted above. This feature combined with multi-threaded SHD support (D1271745) results in some pretty cool geo-replication possibilities. Operational Notes: - Global consistency is gaurenteed for synchronous clients, this is provided by the existing entry-locking mechanism. - Asynchronous clients on the other hand and merely consistent to their region. Writes & deletes will be protected via entry-locks as usual preventing concurrent writes into files which are undergoing replication. Read operations on the other hand should never block. - Writes are allowed from _any_ region and propagated from the origin to all other regions. The take away from this is care should be taken to ensure multiple writers do not write the same files resulting in a gfid split-brain which will require resolution via split-brain policies (majority, mtime & size). Recommended method for preventing this is using the nfs-auth feature to define which region for each share has RW permissions, tiers not in the origin region should have RO perms. TODO: - Synchronous clients (including the SHD) should choose clients from their own region as preferred sources for reads. Most of the plumbing is in place for this via the child_latency array. - Better GFID split brain handling & better dent type split brain handling (i.e. create a trash can and move the offending files into it). - Tagging in addition to latency as a means of defining which children you wish to synchronously write to Test Plan: - The usual suspects, clang, gcc w/ address sanitizer & valgrind - Prove tests Reviewers: jackl, dph, cjh, meyering Reviewed By: meyering Subscribers: ethanr Differential Revision: https://phabricator.fb.com/D1272053 Tasks: 4117827 Change-Id: I694a9ab429722da538da171ec528406e77b5e6d1 BUG: 1428061 Signed-off-by: Kevin Vigor <kvigor@fb.com> Reviewed-on: http://review.gluster.org/16099 Reviewed-on: https://review.gluster.org/16177 Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'libglusterfs/src')
-rw-r--r--libglusterfs/src/common-utils.c28
-rw-r--r--libglusterfs/src/compat.h4
-rw-r--r--libglusterfs/src/defaults-tmpl.c12
-rw-r--r--libglusterfs/src/latency.c12
-rw-r--r--libglusterfs/src/mem-types.h1
-rw-r--r--libglusterfs/src/timespec.c12
-rw-r--r--libglusterfs/src/timespec.h3
-rw-r--r--libglusterfs/src/xlator.h1
8 files changed, 57 insertions, 16 deletions
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 56f545f28de..2acd83f36cf 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -194,26 +194,16 @@ gf_rev_dns_lookup (const char *ip)
{
char *fqdn = NULL;
int ret = 0;
- struct sockaddr_in sa = {0};
- char host_addr[256] = {0, };
GF_VALIDATE_OR_GOTO ("resolver", ip, out);
- sa.sin_family = AF_INET;
- inet_pton (AF_INET, ip, &sa.sin_addr);
- ret = getnameinfo ((struct sockaddr *)&sa, sizeof (sa), host_addr,
- sizeof (host_addr), NULL, 0, 0);
-
+ /* Get the FQDN */
+ ret = gf_get_hostname_from_ip ((char *)ip, &fqdn);
if (ret != 0) {
gf_msg ("resolver", GF_LOG_INFO, errno,
LG_MSG_RESOLVE_HOSTNAME_FAILED, "could not resolve "
"hostname for %s", ip);
- goto out;
}
-
- /* Get the FQDN */
- fqdn = gf_strdup (host_addr);
-
out:
return fqdn;
}
@@ -3127,11 +3117,13 @@ gf_get_hostname_from_ip (char *client_ip, char **hostname)
char *client_ip_copy = NULL;
char *tmp = NULL;
char *ip = NULL;
+ size_t addr_sz = 0;
/* if ipv4, reverse lookup the hostname to
* allow FQDN based rpc authentication
*/
- if (valid_ipv4_address (client_ip, strlen (client_ip), 0) == _gf_false) {
+ if (!valid_ipv6_address (client_ip, strlen (client_ip), 0) &&
+ !valid_ipv4_address (client_ip, strlen (client_ip), 0)) {
/* most times, we get a.b.c.d:port form, so check that */
client_ip_copy = gf_strdup (client_ip);
if (!client_ip_copy)
@@ -3144,12 +3136,14 @@ gf_get_hostname_from_ip (char *client_ip, char **hostname)
if (valid_ipv4_address (ip, strlen (ip), 0) == _gf_true) {
client_sockaddr = (struct sockaddr *)&client_sock_in;
+ addr_sz = sizeof (client_sock_in);
client_sock_in.sin_family = AF_INET;
ret = inet_pton (AF_INET, ip,
(void *)&client_sock_in.sin_addr.s_addr);
} else if (valid_ipv6_address (ip, strlen (ip), 0) == _gf_true) {
client_sockaddr = (struct sockaddr *) &client_sock_in6;
+ addr_sz = sizeof (client_sock_in6);
client_sock_in6.sin6_family = AF_INET6;
ret = inet_pton (AF_INET6, ip,
@@ -3163,8 +3157,14 @@ gf_get_hostname_from_ip (char *client_ip, char **hostname)
goto out;
}
+ /* You cannot just use sizeof (*client_sockaddr), as per the man page
+ * the (getnameinfo) size must be the size of the underlying sockaddr
+ * struct e.g. sockaddr_in6 or sockaddr_in. Failure to do so will
+ * break IPv6 hostname resolution (IPv4 will work only because
+ * the sockaddr_in struct happens to be of the correct size).
+ */
ret = getnameinfo (client_sockaddr,
- sizeof (*client_sockaddr),
+ addr_sz,
client_hostname, sizeof (client_hostname),
NULL, 0, 0);
if (ret) {
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h
index fbaac76b9ee..0f61aaae55e 100644
--- a/libglusterfs/src/compat.h
+++ b/libglusterfs/src/compat.h
@@ -479,6 +479,8 @@ int gf_mkostemp (char *tmpl, int suffixlen, int flags);
#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0);
#endif
+#ifndef IPV6_DEFAULT
+
#ifndef IXDR_GET_LONG
#define IXDR_GET_LONG(buf) ((long)IXDR_GET_U_INT32(buf))
#endif
@@ -495,6 +497,8 @@ int gf_mkostemp (char *tmpl, int suffixlen, int flags);
#define IXDR_PUT_U_LONG(buf, v) IXDR_PUT_LONG(buf, (long)(v))
#endif
+#endif /* IPV6_DEFAULT */
+
#if defined(__GNUC__) && !defined(RELAX_POISONING)
/* Use run API, see run.h */
#include <stdlib.h> /* system(), mkostemp() */
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
index 5b7578b7c57..0889e07caa7 100644
--- a/libglusterfs/src/defaults-tmpl.c
+++ b/libglusterfs/src/defaults-tmpl.c
@@ -170,6 +170,18 @@ default_notify (xlator_t *this, int32_t event, void *data, ...)
}
}
break;
+ case GF_EVENT_CHILD_PING:
+ {
+ xlator_list_t *parent = this->parents;
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify (parent->xlator, event,
+ this, data);
+ parent = parent->next;
+ }
+ }
+ break;
default:
{
xlator_list_t *parent = this->parents;
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index 5025de6c8cf..1d75f5b98ce 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -21,6 +21,7 @@
#include "statedump.h"
#include "libglusterfs-messages.h"
+static int gf_set_fop_from_fn_pointer_warning;
void
gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops, void *fn)
{
@@ -108,8 +109,15 @@ gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops, void
fop = GF_FOP_READDIRP;
else if (fops->getspec == *(fop_getspec_t *)&fn)
fop = GF_FOP_GETSPEC;
- else
- fop = -1;
+ else if (fops->ipc == *(fop_ipc_t *)&fn)
+ fop = GF_FOP_IPC;
+ else {
+ fop = GF_FOP_NULL;
+ GF_LOG_OCCASIONALLY(gf_set_fop_from_fn_pointer_warning,
+ "latency",
+ GF_LOG_WARNING,
+ "Unknown FOP type");
+ }
frame->op = fop;
}
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
index 59c7e77d548..d5b2fcd64bd 100644
--- a/libglusterfs/src/mem-types.h
+++ b/libglusterfs/src/mem-types.h
@@ -173,6 +173,7 @@ enum gf_common_mem_types_ {
gf_common_mt_tbf_bucket_t,
gf_common_mt_tbf_throttle_t,
gf_common_mt_pthread_t,
+ gf_common_ping_local_t,
gf_common_mt_end
};
#endif
diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
index f7b2bea2f30..903303d1380 100644
--- a/libglusterfs/src/timespec.c
+++ b/libglusterfs/src/timespec.c
@@ -60,3 +60,15 @@ void timespec_adjust_delta (struct timespec *ts, struct timespec delta)
ts->tv_sec += ((ts->tv_nsec + delta.tv_nsec) / 1000000000);
ts->tv_sec += delta.tv_sec;
}
+
+void timespec_sub (const struct timespec *begin, const struct timespec *end,
+ struct timespec *res)
+{
+ if (end->tv_nsec < begin->tv_nsec) {
+ res->tv_sec = end->tv_sec - begin->tv_sec - 1;
+ res->tv_nsec = end->tv_nsec + 1000000000 - begin->tv_nsec;
+ } else {
+ res->tv_sec = end->tv_sec - begin->tv_sec;
+ res->tv_nsec = end->tv_nsec - begin->tv_nsec;
+ }
+}
diff --git a/libglusterfs/src/timespec.h b/libglusterfs/src/timespec.h
index f37194b97cf..9c393ee7166 100644
--- a/libglusterfs/src/timespec.h
+++ b/libglusterfs/src/timespec.h
@@ -20,5 +20,8 @@
void timespec_now (struct timespec *ts);
void timespec_adjust_delta (struct timespec *ts, struct timespec delta);
+void timespec_sub (const struct timespec *begin,
+ const struct timespec *end,
+ struct timespec *res);
#endif /* __INCLUDE_TIMESPEC_H__ */
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index c2959efbd95..e0f82a70764 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -933,6 +933,7 @@ struct _xlator {
gf_loglevel_t loglevel; /* Log level for translator */
+ int64_t client_latency;
/* for latency measurement */
fop_latency_t latencies[GF_FOP_MAXVALUE];