summaryrefslogtreecommitdiffstats
path: root/xlators/features/quiesce/src
diff options
context:
space:
mode:
authorPoornima G <pgurusid@redhat.com>2018-01-26 15:34:43 +0530
committerAmar Tumballi <amarts@redhat.com>2018-01-30 05:00:52 +0000
commitd25b6065469eb978d40450b5aebcf5711fb50205 (patch)
treeae8cbb6dff9b7f1a0887b05adc57fa6ec96fa5e7 /xlators/features/quiesce/src
parent9bbee1c2bc91d194d7470dfe2351b7d8c7abe102 (diff)
quiesce, gfproxy: Implement failover across multiple gfproxy nodes
Updates: #242 Change-Id: I767e574a26e922760a7130bd209c178d74e8cf69 Signed-off-by: Poornima G <pgurusid@redhat.com>
Diffstat (limited to 'xlators/features/quiesce/src')
-rw-r--r--xlators/features/quiesce/src/Makefile.am2
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h1
-rw-r--r--xlators/features/quiesce/src/quiesce-messages.h31
-rw-r--r--xlators/features/quiesce/src/quiesce.c237
-rw-r--r--xlators/features/quiesce/src/quiesce.h9
5 files changed, 246 insertions, 34 deletions
diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am
index a6cabb3012d..74ea999c045 100644
--- a/xlators/features/quiesce/src/Makefile.am
+++ b/xlators/features/quiesce/src/Makefile.am
@@ -6,7 +6,7 @@ quiesce_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
quiesce_la_SOURCES = quiesce.c
quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quiesce.h quiesce-mem-types.h
+noinst_HEADERS = quiesce.h quiesce-mem-types.h quiesce-messages.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
index 6e582f424ea..31346c1a794 100644
--- a/xlators/features/quiesce/src/quiesce-mem-types.h
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -15,6 +15,7 @@
enum gf_quiesce_mem_types_ {
gf_quiesce_mt_priv_t = gf_common_mt_end + 1,
+ gf_quiesce_mt_failover_hosts,
gf_quiesce_mt_end
};
#endif
diff --git a/xlators/features/quiesce/src/quiesce-messages.h b/xlators/features/quiesce/src/quiesce-messages.h
new file mode 100644
index 00000000000..8af3b10b06e
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce-messages.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __QUIESCE_MESSAGES_H__
+#define __QUIESCE_MESSAGES_H__
+
+#include "glfs-message-id.h"
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(QUIESCE,
+ QUIESCE_MSG_INVAL_HOST,
+ QUIESCE_MSG_FAILOVER_FAILED
+);
+
+#endif /* __NL_CACHE_MESSAGES_H__ */
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
index 59e57d284a0..95c0c8b0829 100644
--- a/xlators/features/quiesce/src/quiesce.c
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -14,6 +14,9 @@
/* TODO: */
/* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */
+void
+gf_quiesce_timeout (void *data);
+
/* Quiesce Specific Functions */
void
@@ -37,6 +40,173 @@ gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local)
mem_put (local);
}
+void
+__gf_quiesce_start_timer (xlator_t *this, quiesce_priv_t *priv)
+{
+ struct timespec timeout = {0,};
+
+ if (!priv->timer) {
+ timeout.tv_sec = priv->timeout;
+ timeout.tv_nsec = 0;
+
+ priv->timer = gf_timer_call_after (this->ctx,
+ timeout,
+ gf_quiesce_timeout,
+ (void *) this);
+ if (priv->timer == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot create timer");
+ }
+ }
+}
+
+static void
+__gf_quiesce_cleanup_failover_hosts (xlator_t *this, quiesce_priv_t *priv)
+{
+ quiesce_failover_hosts_t *tmp = NULL;
+ quiesce_failover_hosts_t *failover_host = NULL;
+
+ list_for_each_entry_safe (failover_host, tmp,
+ &priv->failover_list, list) {
+ GF_FREE (failover_host->addr);
+ list_del (&failover_host->list);
+ GF_FREE (failover_host);
+ }
+ return;
+}
+
+void
+gf_quiesce_populate_failover_hosts (xlator_t *this, quiesce_priv_t *priv,
+ const char *value)
+{
+ char *dup_val = NULL;
+ char *addr_tok = NULL;
+ char *save_ptr = NULL;
+ quiesce_failover_hosts_t *failover_host = NULL;
+
+ if (!value)
+ goto out;
+
+ dup_val = gf_strdup (value);
+ if (!dup_val)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ if (!list_empty (&priv->failover_list))
+ __gf_quiesce_cleanup_failover_hosts (this, priv);
+ addr_tok = strtok_r (dup_val, ",", &save_ptr);
+ while (addr_tok) {
+ if (!valid_internet_address (addr_tok, _gf_true)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ QUIESCE_MSG_INVAL_HOST, "Specified "
+ "invalid internet address:%s",
+ addr_tok);
+ continue;
+ }
+ failover_host = GF_CALLOC (1, sizeof(*failover_host),
+ gf_quiesce_mt_failover_hosts);
+ failover_host->addr = gf_strdup (addr_tok);
+ INIT_LIST_HEAD (&failover_host->list);
+ list_add (&failover_host->list, &priv->failover_list);
+ addr_tok = strtok_r (NULL, ",", &save_ptr);
+ }
+ }
+ UNLOCK (&priv->lock);
+ GF_FREE (dup_val);
+out:
+ return;
+}
+
+int32_t
+gf_quiesce_failover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+
+ if (op_ret < 0) {
+ /* Failure here doesn't mean the failover to another host didn't
+ * succeed, we will know if failover succeeds or not by the
+ * CHILD_UP/CHILD_DOWN event. A failure here indicates something
+ * went wrong with the submission of failover command, hence
+ * just abort the failover attempts without retrying with other
+ * hosts.
+ */
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
+ QUIESCE_MSG_FAILOVER_FAILED,
+ "Initiating failover to host:%s failed:", (char *)cookie);
+ }
+
+ GF_FREE (cookie);
+ STACK_DESTROY (frame->root);
+
+ priv = this->private;
+ __gf_quiesce_start_timer (this, priv);
+
+ return 0;
+}
+
+int
+__gf_quiesce_perform_failover (xlator_t *this)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ quiesce_priv_t *priv = NULL;
+ quiesce_failover_hosts_t *failover_host = NULL;
+ quiesce_failover_hosts_t *host = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ gf_msg_trace (this->name, 0, "child is up, hence not "
+ "performing any failover");
+ goto out;
+ }
+
+ list_for_each_entry (failover_host, &priv->failover_list, list) {
+ if (failover_host->tried == 0) {
+ host = failover_host;
+ failover_host->tried = 1;
+ break;
+ }
+ }
+ if (!host) {
+ /*TODO: Keep trying until any of the gfproxy comes back up.
+ Currently it tries failing over once for each host,
+ if it doesn't succeed then returns error to mount point
+ list_for_each_entry (failover_host,
+ &priv->failover_list, list) {
+ failover_host->tried = 0;
+ }*/
+ gf_msg_debug (this->name, 0, "all the failover hosts have "
+ "been tried and looks like didn't succeed");
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+
+ dict = dict_new ();
+
+ ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT,
+ gf_strdup (host->addr));
+
+ gf_msg_trace (this->name, 0, "Initiating failover to:%s",
+ host->addr);
+
+ STACK_WIND_COOKIE (frame, gf_quiesce_failover_cbk, NULL,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr,
+ NULL, dict, 0, NULL);
+out:
+
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
call_stub_t *
gf_quiesce_dequeue (xlator_t *this)
{
@@ -86,6 +256,7 @@ gf_quiesce_timeout (void *data)
{
xlator_t *this = NULL;
quiesce_priv_t *priv = NULL;
+ int ret = -1;
this = data;
priv = this->private;
@@ -93,12 +264,21 @@ gf_quiesce_timeout (void *data)
LOCK (&priv->lock);
{
- priv->pass_through = _gf_true;
+ priv->timer = NULL;
+ if (priv->pass_through) {
+ UNLOCK (&priv->lock);
+ goto out;
+ }
+ ret = __gf_quiesce_perform_failover (THIS);
}
UNLOCK (&priv->lock);
- gf_quiesce_dequeue_start (this);
+ if (ret < 0) {
+ priv->pass_through = _gf_true;
+ gf_quiesce_dequeue_start (this);
+ }
+out:
return;
}
@@ -106,7 +286,6 @@ void
gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
{
quiesce_priv_t *priv = NULL;
- struct timespec timeout = {0,};
priv = this->private;
if (!priv) {
@@ -119,19 +298,10 @@ gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
{
list_add_tail (&stub->list, &priv->req);
priv->queue_size++;
+ __gf_quiesce_start_timer (this, priv);
}
UNLOCK (&priv->lock);
- if (!priv->timer) {
- timeout.tv_sec = priv->timeout;
- timeout.tv_nsec = 0;
-
- priv->timer = gf_timer_call_after (this->ctx,
- timeout,
- gf_quiesce_timeout,
- (void *) this);
- }
-
return;
}
@@ -2553,6 +2723,10 @@ reconfigure (xlator_t *this, dict_t *options)
priv = this->private;
GF_OPTION_RECONF("timeout", priv->timeout, options, time, out);
+ GF_OPTION_RECONF ("failover-hosts", priv->failover_hosts, options,
+ str, out);
+ gf_quiesce_populate_failover_hosts (this, priv, priv->failover_hosts);
+
ret = 0;
out:
return ret;
@@ -2579,7 +2753,11 @@ init (xlator_t *this)
if (!priv)
goto out;
+ INIT_LIST_HEAD (&priv->failover_list);
+
GF_OPTION_INIT ("timeout", priv->timeout, time, out);
+ GF_OPTION_INIT ("failover-hosts", priv->failover_hosts, str, out);
+ gf_quiesce_populate_failover_hosts (this, priv, priv->failover_hosts);
priv->local_pool = mem_pool_new (quiesce_local_t,
GF_FOPS_EXPECTED_IN_PARALLEL);
@@ -2617,7 +2795,6 @@ notify (xlator_t *this, int event, void *data, ...)
{
int ret = 0;
quiesce_priv_t *priv = NULL;
- struct timespec timeout = {0,};
priv = this->private;
if (!priv)
@@ -2645,24 +2822,10 @@ notify (xlator_t *this, int event, void *data, ...)
LOCK (&priv->lock);
{
priv->pass_through = _gf_false;
- }
- UNLOCK (&priv->lock);
+ __gf_quiesce_start_timer (this, priv);
- if (priv->timer)
- break;
- timeout.tv_sec = priv->timeout;
- timeout.tv_nsec = 0;
-
- priv->timer = gf_timer_call_after (this->ctx,
- timeout,
- gf_quiesce_timeout,
- (void *) this);
-
- if (priv->timer == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot create timer");
}
-
+ UNLOCK (&priv->lock);
break;
default:
break;
@@ -2735,14 +2898,22 @@ struct xlator_cbks cbks;
struct volume_options options[] = {
{ .key = {"timeout"},
.type = GF_OPTION_TYPE_TIME,
- .default_value = "20",
+ .default_value = "45",
.description = "After 'timeout' seconds since the time 'quiesce' "
"option was set to \"!pass-through\", acknowledgements to file "
"operations are no longer quiesced and previously "
"quiesced acknowledgements are sent to the application",
- .tags = {"debug", "diagnose"},
.op_version = { GD_OP_VERSION_4_0_0 },
- .flags = OPT_FLAG_CLIENT_OPT,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ },
+ { .key = {"failover-hosts"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .op_version = { GD_OP_VERSION_4_0_0 },
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "It is a comma separated list of hostname/IP "
+ "addresses. It Specifies the list of hosts where "
+ "the gfproxy daemons are running, to which the "
+ "the thin clients can failover to."
},
{ .key = {NULL} },
};
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
index e76523c602b..c084801c6c6 100644
--- a/xlators/features/quiesce/src/quiesce.h
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -12,12 +12,19 @@
#define __QUIESCE_H__
#include "quiesce-mem-types.h"
+#include "quiesce-messages.h"
#include "xlator.h"
#include "timer.h"
#define GF_FOPS_EXPECTED_IN_PARALLEL 512
typedef struct {
+ struct list_head list;
+ char *addr;
+ gf_boolean_t tried; /* indicates attempted connecting */
+} quiesce_failover_hosts_t;
+
+typedef struct {
gf_timer_t *timer;
gf_boolean_t pass_through;
gf_lock_t lock;
@@ -26,6 +33,8 @@ typedef struct {
pthread_t thr;
struct mem_pool *local_pool;
uint32_t timeout;
+ char *failover_hosts;
+ struct list_head failover_list;
} quiesce_priv_t;
typedef struct {