diff options
author | Shreyas Siravara <sshreyas@fb.com> | 2015-06-19 13:48:19 -0700 |
---|---|---|
committer | Shreyas Siravara <sshreyas@fb.com> | 2016-12-09 09:10:01 -0800 |
commit | 416e177d50d89a6f050e19acdb28a3ee5d352d32 (patch) | |
tree | 99c09ea30b50649335ccf346e15b75c7b5ed1717 | |
parent | fb959a185d2d95313e8d6c9135f40f344fb72793 (diff) |
glusterd: Retry volfile server multiple times
Summary:
- Retry the volfile server when the initial connection fails. The
default connect attempts is currently 200.
- This is a port D2174716 & D3792748 to 3.8.
Test Plan: Tested retry functionality on devserver.
Reviewed By: rwareing
Signed-off-by: Shreyas Siravara <sshreyas@fb.com>
Change-Id: I22810d52b43107cc156483649fc160612677858a
Reviewed-on: http://review.gluster.org/16077
Tested-by: Shreyas Siravara <sshreyas@fb.com>
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Kevin Vigor <kvigor@fb.com>
-rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 60 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.c | 10 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 1 | ||||
-rw-r--r-- | tests/basic/glusterd/volfile_server_switch.t | 3 |
5 files changed, 47 insertions, 29 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index d8b3ee7d9b3..9376990df7f 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -1897,9 +1897,14 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, switch (event) { case RPC_CLNT_DISCONNECT: + ctx->cmd_args.connect_attempts++; + gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, - "failed to connect with remote-host: %s (%s)", - ctx->cmd_args.volfile_server, strerror (errno)); + "Connect attempt with remote-host: %s (%u/%d)", + ctx->cmd_args.volfile_server, + ctx->cmd_args.connect_attempts, + ctx->cmd_args.max_connect_attempts); + if (!rpc->disabled) { /* * Check if dnscache is exhausted for current server @@ -1910,8 +1915,14 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, break; } } + + /* If we run out of servers, AND we attempted to connect + * max connect times, then we should return ENOTCONN + */ server = ctx->cmd_args.curr_server; - if (server->list.next == &ctx->cmd_args.volfile_servers) { + if ((ctx->cmd_args.connect_attempts >= + ctx->cmd_args.max_connect_attempts) && + server->list.next == &ctx->cmd_args.volfile_servers) { if (!ctx->active) need_term = 1; emval = ENOTCONN; @@ -1919,24 +1930,33 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, "Exhausted all volfile servers"); break; } - server = list_entry (server->list.next, typeof(*server), list); - ctx->cmd_args.curr_server = server; - ctx->cmd_args.volfile_server = server->volfile_server; - - ret = dict_set_str (rpc_trans->options, "remote-host", - server->volfile_server); - if (ret != 0) { - gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, - "failed to set remote-host: %s", + + /* If we exceed the # of connect attempts, we should + * move onto the next server + */ + if (ctx->cmd_args.connect_attempts >= + ctx->cmd_args.max_connect_attempts || !server) { + server = list_entry (server->list.next, + typeof(*server), list); + ctx->cmd_args.curr_server = server; + ctx->cmd_args.volfile_server = server->volfile_server; + + ret = dict_set_str (rpc_trans->options, "remote-host", + server->volfile_server); + if (ret != 0) { + gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, + "failed to set remote-host: %s", + server->volfile_server); + if (!ctx->active) + need_term = 1; + emval = ENOTCONN; + break; + } + ctx->cmd_args.connect_attempts = 0; + gf_log ("glusterfsd-mgmt", GF_LOG_INFO, + "connecting to next volfile server %s", server->volfile_server); - if (!ctx->active) - need_term = 1; - emval = ENOTCONN; - break; } - gf_log ("glusterfsd-mgmt", GF_LOG_INFO, - "connecting to next volfile server %s", - server->volfile_server); break; case RPC_CLNT_CONNECT: rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn); @@ -1953,7 +1973,7 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, } } - + ctx->cmd_args.connect_attempts = 0; if (is_mgmt_rpc_reconnect) glusterfs_mgmt_pmap_signin (ctx); diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 6c7a7c883fa..5022cfc22da 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -986,7 +986,7 @@ parse_opts (int key, char *arg, struct argp_state *state) cmd_args->debug_mode = ENABLE_DEBUG_MODE; break; case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS: - cmd_args->max_connect_attempts = 1; + cmd_args->max_connect_attempts = DEFAULT_MAX_CONNECT_ATTEMPTS; break; case ARGP_DIRECT_IO_MODE_KEY: @@ -1955,13 +1955,7 @@ parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx) } } - /* - This option was made obsolete but parsing it for backward - compatibility with third party applications - */ - if (cmd_args->max_connect_attempts) { - gf_msg ("glusterfs", GF_LOG_WARNING, 0, glusterfsd_msg_33); - } + cmd_args->max_connect_attempts = DEFAULT_MAX_CONNECT_ATTEMPTS; #ifdef GF_DARWIN_HOST_OS if (cmd_args->mount_point) diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index e442bede5db..b5c6b27b534 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -16,7 +16,7 @@ #define DEFAULT_GLUSTERD_VOLFILE CONFDIR "/glusterd.vol" #define DEFAULT_CLIENT_VOLFILE CONFDIR "/glusterfs.vol" #define DEFAULT_SERVER_VOLFILE CONFDIR "/glusterfsd.vol" - +#define DEFAULT_MAX_CONNECT_ATTEMPTS 200 #define DEFAULT_EVENT_POOL_SIZE 16384 #define ARGP_LOG_LEVEL_NONE_OPTION "NONE" diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 6e2d370605b..399d695665b 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -330,6 +330,7 @@ struct _cmd_args { uint32_t log_buf_size; uint32_t log_flush_timeout; int32_t max_connect_attempts; + unsigned int connect_attempts; char *print_exports; char *print_netgroups; /* advanced options */ diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t index 0b0e6470244..0b01398215c 100644 --- a/tests/basic/glusterd/volfile_server_switch.t +++ b/tests/basic/glusterd/volfile_server_switch.t @@ -1,5 +1,8 @@ #!/bin/bash +#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=000000 +#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 + . $(dirname $0)/../../include.rc . $(dirname $0)/../../volume.rc . $(dirname $0)/../../cluster.rc |