diff options
author | Prasanna Kumar Kalever <prasanna.kalever@redhat.com> | 2016-03-17 13:50:31 +0530 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-04-12 05:14:22 -0700 |
commit | 05bc8bfd2a11d280fe0aaac6c7ae86ea5ff08164 (patch) | |
tree | b3a10412f2726c081b59cd8d232bf8286d5c7ba7 | |
parent | 0facb11220aea20a6573b656785922219c9650cf (diff) |
glusterd-client: switch volfile server incase existing connection breaks
Problem:
Currently, say we have 10 Node gluster volume, and mounted it using
Node 1 (N1) as volfile server and the rest as backup volfile servers
$ mount -t glusterfs -obackup-volfile-servers=<N2>:<N3>:...:<N10> <N1>:/vol /mnt
if N1 goes down we still be able to access the same mount point,
but the problem is that if we add or remove bricks to the volume
whoes volfile server is down in our case N1, that info will not be
passed to client, because connection between glusterfs and glusterd (of N1)
will be disconnected due to which we cannot store files to the newly
added bricks until N1 comes back
Solution:
If N1 goes down iterate through the nodes specified in
backup-volfile-servers list and try to establish the connection between
glusterfs and glsuterd, hence we don't really have to wait
until N1 comes back to store files in newly added bricks that are
successfully added when N1 was down
Change-Id: I653c9f081a84667630608091bc243ffc3859d5cd
BUG: 1289916
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
Reviewed-on: http://review.gluster.org/13002
Tested-by: Prasanna Kumar Kalever <pkalever@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Poornima G <pgurusid@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 73 | ||||
-rw-r--r-- | tests/basic/glusterd/volfile_server_switch.t | 48 |
2 files changed, 83 insertions, 38 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 7af15eba92e..61309f9fc08 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -1897,49 +1897,46 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, switch (event) { case RPC_CLNT_DISCONNECT: - if (!ctx->active) { - gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, - "failed to connect with remote-host: %s (%s)", - ctx->cmd_args.volfile_server, - strerror (errno)); - if (!rpc->disabled) { - /* - * Check if dnscache is exhausted for current server - * and continue until cache is exhausted - */ - dnscache = rpc_trans->dnscache; - if (dnscache && dnscache->next) { - break; - } - } - server = ctx->cmd_args.curr_server; - if (server->list.next == &ctx->cmd_args.volfile_servers) { - need_term = 1; - emval = ENOTCONN; - gf_log("glusterfsd-mgmt", GF_LOG_INFO, - "Exhausted all volfile servers"); + gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, + "failed to connect with remote-host: %s (%s)", + ctx->cmd_args.volfile_server, strerror (errno)); + if (!rpc->disabled) { + /* + * Check if dnscache is exhausted for current server + * and continue until cache is exhausted + */ + dnscache = rpc_trans->dnscache; + if (dnscache && dnscache->next) { break; } - server = list_entry (server->list.next, typeof(*server), - list); - ctx->cmd_args.curr_server = server; - ctx->cmd_args.volfile_server = server->volfile_server; - - ret = dict_set_str (rpc_trans->options, - "remote-host", - server->volfile_server); - if (ret != 0) { - gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, - "failed to set remote-host: %s", - server->volfile_server); + } + server = ctx->cmd_args.curr_server; + if (server->list.next == &ctx->cmd_args.volfile_servers) { + if (!ctx->active) need_term = 1; - emval = ENOTCONN; - break; - } - gf_log ("glusterfsd-mgmt", GF_LOG_INFO, - "connecting to next volfile server %s", + emval = ENOTCONN; + gf_log("glusterfsd-mgmt", GF_LOG_INFO, + "Exhausted all volfile servers"); + break; + } + server = list_entry (server->list.next, typeof(*server), list); + ctx->cmd_args.curr_server = server; + ctx->cmd_args.volfile_server = server->volfile_server; + + ret = dict_set_str (rpc_trans->options, "remote-host", + server->volfile_server); + if (ret != 0) { + gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, + "failed to set remote-host: %s", server->volfile_server); + if (!ctx->active) + need_term = 1; + emval = ENOTCONN; + break; } + gf_log ("glusterfsd-mgmt", GF_LOG_INFO, + "connecting to next volfile server %s", + server->volfile_server); break; case RPC_CLNT_CONNECT: rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn); diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t new file mode 100644 index 00000000000..0b0e6470244 --- /dev/null +++ b/tests/basic/glusterd/volfile_server_switch.t @@ -0,0 +1,48 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc + + +cleanup; + +# * How this test works ? +# 1. create a 3 node cluster +# 2. add them to trusted pool +# 3. create a volume and start +# 4. mount the volume with all 3 backup-volfile servers +# 5. kill glusterd in node 1 +# 6. make changes to volume using node 2, using 'volume set' here +# 7. check whether those notifications are received by client + +TEST launch_cluster 3; + +TEST $CLI_1 peer probe $H1; + +TEST $CLI_1 peer probe $H2; + +TEST $CLI_1 peer probe $H3; + +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0 + +TEST $CLI_1 volume start $V0 + +TEST $CLI_1 volume status $V0; + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --volfile-server=$H3 $M0 + +TEST kill_glusterd 1 + +TEST $CLI_2 volume set $V0 performance.io-cache off + +# make sure by this time directory will be created +# TODO: suggest ideal time to wait +sleep 5 + +count=$(find $M0/.meta/graphs/* -maxdepth 0 -type d -iname "*" | wc -l) +TEST [ "$count" -gt "1" ] + +cleanup; |