glusterd-client: switch volfile server incase existing connection breaks

Problem: Currently, say we have 10 Node gluster volume, and mounted it using Node 1 (N1) as volfile server and the rest as backup volfile servers $ mount -t glusterfs -obackup-volfile-servers=<N2>:<N3>:...:<N10> <N1>:/vol /mnt if N1 goes down we still be able to access the same mount point, but the problem is that if we add or remove bricks to the volume whoes volfile server is down in our case N1, that info will not be passed to client, because connection between glusterfs and glusterd (of N1) will be disconnected due to which we cannot store files to the newly added bricks until N1 comes back Solution: If N1 goes down iterate through the nodes specified in backup-volfile-servers list and try to establish the connection between glusterfs and glsuterd, hence we don't really have to wait until N1 comes back to store files in newly added bricks that are successfully added when N1 was down Change-Id: I653c9f081a84667630608091bc243ffc3859d5cd BUG: 1289916 Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> Reviewed-on: http://review.gluster.org/13002 Tested-by: Prasanna Kumar Kalever <pkalever@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Poornima G <pgurusid@redhat.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
author: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> 2016-03-17 13:50:31 +0530
committer: Jeff Darcy <jdarcy@redhat.com> 2016-04-12 05:14:22 -0700
commit: 05bc8bfd2a11d280fe0aaac6c7ae86ea5ff08164 (patch)
tree: b3a10412f2726c081b59cd8d232bf8286d5c7ba7
parent: 0facb11220aea20a6573b656785922219c9650cf (diff)
2 files changed, 83 insertions, 38 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 7af15eba92e..61309f9fc08 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -1897,49 +1897,46 @@ mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
 
         switch (event) {
         case RPC_CLNT_DISCONNECT:
-                if (!ctx->active) {
-                        gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
-                                "failed to connect with remote-host: %s (%s)",
-                                ctx->cmd_args.volfile_server,
-                                strerror (errno));
-                        if (!rpc->disabled) {
-                                /*
-                                 * Check if dnscache is exhausted for current server
-                                 * and continue until cache is exhausted
-                                 */
-                                dnscache = rpc_trans->dnscache;
-                                if (dnscache && dnscache->next) {
-                                        break;
-                                }
-                        }
-                        server = ctx->cmd_args.curr_server;
-                        if (server->list.next == &ctx->cmd_args.volfile_servers) {
-                                need_term = 1;
-                                emval = ENOTCONN;
-                                gf_log("glusterfsd-mgmt", GF_LOG_INFO,
-                                       "Exhausted all volfile servers");
+                gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+                        "failed to connect with remote-host: %s (%s)",
+                        ctx->cmd_args.volfile_server, strerror (errno));
+                if (!rpc->disabled) {
+                        /*
+                         * Check if dnscache is exhausted for current server
+                         * and continue until cache is exhausted
+                         */
+                        dnscache = rpc_trans->dnscache;
+                        if (dnscache && dnscache->next) {
                                 break;
                         }
-                        server = list_entry (server->list.next, typeof(*server),
-                                             list);
-                        ctx->cmd_args.curr_server = server;
-                        ctx->cmd_args.volfile_server = server->volfile_server;
-
-                        ret = dict_set_str (rpc_trans->options,
-                                            "remote-host",
-                                            server->volfile_server);
-                        if (ret != 0) {
-                                gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
-                                        "failed to set remote-host: %s",
-                                        server->volfile_server);
+                }
+                server = ctx->cmd_args.curr_server;
+                if (server->list.next == &ctx->cmd_args.volfile_servers) {
+                        if (!ctx->active)
                                 need_term = 1;
-                                emval = ENOTCONN;
-                                break;
-                        }
-                        gf_log ("glusterfsd-mgmt", GF_LOG_INFO,
-                                "connecting to next volfile server %s",
+                        emval = ENOTCONN;
+                        gf_log("glusterfsd-mgmt", GF_LOG_INFO,
+                               "Exhausted all volfile servers");
+                        break;
+                }
+                server = list_entry (server->list.next, typeof(*server), list);
+                ctx->cmd_args.curr_server = server;
+                ctx->cmd_args.volfile_server = server->volfile_server;
+
+                ret = dict_set_str (rpc_trans->options, "remote-host",
+                                    server->volfile_server);
+                if (ret != 0) {
+                        gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+                                "failed to set remote-host: %s",
                                 server->volfile_server);
+                        if (!ctx->active)
+                                need_term = 1;
+                        emval = ENOTCONN;
+                        break;
                 }
+                gf_log ("glusterfsd-mgmt", GF_LOG_INFO,
+                        "connecting to next volfile server %s",
+                        server->volfile_server);
                 break;
         case RPC_CLNT_CONNECT:
                 rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t
new file mode 100644
index 00000000000..0b0e6470244
--- /dev/null
+++ b/tests/basic/glusterd/volfile_server_switch.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+
+cleanup;
+
+# * How this test works ?
+# 1. create a 3 node cluster
+# 2. add them to trusted pool
+# 3. create a volume and start
+# 4. mount the volume with all 3 backup-volfile servers
+# 5. kill glusterd in node 1
+# 6. make changes to volume using node 2, using 'volume set' here
+# 7. check whether those notifications are received by client
+
+TEST launch_cluster 3;
+
+TEST $CLI_1 peer probe $H1;
+
+TEST $CLI_1 peer probe $H2;
+
+TEST $CLI_1 peer probe $H3;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume status $V0;
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --volfile-server=$H3  $M0
+
+TEST kill_glusterd 1
+
+TEST $CLI_2 volume set $V0 performance.io-cache off
+
+# make sure by this time directory will be created
+# TODO: suggest ideal time to wait
+sleep 5
+
+count=$(find $M0/.meta/graphs/* -maxdepth 0 -type d -iname "*" | wc -l)
+TEST [ "$count" -gt "1" ]
+
+cleanup;
author	Prasanna Kumar Kalever <prasanna.kalever@redhat.com>	2016-03-17 13:50:31 +0530
committer	Jeff Darcy <jdarcy@redhat.com>	2016-04-12 05:14:22 -0700
commit	05bc8bfd2a11d280fe0aaac6c7ae86ea5ff08164 (patch)
tree	b3a10412f2726c081b59cd8d232bf8286d5c7ba7
parent	0facb11220aea20a6573b656785922219c9650cf (diff)