diff options
author | Poornima G <pgurusid@redhat.com> | 2016-06-06 06:29:40 -0400 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-06-16 04:57:42 -0700 |
commit | b8ac20e888fbacad9d90cd8f1c6ff8579a5cefe9 (patch) | |
tree | f7befa6b0e065afb87d9876f731e963d065acd40 | |
parent | c04df79dc453ef5cb7b3a0ca8ba14598da6189ac (diff) |
gfapi: Fix IO error caused when there is consecutive graph switches
Issue:
Consider a simple situation, where glfs_init() is done, i.e. initial
graph is up. Now perform 2 volume sets that results in 2 client side
graph changes. After this perform some IO, the IO fails with ENOTCON.
The only way to recover this client is i guess another graph switch
or restart.
What actually is happening from code perspective:
Initial graph lets say A, followed by 2 consecutive graph switches
to B and C without any IO those two switches.
- graph_setup (A) as a result of GF_EVENT_CHILD_UP, and
fs->next_subvol = A
- glfs_init() results in fs->active_subvol = A, fs->next_subvol = NULL
- graph_setup (B) as a result of GF_EVENT_CHILD_UP, and
fs->next_subvol = B
- graph_setup (C) as a result of GF_EVENT_CHILD_UP, and
fs->next_subvol = C. It also sees that the previous graph B was never
set as fs->active_subvol, i.e. no IO or anything happened on B, so
can safely send GF_EVENT_PARENT_DOWN (by calling glfs_subvol_done(B)).
This parent down on B, results in child_down(B), which is fine.
But child_down also triggers graph_setup(B).
- graph_setup(B) as a result of GF_EVENT_CHILD_DOWN, and
fs->next_subvol = B, and GF_EVENT_PARENT_DOWN on C as explained
above. This again leads to GF_EVENT_CHILD_DOWN on C.
- graph_setup(C) as a result of GF_EVENT_CHILD_DOWN, and
fs->next_subvol = C, and GF_EVENT_PARENT_DOWN on B as explained
above.
Thus both the graphs B and C are disconnected, and hence the ENOTCON
Solution:
Remove the call to graph_setup() when the event is GF_EVENT_CHILD_DOWN.
It don't see any reason why graph_setup should be called when there is
child_down. Not sure what the original reason was, to have graph_setup
in child_down. git hostory shows the first patch itself had this call.
Change-Id: I9de86555f66cc94a05649ac863b40ed3426ffd4b
BUG: 1343038
Signed-off-by: Poornima G <pgurusid@redhat.com>
Reviewed-on: http://review.gluster.org/14656
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r-- | api/src/glfs-master.c | 1 | ||||
-rw-r--r-- | tests/bugs/libgfapi/bug-1093594.c (renamed from tests/bugs/gfapi/bug-1093594.c) | 0 | ||||
-rwxr-xr-x | tests/bugs/libgfapi/bug-1093594.sh (renamed from tests/bugs/gfapi/bug-1093594.sh) | 0 | ||||
-rw-r--r-- | tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c | 165 | ||||
-rwxr-xr-x | tests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh | 20 |
5 files changed, 185 insertions, 1 deletions
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c index ff8f68f452b..9f11a6a0c9c 100644 --- a/api/src/glfs-master.c +++ b/api/src/glfs-master.c @@ -105,7 +105,6 @@ notify (xlator_t *this, int event, void *data, ...) pthread_cond_broadcast (&fs->child_down_cond); } pthread_mutex_unlock (&fs->mutex); - graph_setup (fs, graph); glfs_init_done (fs, 1); break; case GF_EVENT_CHILD_CONNECTING: diff --git a/tests/bugs/gfapi/bug-1093594.c b/tests/bugs/libgfapi/bug-1093594.c index 8f5aa9be66c..8f5aa9be66c 100644 --- a/tests/bugs/gfapi/bug-1093594.c +++ b/tests/bugs/libgfapi/bug-1093594.c diff --git a/tests/bugs/gfapi/bug-1093594.sh b/tests/bugs/libgfapi/bug-1093594.sh index 444319b8e63..444319b8e63 100755 --- a/tests/bugs/gfapi/bug-1093594.sh +++ b/tests/bugs/libgfapi/bug-1093594.sh diff --git a/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c new file mode 100644 index 00000000000..4cf849484a6 --- /dev/null +++ b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.c @@ -0,0 +1,165 @@ +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define WRITE_SIZE (128) + +glfs_t * +setup_new_client(char *volname, char *log_fileile) +{ + int ret = 0; + glfs_t *fs = NULL; + + fs = glfs_new (volname); + if (!fs) { + fprintf (stderr, "\nglfs_new: returned NULL (%s)\n", + strerror (errno)); + goto error; + } + + ret = glfs_set_volfile_server (fs, "tcp", "localhost", 24007); + if (ret < 0) { + fprintf (stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", + ret, strerror (errno)); + goto error; + } + + ret = glfs_set_logging (fs, log_fileile, 7); + if (ret < 0) { + fprintf (stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", + ret, strerror (errno)); + goto error; + } + + ret = glfs_init (fs); + if (ret < 0) { + fprintf (stderr, "\nglfs_init failed with ret: %d (%s)\n", + ret, strerror (errno)); + goto error; + } + return fs; +error: + return NULL; +} + +int +write_something (glfs_t *fs) +{ + glfs_fd_t *fd = NULL; + char *buf = NULL; + int ret = 0; + int j = 0; + + fd = glfs_creat (fs, "filename", O_RDWR, 0644); + if (!fd) { + fprintf (stderr, "%s: (%p) %s\n", "filename", fd, + strerror (errno)); + return -1; + } + + buf = (char *) malloc (WRITE_SIZE); + memset (buf, '-', WRITE_SIZE); + + for (j = 0; j < 4; j++) { + ret = glfs_write (fd, buf, WRITE_SIZE, 0); + if (ret < 0) { + fprintf (stderr, "Write(%s): %d (%s)\n", "filename", ret, + strerror (errno)); + return ret; + } + glfs_lseek (fd, 0, SEEK_SET); + } + return 0; +} + +static int +volfile_change (const char *volname) { + int ret = 0; + char *cmd = NULL, *cmd1 = NULL; + + ret = asprintf (&cmd, "gluster volume set %s quick-read on", + volname); + if (ret < 0) { + fprintf (stderr, "cannot construct cli command string (%s)", + strerror (errno)); + return ret; + } + + ret = asprintf (&cmd1, "gluster volume set %s quick-read off", + volname); + if (ret < 0) { + fprintf (stderr, "cannot construct cli command string (%s)", + strerror (errno)); + return ret; + } + + ret = system (cmd); + if (ret < 0) { + fprintf (stderr, "quick-read off on (%s) failed", volname); + return ret; + } + + ret = system (cmd1); + if (ret < 0) { + fprintf (stderr, "quick-read on on (%s) failed", volname); + return ret; + } + + ret = system (cmd); + if (ret < 0) { + fprintf (stderr, "quick-read off on (%s) failed", volname); + return ret; + } + + free (cmd); + free (cmd1); + return ret; +} + +int +main (int argc, char *argv[]) +{ + int ret = 0; + glfs_t *fs = NULL; + char buf[100]; + glfs_fd_t *fd = NULL; + + if (argc != 3) { + fprintf (stderr, + "Expect following args %s <Vol> <log file location>\n" + , argv[0]); + return -1; + } + + fs = setup_new_client (argv[1], argv[2]); + if (!fs) + goto error; + + ret = volfile_change (argv[1]); + if (ret < 0) + goto error; + + /* This is required as volfile change takes a while to reach this + * gfapi client and precess the graph change. Without this the issue + * cannot be reproduced as in cannot be tested. + */ + sleep (10); + + ret = write_something (fs); + if (ret < 0) + goto error; + + ret = glfs_fini (fs); + if (ret < 0) { + fprintf (stderr, "glfs_fini failed with ret: %d (%s)\n", + ret, strerror (errno)); + goto error; + } + + return 0; +error: + return -1; +} diff --git a/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh new file mode 100755 index 00000000000..43cad2b15ee --- /dev/null +++ b/tests/bugs/libgfapi/glfs_vol_set_IO_ERR.sh @@ -0,0 +1,20 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +## Start and create a volume +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; +TEST $CLI volume start $V0; +logdir=`gluster --print-logdir` + +build_tester $(dirname $0)/glfs_vol_set_IO_ERR.c -lgfapi +TEST $(dirname $0)/glfs_vol_set_IO_ERR $V0 $logdir/glfs_vol_set_IO_ERR.log + +cleanup_tester $(dirname $0)/glfs_vol_set_IO_ERR +cleanup; |