diff options
author | Poornima G <pgurusid@redhat.com> | 2016-11-21 19:57:08 +0530 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-11-23 05:11:07 -0800 |
commit | 114c50c1a10d649a8b640627f09fd5872828d4ec (patch) | |
tree | c5d608a87f64076508006e8afa15c15de08abc22 /tests | |
parent | 63a4f5da0feb19e47d16aa2d6aa78efaef1c93ca (diff) |
io-cache: Fix a read hang
Issue:
=====
In certain cases, there was no unwind of read
from read-ahead xlator, thus resulting in hang.
RCA:
====
In certain cases, ioc_readv() issues STACK_WIND_TAIL() instead
of STACK_WIND(). One such case is when inode_ctx for that file
is not present (can happen if readdirp was called, and populates
md-cache and serves all the lookups from cache).
Consider the following graph:
...
io-cache (parent)
|
readdir-ahead
|
read-ahead
...
Below is the code snippet of ioc_readv calling STACK_WIND_TAIL:
ioc_readv()
{
...
if (!inode_ctx)
STACK_WIND_TAIL (frame, FIRST_CHILD (frame->this),
FIRST_CHILD (frame->this)->fops->readv, fd,
size, offset, flags, xdata);
/* Ideally, this stack_wind should wind to readdir-ahead:readv()
but it winds to read-ahead:readv(). See below for
explaination.
*/
...
}
STACK_WIND_TAIL (frame, obj, fn, ...)
{
frame->this = obj;
/* for the above mentioned graph, frame->this will be readdir-ahead
* frame->this = FIRST_CHILD (frame->this) i.e. readdir-ahead, which
* is as expected
*/
...
THIS = obj;
/* THIS will be read-ahead instead of readdir-ahead!, as obj expands
* to "FIRST_CHILD (frame->this)" and frame->this was pointing
* to readdir-ahead in the previous statement.
*/
...
fn (frame, obj, params);
/* fn will call read-ahead:readv() instead of readdir-ahead:readv()!
* as fn expands to "FIRST_CHILD (frame->this)->fops->readv" and
* frame->this was pointing ro readdir-ahead in the first statement
*/
...
}
Thus, the readdir-ahead's readv() implementation will be skipped, and
ra_readv() will be called with frame->this = "readdir-ahead" and
this = "read-ahead". This can lead to corruption / hang / other problems.
But in this perticular case, when 'frame->this' and 'this' passed
to ra_readv() doesn't match, it causes ra_readv() to call ra_readv()
again!. Thus the logic of read-ahead readv() falls apart and leads to
hang.
Solution:
=========
Ideally, STACK_WIND_TAIL() should be modified as:
STACK_WIND_TAIL (frame, obj, fn, ...)
{
next_xl = obj /* resolve obj as the variables passed in obj macro
can be overwritten in the further instrucions */
next_xl_fn = fn /* resolve fn and store in a tmp variable, before
modifying any variables */
frame->this = next_xl;
...
THIS = next_xl;
...
next_xl_fn (frame, next_xl, params);
...
}
But for this solution, knowing the type of variable 'next_xl_fn' is
a challenge and is not easy. Hence just modifying all the existing
callers to pass "FIRST_CHILD (this)" as obj, instead of
"FIRST_CHILD (frame->this)".
Change-Id: I179ffe3d1f154bc5a1935fd2ee44e912eb0fbb61
BUG: 1388292
Signed-off-by: Poornima G <pgurusid@redhat.com>
Reviewed-on: http://review.gluster.org/15901
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/bugs/io-cache/bug-read-hang.c | 125 | ||||
-rwxr-xr-x | tests/bugs/io-cache/bug-read-hang.t | 32 |
2 files changed, 157 insertions, 0 deletions
diff --git a/tests/bugs/io-cache/bug-read-hang.c b/tests/bugs/io-cache/bug-read-hang.c new file mode 100644 index 00000000000..74dfddd7a6e --- /dev/null +++ b/tests/bugs/io-cache/bug-read-hang.c @@ -0,0 +1,125 @@ +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NO_INIT 1 + +int count = 0; +void +read_cbk (glfs_fd_t *fd, ssize_t ret, void *data) { +count++; +} + +glfs_t * +setup_new_client(char *hostname, char *volname, char *log_file, int flag) +{ + int ret = 0; + glfs_t *fs = NULL; + + fs = glfs_new (volname); + if (!fs) { + fprintf (stderr, "\nglfs_new: returned NULL (%s)\n", + strerror (errno)); + goto error; + } + + ret = glfs_set_volfile_server (fs, "tcp", hostname, 24007); + if (ret < 0) { + fprintf (stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", + ret, strerror (errno)); + goto error; + } + + ret = glfs_set_logging (fs, log_file, 7); + if (ret < 0) { + fprintf (stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", + ret, strerror (errno)); + goto error; + } + + if (flag == NO_INIT) + goto out; + + ret = glfs_init (fs); + if (ret < 0) { + fprintf (stderr, "\nglfs_init failed with ret: %d (%s)\n", + ret, strerror (errno)); + goto error; + } + +out: + return fs; +error: + return NULL; +} + +int +main (int argc, char *argv[]) +{ + int ret = 0; + glfs_t *fs = NULL; + struct glfs_fd *fd = NULL; + char *volname = NULL; + char *log_file = NULL; + char *hostname = NULL; + char *buf = NULL; + struct stat stat; + + if (argc != 4) { + fprintf (stderr, + "Expect following args %s <hostname> <Vol> <log file location>\n" + , argv[0]); + return -1; + } + + hostname = argv[1]; + volname = argv[2]; + log_file = argv[3]; + + fs = setup_new_client (hostname, volname, log_file, 0); + if (!fs) { + fprintf (stderr, "\nsetup_new_client: returned NULL (%s)\n", + strerror (errno)); + goto error; + } + + fd = glfs_opendir (fs, "/"); + if (!fd) { + fprintf (stderr, "/: %s\n", strerror (errno)); + return -1; + } + + glfs_readdirplus (fd, &stat); + + fd = glfs_open (fs, "/test", O_RDWR); + if (fd == NULL) { + fprintf (stderr, "glfs_open: returned NULL\n"); + goto error; + } + + buf = (char *) malloc (5); + + ret = glfs_pread (fd, buf, 5, 0, 0); + if (ret < 0) { + fprintf (stderr, "Read(%s): %d (%s)\n", "test", ret, + strerror (errno)); + return ret; + } + + free (buf); + glfs_close (fd); + + ret = glfs_fini (fs); + if (ret < 0) { + fprintf (stderr, "glfs_fini failed with ret: %d (%s)\n", + ret, strerror (errno)); + return -1; + } + + return 0; +error: + return -1; +} diff --git a/tests/bugs/io-cache/bug-read-hang.t b/tests/bugs/io-cache/bug-read-hang.t new file mode 100755 index 00000000000..1242dbf3ee9 --- /dev/null +++ b/tests/bugs/io-cache/bug-read-hang.t @@ -0,0 +1,32 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +#. $(dirname $0)/../../volume.rc + +cleanup; + +#Basic checks +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2}; + +TEST $CLI volume set $V0 features.cache-invalidation on +TEST $CLI volume set $V0 features.cache-invalidation-timeout 600 +TEST $CLI volume set $V0 performance.cache-invalidation on +TEST $CLI volume set $V0 performance.md-cache-timeout 600 +TEST $CLI volume set $V0 performance.cache-samba-metadata on +TEST $CLI volume set $V0 open-behind off + +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +echo "Hello" > $M0/test + +TEST build_tester $(dirname $0)/bug-read-hang.c -lgfapi +TEST $(dirname $0)/bug-read-hang $H0 $V0 $logdir/bug-read-hang.log + +cleanup_tester $(dirname $0)/bug-read-hang + +cleanup; |