From 8d55c25f158921b508bff0e7f25158991913f922 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 11 Dec 2013 09:33:53 +0530 Subject: syncop: Change return value of syncop Problem: We found a day-1 bug when syncop_xxx() infra is used inside a synctask with compilation optimization (CFLAGS -O2). Detailed explanation of the Root cause: We found the bug in 'gf_defrag_migrate_data' in rebalance operation: Lets look at interesting parts of the function: int gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *migrate_data) { ..... code section - [ Loop ] while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) != 0) { ..... code section - [ ERRNO-1 ] (errno of readdirp is stored in readdir_operrno by a thread) /* Need to keep track of ENOENT errno, that means, there is no need to send more readdirp() */ readdir_operrno = errno; ..... code section - [ SYNCOP-1 ] (syncop_getxattr is called by a thread) ret = syncop_getxattr (this, &entry_loc, &dict, GF_XATTR_LINKINFO_KEY); code section - [ ERRNO-2] (checking for failures of syncop_getxattr(). This may not always be executed in same thread which executed [SYNCOP-1]) if (ret < 0) { if (errno != ENODATA) { loglevel = GF_LOG_ERROR; defrag->total_failures += 1; ..... } the function above could be executed by thread(t1) till [SYNCOP-1] and code from [ERRNO-2] can be executed by a different thread(t2) because of the way syncop-infra schedules the tasks. when the code is compiled with -O2 optimization this is the assembly code that is generated: [ERRNO-1] 1165 readdir_operrno = errno; <<---- errno gets expanded as *(__errno_location()) 0x00007fd149d48b60 <+496>: callq 0x7fd149d410c0 0x00007fd149d48b72 <+514>: mov %rax,0x50(%rsp) <<------ Address returned by __errno_location() is stored in a special location in stack for later use. 0x00007fd149d48b77 <+519>: mov (%rax),%eax 0x00007fd149d48b79 <+521>: mov %eax,0x78(%rsp) .... [ERRNO-2] 1281 if (errno != ENODATA) { 0x00007fd149d492ae <+2366>: mov 0x50(%rsp),%rax <<----- Because it already stored the address returned by __errno_location(), it just dereferences the address to get the errno value. BUT THIS CODE NEED NOT BE EXECUTED BY SAME THREAD!!! 0x00007fd149d492b3 <+2371>: mov $0x9,%ebp 0x00007fd149d492b8 <+2376>: mov (%rax),%edi 0x00007fd149d492ba <+2378>: cmp $0x3d,%edi The problem is that __errno_location() value of t1 and t2 are different. So [ERRNO-2] ends up reading errno of t1 instead of errno of t2 even though t2 is executing [ERRNO-2] code section. When code is compiled without any optimization for [ERRNO-2]: 1281 if (errno != ENODATA) { 0x00007fd58e7a326f <+2237>: callq 0x7fd58e797300 <<--- As it is calling __errno_location() again it gets the location from t2 so it works as intended. 0x00007fd58e7a3274 <+2242>: mov (%rax),%eax 0x00007fd58e7a3276 <+2244>: cmp $0x3d,%eax 0x00007fd58e7a3279 <+2247>: je 0x7fd58e7a32a1 Fix: Make syncop_xxx() return (-errno) value as the return value in case of errors and all the functions which make syncop_xxx() will need to use (-ret) to figure out the reason for failure in case of syncop_xxx() failures. Change-Id: I314d20dabe55d3e62ff66f3b4adb1cac2eaebb57 BUG: 1040356 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6475 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- api/src/glfs-fops.c | 36 ++++++++++++++++++++++++++++++++++++ api/src/glfs-handleops.c | 15 +++++++++++++++ api/src/glfs-internal.h | 20 ++++++++++++++++++++ api/src/glfs-resolve.c | 10 ++++++++++ 4 files changed, 81 insertions(+) (limited to 'api') diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index 5ea54567d..67d5616fe 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -124,6 +124,7 @@ retry: } ret = syncop_open (subvol, &loc, flags, glfd->fd); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); out: @@ -169,6 +170,7 @@ glfs_close (struct glfs_fd *glfd) } ret = syncop_flush (subvol, fd); + DECODE_SYNCOP_ERR (ret); out: fs = glfd->fs; glfs_fd_destroy (glfd); @@ -273,6 +275,7 @@ glfs_fstat (struct glfs_fd *glfd, struct stat *stat) } ret = syncop_fstat (subvol, fd, &iatt); + DECODE_SYNCOP_ERR (ret); if (ret == 0 && stat) glfs_iatt_to_stat (glfd->fs, &iatt, stat); @@ -392,9 +395,11 @@ retry: if (ret == 0) { ret = syncop_open (subvol, &loc, flags, glfd->fd); + DECODE_SYNCOP_ERR (ret); } else { ret = syncop_create (subvol, &loc, flags, mode, glfd->fd, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); } ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -484,6 +489,7 @@ glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, size = iov_length (iovec, iovcnt); ret = syncop_readv (subvol, fd, size, offset, 0, &iov, &cnt, &iobref); + DECODE_SYNCOP_ERR (ret); if (ret <= 0) goto out; @@ -833,6 +839,7 @@ glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, iov.iov_len = size; ret = syncop_writev (subvol, fd, &iov, 1, offset, iobref, flags); + DECODE_SYNCOP_ERR (ret); iobuf_unref (iobuf); iobref_unref (iobref); @@ -1005,6 +1012,7 @@ glfs_fsync (struct glfs_fd *glfd) } ret = syncop_fsync (subvol, fd, 0); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref (fd); @@ -1079,6 +1087,7 @@ glfs_fdatasync (struct glfs_fd *glfd) } ret = syncop_fsync (subvol, fd, 1); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref (fd); @@ -1120,6 +1129,7 @@ glfs_ftruncate (struct glfs_fd *glfd, off_t offset) } ret = syncop_ftruncate (subvol, fd, offset); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref (fd); @@ -1188,6 +1198,7 @@ retry: goto out; ret = syncop_access (subvol, &loc, mode); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); out: @@ -1263,6 +1274,7 @@ retry: } ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -1313,6 +1325,7 @@ retry: } ret = syncop_readlink (subvol, &loc, &linkval, bufsiz); + DECODE_SYNCOP_ERR (ret); if (ret > 0) { memcpy (buf, linkval, ret); GF_FREE (linkval); @@ -1392,6 +1405,7 @@ retry: } ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -1473,6 +1487,7 @@ retry: } ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -1522,6 +1537,7 @@ retry: } ret = syncop_unlink (subvol, &loc); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -1568,6 +1584,7 @@ retry: } ret = syncop_rmdir (subvol, &loc, 0); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -1631,6 +1648,7 @@ retrynew: /* TODO: check if new or old is a prefix of the other, and fail EINVAL */ ret = syncop_rename (subvol, &oldloc, &newloc); + DECODE_SYNCOP_ERR (ret); if (ret == -1 && errno == ESTALE) { if (reval < DEFAULT_REVAL_COUNT) { @@ -1708,6 +1726,7 @@ retrynew: newloc.inode = inode_ref (oldloc.inode); ret = syncop_link (subvol, &oldloc, &newloc); + DECODE_SYNCOP_ERR (ret); if (ret == -1 && errno == ESTALE) { loc_wipe (&oldloc); @@ -1782,6 +1801,7 @@ retry: } ret = syncop_opendir (subvol, &loc, glfd->fd); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); out: @@ -1972,6 +1992,7 @@ glfd_entry_refresh (struct glfs_fd *glfd, int plus) else ret = syncop_readdir (subvol, fd, 131072, glfd->offset, &entries); + DECODE_SYNCOP_ERR (ret); if (ret >= 0) { if (plus) gf_link_inodes_from_dirent (THIS, fd->inode, &entries); @@ -2150,6 +2171,7 @@ retry: goto out; ret = syncop_statfs (subvol, &loc, buf); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); out: @@ -2191,6 +2213,7 @@ retry: goto out; ret = syncop_setattr (subvol, &loc, iatt, valid, 0, 0); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); out: @@ -2226,6 +2249,7 @@ glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid) } ret = syncop_fsetattr (subvol, fd, iatt, valid, 0, 0); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref (fd); @@ -2442,6 +2466,7 @@ retry: goto out; ret = syncop_getxattr (subvol, &loc, &xattr, name); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -2500,6 +2525,7 @@ glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value, } ret = syncop_fgetxattr (subvol, fd, &xattr, name); + DECODE_SYNCOP_ERR (ret); if (ret) goto out; @@ -2570,6 +2596,7 @@ retry: goto out; ret = syncop_getxattr (subvol, &loc, &xattr, NULL); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -2625,6 +2652,7 @@ glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size) } ret = syncop_fgetxattr (subvol, fd, &xattr, NULL); + DECODE_SYNCOP_ERR (ret); if (ret) goto out; @@ -2697,6 +2725,7 @@ retry: } ret = syncop_setxattr (subvol, &loc, xattr, flags); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -2760,6 +2789,7 @@ glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value, } ret = syncop_fsetxattr (subvol, fd, xattr, flags); + DECODE_SYNCOP_ERR (ret); out: if (xattr) dict_unref (xattr); @@ -2803,6 +2833,7 @@ retry: goto out; ret = syncop_removexattr (subvol, &loc, name); + DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -2853,6 +2884,7 @@ glfs_fremovexattr (struct glfs_fd *glfd, const char *name) } ret = syncop_fremovexattr (subvol, fd, name); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref (fd); @@ -2887,6 +2919,7 @@ glfs_fallocate (struct glfs_fd *glfd, int keep_size, off_t offset, size_t len) } ret = syncop_fallocate (subvol, fd, keep_size, offset, len); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref(fd); @@ -2921,6 +2954,7 @@ glfs_discard (struct glfs_fd *glfd, off_t offset, size_t len) } ret = syncop_discard (subvol, fd, offset, len); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref(fd); @@ -2952,6 +2986,7 @@ glfs_zerofill (struct glfs_fd *glfd, off_t offset, off_t len) } ret = syncop_zerofill (subvol, fd, offset, len); + DECODE_SYNCOP_ERR (ret); out: if (fd) fd_unref(fd); @@ -3200,6 +3235,7 @@ glfs_posix_lock (struct glfs_fd *glfd, int cmd, struct flock *flock) gf_flock_from_flock (&gf_flock, flock); gf_flock_from_flock (&saved_flock, flock); ret = syncop_lk (subvol, fd, cmd, &gf_flock); + DECODE_SYNCOP_ERR (ret); gf_flock_to_flock (&gf_flock, flock); if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW)) diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c index 0f996d3a2..5bba38b5f 100644 --- a/api/src/glfs-handleops.c +++ b/api/src/glfs-handleops.c @@ -152,6 +152,7 @@ glfs_h_stat (struct glfs *fs, struct glfs_object *object, struct stat *stat) /* fop/op */ ret = syncop_stat (subvol, &loc, &iatt); + DECODE_SYNCOP_ERR (ret); /* populate out args */ if (!ret && stat) { @@ -258,6 +259,7 @@ glfs_h_setattrs (struct glfs *fs, struct glfs_object *object, struct stat *stat, /* fop/op */ ret = syncop_setattr (subvol, &loc, &iatt, glvalid, 0, 0); + DECODE_SYNCOP_ERR (ret); out: loc_wipe (&loc); @@ -331,6 +333,7 @@ glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags) /* fop/op */ ret = syncop_open (subvol, &loc, flags, glfd->fd); + DECODE_SYNCOP_ERR (ret); out: loc_wipe (&loc); @@ -420,6 +423,7 @@ glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path, /* fop/op */ ret = syncop_create (subvol, &loc, flags, mode, glfd->fd, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); /* populate out args */ if (ret == 0) { @@ -518,6 +522,7 @@ glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path, /* fop/op */ ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); /* populate out args */ if ( ret == 0 ) { @@ -606,6 +611,7 @@ glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path, /* fop/op */ ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); /* populate out args */ if (ret == 0) { @@ -676,11 +682,13 @@ glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path) if (!IA_ISDIR(loc.inode->ia_type)) { ret = syncop_unlink (subvol, &loc); + DECODE_SYNCOP_ERR (ret); if (ret != 0) { goto out; } } else { ret = syncop_rmdir (subvol, &loc, 0); + DECODE_SYNCOP_ERR (ret); if (ret != 0) { goto out; } @@ -755,6 +763,7 @@ glfs_h_opendir (struct glfs *fs, struct glfs_object *object) /* fop/op */ ret = syncop_opendir (subvol, &loc, glfd->fd); + DECODE_SYNCOP_ERR (ret); out: loc_wipe (&loc); @@ -846,6 +855,7 @@ glfs_h_create_from_handle (struct glfs *fs, unsigned char *handle, int len, } ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0); + DECODE_SYNCOP_ERR (ret); if (ret) { gf_log (subvol->name, GF_LOG_WARNING, "inode refresh of %s failed: %s", @@ -934,6 +944,7 @@ glfs_h_truncate (struct glfs *fs, struct glfs_object *object, off_t offset) /* fop/op */ ret = syncop_truncate (subvol, &loc, (off_t)offset); + DECODE_SYNCOP_ERR (ret); /* populate out args */ if (ret == 0) @@ -1006,6 +1017,7 @@ glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name, /* fop/op */ ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt); + DECODE_SYNCOP_ERR (ret); /* populate out args */ if (ret == 0) { @@ -1081,6 +1093,7 @@ glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf, /* fop/op */ ret = syncop_readlink (subvol, &loc, &linkval, bufsiz); + DECODE_SYNCOP_ERR (ret); /* populate out args */ if (ret > 0) @@ -1166,6 +1179,7 @@ glfs_h_link (struct glfs *fs, struct glfs_object *linksrc, /* fop/op */ ret = syncop_link (subvol, &oldloc, &newloc); + DECODE_SYNCOP_ERR (ret); if (ret == 0) /* TODO: No iatt to pass as there has been no lookup */ @@ -1256,6 +1270,7 @@ glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname, /* TODO: check if new or old is a prefix of the other, and fail EINVAL */ ret = syncop_rename (subvol, &oldloc, &newloc); + DECODE_SYNCOP_ERR (ret); if (ret == 0) inode_rename (oldloc.parent->table, oldloc.parent, oldloc.name, diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h index ec1d5579d..f04557323 100644 --- a/api/src/glfs-internal.h +++ b/api/src/glfs-internal.h @@ -18,6 +18,26 @@ #define DEFAULT_REVAL_COUNT 1 +/* + * syncop_xxx() calls are executed in two ways, one is inside a synctask where + * the executing function will do 'swapcontext' and the other is without + * synctask where the executing thread is made to wait using pthread_cond_wait. + * Executing thread may change when syncop_xxx() is executed inside a synctask. + * This leads to errno_location change i.e. errno may give errno of + * non-executing thread. So errno is not touched inside a synctask execution. + * All gfapi calls are executed using the second way of executing syncop_xxx() + * where the executing thread waits using pthread_cond_wait so it is ok to set + * errno in these cases. The following macro makes syncop_xxx() behave just + * like a system call, where -1 is returned and errno is set when a failure + * occurs. + */ +#define DECODE_SYNCOP_ERR(ret) do { \ + if (ret < 0) { \ + errno = -ret; \ + ret = -1; \ + } \ + } while (0) + #define ESTALE_RETRY(ret,errno,reval,loc,label) do { \ if (ret == -1 && errno == ESTALE) { \ if (reval < DEFAULT_REVAL_COUNT) { \ diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c index 4ca2eb6fc..c88330aa4 100644 --- a/api/src/glfs-resolve.c +++ b/api/src/glfs-resolve.c @@ -48,6 +48,7 @@ glfs_first_lookup_safe (xlator_t *subvol) loc.name = ""; ret = syncop_lookup (subvol, &loc, 0, 0, 0, 0); + DECODE_SYNCOP_ERR (ret); gf_log (subvol->name, GF_LOG_DEBUG, "first lookup complete %d", ret); @@ -98,6 +99,7 @@ glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode) return NULL; ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0); + DECODE_SYNCOP_ERR (ret); if (ret) { gf_log (subvol->name, GF_LOG_WARNING, @@ -181,6 +183,7 @@ glfs_resolve_symlink (struct glfs *fs, xlator_t *subvol, inode_t *inode, loc.path = rpath; ret = syncop_readlink (subvol, &loc, &path, 4096); + DECODE_SYNCOP_ERR (ret); if (ret < 0) goto out; @@ -210,6 +213,7 @@ glfs_resolve_base (struct glfs *fs, xlator_t *subvol, inode_t *inode, goto out; ret = syncop_lookup (subvol, &loc, NULL, iatt, NULL, NULL); + DECODE_SYNCOP_ERR (ret); out: loc_wipe (&loc); @@ -268,6 +272,7 @@ glfs_resolve_component (struct glfs *fs, xlator_t *subvol, inode_t *parent, } ret = syncop_lookup (subvol, &loc, NULL, &ciatt, NULL, NULL); + DECODE_SYNCOP_ERR (ret); if (ret && reval) { inode_unref (loc.inode); loc.inode = inode_new (parent->table); @@ -292,6 +297,7 @@ glfs_resolve_component (struct glfs *fs, xlator_t *subvol, inode_t *parent, ret = syncop_lookup (subvol, &loc, xattr_req, &ciatt, NULL, NULL); + DECODE_SYNCOP_ERR (ret); } if (ret) goto out; @@ -515,6 +521,7 @@ glfs_migrate_fd_locks_safe (struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd, ret = syncop_fgetxattr (oldsubvol, oldfd, &lockinfo, GF_XATTR_LOCKINFO_KEY); + DECODE_SYNCOP_ERR (ret); if (ret < 0) { gf_log (fs->volname, GF_LOG_WARNING, "fgetxattr (%s) failed (%s) on graph %s (%d)", @@ -533,6 +540,7 @@ glfs_migrate_fd_locks_safe (struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd, } ret = syncop_fsetxattr (newsubvol, newfd, lockinfo, 0); + DECODE_SYNCOP_ERR (ret); if (ret < 0) { gf_log (fs->volname, GF_LOG_WARNING, "fsetxattr (%s) failed (%s) on graph %s (%d)", @@ -568,6 +576,7 @@ glfs_migrate_fd_safe (struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) if (!oldsubvol->switched) { ret = syncop_fsync (oldsubvol, oldfd, 0); + DECODE_SYNCOP_ERR (ret); if (ret) { gf_log (fs->volname, GF_LOG_WARNING, "fsync() failed (%s) on %s graph %s (%d)", @@ -614,6 +623,7 @@ glfs_migrate_fd_safe (struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) ret = syncop_open (newsubvol, &loc, oldfd->flags & ~(O_TRUNC|O_EXCL|O_CREAT), newfd); + DECODE_SYNCOP_ERR (ret); loc_wipe (&loc); if (ret) { -- cgit