From 1a5e8638a177ac90177ae1b749b89c984930c875 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 21 May 2013 16:27:09 -0700 Subject: gfapi: handle graph switch (cwd, fds, locks) - Migrate open fds to new graph - Migrate locks held in open fd to new fd - Refresh CWD, so resolution of relative paths happens on refreshed inode (on new graph). Change-Id: I4b18083b9b290107ebda1f917fc85b635ab72fb4 BUG: 953694 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/5059 Tested-by: Gluster Build System Reviewed-by: Raghavendra G --- api/src/glfs-fops.c | 405 ++++++++++++++++++++++++++++++++------- api/src/glfs-internal.h | 57 +++++- api/src/glfs-master.c | 45 ++++- api/src/glfs-resolve.c | 494 +++++++++++++++++++++++++++++++++++++++++++++++- api/src/glfs.c | 75 +++----- 5 files changed, 944 insertions(+), 132 deletions(-) (limited to 'api') diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index dadaf4b4..231db481 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -113,6 +113,9 @@ retry: } if (glfd->fd) { + /* Retry. Safe to touch glfd->fd as we + still have not glfs_fd_bind() yet. + */ fd_unref (glfd->fd); glfd->fd = NULL; } @@ -133,8 +136,13 @@ out: if (ret && glfd) { glfs_fd_destroy (glfd); glfd = NULL; + } else { + fd_bind (glfd->fd); + glfs_fd_bind (glfd); } + glfs_subvol_done (fs, subvol); + return glfd; } @@ -144,15 +152,30 @@ glfs_close (struct glfs_fd *glfd) { xlator_t *subvol = NULL; int ret = -1; + fd_t *fd = NULL; + struct glfs *fs = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); - ret = syncop_flush (subvol, glfd->fd); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + ret = syncop_flush (subvol, fd); +out: + fs = glfd->fs; glfs_fd_destroy (glfd); + if (fd) + fd_unref (fd); + + glfs_subvol_done (fs, subvol); + return ret; } @@ -184,6 +207,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -215,6 +240,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -225,21 +252,34 @@ glfs_fstat (struct glfs_fd *glfd, struct stat *stat) int ret = -1; xlator_t *subvol = NULL; struct iatt iatt = {0, }; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_fstat (subvol, glfd->fd, &iatt); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_fstat (subvol, fd, &iatt); if (ret == 0 && stat) glfs_iatt_to_stat (glfd->fs, &iatt, stat); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -334,6 +374,9 @@ retry: } if (glfd->fd) { + /* Retry. Safe to touch glfd->fd as we + still have not glfs_fd_bind() yet. + */ fd_unref (glfd->fd); glfd->fd = NULL; } @@ -361,8 +404,13 @@ out: if (ret && glfd) { glfs_fd_destroy (glfd); glfd = NULL; + } else { + fd_bind (glfd->fd); + glfs_fd_bind (glfd); } + glfs_subvol_done (fs, subvol); + return glfd; } @@ -408,17 +456,29 @@ glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, struct iovec *iov = NULL; int cnt = 0; struct iobref *iobref = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); + if (!subvol) { + ret = -1; + errno = EIO; + goto out; + } + + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } size = iov_length (iovec, iovcnt); - ret = syncop_readv (subvol, glfd->fd, size, offset, - 0, &iov, &cnt, &iobref); + ret = syncop_readv (subvol, fd, size, offset, 0, &iov, &cnt, &iobref); if (ret <= 0) - return ret; + goto out; size = iov_copy (iovec, iovcnt, iov, cnt); /* FIXME!!! */ @@ -429,6 +489,12 @@ glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, if (iobref) iobref_unref (iobref); +out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return size; } @@ -628,24 +694,39 @@ glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, struct iobref *iobref = NULL; struct iobuf *iobuf = NULL; struct iovec iov = {0, }; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); + if (!subvol) { + ret = -1; + errno = EIO; + goto out; + } + + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } size = iov_length (iovec, iovcnt); iobuf = iobuf_get2 (subvol->ctx->iobuf_pool, size); if (!iobuf) { + ret = -1; errno = ENOMEM; - return -1; + goto out; } iobref = iobref_new (); if (!iobref) { iobuf_unref (iobuf); errno = ENOMEM; - return -1; + ret = -1; + goto out; } ret = iobref_add (iobref, iobuf); @@ -653,7 +734,8 @@ glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, iobuf_unref (iobuf); iobref_unref (iobref); errno = ENOMEM; - return -1; + ret = -1; + goto out; } iov_unload (iobuf_ptr (iobuf), iovec, iovcnt); /* FIXME!!! */ @@ -661,17 +743,22 @@ glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, iov.iov_base = iobuf_ptr (iobuf); iov.iov_len = size; - ret = syncop_writev (subvol, glfd->fd, &iov, 1, offset, - iobref, flags); + ret = syncop_writev (subvol, fd, &iov, 1, offset, iobref, flags); iobuf_unref (iobuf); iobref_unref (iobref); if (ret <= 0) - return ret; + goto out; glfd->offset = (offset + size); +out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -810,18 +897,31 @@ glfs_fsync (struct glfs_fd *glfd) { int ret = -1; xlator_t *subvol = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_fsync (subvol, glfd->fd, 0); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_fsync (subvol, fd, 0); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -871,18 +971,31 @@ glfs_fdatasync (struct glfs_fd *glfd) { int ret = -1; xlator_t *subvol = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_fsync (subvol, glfd->fd, 1); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_fsync (subvol, fd, 1); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -899,18 +1012,31 @@ glfs_ftruncate (struct glfs_fd *glfd, off_t offset) { int ret = -1; xlator_t *subvol = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_ftruncate (subvol, glfd->fd, offset); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_ftruncate (subvol, fd, offset); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -978,6 +1104,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1057,6 +1185,8 @@ out: if (xattr_req) dict_unref (xattr_req); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1098,6 +1228,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1177,6 +1309,8 @@ out: if (xattr_req) dict_unref (xattr_req); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1256,6 +1390,8 @@ out: if (xattr_req) dict_unref (xattr_req); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1300,6 +1436,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1344,6 +1482,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1412,6 +1552,8 @@ out: loc_wipe (&oldloc); loc_wipe (&newloc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1474,6 +1616,8 @@ out: loc_wipe (&oldloc); loc_wipe (&newloc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1517,6 +1661,9 @@ retry: } if (glfd->fd) { + /* Retry. Safe to touch glfd->fd as we + still have not glfs_fd_bind() yet. + */ fd_unref (glfd->fd); glfd->fd = NULL; } @@ -1537,8 +1684,13 @@ out: if (ret && glfd) { glfs_fd_destroy (glfd); glfd = NULL; + } else { + fd_bind (glfd->fd); + glfs_fd_bind (glfd); } + glfs_subvol_done (fs, subvol); + return glfd; } @@ -1619,38 +1771,57 @@ glfd_entry_refresh (struct glfs_fd *glfd, int plus) gf_dirent_t entries; gf_dirent_t old; int ret = -1; + fd_t *fd = NULL; - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { + ret = -1; errno = EIO; - return -1; + goto out; + } + + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + if (fd->inode->ia_type != IA_IFDIR) { + ret = -1; + errno = EBADF; + goto out; } INIT_LIST_HEAD (&entries.list); INIT_LIST_HEAD (&old.list); if (plus) - ret = syncop_readdirp (subvol, glfd->fd, 131072, glfd->offset, + ret = syncop_readdirp (subvol, fd, 131072, glfd->offset, NULL, &entries); else - ret = syncop_readdir (subvol, glfd->fd, 131072, glfd->offset, + ret = syncop_readdir (subvol, fd, 131072, glfd->offset, &entries); if (ret >= 0) { - /* spurious errno is dangerous for glfd_entry_next() */ - errno = 0; - if (plus) - gf_link_inodes_from_dirent (THIS, glfd->fd->inode, - &entries); + gf_link_inodes_from_dirent (THIS, fd->inode, &entries); list_splice_init (&glfd->entries, &old.list); list_splice_init (&entries.list, &glfd->entries); + + /* spurious errno is dangerous for glfd_entry_next() */ + errno = 0; } if (ret > 0) glfd->next = list_entry (glfd->entries.next, gf_dirent_t, list); gf_dirent_free (&old); +out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); return ret; } @@ -1692,12 +1863,6 @@ glfs_readdirplus_r (struct glfs_fd *glfd, struct stat *stat, struct dirent *buf, __glfs_entry_fd (glfd); - if (glfd->fd->inode->ia_type != IA_IFDIR) { - ret = -1; - errno = EBADF; - goto out; - } - errno = 0; entry = glfd_entry_next (glfd, !!stat); if (errno) @@ -1715,7 +1880,7 @@ glfs_readdirplus_r (struct glfs_fd *glfd, struct stat *stat, struct dirent *buf, if (stat) glfs_iatt_to_stat (glfd->fs, &entry->d_stat, stat); } -out: + return ret; } @@ -1758,6 +1923,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1797,6 +1964,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -1806,18 +1975,31 @@ glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid) { int ret = -1; xlator_t *subvol = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_fsetattr (subvol, glfd->fd, iatt, valid, 0, 0); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_fsetattr (subvol, fd, iatt, valid, 0, 0); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -2038,6 +2220,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -2065,22 +2249,35 @@ glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value, int ret = -1; xlator_t *subvol = NULL; dict_t *xattr = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_fgetxattr (subvol, glfd->fd, &xattr, name); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_fgetxattr (subvol, fd, &xattr, name); if (ret) goto out; ret = glfs_getxattr_process (value, size, xattr, name); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -2151,6 +2348,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -2175,22 +2374,35 @@ glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size) int ret = -1; xlator_t *subvol = NULL; dict_t *xattr = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_fgetxattr (subvol, glfd->fd, &xattr, NULL); + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_fgetxattr (subvol, fd, &xattr, NULL); if (ret) goto out; ret = glfs_listxattr_process (value, size, xattr); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -2261,6 +2473,8 @@ out: if (xattr) dict_unref (xattr); + glfs_subvol_done (fs, subvol); + return ret; } @@ -2288,16 +2502,24 @@ glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value, int ret = -1; xlator_t *subvol = NULL; dict_t *xattr = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + xattr = dict_for_key_value (name, value, size); if (!xattr) { ret = -1; @@ -2305,11 +2527,16 @@ glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value, goto out; } - ret = syncop_fsetxattr (subvol, glfd->fd, xattr, flags); + ret = syncop_fsetxattr (subvol, fd, xattr, flags); out: if (xattr) dict_unref (xattr); + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -2350,6 +2577,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -2373,31 +2602,32 @@ glfs_fremovexattr (struct glfs_fd *glfd, const char *name) { int ret = -1; xlator_t *subvol = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } - ret = syncop_fremovexattr (subvol, glfd->fd, name); -out: - return ret; -} + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + ret = syncop_fremovexattr (subvol, fd, name); +out: + if (fd) + fd_unref (fd); -void -glfs_cwd_set (struct glfs *fs, inode_t *inode) -{ - if (fs->cwd) { - inode_unref (fs->cwd); - fs->cwd = NULL; - } + glfs_subvol_done (glfd->fs, subvol); - fs->cwd = inode_ref (inode); + return ret; } @@ -2437,6 +2667,8 @@ retry: out: loc_wipe (&loc); + glfs_subvol_done (fs, subvol); + return ret; } @@ -2444,12 +2676,28 @@ out: int glfs_fchdir (struct glfs_fd *glfd) { - int ret = -1; - inode_t *inode = NULL; + int ret = -1; + inode_t *inode = NULL; + xlator_t *subvol = NULL; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - inode = glfd->fd->inode; + subvol = glfs_active_subvol (glfd->fs); + if (!subvol) { + ret = -1; + errno = EIO; + goto out; + } + + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + inode = fd->inode; if (!IA_ISDIR (inode->ia_type)) { ret = -1; @@ -2460,6 +2708,11 @@ glfs_fchdir (struct glfs_fd *glfd) glfs_cwd_set (glfd->fs, inode); ret = 0; out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } @@ -2516,6 +2769,8 @@ out: retpath = NULL; } + glfs_subvol_done (fs, subvol); + return retpath; } @@ -2535,7 +2790,8 @@ glfs_getcwd (struct glfs *fs, char *buf, size_t n) goto out; } - inode = fs->cwd; + inode = glfs_cwd_get (fs); + if (!inode) { strncpy (buf, "/", n); ret = 0; @@ -2554,6 +2810,9 @@ glfs_getcwd (struct glfs *fs, char *buf, size_t n) out: GF_FREE (path); + if (inode) + inode_unref (inode); + if (ret < 0) return NULL; @@ -2589,19 +2848,37 @@ glfs_posix_lock (struct glfs_fd *glfd, int cmd, struct flock *flock) int ret = -1; xlator_t *subvol = NULL; struct gf_flock gf_flock = {0, }; + struct gf_flock saved_flock = {0, }; + fd_t *fd = NULL; __glfs_entry_fd (glfd); - subvol = glfs_fd_subvol (glfd); + subvol = glfs_active_subvol (glfd->fs); if (!subvol) { ret = -1; errno = EIO; goto out; } + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + gf_flock_from_flock (&gf_flock, flock); - ret = syncop_lk (subvol, glfd->fd, cmd, &gf_flock); + gf_flock_from_flock (&saved_flock, flock); + ret = syncop_lk (subvol, fd, cmd, &gf_flock); gf_flock_to_flock (&gf_flock, flock); + + if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW)) + fd_lk_insert_and_merge (fd, cmd, &saved_flock); out: + if (fd) + fd_unref (fd); + + glfs_subvol_done (glfd->fs, subvol); + return ret; } diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h index d658ce2a..30ff599f 100644 --- a/api/src/glfs-internal.h +++ b/api/src/glfs-internal.h @@ -33,6 +33,8 @@ struct glfs { int err; xlator_t *active_subvol; + xlator_t *next_subvol; + xlator_t *old_subvol; char *oldvolfile; ssize_t oldvollen; @@ -40,12 +42,17 @@ struct glfs { inode_t *cwd; uint32_t dev_id; /* Used to fill st_dev in struct stat */ + + struct list_head openfds; + + gf_boolean_t migration_in_progress; }; struct glfs_fd { + struct list_head openfds; struct glfs *fs; off_t offset; - fd_t *fd; + fd_t *fd; /* Currently guared by @fs->mutex. TODO: per-glfd lock */ struct list_head entries; gf_dirent_t *next; }; @@ -62,7 +69,11 @@ int glfs_resolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *lo struct iatt *iatt, int reval); int glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc, struct iatt *iatt, int reval); -void glfs_first_lookup (xlator_t *subvol); +fd_t *glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd); + +fd_t *__glfs_migrate_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd); + +int glfs_first_lookup (xlator_t *subvol); static inline void __glfs_entry_fs (struct glfs *fs) @@ -78,12 +89,50 @@ __glfs_entry_fd (struct glfs_fd *fd) } +/* + By default all lock attempts from user context must + use glfs_lock() and glfs_unlock(). This allows + for a safe implementation of graph migration where + we can give up the mutex during syncop calls so + that bottom up calls (particularly CHILD_UP notify) + can do a mutex_lock() on @glfs without deadlocking + the filesystem +*/ +static inline int +glfs_lock (struct glfs *fs) +{ + pthread_mutex_lock (&fs->mutex); + + while (!fs->init) + pthread_cond_wait (&fs->cond, &fs->mutex); + + while (fs->migration_in_progress) + pthread_cond_wait (&fs->cond, &fs->mutex); + + return 0; +} + + +static inline void +glfs_unlock (struct glfs *fs) +{ + pthread_mutex_unlock (&fs->mutex); +} + + void glfs_fd_destroy (struct glfs_fd *glfd); struct glfs_fd *glfs_fd_new (struct glfs *fs); - -xlator_t * glfs_fd_subvol (struct glfs_fd *glfd); +void glfs_fd_bind (struct glfs_fd *glfd); xlator_t * glfs_active_subvol (struct glfs *fs); +xlator_t * __glfs_active_subvol (struct glfs *fs); +void glfs_subvol_done (struct glfs *fs, xlator_t *subvol); + +inode_t * glfs_refresh_inode (xlator_t *subvol, inode_t *inode); + +inode_t *glfs_cwd_get (struct glfs *fs); +int glfs_cwd_set (struct glfs *fs, inode_t *inode); +int __glfs_cwd_set (struct glfs *fs, inode_t *inode); #endif /* !_GLFS_INTERNAL_H */ diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c index e6c46e36..09a92b7f 100644 --- a/api/src/glfs-master.c +++ b/api/src/glfs-master.c @@ -22,26 +22,57 @@ #include "xlator.h" #include "glusterfs.h" + #include "glfs-internal.h" int glfs_graph_setup (struct glfs *fs, glusterfs_graph_t *graph) { - if (fs->active_subvol == graph->top) - return 0; + xlator_t *new_subvol = NULL; + xlator_t *old_subvol = NULL; + inode_table_t *itable = NULL; + int ret = -1; + + new_subvol = graph->top; + /* This is called in a bottom-up context, it should specifically + NOT be glfs_lock() + */ pthread_mutex_lock (&fs->mutex); { - fs->active_subvol = graph->top; - pthread_cond_broadcast (&fs->cond); + if (new_subvol->switched || + new_subvol == fs->active_subvol || + new_subvol == fs->next_subvol) { + /* Spurious CHILD_UP event on old graph */ + ret = 0; + goto unlock; + } + + if (!new_subvol->itable) { + itable = inode_table_new (131072, new_subvol); + if (!itable) { + errno = ENOMEM; + ret = -1; + goto unlock; + } + + new_subvol->itable = itable; + } + + old_subvol = fs->next_subvol; + fs->next_subvol = new_subvol; + fs->next_subvol->winds++; /* first ref */ + ret = 0; } +unlock: pthread_mutex_unlock (&fs->mutex); - gf_log ("glfs-master", GF_LOG_INFO, "switched to graph %s (%d)", - uuid_utoa ((unsigned char *)graph->graph_uuid), graph->id); + if (old_subvol) + /* wasn't picked up so far, skip */ + glfs_subvol_done (fs, old_subvol); - return 0; + return ret; } diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c index 07fc1b7c..4a634df6 100644 --- a/api/src/glfs-resolve.c +++ b/api/src/glfs-resolve.c @@ -32,9 +32,11 @@ #include "glfs-internal.h" +#define graphid_str(subvol) (uuid_utoa((unsigned char *)subvol->graph->graph_uuid)) -void -glfs_first_lookup (xlator_t *subvol) + +int +glfs_first_lookup_safe (xlator_t *subvol) { loc_t loc = {0, }; int ret = -1; @@ -49,10 +51,87 @@ glfs_first_lookup (xlator_t *subvol) gf_log (subvol->name, GF_LOG_DEBUG, "first lookup complete %d", ret); - return; + return ret; } +int +__glfs_first_lookup (struct glfs *fs, xlator_t *subvol) +{ + int ret = -1; + + fs->migration_in_progress = 1; + pthread_mutex_unlock (&fs->mutex); + { + ret = glfs_first_lookup_safe (subvol); + } + pthread_mutex_lock (&fs->mutex); + fs->migration_in_progress = 0; + + return ret; +} + + +inode_t * +glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode) +{ + loc_t loc = {0, }; + int ret = -1; + struct iatt iatt = {0, }; + inode_t *newinode = NULL; + + + if (!oldinode) + return NULL; + + if (oldinode->table->xl == subvol) + return inode_ref (oldinode); + + newinode = inode_find (subvol->itable, oldinode->gfid); + if (newinode) + return newinode; + + uuid_copy (loc.gfid, oldinode->gfid); + loc.inode = inode_new (subvol->itable); + if (!loc.inode) + return NULL; + + ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0); + + if (ret) { + gf_log (subvol->name, GF_LOG_WARNING, + "inode refresh of %s failed: %s", + uuid_utoa (oldinode->gfid), strerror (errno)); + loc_wipe (&loc); + return NULL; + } + + newinode = inode_link (loc.inode, 0, 0, &iatt); + if (newinode) + inode_lookup (newinode); + + loc_wipe (&loc); + + return newinode; +} + + +inode_t * +__glfs_refresh_inode (struct glfs *fs, xlator_t *subvol, inode_t *inode) +{ + inode_t *newinode = NULL; + + fs->migration_in_progress = 1; + pthread_mutex_unlock (&fs->mutex); + { + newinode = glfs_refresh_inode_safe (subvol, inode); + } + pthread_mutex_lock (&fs->mutex); + fs->migration_in_progress = 0; + + return newinode; +} + int glfs_loc_touchup (loc_t *loc) { @@ -364,13 +443,18 @@ glfs_resolve_path (struct glfs *fs, xlator_t *subvol, const char *origpath, loc_t *loc, struct iatt *iatt, int follow, int reval) { int ret = -1; + inode_t *cwd = NULL; if (origpath[0] == '/') - ret = glfs_resolve_at (fs, subvol, NULL, origpath, loc, iatt, - follow, reval); - else - ret = glfs_resolve_at (fs, subvol, fs->cwd, origpath, loc, iatt, - follow, reval); + return glfs_resolve_at (fs, subvol, NULL, origpath, loc, iatt, + follow, reval); + + cwd = glfs_cwd_get (fs); + + ret = glfs_resolve_at (fs, subvol, cwd, origpath, loc, iatt, + follow, reval); + if (cwd) + inode_unref (cwd); return ret; } @@ -399,3 +483,397 @@ glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *origpath, return ret; } + +int +glfs_migrate_fd_locks_safe (struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd, + xlator_t *newsubvol, fd_t *newfd) +{ + dict_t *lockinfo = NULL; + int ret = 0; + char uuid1[64]; + + if (!oldfd->lk_ctx || fd_lk_ctx_empty (oldfd->lk_ctx)) + return 0; + + newfd->lk_ctx = fd_lk_ctx_ref (oldfd->lk_ctx); + + ret = syncop_fgetxattr (oldsubvol, oldfd, &lockinfo, + GF_XATTR_LOCKINFO_KEY); + if (ret < 0) { + gf_log (fs->volname, GF_LOG_WARNING, + "fgetxattr (%s) failed (%s) on graph %s (%d)", + uuid_utoa_r (oldfd->inode->gfid, uuid1), + strerror (errno), + graphid_str (oldsubvol), oldsubvol->graph->id); + goto out; + } + + if (!dict_get (lockinfo, GF_XATTR_LOCKINFO_KEY)) { + gf_log (fs->volname, GF_LOG_WARNING, + "missing lokinfo key (%s) on graph %s (%d)", + uuid_utoa_r (oldfd->inode->gfid, uuid1), + graphid_str (oldsubvol), oldsubvol->graph->id); + goto out; + } + + ret = syncop_fsetxattr (newsubvol, newfd, lockinfo, 0); + if (ret < 0) { + gf_log (fs->volname, GF_LOG_WARNING, + "fsetxattr (%s) failed (%s) on graph %s (%d)", + uuid_utoa_r (newfd->inode->gfid, uuid1), + strerror (errno), + graphid_str (newsubvol), newsubvol->graph->id); + goto out; + } +out: + if (lockinfo) + dict_unref (lockinfo); + return ret; +} + + +fd_t * +glfs_migrate_fd_safe (struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) +{ + fd_t *newfd = NULL; + inode_t *oldinode = NULL; + inode_t *newinode = NULL; + xlator_t *oldsubvol = NULL; + int ret = -1; + loc_t loc = {0, }; + char uuid1[64]; + + + oldinode = oldfd->inode; + oldsubvol = oldinode->table->xl; + + if (oldsubvol == newsubvol) + return fd_ref (oldfd); + + if (!oldsubvol->switched) { + ret = syncop_fsync (oldsubvol, oldfd, 0); + if (ret) { + gf_log (fs->volname, GF_LOG_WARNING, + "fsync() failed (%s) on %s graph %s (%d)", + strerror (errno), + uuid_utoa_r (oldfd->inode->gfid, uuid1), + graphid_str (oldsubvol), oldsubvol->graph->id); + } + } + + newinode = glfs_refresh_inode_safe (newsubvol, oldinode); + if (!newinode) { + gf_log (fs->volname, GF_LOG_WARNING, + "inode (%s) refresh failed (%s) on graph %s (%d)", + uuid_utoa_r (oldinode->gfid, uuid1), + strerror (errno), + graphid_str (newsubvol), newsubvol->graph->id); + goto out; + } + + newfd = fd_create (newinode, getpid()); + if (!newfd) { + gf_log (fs->volname, GF_LOG_WARNING, + "fd_create (%s) failed (%s) on graph %s (%d)", + uuid_utoa_r (newinode->gfid, uuid1), + strerror (errno), + graphid_str (newsubvol), newsubvol->graph->id); + goto out; + } + + loc.inode = inode_ref (newinode); + + if (IA_ISDIR (oldinode->ia_type)) + ret = syncop_opendir (newsubvol, &loc, newfd); + else + ret = syncop_open (newsubvol, &loc, + oldfd->flags & ~(O_TRUNC|O_EXCL|O_CREAT), + newfd); + loc_wipe (&loc); + + if (ret) { + gf_log (fs->volname, GF_LOG_WARNING, + "syncop_open%s (%s) failed (%s) on graph %s (%d)", + IA_ISDIR (oldinode->ia_type) ? "dir" : "", + uuid_utoa_r (newinode->gfid, uuid1), + strerror (errno), + graphid_str (newsubvol), newsubvol->graph->id); + goto out; + } + + ret = glfs_migrate_fd_locks_safe (fs, oldsubvol, oldfd, newsubvol, + newfd); + + if (ret) { + gf_log (fs->volname, GF_LOG_WARNING, + "lock migration (%s) failed (%s) on graph %s (%d)", + uuid_utoa_r (newinode->gfid, uuid1), + strerror (errno), + graphid_str (newsubvol), newsubvol->graph->id); + goto out; + } + + fd_bind (newfd); +out: + if (newinode) + inode_unref (newinode); + + if (ret) { + fd_unref (newfd); + newfd = NULL; + } + + return newfd; +} + + +fd_t * +__glfs_migrate_fd (struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd) +{ + fd_t *oldfd = NULL; + fd_t *newfd = NULL; + + oldfd = glfd->fd; + + fs->migration_in_progress = 1; + pthread_mutex_unlock (&fs->mutex); + { + newfd = glfs_migrate_fd_safe (fs, newsubvol, oldfd); + } + pthread_mutex_lock (&fs->mutex); + fs->migration_in_progress = 0; + + return newfd; +} + + +fd_t * +__glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd) +{ + fd_t *fd = NULL; + + if (glfd->fd->inode->table->xl == subvol) + return fd_ref (glfd->fd); + + fd = __glfs_migrate_fd (fs, subvol, glfd); + if (!fd) + return NULL; + + if (subvol == fs->active_subvol) { + fd_unref (glfd->fd); + glfd->fd = fd_ref (fd); + } + + return fd; +} + + +fd_t * +glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd) +{ + fd_t *fd = NULL; + + glfs_lock (fs); + { + fd = __glfs_resolve_fd (fs, subvol, glfd); + } + glfs_unlock (fs); + + return fd; +} + + +void +__glfs_migrate_openfds (struct glfs *fs, xlator_t *subvol) +{ + struct glfs_fd *glfd = NULL; + fd_t *fd = NULL; + + list_for_each_entry (glfd, &fs->openfds, openfds) { + if (uuid_is_null (glfd->fd->inode->gfid)) { + gf_log (fs->volname, GF_LOG_INFO, + "skipping openfd %p/%p in graph %s (%d)", + glfd, glfd->fd, graphid_str(subvol), + subvol->graph->id); + /* create in progress, defer */ + continue; + } + + fd = __glfs_migrate_fd (fs, subvol, glfd); + if (fd) { + fd_unref (glfd->fd); + glfd->fd = fd; + } + } +} + + +xlator_t * +__glfs_active_subvol (struct glfs *fs) +{ + xlator_t *new_subvol = NULL; + int ret = -1; + inode_t *new_cwd = NULL; + + if (!fs->next_subvol) + return fs->active_subvol; + + new_subvol = fs->next_subvol; + + ret = __glfs_first_lookup (fs, new_subvol); + if (ret) { + gf_log (fs->volname, GF_LOG_INFO, + "first lookup on graph %s (%d) failed (%s)", + graphid_str (new_subvol), new_subvol->graph->id, + strerror (errno)); + return NULL; + } + + if (fs->cwd) { + new_cwd = __glfs_refresh_inode (fs, new_subvol, fs->cwd); + + if (!new_cwd) { + char buf1[64]; + gf_log (fs->volname, GF_LOG_INFO, + "cwd refresh of %s graph %s (%d) failed (%s)", + uuid_utoa_r (fs->cwd->gfid, buf1), + graphid_str (new_subvol), + new_subvol->graph->id, strerror (errno)); + return NULL; + } + } + + __glfs_migrate_openfds (fs, new_subvol); + + /* switching @active_subvol and @cwd + should be atomic + */ + fs->old_subvol = fs->active_subvol; + fs->active_subvol = fs->next_subvol; + fs->next_subvol = NULL; + + if (new_cwd) { + __glfs_cwd_set (fs, new_cwd); + inode_unref (new_cwd); + } + + gf_log (fs->volname, GF_LOG_INFO, "switched to graph %s (%d)", + graphid_str (new_subvol), new_subvol->graph->id); + + return new_subvol; +} + +xlator_t * +glfs_active_subvol (struct glfs *fs) +{ + xlator_t *subvol = NULL; + xlator_t *old_subvol = NULL; + + glfs_lock (fs); + { + subvol = __glfs_active_subvol (fs); + + if (subvol) + subvol->winds++; + + if (fs->old_subvol) { + old_subvol = fs->old_subvol; + fs->old_subvol = NULL; + old_subvol->switched = 1; + } + } + glfs_unlock (fs); + + if (old_subvol) + glfs_subvol_done (fs, old_subvol); + + return subvol; +} + + +void +glfs_subvol_done (struct glfs *fs, xlator_t *subvol) +{ + int ref = 0; + xlator_t *active_subvol = NULL; + + glfs_lock (fs); + { + ref = (--subvol->winds); + active_subvol = fs->active_subvol; + } + glfs_unlock (fs); + + if (ref == 0) { + assert (subvol != active_subvol); + xlator_notify (subvol, GF_EVENT_PARENT_DOWN, subvol, NULL); + } +} + + +int +__glfs_cwd_set (struct glfs *fs, inode_t *inode) +{ + if (inode->table->xl != fs->active_subvol) { + inode = __glfs_refresh_inode (fs, fs->active_subvol, inode); + if (!inode) + return -1; + } else { + inode_ref (inode); + } + + if (fs->cwd) + inode_unref (fs->cwd); + + fs->cwd = inode; + + return 0; +} + + +int +glfs_cwd_set (struct glfs *fs, inode_t *inode) +{ + int ret = 0; + + glfs_lock (fs); + { + ret = __glfs_cwd_set (fs, inode); + } + glfs_unlock (fs); + + return ret; +} + + +inode_t * +__glfs_cwd_get (struct glfs *fs) +{ + inode_t *cwd = NULL; + + if (!fs->cwd) + return NULL; + + if (fs->cwd->table->xl == fs->active_subvol) { + cwd = inode_ref (fs->cwd); + return cwd; + } + + cwd = __glfs_refresh_inode (fs, fs->active_subvol, fs->cwd); + + return cwd; +} + +inode_t * +glfs_cwd_get (struct glfs *fs) +{ + inode_t *cwd = NULL; + + glfs_lock (fs); + { + cwd = __glfs_cwd_get (fs); + } + glfs_unlock (fs); + + return cwd; +} diff --git a/api/src/glfs.c b/api/src/glfs.c index 82873fe7..efda6b67 100644 --- a/api/src/glfs.c +++ b/api/src/glfs.c @@ -12,11 +12,9 @@ /* TODO: - merge locks in glfs_posix_lock for lock self-healing - - refresh fs->cwd inode on graph switch - set proper pid/lk_owner to call frames (currently buried in syncop) - fix logging.c/h to store logfp and loglevel in glusterfs_ctx_t and reach it via THIS. - - fd migration on graph switch. - update syncop functions to accept/return xdata. ??? - protocol/client to reconnect immediately after portmap disconnect. - handle SEEK_END failure in _lseek() @@ -338,66 +336,41 @@ glfs_fd_new (struct glfs *fs) glfd->fs = fs; + INIT_LIST_HEAD (&glfd->openfds); + return glfd; } void -glfs_fd_destroy (struct glfs_fd *glfd) +glfs_fd_bind (struct glfs_fd *glfd) { - if (!glfd) - return; - if (glfd->fd) - fd_unref (glfd->fd); - GF_FREE (glfd); -} + struct glfs *fs = NULL; + fs = glfd->fs; -xlator_t * -glfs_fd_subvol (struct glfs_fd *glfd) -{ - xlator_t *subvol = NULL; - - if (!glfd) - return NULL; - - subvol = glfd->fd->inode->table->xl; - - return subvol; + glfs_lock (fs); + { + list_add_tail (&glfd->openfds, &fs->openfds); + } + glfs_unlock (fs); } - -xlator_t * -glfs_active_subvol (struct glfs *fs) +void +glfs_fd_destroy (struct glfs_fd *glfd) { - xlator_t *subvol = NULL; - inode_table_t *itable = NULL; + if (!glfd) + return; - pthread_mutex_lock (&fs->mutex); + glfs_lock (glfd->fs); { - while (!fs->init) - pthread_cond_wait (&fs->cond, &fs->mutex); - - subvol = fs->active_subvol; - } - pthread_mutex_unlock (&fs->mutex); - - if (!subvol) - return NULL; - - if (!subvol->itable) { - itable = inode_table_new (0, subvol); - if (!itable) { - errno = ENOMEM; - return NULL; - } - - subvol->itable = itable; - - glfs_first_lookup (subvol); + list_del_init (&glfd->openfds); } + glfs_unlock (glfd->fs); - return subvol; + if (glfd->fd) + fd_unref (glfd->fd); + GF_FREE (glfd); } @@ -456,6 +429,8 @@ glfs_new (const char *volname) pthread_mutex_init (&fs->mutex, NULL); pthread_cond_init (&fs->cond, NULL); + INIT_LIST_HEAD (&fs->openfds); + return fs; } @@ -519,7 +494,8 @@ glfs_init_wait (struct glfs *fs) { int ret = -1; - pthread_mutex_lock (&fs->mutex); + /* Always a top-down call, use glfs_lock() */ + glfs_lock (fs); { while (!fs->init) pthread_cond_wait (&fs->cond, @@ -527,7 +503,7 @@ glfs_init_wait (struct glfs *fs) ret = fs->ret; errno = fs->err; } - pthread_mutex_unlock (&fs->mutex); + glfs_unlock (fs); return ret; } @@ -546,6 +522,7 @@ glfs_init_done (struct glfs *fs, int ret) init_cbk = fs->init_cbk; + /* Always a bottom-up call, use mutex_lock() */ pthread_mutex_lock (&fs->mutex); { fs->init = 1; -- cgit