diff options
103 files changed, 20618 insertions, 878 deletions
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index 4bf33b859..37e8d22d8 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -146,7 +146,7 @@ out: int -glfs_close (struct glfs_fd *glfd) +glfs_close_with_xdata (struct glfs_fd *glfd, dict_t *dict) { xlator_t *subvol = NULL; int ret = -1; @@ -169,7 +169,7 @@ glfs_close (struct glfs_fd *glfd) goto out; } - ret = syncop_flush (subvol, fd); + ret = syncop_flush_with_xdata (subvol, fd, dict); DECODE_SYNCOP_ERR (ret); out: fs = glfd->fs; @@ -183,6 +183,11 @@ out: return ret; } +int +glfs_close (struct glfs_fd *glfd) +{ + return(glfs_close_with_xdata(glfd, NULL)); +} int glfs_lstat (struct glfs *fs, const char *path, struct stat *stat) @@ -251,7 +256,7 @@ out: int -glfs_fstat (struct glfs_fd *glfd, struct stat *stat) +glfs_fstat_with_xdata (struct glfs_fd *glfd, struct stat *stat, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -274,7 +279,7 @@ glfs_fstat (struct glfs_fd *glfd, struct stat *stat) goto out; } - ret = syncop_fstat (subvol, fd, &iatt); + ret = syncop_fstat_with_xdata (subvol, fd, &iatt, dict); DECODE_SYNCOP_ERR (ret); if (ret == 0 && stat) @@ -288,17 +293,21 @@ out: return ret; } +int +glfs_fstat (struct glfs_fd *glfd, struct stat *stat) +{ + return(glfs_fstat_with_xdata(glfd, stat, NULL)); +} + struct glfs_fd * -glfs_creat (struct glfs *fs, const char *path, int flags, mode_t mode) +glfs_creat_with_xdata (struct glfs *fs, const char *path, int flags, mode_t mode, uuid_t gfid, dict_t *xattr_req) { int ret = -1; struct glfs_fd *glfd = NULL; xlator_t *subvol = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; int reval = 0; __glfs_entry_fs (fs); @@ -310,14 +319,6 @@ glfs_creat (struct glfs *fs, const char *path, int flags, mode_t mode) goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -409,8 +410,6 @@ retry: out: loc_wipe (&loc); - if (xattr_req) - dict_unref (xattr_req); if (ret && glfd) { glfs_fd_destroy (glfd); @@ -426,9 +425,28 @@ out: return glfd; } +struct glfs_fd * +glfs_creat (struct glfs *fs, const char *path, int flags, mode_t mode) +{ + dict_t *xattr_req = NULL; + uuid_t gfid; + struct glfs_fd *fd = NULL; + + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return NULL; + } + uuid_generate (gfid); + fd = glfs_creat_with_xdata (fs, path, flags, mode, gfid, xattr_req); + if (xattr_req) + dict_unref (xattr_req); + return (fd); +} off_t -glfs_lseek (struct glfs_fd *glfd, off_t offset, int whence) +glfs_lseek_with_xdata (struct glfs_fd *glfd, off_t offset, int whence, dict_t *dict) { struct stat sb = {0, }; int ret = -1; @@ -443,7 +461,7 @@ glfs_lseek (struct glfs_fd *glfd, off_t offset, int whence) glfd->offset += offset; break; case SEEK_END: - ret = glfs_fstat (glfd, &sb); + ret = glfs_fstat_with_xdata (glfd, &sb, dict); if (ret) { /* seek cannot fail :O */ break; @@ -455,12 +473,17 @@ glfs_lseek (struct glfs_fd *glfd, off_t offset, int whence) return glfd->offset; } +off_t +glfs_lseek (struct glfs_fd *glfd, off_t offset, int whence) +{ + return(glfs_lseek_with_xdata(glfd, offset, whence, NULL)); +} ////////////// ssize_t -glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, - off_t offset, int flags) +glfs_preadv_with_xdata (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + off_t offset, int flags, dict_t *dict) { xlator_t *subvol = NULL; ssize_t ret = -1; @@ -488,7 +511,7 @@ glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, size = iov_length (iovec, iovcnt); - ret = syncop_readv (subvol, fd, size, offset, 0, &iov, &cnt, &iobref); + ret = syncop_readv_with_xdata (subvol, fd, size, offset, 0, &iov, &cnt, &iobref, dict); DECODE_SYNCOP_ERR (ret); if (ret <= 0) goto out; @@ -512,6 +535,12 @@ out: return ret; } +ssize_t +glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + off_t offset, int flags) +{ + return(glfs_preadv_with_xdata(glfd, iovec, iovcnt, offset, flags, NULL)); +} ssize_t glfs_read (struct glfs_fd *glfd, void *buf, size_t count, int flags) @@ -527,6 +556,19 @@ glfs_read (struct glfs_fd *glfd, void *buf, size_t count, int flags) return ret; } +ssize_t +glfs_read_with_xdata (struct glfs_fd *glfd, void *buf, size_t count, int flags, dict_t *dict) +{ + struct iovec iov = {0, }; + ssize_t ret = 0; + + iov.iov_base = buf; + iov.iov_len = count; + + ret = glfs_preadv_with_xdata (glfd, &iov, 1, glfd->offset, flags, dict); + + return ret; +} ssize_t glfs_pread (struct glfs_fd *glfd, void *buf, size_t count, off_t offset, @@ -783,6 +825,12 @@ ssize_t glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, off_t offset, int flags) { + return(glfs_pwritev_with_xdata(glfd, iovec, iovcnt, offset, flags, NULL)); +} +ssize_t +glfs_pwritev_with_xdata (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + off_t offset, int flags, dict_t *dict) +{ xlator_t *subvol = NULL; int ret = -1; size_t size = -1; @@ -838,7 +886,7 @@ glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, iov.iov_base = iobuf_ptr (iobuf); iov.iov_len = size; - ret = syncop_writev (subvol, fd, &iov, 1, offset, iobref, flags); + ret = syncop_writev_with_xdata (subvol, fd, &iov, 1, offset, iobref, flags, dict); DECODE_SYNCOP_ERR (ret); iobuf_unref (iobuf); @@ -873,6 +921,20 @@ glfs_write (struct glfs_fd *glfd, const void *buf, size_t count, int flags) return ret; } +ssize_t +glfs_write_with_xdata (struct glfs_fd *glfd, const void *buf, size_t count, int flags, dict_t *dict) +{ + struct iovec iov = {0, }; + ssize_t ret = 0; + + iov.iov_base = (void *) buf; + iov.iov_len = count; + + ret = glfs_pwritev_with_xdata (glfd, &iov, 1, glfd->offset, flags, dict); + + return ret; +} + ssize_t @@ -886,6 +948,16 @@ glfs_writev (struct glfs_fd *glfd, const struct iovec *iov, int count, return ret; } +ssize_t +glfs_writev_with_xdata (struct glfs_fd *glfd, const struct iovec *iov, int count, + int flags, dict_t *dict) +{ + ssize_t ret = 0; + + ret = glfs_pwritev_with_xdata (glfd, iov, count, glfd->offset, flags, dict); + + return ret; +} ssize_t glfs_pwrite (struct glfs_fd *glfd, const void *buf, size_t count, off_t offset, @@ -989,7 +1061,7 @@ glfs_writev_async (struct glfs_fd *glfd, const struct iovec *iov, int count, int -glfs_fsync (struct glfs_fd *glfd) +glfs_fsync_with_xdata (struct glfs_fd *glfd, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1011,7 +1083,7 @@ glfs_fsync (struct glfs_fd *glfd) goto out; } - ret = syncop_fsync (subvol, fd, 0); + ret = syncop_fsync_with_xdata (subvol, fd, 0, dict); DECODE_SYNCOP_ERR (ret); out: if (fd) @@ -1022,6 +1094,11 @@ out: return ret; } +int +glfs_fsync (struct glfs_fd *glfd) +{ + return(glfs_fsync_with_xdata(glfd, NULL)); +} static int glfs_fsync_async_common (struct glfs_fd *glfd, glfs_io_cbk fn, void *data, @@ -1106,7 +1183,7 @@ glfs_fdatasync_async (struct glfs_fd *glfd, glfs_io_cbk fn, void *data) int -glfs_ftruncate (struct glfs_fd *glfd, off_t offset) +glfs_ftruncate_with_xdata (struct glfs_fd *glfd, off_t offset, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1128,7 +1205,7 @@ glfs_ftruncate (struct glfs_fd *glfd, off_t offset) goto out; } - ret = syncop_ftruncate (subvol, fd, offset); + ret = syncop_ftruncate_with_xdata (subvol, fd, offset, dict); DECODE_SYNCOP_ERR (ret); out: if (fd) @@ -1139,6 +1216,11 @@ out: return ret; } +int +glfs_ftruncate (struct glfs_fd *glfd, off_t offset) +{ + return(glfs_ftruncate_with_xdata(glfd, offset, NULL)); +} int glfs_ftruncate_async (struct glfs_fd *glfd, off_t offset, @@ -1211,14 +1293,12 @@ out: int -glfs_symlink (struct glfs *fs, const char *data, const char *path) +glfs_symlink_with_xdata (struct glfs *fs, const char *data, const char *path, uuid_t gfid, dict_t *xattr_req) { int ret = -1; xlator_t *subvol = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; int reval = 0; __glfs_entry_fs (fs); @@ -1230,14 +1310,6 @@ glfs_symlink (struct glfs *fs, const char *data, const char *path) goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -1283,14 +1355,30 @@ retry: out: loc_wipe (&loc); - if (xattr_req) - dict_unref (xattr_req); - glfs_subvol_done (fs, subvol); return ret; } +int +glfs_symlink (struct glfs *fs, const char *data, const char *path) +{ + uuid_t gfid; + dict_t *xattr_req = NULL; + int ret = -1; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return -1 ; + } + + uuid_generate (gfid); + ret = glfs_symlink_with_xdata(fs, data, path, gfid, xattr_req); + + dict_unref (xattr_req); + return ret; +} int glfs_readlink (struct glfs *fs, const char *path, char *buf, size_t bufsiz) @@ -1342,14 +1430,12 @@ out: int -glfs_mknod (struct glfs *fs, const char *path, mode_t mode, dev_t dev) +glfs_mknod_with_xdata (struct glfs *fs, const char *path, mode_t mode, dev_t dev, uuid_t gfid, dict_t *xattr_req) { int ret = -1; xlator_t *subvol = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; int reval = 0; __glfs_entry_fs (fs); @@ -1361,14 +1447,7 @@ glfs_mknod (struct glfs *fs, const char *path, mode_t mode, dev_t dev) goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -1414,24 +1493,38 @@ retry: out: loc_wipe (&loc); - if (xattr_req) - dict_unref (xattr_req); - glfs_subvol_done (fs, subvol); return ret; } +int +glfs_mknod (struct glfs *fs, const char *path, mode_t mode, dev_t dev) +{ + dict_t *xattr_req = NULL; + uuid_t gfid; + int ret; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return -1; + } + + uuid_generate (gfid); + ret = glfs_mknod_with_xdata(fs, path, mode, dev, gfid, xattr_req); + + dict_unref (xattr_req); + return (ret); +} int -glfs_mkdir (struct glfs *fs, const char *path, mode_t mode) +glfs_mkdir_with_xdata (struct glfs *fs, const char *path, mode_t mode, uuid_t gfid, dict_t *xattr_req) { int ret = -1; xlator_t *subvol = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; int reval = 0; __glfs_entry_fs (fs); @@ -1443,14 +1536,6 @@ glfs_mkdir (struct glfs *fs, const char *path, mode_t mode) goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -1496,17 +1581,33 @@ retry: out: loc_wipe (&loc); - if (xattr_req) - dict_unref (xattr_req); glfs_subvol_done (fs, subvol); return ret; } +int +glfs_mkdir (struct glfs *fs, const char *path, mode_t mode) +{ + uuid_t gfid; + dict_t *xattr_req = NULL; + int ret; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return -1; + } + + uuid_generate (gfid); + ret = glfs_mkdir_with_xdata(fs, path, mode, gfid, xattr_req); + dict_unref (xattr_req); + return ret; +} int -glfs_unlink (struct glfs *fs, const char *path) +glfs_unlink_with_xdata (struct glfs *fs, const char *path, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1536,7 +1637,7 @@ retry: goto out; } - ret = syncop_unlink (subvol, &loc); + ret = syncop_unlink_with_xdata (subvol, &loc, dict); DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -1551,9 +1652,14 @@ out: return ret; } +int +glfs_unlink (struct glfs *fs, const char *path) +{ + return(glfs_unlink_with_xdata(fs, path, NULL)); +} int -glfs_rmdir (struct glfs *fs, const char *path) +glfs_rmdir_with_xdata (struct glfs *fs, const char *path, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1583,7 +1689,7 @@ retry: goto out; } - ret = syncop_rmdir (subvol, &loc, 0); + ret = syncop_rmdir_with_xdata (subvol, &loc, 0, dict); DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -1598,9 +1704,14 @@ out: return ret; } +int +glfs_rmdir (struct glfs *fs, const char *path) +{ + return (glfs_rmdir_with_xdata(fs, path, NULL)); +} int -glfs_rename (struct glfs *fs, const char *oldpath, const char *newpath) +glfs_rename_with_xdata (struct glfs *fs, const char *oldpath, const char *newpath, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1647,7 +1758,7 @@ retrynew: /* TODO: check if new or old is a prefix of the other, and fail EINVAL */ - ret = syncop_rename (subvol, &oldloc, &newloc); + ret = syncop_rename_with_xdata (subvol, &oldloc, &newloc, dict); DECODE_SYNCOP_ERR (ret); if (ret == -1 && errno == ESTALE) { @@ -1674,7 +1785,13 @@ out: int -glfs_link (struct glfs *fs, const char *oldpath, const char *newpath) +glfs_rename (struct glfs *fs, const char *oldpath, const char *newpath) +{ + return(glfs_rename_with_xdata(fs, oldpath, newpath, NULL)); +} + +int +glfs_link_with_xdata (struct glfs *fs, const char *oldpath, const char *newpath, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1725,7 +1842,7 @@ retrynew: } newloc.inode = inode_ref (oldloc.inode); - ret = syncop_link (subvol, &oldloc, &newloc); + ret = syncop_link_with_xdata (subvol, &oldloc, &newloc, dict); DECODE_SYNCOP_ERR (ret); if (ret == -1 && errno == ESTALE) { @@ -1746,6 +1863,11 @@ out: return ret; } +int +glfs_link (struct glfs *fs, const char *oldpath, const char *newpath) +{ + return(glfs_link_with_xdata(fs, oldpath, newpath, NULL)); +} struct glfs_fd * glfs_opendir (struct glfs *fs, const char *path) @@ -2184,8 +2306,8 @@ out: int -glfs_setattr (struct glfs *fs, const char *path, struct iatt *iatt, - int valid, int follow) +glfs_setattr_with_xdata (struct glfs *fs, const char *path, struct iatt *iatt, + int valid, int follow, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2212,7 +2334,7 @@ retry: if (ret) goto out; - ret = syncop_setattr (subvol, &loc, iatt, valid, 0, 0); + ret = syncop_setattr_with_xdata (subvol, &loc, iatt, valid, 0, 0, dict); DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -2224,9 +2346,15 @@ out: return ret; } +int +glfs_setattr (struct glfs *fs, const char *path, struct iatt *iatt, + int valid, int follow) +{ + return(glfs_setattr_with_xdata(fs, path, iatt, valid, follow, NULL)); +} int -glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid) +glfs_fsetattr_with_xdata (struct glfs_fd *glfd, struct iatt *iatt, int valid, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2248,7 +2376,7 @@ glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid) goto out; } - ret = syncop_fsetattr (subvol, fd, iatt, valid, 0, 0); + ret = syncop_fsetattr_with_xdata (subvol, fd, iatt, valid, 0, 0, dict); DECODE_SYNCOP_ERR (ret); out: if (fd) @@ -2259,6 +2387,11 @@ out: return ret; } +int +glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid) +{ + return(glfs_fsetattr_with_xdata(glfd, iatt, valid, NULL)); +} int glfs_chmod (struct glfs *fs, const char *path, mode_t mode) @@ -2500,8 +2633,8 @@ glfs_lgetxattr (struct glfs *fs, const char *path, const char *name, ssize_t -glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value, - size_t size) +glfs_fgetxattr_with_xdata (struct glfs_fd *glfd, const char *name, void *value, + size_t size, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2524,7 +2657,7 @@ glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value, goto out; } - ret = syncop_fgetxattr (subvol, fd, &xattr, name); + ret = syncop_fgetxattr_with_xdata (subvol, fd, &xattr, name, dict); DECODE_SYNCOP_ERR (ret); if (ret) goto out; @@ -2539,6 +2672,12 @@ out: return ret; } +ssize_t +glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value, + size_t size) +{ + return(glfs_fgetxattr_with_xdata(glfd, name, value, size, NULL)); +} int glfs_listxattr_process (void *value, size_t size, dict_t *xattr) @@ -2628,7 +2767,8 @@ glfs_llistxattr (struct glfs *fs, const char *path, void *value, size_t size) ssize_t -glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size) +glfs_flistxattr_with_xdata (struct glfs_fd *glfd, void *value, size_t size, + dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2651,7 +2791,7 @@ glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size) goto out; } - ret = syncop_fgetxattr (subvol, fd, &xattr, NULL); + ret = syncop_fgetxattr_with_xdata (subvol, fd, &xattr, NULL, dict); DECODE_SYNCOP_ERR (ret); if (ret) goto out; @@ -2667,6 +2807,12 @@ out: } +ssize_t +glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size) +{ + return(glfs_flistxattr_with_xdata(glfd, value, size, NULL)); +} + dict_t * dict_for_key_value (const char *name, const char *value, size_t size) { @@ -2689,7 +2835,7 @@ dict_for_key_value (const char *name, const char *value, size_t size) int glfs_setxattr_common (struct glfs *fs, const char *path, const char *name, - const void *value, size_t size, int flags, int follow) + const void *value, size_t size, int flags, int follow, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2724,7 +2870,7 @@ retry: goto out; } - ret = syncop_setxattr (subvol, &loc, xattr, flags); + ret = syncop_setxattr_with_xdata (subvol, &loc, xattr, flags, dict); DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -2744,21 +2890,27 @@ int glfs_setxattr (struct glfs *fs, const char *path, const char *name, const void *value, size_t size, int flags) { - return glfs_setxattr_common (fs, path, name, value, size, flags, 1); + return glfs_setxattr_common (fs, path, name, value, size, flags, 1, NULL); } +int +glfs_setxattr_with_xdata (struct glfs *fs, const char *path, const char *name, + const void *value, size_t size, int flags, dict_t * dict) +{ + return glfs_setxattr_common (fs, path, name, value, size, flags, 1, dict); +} int glfs_lsetxattr (struct glfs *fs, const char *path, const char *name, const void *value, size_t size, int flags) { - return glfs_setxattr_common (fs, path, name, value, size, flags, 0); + return glfs_setxattr_common (fs, path, name, value, size, flags, 0, NULL); } int -glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value, - size_t size, int flags) +glfs_fsetxattr_with_xdata (struct glfs_fd *glfd, const char *name, const void *value, + size_t size, int flags, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2788,7 +2940,7 @@ glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value, goto out; } - ret = syncop_fsetxattr (subvol, fd, xattr, flags); + ret = syncop_fsetxattr_with_xdata (subvol, fd, xattr, flags, dict); DECODE_SYNCOP_ERR (ret); out: if (xattr) @@ -2802,10 +2954,16 @@ out: return ret; } +int +glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value, + size_t size, int flags) +{ + return(glfs_fsetxattr_with_xdata(glfd, name, value, size, flags, NULL)); +} int glfs_removexattr_common (struct glfs *fs, const char *path, const char *name, - int follow) + int follow, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2832,7 +2990,7 @@ retry: if (ret) goto out; - ret = syncop_removexattr (subvol, &loc, name, 0); + ret = syncop_removexattr_with_xdata (subvol, &loc, name, dict); DECODE_SYNCOP_ERR (ret); ESTALE_RETRY (ret, errno, reval, &loc, retry); @@ -2849,19 +3007,25 @@ out: int glfs_removexattr (struct glfs *fs, const char *path, const char *name) { - return glfs_removexattr_common (fs, path, name, 1); + return glfs_removexattr_common (fs, path, name, 1, NULL); } int glfs_lremovexattr (struct glfs *fs, const char *path, const char *name) { - return glfs_removexattr_common (fs, path, name, 0); + return glfs_removexattr_common (fs, path, name, 0, NULL); +} + +int +glfs_removexattr_with_xdata (struct glfs *fs, const char *path, const char *name, dict_t *dict) +{ + return glfs_removexattr_common (fs, path, name, 1, dict); } int -glfs_fremovexattr (struct glfs_fd *glfd, const char *name) +glfs_fremovexattr_with_xdata (struct glfs_fd *glfd, const char *name, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -2883,7 +3047,7 @@ glfs_fremovexattr (struct glfs_fd *glfd, const char *name) goto out; } - ret = syncop_fremovexattr (subvol, fd, name, 0); + ret = syncop_fremovexattr_with_xdata (subvol, fd, name, dict); DECODE_SYNCOP_ERR (ret); out: if (fd) @@ -2894,6 +3058,11 @@ out: return ret; } +int +glfs_fremovexattr (struct glfs_fd *glfd, const char *name) +{ + return(glfs_fremovexattr_with_xdata(glfd, name, NULL)); +} int glfs_fallocate (struct glfs_fd *glfd, int keep_size, off_t offset, size_t len) @@ -2997,6 +3166,29 @@ out: } int +glfs_ipc (struct glfs *fs, int32_t op) +{ + int ret = -1; + xlator_t *subvol = NULL; + + __glfs_entry_fs (fs); + + subvol = glfs_active_subvol (fs); + if (!subvol) { + ret = -1; + errno = EIO; + goto out; + } + + ret = syncop_ipc (subvol, op); + DECODE_SYNCOP_ERR (ret); + +out: + glfs_subvol_done (fs, subvol); + return ret; +} + +int glfs_chdir (struct glfs *fs, const char *path) { int ret = -1; @@ -3139,7 +3331,6 @@ out: return retpath; } - char * glfs_getcwd (struct glfs *fs, char *buf, size_t n) { diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c index 7fb202973..b97590239 100644 --- a/api/src/glfs-handleops.c +++ b/api/src/glfs-handleops.c @@ -432,7 +432,7 @@ out: } struct glfs_fd * -glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags) +glfs_h_open_with_xdata (struct glfs *fs, struct glfs_object *object, int flags, dict_t * dict) { int ret = -1; struct glfs_fd *glfd = NULL; @@ -441,7 +441,7 @@ glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags) loc_t loc = {0, }; /* validate in args */ - if ((fs == NULL) || (object == NULL)) { + if ((fs == NULL) || (object == NULL) || (dict == NULL)) { errno = EINVAL; return NULL; } @@ -492,7 +492,7 @@ glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags) GLFS_LOC_FILL_INODE (inode, loc, out); /* fop/op */ - ret = syncop_open (subvol, &loc, flags, glfd->fd); + ret = syncop_open_with_xdata (subvol, &loc, flags, glfd->fd, dict); DECODE_SYNCOP_ERR (ret); out: @@ -515,9 +515,16 @@ out: return glfd; } +struct glfs_fd * +glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags) +{ + return(glfs_h_open_with_xdata(fs, object, flags, NULL)); +} + struct glfs_object * -glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path, - int flags, mode_t mode, struct stat *stat) +glfs_h_creat_with_xdata (struct glfs *fs, struct glfs_object *parent, const char *path, + int flags, mode_t mode, struct stat *stat, + uuid_t gfid, dict_t * xattr_req) { int ret = -1; struct glfs_fd *glfd = NULL; @@ -525,12 +532,10 @@ glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path, inode_t *inode = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; struct glfs_object *object = NULL; /* validate in args */ - if ((fs == NULL) || (parent == NULL) || (path == NULL)) { + if ((fs == NULL) || (parent == NULL) || (path == NULL) || (xattr_req == NULL)) { errno = EINVAL; return NULL; } @@ -552,14 +557,6 @@ glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path, goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -628,20 +625,34 @@ out: } struct glfs_object * -glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path, - mode_t mode, struct stat *stat) +glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path, + int flags, mode_t mode, struct stat *stat) +{ + uuid_t gfid; + dict_t *xattr_req = NULL; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return NULL; + } + uuid_generate (gfid); + return(glfs_h_creat_with_xdata(fs, parent, path, flags, mode, stat, gfid, xattr_req)); +} + +struct glfs_object * +glfs_h_mkdir_with_xdata (struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, struct stat *stat, uuid_t gfid, dict_t *xattr_req) { int ret = -1; xlator_t *subvol = NULL; inode_t *inode = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; struct glfs_object *object = NULL; /* validate in args */ - if ((fs == NULL) || (parent == NULL) || (path == NULL)) { + if ((fs == NULL) || (parent == NULL) || (path == NULL) || (xattr_req == NULL)) { errno = EINVAL; return NULL; } @@ -663,14 +674,6 @@ glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path, goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -717,20 +720,36 @@ out: } struct glfs_object * -glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path, - mode_t mode, dev_t dev, struct stat *stat) +glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, struct stat *stat) +{ + uuid_t gfid; + dict_t *xattr_req = NULL; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return NULL; + } + + uuid_generate (gfid); + return(glfs_h_mkdir_with_xdata(fs, parent, path, mode, stat, gfid, xattr_req)); +} + +struct glfs_object * +glfs_h_mknod_with_xdata (struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, dev_t dev, struct stat *stat, + uuid_t gfid, dict_t * xattr_req) { int ret = -1; xlator_t *subvol = NULL; inode_t *inode = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; struct glfs_object *object = NULL; /* validate in args */ - if ((fs == NULL) || (parent == NULL) || (path == NULL)) { + if ((fs == NULL) || (parent == NULL) || (path == NULL) || (xattr_req == NULL)) { errno = EINVAL; return NULL; } @@ -752,14 +771,6 @@ glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path, goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -804,8 +815,26 @@ out: return object; } +struct glfs_object * +glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, dev_t dev, struct stat *stat) +{ + uuid_t gfid; + dict_t *xattr_req = NULL; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return NULL; + } + + uuid_generate (gfid); + + return(glfs_h_mknod_with_xdata(fs, parent, path, mode, dev, stat, gfid, xattr_req)); +} + int -glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path) +glfs_h_unlink_with_xdata (struct glfs *fs, struct glfs_object *parent, const char *path, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -841,13 +870,13 @@ glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path) } if (!IA_ISDIR(loc.inode->ia_type)) { - ret = syncop_unlink (subvol, &loc); + ret = syncop_unlink_with_xdata (subvol, &loc, dict); DECODE_SYNCOP_ERR (ret); if (ret != 0) { goto out; } } else { - ret = syncop_rmdir (subvol, &loc, 0); + ret = syncop_rmdir_with_xdata (subvol, &loc, 0, dict); DECODE_SYNCOP_ERR (ret); if (ret != 0) { goto out; @@ -868,8 +897,14 @@ out: return ret; } +int +glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path) +{ + return(glfs_h_unlink_with_xdata(fs, parent, path, NULL)); +} + struct glfs_fd * -glfs_h_opendir (struct glfs *fs, struct glfs_object *object) +glfs_h_opendir_with_xdata (struct glfs *fs, struct glfs_object *object, dict_t *dict) { int ret = -1; struct glfs_fd *glfd = NULL; @@ -922,7 +957,7 @@ glfs_h_opendir (struct glfs *fs, struct glfs_object *object) GLFS_LOC_FILL_INODE (inode, loc, out); /* fop/op */ - ret = syncop_opendir (subvol, &loc, glfd->fd); + ret = syncop_opendir_with_xdata (subvol, &loc, glfd->fd, dict); DECODE_SYNCOP_ERR (ret); out: @@ -944,6 +979,12 @@ out: return glfd; } +struct glfs_fd * +glfs_h_opendir (struct glfs *fs, struct glfs_object *object) +{ + return(glfs_h_opendir_with_xdata(fs, object, NULL)); +} + ssize_t glfs_h_extract_handle (struct glfs_object *object, unsigned char *handle, int len) @@ -1122,21 +1163,19 @@ out: } struct glfs_object * -glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name, - const char *data, struct stat *stat) +glfs_h_symlink_with_xdata (struct glfs *fs, struct glfs_object *parent, const char *name, + const char *data, struct stat *stat, uuid_t gfid, dict_t * xattr_req) { int ret = -1; xlator_t *subvol = NULL; inode_t *inode = NULL; loc_t loc = {0, }; struct iatt iatt = {0, }; - uuid_t gfid; - dict_t *xattr_req = NULL; struct glfs_object *object = NULL; /* validate in args */ if ((fs == NULL) || (parent == NULL) || (name == NULL) || - (data == NULL)) { + (data == NULL) || (xattr_req == NULL)) { errno = EINVAL; return NULL; } @@ -1158,14 +1197,6 @@ glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name, goto out; } - xattr_req = dict_new (); - if (!xattr_req) { - ret = -1; - errno = ENOMEM; - goto out; - } - - uuid_generate (gfid); ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16); if (ret) { ret = -1; @@ -1216,6 +1247,23 @@ out: return object; } +struct glfs_object * +glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name, + const char *data, struct stat *stat) +{ + uuid_t gfid; + dict_t *xattr_req = NULL; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + return NULL; + } + + uuid_generate (gfid); + return(glfs_h_symlink_with_xdata(fs, parent, name, data, stat, gfid, xattr_req)); +} + int glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf, size_t bufsiz) @@ -1274,8 +1322,8 @@ out: } int -glfs_h_link (struct glfs *fs, struct glfs_object *linksrc, - struct glfs_object *parent, const char *name) +glfs_h_link_with_xdata (struct glfs *fs, struct glfs_object *linksrc, + struct glfs_object *parent, const char *name, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1338,7 +1386,7 @@ glfs_h_link (struct glfs *fs, struct glfs_object *linksrc, newloc.inode = inode_ref (inode); /* fop/op */ - ret = syncop_link (subvol, &oldloc, &newloc); + ret = syncop_link_with_xdata (subvol, &oldloc, &newloc, dict); DECODE_SYNCOP_ERR (ret); if (ret == 0) @@ -1360,8 +1408,14 @@ out: } int -glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname, - struct glfs_object *newdir, const char *newname) +glfs_h_link (struct glfs *fs, struct glfs_object *linksrc, + struct glfs_object *parent, const char *name) +{ + return(glfs_h_link_with_xdata(fs, linksrc, parent, name, NULL)); +} +int +glfs_h_rename_with_xdata (struct glfs *fs, struct glfs_object *olddir, const char *oldname, + struct glfs_object *newdir, const char *newname, dict_t *dict) { int ret = -1; xlator_t *subvol = NULL; @@ -1429,7 +1483,7 @@ glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname, /* TODO: check if new or old is a prefix of the other, and fail EINVAL */ - ret = syncop_rename (subvol, &oldloc, &newloc); + ret = syncop_rename_with_xdata (subvol, &oldloc, &newloc, dict); DECODE_SYNCOP_ERR (ret); if (ret == 0) @@ -1451,3 +1505,10 @@ out: return ret; } + +int +glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname, + struct glfs_object *newdir, const char *newname) +{ + return(glfs_h_rename_with_xdata(fs, olddir, oldname, newdir, newname, NULL)); +} diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h index 027089760..277b20a3d 100644 --- a/api/src/glfs-handles.h +++ b/api/src/glfs-handles.h @@ -84,21 +84,42 @@ struct glfs_object *glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path, int flags, mode_t mode, struct stat *sb) __THROW; +struct glfs_object *glfs_h_creat_with_xdata (struct glfs *fs, struct glfs_object *parent, + const char *path, int flags, mode_t mode, + struct stat *sb, uuid_t gfid, dict_t * xattr_req); + struct glfs_object *glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path, mode_t flags, struct stat *sb) __THROW; +struct glfs_object *glfs_h_mkdir_with_xdata (struct glfs *fs, struct glfs_object *parent, + const char *path, mode_t flags, + struct stat *sb, uuid_t gfid, dict_t * xattr_req); + struct glfs_object *glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path, mode_t mode, dev_t dev, struct stat *sb) __THROW; +struct glfs_object *glfs_h_mknod_with_xdata (struct glfs *fs, struct glfs_object *parent, + const char *path, mode_t mode, dev_t dev, + struct stat *sb, uuid_t gfid, dict_t * xattr_req); + struct glfs_object *glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name, const char *data, struct stat *stat) __THROW; +struct glfs_object *glfs_h_symlink_with_xdata (struct glfs *fs, + struct glfs_object *parent, + const char *name, + const char *data, + struct stat *stat, + uuid_t gfid, + dict_t * xattr_req) __THROW; /* Operations on the actual objects */ int glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path) __THROW; +int glfs_h_unlink_with_xdata (struct glfs *fs, struct glfs_object *parent, + const char *path, dict_t *dict) __THROW; int glfs_h_close (struct glfs_object *object) __THROW; @@ -130,10 +151,16 @@ int glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf, int glfs_h_link (struct glfs *fs, struct glfs_object *linktgt, struct glfs_object *parent, const char *name) __THROW; +int glfs_h_link_with_xdata (struct glfs *fs, struct glfs_object *linktgt, + struct glfs_object *parent, const char *name, + dict_t *dict) __THROW; int glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname, struct glfs_object *newdir, const char *newname) __THROW; +int glfs_h_rename_with_xdata (struct glfs *fs, struct glfs_object *olddir, + const char *oldname, struct glfs_object *newdir, + const char *newname, dict_t *dict) __THROW; int glfs_h_removexattrs (struct glfs *fs, struct glfs_object *object, const char *name) __THROW; @@ -147,11 +174,17 @@ struct glfs_object *glfs_h_create_from_handle (struct glfs *fs, struct stat *stat) __THROW; /* Operations enabling object handles to fd transitions */ -struct glfs_fd *glfs_h_opendir (struct glfs *fs, - struct glfs_object *object) __THROW; +struct glfs_fd *glfs_h_opendir (struct glfs *fs, struct glfs_object *object) + __THROW; +struct glfs_fd *glfs_h_opendir_with_xdata (struct glfs *fs, + struct glfs_object *object, + dict_t *dict) __THROW; struct glfs_fd *glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags) __THROW; +struct glfs_fd *glfs_h_open_with_xdata (struct glfs *fs, + struct glfs_object *object, int flags, + dict_t *dict) __THROW; __END_DECLS diff --git a/api/src/glfs.h b/api/src/glfs.h index 4344df24d..1ebb8f507 100644 --- a/api/src/glfs.h +++ b/api/src/glfs.h @@ -387,8 +387,11 @@ glfs_fd_t *glfs_open (glfs_t *fs, const char *path, int flags) __THROW; glfs_fd_t *glfs_creat (glfs_t *fs, const char *path, int flags, mode_t mode) __THROW; +glfs_fd_t *glfs_creat_with_xdata (glfs_t *fs, const char *path, int flags, + mode_t mode, uuid_t gfid, dict_t *dict) __THROW; int glfs_close (glfs_fd_t *fd) __THROW; +int glfs_close_with_xdata (glfs_fd_t *fd, dict_t *dict) __THROW; glfs_t *glfs_from_glfd (glfs_fd_t *fd) __THROW; @@ -422,10 +425,13 @@ typedef void (*glfs_io_cbk) (glfs_fd_t *fd, ssize_t ret, void *data); // glfs_{read,write}[_async] -ssize_t glfs_read (glfs_fd_t *fd, void *buf, - size_t count, int flags) __THROW; -ssize_t glfs_write (glfs_fd_t *fd, const void *buf, - size_t count, int flags) __THROW; +ssize_t glfs_read (glfs_fd_t *fd, void *buf, size_t count, int flags) __THROW; +ssize_t glfs_read_with_xdata (struct glfs_fd *glfd, void *buf, size_t count, + int flags, dict_t *dict) __THROW; +ssize_t glfs_write (glfs_fd_t *fd, const void *buf, size_t count, int flags) + __THROW; +ssize_t glfs_write_with_xdata (glfs_fd_t *fd, const void *buf, size_t count, + int flags, dict_t *dict) __THROW; int glfs_read_async (glfs_fd_t *fd, void *buf, size_t count, int flags, glfs_io_cbk fn, void *data) __THROW; int glfs_write_async (glfs_fd_t *fd, const void *buf, size_t count, int flags, @@ -437,6 +443,8 @@ ssize_t glfs_readv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt, int flags) __THROW; ssize_t glfs_writev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt, int flags) __THROW; +ssize_t glfs_writev_with_xdata (glfs_fd_t *fd, const struct iovec *iov, + int iovcnt, int flags, dict_t *dict) __THROW; int glfs_readv_async (glfs_fd_t *fd, const struct iovec *iov, int count, int flags, glfs_io_cbk fn, void *data) __THROW; int glfs_writev_async (glfs_fd_t *fd, const struct iovec *iov, int count, @@ -457,29 +465,42 @@ int glfs_pwrite_async (glfs_fd_t *fd, const void *buf, int count, off_t offset, ssize_t glfs_preadv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) __THROW; +ssize_t glfs_preadv_with_xdata (glfs_fd_t *fd, const struct iovec *iov, + int iovcnt, off_t offset, int flags, + dict_t *dict) __THROW; ssize_t glfs_pwritev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt, off_t offset, int flags) __THROW; -int glfs_preadv_async (glfs_fd_t *fd, const struct iovec *iov, - int count, off_t offset, int flags, - glfs_io_cbk fn, void *data) __THROW; -int glfs_pwritev_async (glfs_fd_t *fd, const struct iovec *iov, - int count, off_t offset, int flags, - glfs_io_cbk fn, void *data) __THROW; +ssize_t glfs_pwritev_with_xdata (glfs_fd_t *fd, const struct iovec *iov, + int iovcnt, off_t offset, int flags, + dict_t *dict) __THROW; +int glfs_preadv_async (glfs_fd_t *fd, const struct iovec *iov, int count, + off_t offset, int flags, glfs_io_cbk fn, void *data) + __THROW; +int glfs_pwritev_async (glfs_fd_t *fd, const struct iovec *iov, int count, + off_t offset, int flags, glfs_io_cbk fn, void *data) + __THROW; off_t glfs_lseek (glfs_fd_t *fd, off_t offset, int whence) __THROW; +off_t glfs_lseek_with_xdata (glfs_fd_t *fd, off_t offset, int whence, + dict_t *dict) __THROW; int glfs_truncate (glfs_t *fs, const char *path, off_t length) __THROW; int glfs_ftruncate (glfs_fd_t *fd, off_t length) __THROW; +int glfs_ftruncate_with_xdata (glfs_fd_t *fd, off_t length, dict_t *dict) + __THROW; int glfs_ftruncate_async (glfs_fd_t *fd, off_t length, glfs_io_cbk fn, void *data) __THROW; int glfs_lstat (glfs_t *fs, const char *path, struct stat *buf) __THROW; int glfs_stat (glfs_t *fs, const char *path, struct stat *buf) __THROW; int glfs_fstat (glfs_fd_t *fd, struct stat *buf) __THROW; +int glfs_fstat_with_xdata (glfs_fd_t *fd, struct stat *buf, dict_t *dict) + __THROW; int glfs_fsync (glfs_fd_t *fd) __THROW; +int glfs_fsync_with_xdata (glfs_fd_t *fd, dict_t *dict) __THROW; int glfs_fsync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW; int glfs_fdatasync (glfs_fd_t *fd) __THROW; @@ -487,22 +508,35 @@ int glfs_fdatasync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW; int glfs_access (glfs_t *fs, const char *path, int mode) __THROW; -int glfs_symlink (glfs_t *fs, const char *oldpath, const char *newpath) __THROW; +int glfs_symlink (glfs_t *fs, const char *oldpath, const char *newpath) + __THROW; +int glfs_symlink_with_xdata (glfs_t *fs, const char *oldpath, + const char *newpath, uuid_t gfid, dict_t *dict) + __THROW; int glfs_readlink (glfs_t *fs, const char *path, char *buf, size_t bufsiz) __THROW; int glfs_mknod (glfs_t *fs, const char *path, mode_t mode, dev_t dev) __THROW; +int glfs_mknod_with_xdata (glfs_t *fs, const char *path, mode_t mode, + dev_t dev, uuid_t gfid, dict_t *dict) __THROW; int glfs_mkdir (glfs_t *fs, const char *path, mode_t mode) __THROW; +int glfs_mkdir_with_xdata (glfs_t *fs, const char *path, mode_t mode, + uuid_t gfid, dict_t *dict) __THROW; int glfs_unlink (glfs_t *fs, const char *path) __THROW; int glfs_rmdir (glfs_t *fs, const char *path) __THROW; +int glfs_rmdir_with_xdata (glfs_t *fs, const char *path, dict_t *dict) __THROW; int glfs_rename (glfs_t *fs, const char *oldpath, const char *newpath) __THROW; +int glfs_rename_with_xdata (glfs_t *fs, const char *oldpath, + const char *newpath, dict_t *dict) __THROW; int glfs_link (glfs_t *fs, const char *oldpath, const char *newpath) __THROW; +int glfs_link_with_xdata (glfs_t *fs, const char *oldpath, const char *newpath, + dict_t *dict) __THROW; glfs_fd_t *glfs_opendir (glfs_t *fs, const char *path) __THROW; @@ -565,6 +599,9 @@ ssize_t glfs_lgetxattr (glfs_t *fs, const char *path, const char *name, ssize_t glfs_fgetxattr (glfs_fd_t *fd, const char *name, void *value, size_t size) __THROW; +ssize_t glfs_fgetxattr_with_xdata (glfs_fd_t *fd, const char *name, + void *value, size_t size, dict_t *dict) + __THROW; ssize_t glfs_listxattr (glfs_t *fs, const char *path, void *value, size_t size) __THROW; @@ -573,21 +610,34 @@ ssize_t glfs_llistxattr (glfs_t *fs, const char *path, void *value, size_t size) __THROW; ssize_t glfs_flistxattr (glfs_fd_t *fd, void *value, size_t size) __THROW; +ssize_t glfs_flistxattr_with_xdata (glfs_fd_t *fd, void *value, size_t size, + dict_t *dict) __THROW; int glfs_setxattr (glfs_t *fs, const char *path, const char *name, const void *value, size_t size, int flags) __THROW; +int glfs_setxattr_with_xdata (glfs_t *fs, const char *path, const char *name, + const void *value, size_t size, int flags, dict_t *dict); + int glfs_lsetxattr (glfs_t *fs, const char *path, const char *name, const void *value, size_t size, int flags) __THROW; int glfs_fsetxattr (glfs_fd_t *fd, const char *name, const void *value, size_t size, int flags) __THROW; +int glfs_fsetxattr_with_xdata (glfs_fd_t *fd, const char *name, + const void *value, size_t size, int flags, + dict_t *dict) __THROW; + int glfs_removexattr (glfs_t *fs, const char *path, const char *name) __THROW; +int glfs_removexattr_with_xdata (glfs_t *fs, const char *path, + const char *name, dict_t *dict) __THROW; int glfs_lremovexattr (glfs_t *fs, const char *path, const char *name) __THROW; int glfs_fremovexattr (glfs_fd_t *fd, const char *name) __THROW; +int glfs_fremovexattr_with_xdata (glfs_fd_t *fd, const char *name, + dict_t *dict) __THROW; int glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len) __THROW; @@ -611,6 +661,22 @@ int glfs_fchdir (glfs_fd_t *fd) __THROW; char *glfs_realpath (glfs_t *fs, const char *path, char *resolved_path) __THROW; +int +glfs_setattr_with_xdata (struct glfs *fs, const char *path, struct iatt *iatt, + int valid, int follow, dict_t *dict); +int +glfs_fsetattr_with_xdata (struct glfs_fd *glfd, struct iatt *iatt, int valid, dict_t *dict); +int +glfs_setattr (struct glfs *fs, const char *path, struct iatt *iatt, + int valid, int follow); +int +glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid); + +int glfs_ipc (struct glfs *fs, int32_t op) __THROW; + + + + /* * @cmd and @flock are as specified in man fcntl(2). */ diff --git a/configure.ac b/configure.ac index ae0bc78c4..500b1e93b 100644 --- a/configure.ac +++ b/configure.ac @@ -66,6 +66,12 @@ AC_CONFIG_FILES([Makefile xlators/cluster/Makefile xlators/cluster/afr/Makefile xlators/cluster/afr/src/Makefile + xlators/cluster/nsr-server/Makefile + xlators/cluster/nsr-server/src/Makefile + xlators/cluster/nsr-recon/Makefile + xlators/cluster/nsr-recon/src/Makefile + xlators/cluster/nsr-client/Makefile + xlators/cluster/nsr-client/src/Makefile xlators/cluster/stripe/Makefile xlators/cluster/stripe/src/Makefile xlators/cluster/dht/Makefile @@ -289,6 +295,7 @@ fi AC_CHECK_TOOL([LD],[ld]) AC_CHECK_LIB([crypto], [MD5], , AC_MSG_ERROR([OpenSSL crypto library is required to build glusterfs])) +AC_CHECK_LIB([curl], curl_version, ,AC_MSG_ERROR([libcurl library is required to build glusterfs])) AC_CHECK_LIB([pthread], [pthread_mutex_init], , AC_MSG_ERROR([Posix threads library is required to build glusterfs])) @@ -844,6 +851,19 @@ fi AM_CONDITIONAL([ENABLE_SYSLOG], [test x$USE_SYSLOG = xyes]) #end syslog section + +#etcd section +AC_CHECK_PROG(ETCD,etcd,yes) + +ETCD_SIM=yes +if test "x${ETCD}" = "xyes"; then + ETCD_SIM=no + AC_DEFINE(HAVE_ETCD, 1, [define if found etcd]) +fi +AM_CONDITIONAL([ENABLE_ETCD_SIM], [test x$ETCD_SIM = xyes]) +#end etcd section + + BUILD_READLINE=no AC_CHECK_LIB([readline -lcurses],[readline],[RLLIBS="-lreadline -lcurses"]) AC_CHECK_LIB([readline -ltermcap],[readline],[RLLIBS="-lreadline -ltermcap"]) diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 21913a095..79804f538 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -188,6 +188,7 @@ BuildRequires: bison flex BuildRequires: gcc make automake libtool BuildRequires: ncurses-devel readline-devel BuildRequires: libxml2-devel openssl-devel +BuildRequires: libcurl-devel BuildRequires: libaio-devel BuildRequires: python-devel BuildRequires: python-ctypes @@ -828,6 +829,8 @@ fi %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs* # Glupy files are in the -extra-xlators package %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/glupy* +%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/nsr.so +%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/nsr_recon.so # sample xlators not generally used or usable %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13* %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/mac-compat* @@ -961,6 +964,8 @@ fi %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt* %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs* %{_sharedstatedir}/glusterd +%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/nsr.so +%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/nsr_recon.so %ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options # This is really ugly, but I have no idea how to mark these directories in diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c index 7e94ee3c0..ee2e7c933 100644 --- a/libglusterfs/src/call-stub.c +++ b/libglusterfs/src/call-stub.c @@ -2297,7 +2297,53 @@ out: } -static void +call_stub_t * +fop_ipc_cbk_stub (call_frame_t *frame, fop_ipc_cbk_t fn, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO ("call-stub", frame, out); + + stub = stub_new (frame, 0, GF_FOP_IPC); + GF_VALIDATE_OR_GOTO ("call-stub", stub, out); + + stub->fn_cbk.ipc = fn; + + stub->args_cbk.op_ret = op_ret; + stub->args_cbk.op_errno = op_errno; + + if (xdata) + stub->args_cbk.xdata = dict_ref (xdata); +out: + return stub; +} + +call_stub_t * +fop_ipc_stub (call_frame_t *frame, fop_ipc_t fn, + int32_t op, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO ("call-stub", frame, out); + GF_VALIDATE_OR_GOTO ("call-stub", fn, out); + + stub = stub_new (frame, 1, GF_FOP_IPC); + GF_VALIDATE_OR_GOTO ("call-stub", stub, out); + + stub->fn.ipc = fn; + + stub->args.cmd = op; + + if (xdata) + stub->args.xdata = dict_ref (xdata); +out: + return stub; + +} + + +void call_resume_wind (call_stub_t *stub) { GF_VALIDATE_OR_GOTO ("call-stub", stub, out); @@ -2529,6 +2575,10 @@ call_resume_wind (call_stub_t *stub) stub->args.fd, stub->args.offset, stub->args.size, stub->args.xdata); break; + case GF_FOP_IPC: + stub->fn.ipc (stub->frame, stub->frame->this, + stub->args.cmd, stub->args.xdata); + break; default: gf_log_callingfn ("call-stub", GF_LOG_ERROR, @@ -2736,6 +2786,9 @@ call_resume_unwind (call_stub_t *stub) STUB_UNWIND(stub, zerofill, &stub->args_cbk.prestat, &stub->args_cbk.poststat, stub->args_cbk.xdata); break; + case GF_FOP_IPC: + STUB_UNWIND (stub, ipc, stub->args_cbk.xdata); + break; default: gf_log_callingfn ("call-stub", GF_LOG_ERROR, diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h index 0f6c108ee..e404c8dda 100644 --- a/libglusterfs/src/call-stub.h +++ b/libglusterfs/src/call-stub.h @@ -72,6 +72,7 @@ typedef struct { fop_fallocate_t fallocate; fop_discard_t discard; fop_zerofill_t zerofill; + fop_ipc_t ipc; } fn; union { @@ -119,6 +120,7 @@ typedef struct { fop_fallocate_cbk_t fallocate; fop_discard_cbk_t discard; fop_zerofill_cbk_t zerofill; + fop_ipc_cbk_t ipc; } fn_cbk; struct { @@ -761,7 +763,20 @@ fop_zerofill_cbk_stub(call_frame_t *frame, struct iatt *statpre, struct iatt *statpost, dict_t *xdata); +call_stub_t * +fop_ipc_stub (call_frame_t *frame, fop_ipc_t fn, int32_t op, dict_t *xdata); + +call_stub_t * +fop_ipc_cbk_stub (call_frame_t *frame, fop_ipc_cbk_t fn, + int32_t op_ret, int32_t op_errno, dict_t *xdata); + void call_resume (call_stub_t *stub); void call_stub_destroy (call_stub_t *stub); void call_unwind_error (call_stub_t *stub, int op_ret, int op_errno); + +/* + * Sometimes we might want to call just this, perhaps repeatedly, without + * having (or being able) to destroy and recreate it. + */ +void call_resume_wind (call_stub_t *stub); #endif diff --git a/libglusterfs/src/defaults.c b/libglusterfs/src/defaults.c index 599f9477d..8e0e56a74 100644 --- a/libglusterfs/src/defaults.c +++ b/libglusterfs/src/defaults.c @@ -1295,6 +1295,16 @@ default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } + +int32_t +default_ipc_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STACK_UNWIND_STRICT (ipc, frame, op_ret, op_errno, xdata); + return 0; +} + + /* RESUME */ int32_t @@ -1726,6 +1736,17 @@ default_zerofill_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, } +int32_t +default_ipc_resume (call_frame_t *frame, xlator_t *this, int32_t op, + dict_t *xdata) +{ + STACK_WIND (frame, default_ipc_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->ipc, + op, xdata); + return 0; +} + + /* FOPS */ int32_t @@ -2162,6 +2183,16 @@ default_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t +default_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + STACK_WIND_TAIL (frame, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->ipc, + op, xdata); + return 0; +} + + +int32_t default_forget (xlator_t *this, inode_t *inode) { gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not " diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/defaults.h index 9bd5eb842..e29d62edf 100644 --- a/libglusterfs/src/defaults.h +++ b/libglusterfs/src/defaults.h @@ -263,6 +263,9 @@ int32_t default_zerofill(call_frame_t *frame, off_t offset, off_t len, dict_t *xdata); +int32_t default_ipc (call_frame_t *frame, xlator_t *this, int32_t op, + dict_t *xdata); + /* Resume */ int32_t default_getspec_resume (call_frame_t *frame, @@ -492,6 +495,9 @@ int32_t default_zerofill_resume(call_frame_t *frame, off_t offset, off_t len, dict_t *xdata); +int32_t default_ipc_resume (call_frame_t *frame, xlator_t *this, + int32_t op, dict_t *xdata); + /* _cbk_resume */ @@ -985,6 +991,9 @@ int32_t default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *pre, struct iatt *post, dict_t *xdata); +int32_t default_ipc_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata); + int32_t default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, char *spec_data); diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 259c5c885..0bb5ea9f4 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -70,7 +70,8 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_FREMOVEXATTR]= "FREMOVEXATTR", [GF_FOP_FALLOCATE] = "FALLOCATE", [GF_FOP_DISCARD] = "DISCARD", - [GF_FOP_ZEROFILL] = "ZEROFILL", + [GF_FOP_ZEROFILL] = "ZEROFILL", + [GF_FOP_IPC] = "IPC", }; /* THIS */ diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 96a203770..2bb3558e9 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -205,7 +205,7 @@ typedef enum { GF_FOP_WRITE, GF_FOP_STATFS, GF_FOP_FLUSH, - GF_FOP_FSYNC, /* 15 */ + GF_FOP_FSYNC, /* 16 */ GF_FOP_SETXATTR, GF_FOP_GETXATTR, GF_FOP_REMOVEXATTR, @@ -238,6 +238,7 @@ typedef enum { GF_FOP_FALLOCATE, GF_FOP_DISCARD, GF_FOP_ZEROFILL, + GF_FOP_IPC, GF_FOP_MAXVALUE, } glusterfs_fop_t; diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h index 04b404712..3bb991fac 100644 --- a/libglusterfs/src/list.h +++ b/libglusterfs/src/list.h @@ -212,4 +212,18 @@ list_append_init (struct list_head *list, struct list_head *head) &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) +/* + * This list implementation has some advantages, but one disadvantage: you + * can't use NULL to check whether you're at the head or tail. Thus, the + * address of the head has to be an argument for these macros. + */ + +#define list_next(ptr,head,type,member) \ + (((ptr)->member.next == head) ? NULL \ + : list_entry((ptr)->member.next,type,member)) + +#define list_prev(ptr,head,type,member) \ + (((ptr)->member.prev == head) ? NULL \ + : list_entry((ptr)->member.prev,type,member)) + #endif /* _LLIST_H */ diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 9705a7d54..a46a6603f 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -1203,6 +1203,22 @@ syncop_opendir (xlator_t *subvol, } int +syncop_opendir_with_xdata (xlator_t *subvol, + loc_t *loc, + fd_t *fd, + dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir, + loc, fd, dict); + + errno = args.op_errno; + return args.op_ret; + +} + +int syncop_fsyncdir_cbk (call_frame_t *frame, void* cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -1250,10 +1266,16 @@ syncop_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name, dict_t *xdata) { + return(syncop_removexattr_with_xdata(subvol, loc, name, NULL)); +} + +int +syncop_removexattr_with_xdata (xlator_t *subvol, loc_t *loc, const char *name, dict_t *dict) +{ struct syncargs args = {0, }; SYNCOP (subvol, (&args), syncop_removexattr_cbk, subvol->fops->removexattr, - loc, name, xdata); + loc, name, dict); if (args.op_ret < 0) return -args.op_errno; @@ -1290,6 +1312,17 @@ syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name, dict_t *xdata } int +syncop_fremovexattr_with_xdata (xlator_t *subvol, fd_t *fd, const char *name, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fremovexattr_cbk, + subvol->fops->fremovexattr, fd, name, dict); + + errno = args.op_errno; + return args.op_ret; +} +int syncop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { @@ -1305,14 +1338,19 @@ syncop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } - int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags) { + return (syncop_setxattr_with_xdata(subvol, loc, dict, flags, NULL)); +} + +int +syncop_setxattr_with_xdata (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags, dict_t *extra) +{ struct syncargs args = {0, }; SYNCOP (subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr, - loc, dict, flags, NULL); + loc, dict, flags, extra); if (args.op_ret < 0) return -args.op_errno; @@ -1350,6 +1388,18 @@ syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags) } int +syncop_fsetxattr_with_xdata (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags, dict_t *extra) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fsetxattr_cbk, subvol->fops->fsetxattr, + fd, dict, flags, extra); + + errno = args.op_errno; + return args.op_ret; +} + +int syncop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *dict, dict_t *xdata) { @@ -1404,12 +1454,12 @@ syncop_getxattr (xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key) } int -syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key) +syncop_fgetxattr_with_xdata (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key, dict_t *extra) { struct syncargs args = {0, }; SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr, - fd, key, NULL); + fd, key, extra); if (dict) *dict = args.xattr; @@ -1422,6 +1472,12 @@ syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key) } int +syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key) +{ + return(syncop_fgetxattr_with_xdata(subvol, fd, dict, key, NULL)); +} + +int syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata) @@ -1485,13 +1541,13 @@ syncop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int -syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, - struct iatt *preop, struct iatt *postop) +syncop_setattr_with_xdata (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, + struct iatt *preop, struct iatt *postop, dict_t *dict) { struct syncargs args = {0, }; SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->setattr, - loc, iatt, valid, NULL); + loc, iatt, valid, dict); if (preop) *preop = args.iatt1; @@ -1503,15 +1559,21 @@ syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, return args.op_ret; } +int +syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, + struct iatt *preop, struct iatt *postop) +{ + return(syncop_setattr_with_xdata(subvol, loc, iatt, valid, preop, postop, NULL)); +} int -syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid, - struct iatt *preop, struct iatt *postop) +syncop_fsetattr_with_xdata (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid, + struct iatt *preop, struct iatt *postop, dict_t *dict) { struct syncargs args = {0, }; SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->fsetattr, - fd, iatt, valid, NULL); + fd, iatt, valid, dict); if (preop) *preop = args.iatt1; @@ -1523,6 +1585,12 @@ syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid, return args.op_ret; } +int +syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid, + struct iatt *preop, struct iatt *postop) +{ + return(syncop_fsetattr_with_xdata(subvol, fd, iatt, valid, preop, postop, NULL)); +} int32_t syncop_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1554,6 +1622,19 @@ syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd) } +int +syncop_open_with_xdata (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_open_cbk, subvol->fops->open, + loc, flags, fd, dict); + + errno = args.op_errno; + return args.op_ret; + +} + int32_t syncop_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1584,14 +1665,14 @@ syncop_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } int -syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off, +syncop_readv_with_xdata (xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags, struct iovec **vector, int *count, - struct iobref **iobref) + struct iobref **iobref, dict_t *dict) { struct syncargs args = {0, }; SYNCOP (subvol, (&args), syncop_readv_cbk, subvol->fops->readv, - fd, size, off, flags, NULL); + fd, size, off, flags, dict); if (args.op_ret < 0) goto out; @@ -1618,6 +1699,14 @@ out: } int +syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off, + uint32_t flags, struct iovec **vector, int *count, + struct iobref **iobref) +{ + return(syncop_readv_with_xdata(subvol, fd, size, off, flags, vector, count, iobref, NULL)); +} + +int syncop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) @@ -1635,21 +1724,29 @@ syncop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } int -syncop_writev (xlator_t *subvol, fd_t *fd, const struct iovec *vector, +syncop_writev_with_xdata (xlator_t *subvol, fd_t *fd, const struct iovec *vector, int32_t count, off_t offset, struct iobref *iobref, - uint32_t flags) + uint32_t flags, dict_t *dict) { struct syncargs args = {0, }; SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev, fd, (struct iovec *) vector, count, offset, flags, iobref, - NULL); + dict); if (args.op_ret < 0) return -args.op_errno; return args.op_ret; } +int +syncop_writev (xlator_t *subvol, fd_t *fd, const struct iovec *vector, + int32_t count, off_t offset, struct iobref *iobref, + uint32_t flags) +{ + return(syncop_writev_with_xdata(subvol, fd, vector, count, offset, iobref, flags, NULL)); +} + int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size, off_t offset, struct iobref *iobref, uint32_t flags) { @@ -1746,6 +1843,18 @@ syncop_unlink (xlator_t *subvol, loc_t *loc) } int +syncop_unlink_with_xdata (xlator_t *subvol, loc_t *loc, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_unlink_cbk, subvol->fops->unlink, loc, + 0, dict); + + errno = args.op_errno; + return args.op_ret; +} + +int syncop_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) @@ -1777,6 +1886,18 @@ syncop_rmdir (xlator_t *subvol, loc_t *loc, int flags) int +syncop_rmdir_with_xdata (xlator_t *subvol, loc_t *loc, int flags, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_rmdir_cbk, subvol->fops->rmdir, loc, + flags, dict); + + errno = args.op_errno; + return args.op_ret; +} + +int syncop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, @@ -1809,6 +1930,18 @@ syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc) return args.op_ret; } +int +syncop_link_with_xdata (xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_link_cbk, subvol->fops->link, + oldloc, newloc, dict); + + errno = args.op_errno; + + return args.op_ret; +} int syncop_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1844,6 +1977,19 @@ syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc) return args.op_ret; } +int +syncop_rename_with_xdata (xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_rename_cbk, subvol->fops->rename, + oldloc, newloc, dict); + + errno = args.op_errno; + + return args.op_ret; +} + int syncop_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1876,6 +2022,18 @@ syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset) } int +syncop_ftruncate_with_xdata (xlator_t *subvol, fd_t *fd, off_t offset, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->ftruncate, + fd, offset, dict); + + errno = args.op_errno; + return args.op_ret; +} + +int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset) { struct syncargs args = {0, }; @@ -1920,6 +2078,19 @@ syncop_fsync (xlator_t *subvol, fd_t *fd, int dataonly) } +int +syncop_fsync_with_xdata (xlator_t *subvol, fd_t *fd, int dataonly, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync, + fd, dataonly, dict); + + errno = args.op_errno; + return args.op_ret; + +} + int syncop_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1953,6 +2124,19 @@ syncop_flush (xlator_t *subvol, fd_t *fd) } int +syncop_flush_with_xdata (xlator_t *subvol, fd_t *fd, dict_t *dict) +{ + struct syncargs args = {0}; + + SYNCOP (subvol, (&args), syncop_flush_cbk, subvol->fops->flush, + fd, dict); + + errno = args.op_errno; + return args.op_ret; + +} + +int syncop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *stbuf, dict_t *xdata) { @@ -1987,6 +2171,21 @@ syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf) return args.op_ret; } +int +syncop_fstat_with_xdata (xlator_t *subvol, fd_t *fd, struct iatt *stbuf, dict_t *dict) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat, + fd, dict); + + if (stbuf) + *stbuf = args.iatt1; + + errno = args.op_errno; + return args.op_ret; + +} int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf) @@ -2282,6 +2481,36 @@ syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, off_t len) int +syncop_ipc_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + + __wake (args); + + return 0; +} + +int +syncop_ipc (xlator_t *subvol, int32_t op) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_ipc_cbk, subvol->fops->ipc, + op, NULL); + + if (args.op_ret < 0) + return -args.op_errno; + return args.op_ret; +} + + +int syncop_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata) diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h index 16f3833ba..8760f4b68 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/syncop.h @@ -351,51 +351,81 @@ int syncop_readdir (xlator_t *subvol, fd_t *fd, size_t size, off_t off, gf_dirent_t *entries); int syncop_opendir (xlator_t *subvol, loc_t *loc, fd_t *fd); +int syncop_opendir_with_xdata (xlator_t *subvol, loc_t *loc, fd_t *fd, dict_t *dict); int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, /* out */ struct iatt *preop, struct iatt *postop); +int syncop_setattr_with_xdata (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid, + /* out */ + struct iatt *preop, struct iatt *postop, dict_t *dict); + int syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid, /* out */ struct iatt *preop, struct iatt *postop); +int syncop_fsetattr_with_xdata (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid, + /* out */ + struct iatt *preop, struct iatt *postop, dict_t *dict); + int syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf); int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags); +int syncop_setxattr_with_xdata (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags, dict_t *extra); int syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags); +int syncop_fsetxattr_with_xdata (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags, dict_t *extra); int syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict); int syncop_getxattr (xlator_t *xl, loc_t *loc, dict_t **dict, const char *key); int syncop_fgetxattr (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key); +int syncop_fgetxattr_with_xdata (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key, dict_t *extra); int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name, dict_t *xdata); +int syncop_removexattr_with_xdata (xlator_t *subvol, loc_t *loc, const char *name, dict_t *dict); int syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name, dict_t *xdata); +int syncop_fremovexattr_with_xdata (xlator_t *subvol, fd_t *fd, const char *name, dict_t *dict); int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode, fd_t *fd, dict_t *dict, struct iatt *iatt); int syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd); +int syncop_open_with_xdata (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd, dict_t *dict); int syncop_close (fd_t *fd); +int syncop_close_with_xdata (fd_t *fd, dict_t *dict); int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size, off_t offset, struct iobref *iobref, uint32_t flags); int syncop_writev (xlator_t *subvol, fd_t *fd, const struct iovec *vector, int32_t count, off_t offset, struct iobref *iobref, uint32_t flags); +int syncop_writev_with_xdata (xlator_t *subvol, fd_t *fd, const struct iovec *vector, + int32_t count, off_t offset, struct iobref *iobref, + uint32_t flags, dict_t *dict); int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags, /* out */ struct iovec **vector, int *count, struct iobref **iobref); +int syncop_readv_with_xdata (xlator_t *subvol, fd_t *fd, size_t size, off_t off, + uint32_t flags, + /* out */ + struct iovec **vector, int *count, struct iobref **iobref, dict_t *dict); int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset); +int syncop_ftruncate_with_xdata (xlator_t *subvol, fd_t *fd, off_t offset, dict_t *dict); int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset); int syncop_unlink (xlator_t *subvol, loc_t *loc); +int syncop_unlink_with_xdata (xlator_t *subvol, loc_t *loc, dict_t *dict); + int syncop_rmdir (xlator_t *subvol, loc_t *loc, int flags); +int syncop_rmdir_with_xdata (xlator_t *subvol, loc_t *loc, int flags, dict_t *dict); int syncop_fsync (xlator_t *subvol, fd_t *fd, int dataonly); +int syncop_fsync_with_xdata (xlator_t *subvol, fd_t *fd, int dataonly, dict_t *dict); int syncop_flush (xlator_t *subvol, fd_t *fd); +int syncop_flush_with_xdata (xlator_t *subvol, fd_t *fd, dict_t *dict); int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf); +int syncop_fstat_with_xdata (xlator_t *subvol, fd_t *fd, struct iatt *stbuf, dict_t *dict); int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf); int syncop_symlink (xlator_t *subvol, loc_t *loc, const char *newpath, @@ -406,6 +436,7 @@ int syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev, int syncop_mkdir (xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *dict, struct iatt *iatt); int syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc); +int syncop_link_with_xdata (xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *dict); int syncop_fsyncdir (xlator_t *subvol, fd_t *fd, int datasync); int syncop_access (xlator_t *subvol, loc_t *loc, int32_t mask); int syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset, @@ -415,7 +446,10 @@ int syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len); int syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, off_t len); int syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc); +int syncop_rename_with_xdata (xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *dict); int syncop_lk (xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock); +int syncop_ipc (xlator_t *subvol, int op); + #endif /* _SYNCOP_H */ diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 1bded6d3d..55e946795 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -82,6 +82,7 @@ fill_defaults (xlator_t *xl) SET_DEFAULT_FOP (fallocate); SET_DEFAULT_FOP (discard); SET_DEFAULT_FOP (zerofill); + SET_DEFAULT_FOP (ipc); SET_DEFAULT_FOP (getspec); @@ -880,4 +881,3 @@ glusterd_check_log_level (const char *value) return log_level; } - diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 1daa06ec2..ec5c0c5c1 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -442,6 +442,10 @@ typedef int32_t (*fop_zerofill_cbk_t) (call_frame_t *frame, struct iatt *preop_stbuf, struct iatt *postop_stbuf, dict_t *xdata); +typedef int32_t (*fop_ipc_cbk_t) (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata); + typedef int32_t (*fop_lookup_t) (call_frame_t *frame, xlator_t *this, loc_t *loc, @@ -673,6 +677,7 @@ typedef int32_t (*fop_discard_t) (call_frame_t *frame, off_t offset, size_t len, dict_t *xdata); + typedef int32_t (*fop_zerofill_t) (call_frame_t *frame, xlator_t *this, fd_t *fd, @@ -680,6 +685,9 @@ typedef int32_t (*fop_zerofill_t) (call_frame_t *frame, off_t len, dict_t *xdata); +typedef int32_t (*fop_ipc_t) (call_frame_t *frame, xlator_t *this, int32_t op, + dict_t *xdata); + struct xlator_fops { fop_lookup_t lookup; fop_stat_t stat; @@ -726,6 +734,7 @@ struct xlator_fops { fop_fallocate_t fallocate; fop_discard_t discard; fop_zerofill_t zerofill; + fop_ipc_t ipc; /* these entries are used for a typechecking hack in STACK_WIND _only_ */ fop_lookup_cbk_t lookup_cbk; @@ -773,6 +782,7 @@ struct xlator_fops { fop_fallocate_cbk_t fallocate_cbk; fop_discard_cbk_t discard_cbk; fop_zerofill_cbk_t zerofill_cbk; + fop_ipc_cbk_t ipc_cbk; }; typedef int32_t (*cbk_forget_t) (xlator_t *this, diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index e4f7fbf3a..99d5a6259 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -59,6 +59,7 @@ enum gf_fop_procnum { GFS3_OP_FALLOCATE, GFS3_OP_DISCARD, GFS3_OP_ZEROFILL, + GFS3_OP_IPC, GFS3_OP_MAXVALUE, } ; diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c new file mode 100644 index 000000000..6b59f4040 --- /dev/null +++ b/rpc/xdr/src/glusterfs3-xdr.c @@ -0,0 +1,2068 @@ +/* + Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "xdr-common.h" +#include "compat.h" + +#if defined(__GNUC__) +#if __GNUC__ >= 4 +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif +#endif + +/* + * Please do not edit this file. + * It was generated using rpcgen. + */ + +#include "glusterfs3-xdr.h" + +bool_t +xdr_gf_statfs (XDR *xdrs, gf_statfs *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_u_quad_t (xdrs, &objp->bsize)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->frsize)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->blocks)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->bfree)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->bavail)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->files)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ffree)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->favail)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->fsid)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->flag)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->namemax)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_proto_flock (XDR *xdrs, gf_proto_flock *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_u_int (xdrs, &objp->type)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->whence)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->start)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->len)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->pid)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_iatt (XDR *xdrs, gf_iatt *objp) +{ + register int32_t *buf; + buf = NULL; + + + if (xdrs->x_op == XDR_ENCODE) { + if (!xdr_opaque (xdrs, objp->ia_gfid, 16)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_ino)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_dev)) + return FALSE; + buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_nlink)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_uid)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_gid)) + return FALSE; + + } else { + IXDR_PUT_U_LONG(buf, objp->mode); + IXDR_PUT_U_LONG(buf, objp->ia_nlink); + IXDR_PUT_U_LONG(buf, objp->ia_uid); + IXDR_PUT_U_LONG(buf, objp->ia_gid); + } + if (!xdr_u_quad_t (xdrs, &objp->ia_rdev)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_size)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_blksize)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_blocks)) + return FALSE; + buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_u_int (xdrs, &objp->ia_atime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_atime_nsec)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_mtime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_ctime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec)) + return FALSE; + } else { + IXDR_PUT_U_LONG(buf, objp->ia_atime); + IXDR_PUT_U_LONG(buf, objp->ia_atime_nsec); + IXDR_PUT_U_LONG(buf, objp->ia_mtime); + IXDR_PUT_U_LONG(buf, objp->ia_mtime_nsec); + IXDR_PUT_U_LONG(buf, objp->ia_ctime); + IXDR_PUT_U_LONG(buf, objp->ia_ctime_nsec); + } + return TRUE; + } else if (xdrs->x_op == XDR_DECODE) { + if (!xdr_opaque (xdrs, objp->ia_gfid, 16)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_ino)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_dev)) + return FALSE; + buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_nlink)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_uid)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_gid)) + return FALSE; + + } else { + objp->mode = IXDR_GET_U_LONG(buf); + objp->ia_nlink = IXDR_GET_U_LONG(buf); + objp->ia_uid = IXDR_GET_U_LONG(buf); + objp->ia_gid = IXDR_GET_U_LONG(buf); + } + if (!xdr_u_quad_t (xdrs, &objp->ia_rdev)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_size)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_blksize)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_blocks)) + return FALSE; + buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_u_int (xdrs, &objp->ia_atime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_atime_nsec)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_mtime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_ctime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec)) + return FALSE; + } else { + objp->ia_atime = IXDR_GET_U_LONG(buf); + objp->ia_atime_nsec = IXDR_GET_U_LONG(buf); + objp->ia_mtime = IXDR_GET_U_LONG(buf); + objp->ia_mtime_nsec = IXDR_GET_U_LONG(buf); + objp->ia_ctime = IXDR_GET_U_LONG(buf); + objp->ia_ctime_nsec = IXDR_GET_U_LONG(buf); + } + return TRUE; + } + + if (!xdr_opaque (xdrs, objp->ia_gfid, 16)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_ino)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_dev)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_nlink)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_uid)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_gid)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_rdev)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_size)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_blksize)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->ia_blocks)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_atime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_atime_nsec)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_mtime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_ctime)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_stat_req (XDR *xdrs, gfs3_stat_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_stat_rsp (XDR *xdrs, gfs3_stat_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_readlink_req (XDR *xdrs, gfs3_readlink_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_readlink_rsp (XDR *xdrs, gfs3_readlink_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->buf)) + return FALSE; + if (!xdr_string (xdrs, &objp->path, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_mknod_req (XDR *xdrs, gfs3_mknod_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->dev)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->umask)) + return FALSE; + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_mknod_rsp (XDR *xdrs, gfs3_mknod_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_mkdir_req (XDR *xdrs, gfs3_mkdir_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->umask)) + return FALSE; + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_mkdir_rsp (XDR *xdrs, gfs3_mkdir_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_unlink_req (XDR *xdrs, gfs3_unlink_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->xflags)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_unlink_rsp (XDR *xdrs, gfs3_unlink_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_rmdir_req (XDR *xdrs, gfs3_rmdir_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + if (!xdr_int (xdrs, &objp->xflags)) + return FALSE; + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_rmdir_rsp (XDR *xdrs, gfs3_rmdir_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_symlink_req (XDR *xdrs, gfs3_symlink_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->umask)) + return FALSE; + if (!xdr_string (xdrs, &objp->linkname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_symlink_rsp (XDR *xdrs, gfs3_symlink_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_rename_req (XDR *xdrs, gfs3_rename_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->oldgfid, 16)) + return FALSE; + if (!xdr_opaque (xdrs, objp->newgfid, 16)) + return FALSE; + if (!xdr_string (xdrs, &objp->oldbname, ~0)) + return FALSE; + if (!xdr_string (xdrs, &objp->newbname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_rename_rsp (XDR *xdrs, gfs3_rename_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preoldparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postoldparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->prenewparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postnewparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_link_req (XDR *xdrs, gfs3_link_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->oldgfid, 16)) + return FALSE; + if (!xdr_opaque (xdrs, objp->newgfid, 16)) + return FALSE; + if (!xdr_string (xdrs, &objp->newbname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_link_rsp (XDR *xdrs, gfs3_link_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_truncate_req (XDR *xdrs, gfs3_truncate_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_truncate_rsp (XDR *xdrs, gfs3_truncate_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->prestat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->poststat)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_open_req (XDR *xdrs, gfs3_open_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_open_rsp (XDR *xdrs, gfs3_open_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_read_req (XDR *xdrs, gfs3_read_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->size)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flag)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_read_rsp (XDR *xdrs, gfs3_read_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_lookup_req (XDR *xdrs, gfs3_lookup_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_lookup_rsp (XDR *xdrs, gfs3_lookup_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_write_req (XDR *xdrs, gfs3_write_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->size)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flag)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_write_rsp (XDR *xdrs, gfs3_write_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->prestat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->poststat)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_statfs_req (XDR *xdrs, gfs3_statfs_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_statfs_rsp (XDR *xdrs, gfs3_statfs_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_statfs (xdrs, &objp->statfs)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_lk_req (XDR *xdrs, gfs3_lk_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->cmd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->type)) + return FALSE; + if (!xdr_gf_proto_flock (xdrs, &objp->flock)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_lk_rsp (XDR *xdrs, gfs3_lk_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_proto_flock (xdrs, &objp->flock)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_inodelk_req (XDR *xdrs, gfs3_inodelk_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->cmd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->type)) + return FALSE; + if (!xdr_gf_proto_flock (xdrs, &objp->flock)) + return FALSE; + if (!xdr_string (xdrs, &objp->volume, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_finodelk_req (XDR *xdrs, gfs3_finodelk_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->cmd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->type)) + return FALSE; + if (!xdr_gf_proto_flock (xdrs, &objp->flock)) + return FALSE; + if (!xdr_string (xdrs, &objp->volume, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_flush_req (XDR *xdrs, gfs3_flush_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fsync_req (XDR *xdrs, gfs3_fsync_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->data)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fsync_rsp (XDR *xdrs, gfs3_fsync_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->prestat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->poststat)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_setxattr_req (XDR *xdrs, gfs3_setxattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fsetxattr_req (XDR *xdrs, gfs3_fsetxattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_xattrop_req (XDR *xdrs, gfs3_xattrop_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_xattrop_rsp (XDR *xdrs, gfs3_xattrop_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fxattrop_req (XDR *xdrs, gfs3_fxattrop_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fxattrop_rsp (XDR *xdrs, gfs3_fxattrop_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_getxattr_req (XDR *xdrs, gfs3_getxattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->namelen)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_getxattr_rsp (XDR *xdrs, gfs3_getxattr_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fgetxattr_req (XDR *xdrs, gfs3_fgetxattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->namelen)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fgetxattr_rsp (XDR *xdrs, gfs3_fgetxattr_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_removexattr_req (XDR *xdrs, gfs3_removexattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fremovexattr_req (XDR *xdrs, gfs3_fremovexattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_opendir_req (XDR *xdrs, gfs3_opendir_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_opendir_rsp (XDR *xdrs, gfs3_opendir_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fsyncdir_req (XDR *xdrs, gfs3_fsyncdir_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_int (xdrs, &objp->data)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_readdir_req (XDR *xdrs, gfs3_readdir_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_readdirp_req (XDR *xdrs, gfs3_readdirp_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_access_req (XDR *xdrs, gfs3_access_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->mask)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_create_req (XDR *xdrs, gfs3_create_req *objp) +{ + register int32_t *buf; + buf = NULL; + + + if (xdrs->x_op == XDR_ENCODE) { + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->umask)) + return FALSE; + + } else { + IXDR_PUT_U_LONG(buf, objp->flags); + IXDR_PUT_U_LONG(buf, objp->mode); + IXDR_PUT_U_LONG(buf, objp->umask); + } + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; + } else if (xdrs->x_op == XDR_DECODE) { + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->umask)) + return FALSE; + + } else { + objp->flags = IXDR_GET_U_LONG(buf); + objp->mode = IXDR_GET_U_LONG(buf); + objp->umask = IXDR_GET_U_LONG(buf); + } + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; + } + + if (!xdr_opaque (xdrs, objp->pargfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->mode)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->umask)) + return FALSE; + if (!xdr_string (xdrs, &objp->bname, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_create_rsp (XDR *xdrs, gfs3_create_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->preparent)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->postparent)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_ftruncate_req (XDR *xdrs, gfs3_ftruncate_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_ftruncate_rsp (XDR *xdrs, gfs3_ftruncate_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->prestat)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->poststat)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fstat_req (XDR *xdrs, gfs3_fstat_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fstat_rsp (XDR *xdrs, gfs3_fstat_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_entrylk_req (XDR *xdrs, gfs3_entrylk_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->cmd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->type)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->namelen)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_string (xdrs, &objp->volume, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fentrylk_req (XDR *xdrs, gfs3_fentrylk_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->cmd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->type)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->namelen)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_string (xdrs, &objp->volume, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_setattr_req (XDR *xdrs, gfs3_setattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stbuf)) + return FALSE; + if (!xdr_int (xdrs, &objp->valid)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_setattr_rsp (XDR *xdrs, gfs3_setattr_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpre)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpost)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fsetattr_req (XDR *xdrs, gfs3_fsetattr_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stbuf)) + return FALSE; + if (!xdr_int (xdrs, &objp->valid)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fsetattr_rsp (XDR *xdrs, gfs3_fsetattr_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpre)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpost)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fallocate_req (XDR *xdrs, gfs3_fallocate_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fallocate_rsp (XDR *xdrs, gfs3_fallocate_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpre)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpost)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_discard_req (XDR *xdrs, gfs3_discard_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_discard_rsp (XDR *xdrs, gfs3_discard_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpre)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpost)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_zerofill_req (XDR *xdrs, gfs3_zerofill_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_zerofill_rsp (XDR *xdrs, gfs3_zerofill_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpre)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpost)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->len)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_rchecksum_rsp (XDR *xdrs, gfs3_rchecksum_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + + if (xdrs->x_op == XDR_ENCODE) { + buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->weak_checksum)) + return FALSE; + + } else { + IXDR_PUT_LONG(buf, objp->op_ret); + IXDR_PUT_LONG(buf, objp->op_errno); + IXDR_PUT_U_LONG(buf, objp->weak_checksum); + } + if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; + } else if (xdrs->x_op == XDR_DECODE) { + buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->weak_checksum)) + return FALSE; + + } else { + objp->op_ret = IXDR_GET_LONG(buf); + objp->op_errno = IXDR_GET_LONG(buf); + objp->weak_checksum = IXDR_GET_U_LONG(buf); + } + if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; + } + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->weak_checksum)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_ipc_req (XDR *xdrs, gfs3_ipc_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_setvolume_req (XDR *xdrs, gf_setvolume_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_setvolume_rsp (XDR *xdrs, gf_setvolume_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_getspec_req (XDR *xdrs, gf_getspec_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_string (xdrs, &objp->key, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_getspec_rsp (XDR *xdrs, gf_getspec_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_string (xdrs, &objp->spec, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_mgmt_hndsk_req (XDR *xdrs, gf_mgmt_hndsk_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_bytes (xdrs, (char **)&objp->hndsk.hndsk_val, (u_int *) &objp->hndsk.hndsk_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_mgmt_hndsk_rsp (XDR *xdrs, gf_mgmt_hndsk_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->hndsk.hndsk_val, (u_int *) &objp->hndsk.hndsk_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_log_req (XDR *xdrs, gf_log_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_bytes (xdrs, (char **)&objp->msg.msg_val, (u_int *) &objp->msg.msg_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_notify_req (XDR *xdrs, gf_notify_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_string (xdrs, &objp->buf, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_notify_rsp (XDR *xdrs, gf_notify_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + + if (xdrs->x_op == XDR_ENCODE) { + buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + + } else { + IXDR_PUT_LONG(buf, objp->op_ret); + IXDR_PUT_LONG(buf, objp->op_errno); + IXDR_PUT_U_LONG(buf, objp->flags); + } + if (!xdr_string (xdrs, &objp->buf, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; + } else if (xdrs->x_op == XDR_DECODE) { + buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT); + if (buf == NULL) { + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + + } else { + objp->op_ret = IXDR_GET_LONG(buf); + objp->op_errno = IXDR_GET_LONG(buf); + objp->flags = IXDR_GET_U_LONG(buf); + } + if (!xdr_string (xdrs, &objp->buf, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; + } + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_string (xdrs, &objp->buf, ~0)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_releasedir_req (XDR *xdrs, gfs3_releasedir_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_release_req (XDR *xdrs, gfs3_release_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_common_rsp (XDR *xdrs, gf_common_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_dirlist (XDR *xdrs, gfs3_dirlist *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_u_quad_t (xdrs, &objp->d_ino)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->d_off)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->d_len)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->d_type)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_readdir_rsp (XDR *xdrs, gfs3_readdir_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_dirplist (XDR *xdrs, gfs3_dirplist *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_u_quad_t (xdrs, &objp->d_ino)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->d_off)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->d_len)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->d_type)) + return FALSE; + if (!xdr_string (xdrs, &objp->name, ~0)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->stat)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_readdirp_rsp (XDR *xdrs, gfs3_readdirp_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_set_lk_ver_rsp (XDR *xdrs, gf_set_lk_ver_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_int (xdrs, &objp->lk_ver)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_set_lk_ver_req (XDR *xdrs, gf_set_lk_ver_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_string (xdrs, &objp->uid, ~0)) + return FALSE; + if (!xdr_int (xdrs, &objp->lk_ver)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_event_notify_req (XDR *xdrs, gf_event_notify_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gf_event_notify_rsp (XDR *xdrs, gf_event_notify_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h new file mode 100644 index 000000000..c44a48680 --- /dev/null +++ b/rpc/xdr/src/glusterfs3-xdr.h @@ -0,0 +1,1404 @@ +/* + Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "xdr-common.h" +#include "compat.h" + +#if defined(__GNUC__) +#if __GNUC__ >= 4 +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif +#endif + +/* + * Please do not edit this file. + * It was generated using rpcgen. + */ + +#ifndef _GLUSTERFS3_XDR_H_RPCGEN +#define _GLUSTERFS3_XDR_H_RPCGEN + +#include <rpc/rpc.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct gf_statfs { + u_quad_t bsize; + u_quad_t frsize; + u_quad_t blocks; + u_quad_t bfree; + u_quad_t bavail; + u_quad_t files; + u_quad_t ffree; + u_quad_t favail; + u_quad_t fsid; + u_quad_t flag; + u_quad_t namemax; +}; +typedef struct gf_statfs gf_statfs; + +struct gf_proto_flock { + u_int type; + u_int whence; + u_quad_t start; + u_quad_t len; + u_int pid; + struct { + u_int lk_owner_len; + char *lk_owner_val; + } lk_owner; +}; +typedef struct gf_proto_flock gf_proto_flock; + +struct gf_iatt { + char ia_gfid[16]; + u_quad_t ia_ino; + u_quad_t ia_dev; + u_int mode; + u_int ia_nlink; + u_int ia_uid; + u_int ia_gid; + u_quad_t ia_rdev; + u_quad_t ia_size; + u_int ia_blksize; + u_quad_t ia_blocks; + u_int ia_atime; + u_int ia_atime_nsec; + u_int ia_mtime; + u_int ia_mtime_nsec; + u_int ia_ctime; + u_int ia_ctime_nsec; +}; +typedef struct gf_iatt gf_iatt; + +struct gfs3_stat_req { + char gfid[16]; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_stat_req gfs3_stat_req; + +struct gfs3_stat_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_stat_rsp gfs3_stat_rsp; + +struct gfs3_readlink_req { + char gfid[16]; + u_int size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_readlink_req gfs3_readlink_req; + +struct gfs3_readlink_rsp { + int op_ret; + int op_errno; + struct gf_iatt buf; + char *path; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_readlink_rsp gfs3_readlink_rsp; + +struct gfs3_mknod_req { + char pargfid[16]; + u_quad_t dev; + u_int mode; + u_int umask; + char *bname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_mknod_req gfs3_mknod_req; + +struct gfs3_mknod_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct gf_iatt preparent; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_mknod_rsp gfs3_mknod_rsp; + +struct gfs3_mkdir_req { + char pargfid[16]; + u_int mode; + u_int umask; + char *bname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_mkdir_req gfs3_mkdir_req; + +struct gfs3_mkdir_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct gf_iatt preparent; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_mkdir_rsp gfs3_mkdir_rsp; + +struct gfs3_unlink_req { + char pargfid[16]; + char *bname; + u_int xflags; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_unlink_req gfs3_unlink_req; + +struct gfs3_unlink_rsp { + int op_ret; + int op_errno; + struct gf_iatt preparent; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_unlink_rsp gfs3_unlink_rsp; + +struct gfs3_rmdir_req { + char pargfid[16]; + int xflags; + char *bname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_rmdir_req gfs3_rmdir_req; + +struct gfs3_rmdir_rsp { + int op_ret; + int op_errno; + struct gf_iatt preparent; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_rmdir_rsp gfs3_rmdir_rsp; + +struct gfs3_symlink_req { + char pargfid[16]; + char *bname; + u_int umask; + char *linkname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_symlink_req gfs3_symlink_req; + +struct gfs3_symlink_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct gf_iatt preparent; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_symlink_rsp gfs3_symlink_rsp; + +struct gfs3_rename_req { + char oldgfid[16]; + char newgfid[16]; + char *oldbname; + char *newbname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_rename_req gfs3_rename_req; + +struct gfs3_rename_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct gf_iatt preoldparent; + struct gf_iatt postoldparent; + struct gf_iatt prenewparent; + struct gf_iatt postnewparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_rename_rsp gfs3_rename_rsp; + +struct gfs3_link_req { + char oldgfid[16]; + char newgfid[16]; + char *newbname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_link_req gfs3_link_req; + +struct gfs3_link_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct gf_iatt preparent; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_link_rsp gfs3_link_rsp; + +struct gfs3_truncate_req { + char gfid[16]; + u_quad_t offset; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_truncate_req gfs3_truncate_req; + +struct gfs3_truncate_rsp { + int op_ret; + int op_errno; + struct gf_iatt prestat; + struct gf_iatt poststat; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_truncate_rsp gfs3_truncate_rsp; + +struct gfs3_open_req { + char gfid[16]; + u_int flags; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_open_req gfs3_open_req; + +struct gfs3_open_rsp { + int op_ret; + int op_errno; + quad_t fd; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_open_rsp gfs3_open_rsp; + +struct gfs3_read_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + u_int size; + u_int flag; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_read_req gfs3_read_req; + +struct gfs3_read_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + u_int size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_read_rsp gfs3_read_rsp; + +struct gfs3_lookup_req { + char gfid[16]; + char pargfid[16]; + u_int flags; + char *bname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_lookup_req gfs3_lookup_req; + +struct gfs3_lookup_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_lookup_rsp gfs3_lookup_rsp; + +struct gfs3_write_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + u_int size; + u_int flag; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_write_req gfs3_write_req; + +struct gfs3_write_rsp { + int op_ret; + int op_errno; + struct gf_iatt prestat; + struct gf_iatt poststat; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_write_rsp gfs3_write_rsp; + +struct gfs3_statfs_req { + char gfid[16]; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_statfs_req gfs3_statfs_req; + +struct gfs3_statfs_rsp { + int op_ret; + int op_errno; + struct gf_statfs statfs; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_statfs_rsp gfs3_statfs_rsp; + +struct gfs3_lk_req { + char gfid[16]; + quad_t fd; + u_int cmd; + u_int type; + struct gf_proto_flock flock; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_lk_req gfs3_lk_req; + +struct gfs3_lk_rsp { + int op_ret; + int op_errno; + struct gf_proto_flock flock; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_lk_rsp gfs3_lk_rsp; + +struct gfs3_inodelk_req { + char gfid[16]; + u_int cmd; + u_int type; + struct gf_proto_flock flock; + char *volume; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_inodelk_req gfs3_inodelk_req; + +struct gfs3_finodelk_req { + char gfid[16]; + quad_t fd; + u_int cmd; + u_int type; + struct gf_proto_flock flock; + char *volume; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_finodelk_req gfs3_finodelk_req; + +struct gfs3_flush_req { + char gfid[16]; + quad_t fd; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_flush_req gfs3_flush_req; + +struct gfs3_fsync_req { + char gfid[16]; + quad_t fd; + u_int data; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fsync_req gfs3_fsync_req; + +struct gfs3_fsync_rsp { + int op_ret; + int op_errno; + struct gf_iatt prestat; + struct gf_iatt poststat; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fsync_rsp gfs3_fsync_rsp; + +struct gfs3_setxattr_req { + char gfid[16]; + u_int flags; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_setxattr_req gfs3_setxattr_req; + +struct gfs3_fsetxattr_req { + char gfid[16]; + quad_t fd; + u_int flags; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fsetxattr_req gfs3_fsetxattr_req; + +struct gfs3_xattrop_req { + char gfid[16]; + u_int flags; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_xattrop_req gfs3_xattrop_req; + +struct gfs3_xattrop_rsp { + int op_ret; + int op_errno; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_xattrop_rsp gfs3_xattrop_rsp; + +struct gfs3_fxattrop_req { + char gfid[16]; + quad_t fd; + u_int flags; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fxattrop_req gfs3_fxattrop_req; + +struct gfs3_fxattrop_rsp { + int op_ret; + int op_errno; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fxattrop_rsp gfs3_fxattrop_rsp; + +struct gfs3_getxattr_req { + char gfid[16]; + u_int namelen; + char *name; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_getxattr_req gfs3_getxattr_req; + +struct gfs3_getxattr_rsp { + int op_ret; + int op_errno; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_getxattr_rsp gfs3_getxattr_rsp; + +struct gfs3_fgetxattr_req { + char gfid[16]; + quad_t fd; + u_int namelen; + char *name; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fgetxattr_req gfs3_fgetxattr_req; + +struct gfs3_fgetxattr_rsp { + int op_ret; + int op_errno; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fgetxattr_rsp gfs3_fgetxattr_rsp; + +struct gfs3_removexattr_req { + char gfid[16]; + char *name; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_removexattr_req gfs3_removexattr_req; + +struct gfs3_fremovexattr_req { + char gfid[16]; + quad_t fd; + char *name; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fremovexattr_req gfs3_fremovexattr_req; + +struct gfs3_opendir_req { + char gfid[16]; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_opendir_req gfs3_opendir_req; + +struct gfs3_opendir_rsp { + int op_ret; + int op_errno; + quad_t fd; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_opendir_rsp gfs3_opendir_rsp; + +struct gfs3_fsyncdir_req { + char gfid[16]; + quad_t fd; + int data; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fsyncdir_req gfs3_fsyncdir_req; + +struct gfs3_readdir_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + u_int size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_readdir_req gfs3_readdir_req; + +struct gfs3_readdirp_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + u_int size; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gfs3_readdirp_req gfs3_readdirp_req; + +struct gfs3_access_req { + char gfid[16]; + u_int mask; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_access_req gfs3_access_req; + +struct gfs3_create_req { + char pargfid[16]; + u_int flags; + u_int mode; + u_int umask; + char *bname; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_create_req gfs3_create_req; + +struct gfs3_create_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + u_quad_t fd; + struct gf_iatt preparent; + struct gf_iatt postparent; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_create_rsp gfs3_create_rsp; + +struct gfs3_ftruncate_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_ftruncate_req gfs3_ftruncate_req; + +struct gfs3_ftruncate_rsp { + int op_ret; + int op_errno; + struct gf_iatt prestat; + struct gf_iatt poststat; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_ftruncate_rsp gfs3_ftruncate_rsp; + +struct gfs3_fstat_req { + char gfid[16]; + quad_t fd; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fstat_req gfs3_fstat_req; + +struct gfs3_fstat_rsp { + int op_ret; + int op_errno; + struct gf_iatt stat; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fstat_rsp gfs3_fstat_rsp; + +struct gfs3_entrylk_req { + char gfid[16]; + u_int cmd; + u_int type; + u_quad_t namelen; + char *name; + char *volume; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_entrylk_req gfs3_entrylk_req; + +struct gfs3_fentrylk_req { + char gfid[16]; + quad_t fd; + u_int cmd; + u_int type; + u_quad_t namelen; + char *name; + char *volume; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fentrylk_req gfs3_fentrylk_req; + +struct gfs3_setattr_req { + char gfid[16]; + struct gf_iatt stbuf; + int valid; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_setattr_req gfs3_setattr_req; + +struct gfs3_setattr_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_setattr_rsp gfs3_setattr_rsp; + +struct gfs3_fsetattr_req { + quad_t fd; + struct gf_iatt stbuf; + int valid; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fsetattr_req gfs3_fsetattr_req; + +struct gfs3_fsetattr_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fsetattr_rsp gfs3_fsetattr_rsp; + +struct gfs3_fallocate_req { + char gfid[16]; + quad_t fd; + u_int flags; + u_quad_t offset; + u_quad_t size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fallocate_req gfs3_fallocate_req; + +struct gfs3_fallocate_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fallocate_rsp gfs3_fallocate_rsp; + +struct gfs3_discard_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + u_quad_t size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_discard_req gfs3_discard_req; + +struct gfs3_discard_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_discard_rsp gfs3_discard_rsp; + +struct gfs3_zerofill_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + u_quad_t size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_zerofill_req gfs3_zerofill_req; + +struct gfs3_zerofill_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_zerofill_rsp gfs3_zerofill_rsp; + +struct gfs3_rchecksum_req { + quad_t fd; + u_quad_t offset; + u_int len; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_rchecksum_req gfs3_rchecksum_req; + +struct gfs3_rchecksum_rsp { + int op_ret; + int op_errno; + u_int weak_checksum; + struct { + u_int strong_checksum_len; + char *strong_checksum_val; + } strong_checksum; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_rchecksum_rsp gfs3_rchecksum_rsp; + +struct gfs3_ipc_req { + int op; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_ipc_req gfs3_ipc_req; + +struct gf_setvolume_req { + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gf_setvolume_req gf_setvolume_req; + +struct gf_setvolume_rsp { + int op_ret; + int op_errno; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gf_setvolume_rsp gf_setvolume_rsp; + +struct gf_getspec_req { + u_int flags; + char *key; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gf_getspec_req gf_getspec_req; + +struct gf_getspec_rsp { + int op_ret; + int op_errno; + char *spec; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gf_getspec_rsp gf_getspec_rsp; + +struct gf_mgmt_hndsk_req { + struct { + u_int hndsk_len; + char *hndsk_val; + } hndsk; +}; +typedef struct gf_mgmt_hndsk_req gf_mgmt_hndsk_req; + +struct gf_mgmt_hndsk_rsp { + int op_ret; + int op_errno; + struct { + u_int hndsk_len; + char *hndsk_val; + } hndsk; +}; +typedef struct gf_mgmt_hndsk_rsp gf_mgmt_hndsk_rsp; + +struct gf_log_req { + struct { + u_int msg_len; + char *msg_val; + } msg; +}; +typedef struct gf_log_req gf_log_req; + +struct gf_notify_req { + u_int flags; + char *buf; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gf_notify_req gf_notify_req; + +struct gf_notify_rsp { + int op_ret; + int op_errno; + u_int flags; + char *buf; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gf_notify_rsp gf_notify_rsp; + +struct gfs3_releasedir_req { + char gfid[16]; + quad_t fd; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_releasedir_req gfs3_releasedir_req; + +struct gfs3_release_req { + char gfid[16]; + quad_t fd; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_release_req gfs3_release_req; + +struct gf_common_rsp { + int op_ret; + int op_errno; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gf_common_rsp gf_common_rsp; + +struct gfs3_dirlist { + u_quad_t d_ino; + u_quad_t d_off; + u_int d_len; + u_int d_type; + char *name; + struct gfs3_dirlist *nextentry; +}; +typedef struct gfs3_dirlist gfs3_dirlist; + +struct gfs3_readdir_rsp { + int op_ret; + int op_errno; + struct gfs3_dirlist *reply; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_readdir_rsp gfs3_readdir_rsp; + +struct gfs3_dirplist { + u_quad_t d_ino; + u_quad_t d_off; + u_int d_len; + u_int d_type; + char *name; + struct gf_iatt stat; + struct { + u_int dict_len; + char *dict_val; + } dict; + struct gfs3_dirplist *nextentry; +}; +typedef struct gfs3_dirplist gfs3_dirplist; + +struct gfs3_readdirp_rsp { + int op_ret; + int op_errno; + struct gfs3_dirplist *reply; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_readdirp_rsp gfs3_readdirp_rsp; + +struct gf_set_lk_ver_rsp { + int op_ret; + int op_errno; + int lk_ver; +}; +typedef struct gf_set_lk_ver_rsp gf_set_lk_ver_rsp; + +struct gf_set_lk_ver_req { + char *uid; + int lk_ver; +}; +typedef struct gf_set_lk_ver_req gf_set_lk_ver_req; + +struct gf_event_notify_req { + int op; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gf_event_notify_req gf_event_notify_req; + +struct gf_event_notify_rsp { + int op_ret; + int op_errno; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gf_event_notify_rsp gf_event_notify_rsp; + +/* the xdr functions */ + +#if defined(__STDC__) || defined(__cplusplus) +extern bool_t xdr_gf_statfs (XDR *, gf_statfs*); +extern bool_t xdr_gf_proto_flock (XDR *, gf_proto_flock*); +extern bool_t xdr_gf_iatt (XDR *, gf_iatt*); +extern bool_t xdr_gfs3_stat_req (XDR *, gfs3_stat_req*); +extern bool_t xdr_gfs3_stat_rsp (XDR *, gfs3_stat_rsp*); +extern bool_t xdr_gfs3_readlink_req (XDR *, gfs3_readlink_req*); +extern bool_t xdr_gfs3_readlink_rsp (XDR *, gfs3_readlink_rsp*); +extern bool_t xdr_gfs3_mknod_req (XDR *, gfs3_mknod_req*); +extern bool_t xdr_gfs3_mknod_rsp (XDR *, gfs3_mknod_rsp*); +extern bool_t xdr_gfs3_mkdir_req (XDR *, gfs3_mkdir_req*); +extern bool_t xdr_gfs3_mkdir_rsp (XDR *, gfs3_mkdir_rsp*); +extern bool_t xdr_gfs3_unlink_req (XDR *, gfs3_unlink_req*); +extern bool_t xdr_gfs3_unlink_rsp (XDR *, gfs3_unlink_rsp*); +extern bool_t xdr_gfs3_rmdir_req (XDR *, gfs3_rmdir_req*); +extern bool_t xdr_gfs3_rmdir_rsp (XDR *, gfs3_rmdir_rsp*); +extern bool_t xdr_gfs3_symlink_req (XDR *, gfs3_symlink_req*); +extern bool_t xdr_gfs3_symlink_rsp (XDR *, gfs3_symlink_rsp*); +extern bool_t xdr_gfs3_rename_req (XDR *, gfs3_rename_req*); +extern bool_t xdr_gfs3_rename_rsp (XDR *, gfs3_rename_rsp*); +extern bool_t xdr_gfs3_link_req (XDR *, gfs3_link_req*); +extern bool_t xdr_gfs3_link_rsp (XDR *, gfs3_link_rsp*); +extern bool_t xdr_gfs3_truncate_req (XDR *, gfs3_truncate_req*); +extern bool_t xdr_gfs3_truncate_rsp (XDR *, gfs3_truncate_rsp*); +extern bool_t xdr_gfs3_open_req (XDR *, gfs3_open_req*); +extern bool_t xdr_gfs3_open_rsp (XDR *, gfs3_open_rsp*); +extern bool_t xdr_gfs3_read_req (XDR *, gfs3_read_req*); +extern bool_t xdr_gfs3_read_rsp (XDR *, gfs3_read_rsp*); +extern bool_t xdr_gfs3_lookup_req (XDR *, gfs3_lookup_req*); +extern bool_t xdr_gfs3_lookup_rsp (XDR *, gfs3_lookup_rsp*); +extern bool_t xdr_gfs3_write_req (XDR *, gfs3_write_req*); +extern bool_t xdr_gfs3_write_rsp (XDR *, gfs3_write_rsp*); +extern bool_t xdr_gfs3_statfs_req (XDR *, gfs3_statfs_req*); +extern bool_t xdr_gfs3_statfs_rsp (XDR *, gfs3_statfs_rsp*); +extern bool_t xdr_gfs3_lk_req (XDR *, gfs3_lk_req*); +extern bool_t xdr_gfs3_lk_rsp (XDR *, gfs3_lk_rsp*); +extern bool_t xdr_gfs3_inodelk_req (XDR *, gfs3_inodelk_req*); +extern bool_t xdr_gfs3_finodelk_req (XDR *, gfs3_finodelk_req*); +extern bool_t xdr_gfs3_flush_req (XDR *, gfs3_flush_req*); +extern bool_t xdr_gfs3_fsync_req (XDR *, gfs3_fsync_req*); +extern bool_t xdr_gfs3_fsync_rsp (XDR *, gfs3_fsync_rsp*); +extern bool_t xdr_gfs3_setxattr_req (XDR *, gfs3_setxattr_req*); +extern bool_t xdr_gfs3_fsetxattr_req (XDR *, gfs3_fsetxattr_req*); +extern bool_t xdr_gfs3_xattrop_req (XDR *, gfs3_xattrop_req*); +extern bool_t xdr_gfs3_xattrop_rsp (XDR *, gfs3_xattrop_rsp*); +extern bool_t xdr_gfs3_fxattrop_req (XDR *, gfs3_fxattrop_req*); +extern bool_t xdr_gfs3_fxattrop_rsp (XDR *, gfs3_fxattrop_rsp*); +extern bool_t xdr_gfs3_getxattr_req (XDR *, gfs3_getxattr_req*); +extern bool_t xdr_gfs3_getxattr_rsp (XDR *, gfs3_getxattr_rsp*); +extern bool_t xdr_gfs3_fgetxattr_req (XDR *, gfs3_fgetxattr_req*); +extern bool_t xdr_gfs3_fgetxattr_rsp (XDR *, gfs3_fgetxattr_rsp*); +extern bool_t xdr_gfs3_removexattr_req (XDR *, gfs3_removexattr_req*); +extern bool_t xdr_gfs3_fremovexattr_req (XDR *, gfs3_fremovexattr_req*); +extern bool_t xdr_gfs3_opendir_req (XDR *, gfs3_opendir_req*); +extern bool_t xdr_gfs3_opendir_rsp (XDR *, gfs3_opendir_rsp*); +extern bool_t xdr_gfs3_fsyncdir_req (XDR *, gfs3_fsyncdir_req*); +extern bool_t xdr_gfs3_readdir_req (XDR *, gfs3_readdir_req*); +extern bool_t xdr_gfs3_readdirp_req (XDR *, gfs3_readdirp_req*); +extern bool_t xdr_gfs3_access_req (XDR *, gfs3_access_req*); +extern bool_t xdr_gfs3_create_req (XDR *, gfs3_create_req*); +extern bool_t xdr_gfs3_create_rsp (XDR *, gfs3_create_rsp*); +extern bool_t xdr_gfs3_ftruncate_req (XDR *, gfs3_ftruncate_req*); +extern bool_t xdr_gfs3_ftruncate_rsp (XDR *, gfs3_ftruncate_rsp*); +extern bool_t xdr_gfs3_fstat_req (XDR *, gfs3_fstat_req*); +extern bool_t xdr_gfs3_fstat_rsp (XDR *, gfs3_fstat_rsp*); +extern bool_t xdr_gfs3_entrylk_req (XDR *, gfs3_entrylk_req*); +extern bool_t xdr_gfs3_fentrylk_req (XDR *, gfs3_fentrylk_req*); +extern bool_t xdr_gfs3_setattr_req (XDR *, gfs3_setattr_req*); +extern bool_t xdr_gfs3_setattr_rsp (XDR *, gfs3_setattr_rsp*); +extern bool_t xdr_gfs3_fsetattr_req (XDR *, gfs3_fsetattr_req*); +extern bool_t xdr_gfs3_fsetattr_rsp (XDR *, gfs3_fsetattr_rsp*); +extern bool_t xdr_gfs3_fallocate_req (XDR *, gfs3_fallocate_req*); +extern bool_t xdr_gfs3_fallocate_rsp (XDR *, gfs3_fallocate_rsp*); +extern bool_t xdr_gfs3_discard_req (XDR *, gfs3_discard_req*); +extern bool_t xdr_gfs3_discard_rsp (XDR *, gfs3_discard_rsp*); +extern bool_t xdr_gfs3_zerofill_req (XDR *, gfs3_zerofill_req*); +extern bool_t xdr_gfs3_zerofill_rsp (XDR *, gfs3_zerofill_rsp*); +extern bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*); +extern bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*); +extern bool_t xdr_gfs3_ipc_req (XDR *, gfs3_ipc_req*); +extern bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*); +extern bool_t xdr_gf_setvolume_rsp (XDR *, gf_setvolume_rsp*); +extern bool_t xdr_gf_getspec_req (XDR *, gf_getspec_req*); +extern bool_t xdr_gf_getspec_rsp (XDR *, gf_getspec_rsp*); +extern bool_t xdr_gf_mgmt_hndsk_req (XDR *, gf_mgmt_hndsk_req*); +extern bool_t xdr_gf_mgmt_hndsk_rsp (XDR *, gf_mgmt_hndsk_rsp*); +extern bool_t xdr_gf_log_req (XDR *, gf_log_req*); +extern bool_t xdr_gf_notify_req (XDR *, gf_notify_req*); +extern bool_t xdr_gf_notify_rsp (XDR *, gf_notify_rsp*); +extern bool_t xdr_gfs3_releasedir_req (XDR *, gfs3_releasedir_req*); +extern bool_t xdr_gfs3_release_req (XDR *, gfs3_release_req*); +extern bool_t xdr_gf_common_rsp (XDR *, gf_common_rsp*); +extern bool_t xdr_gfs3_dirlist (XDR *, gfs3_dirlist*); +extern bool_t xdr_gfs3_readdir_rsp (XDR *, gfs3_readdir_rsp*); +extern bool_t xdr_gfs3_dirplist (XDR *, gfs3_dirplist*); +extern bool_t xdr_gfs3_readdirp_rsp (XDR *, gfs3_readdirp_rsp*); +extern bool_t xdr_gf_set_lk_ver_rsp (XDR *, gf_set_lk_ver_rsp*); +extern bool_t xdr_gf_set_lk_ver_req (XDR *, gf_set_lk_ver_req*); +extern bool_t xdr_gf_event_notify_req (XDR *, gf_event_notify_req*); +extern bool_t xdr_gf_event_notify_rsp (XDR *, gf_event_notify_rsp*); + +#else /* K&R C */ +extern bool_t xdr_gf_statfs (); +extern bool_t xdr_gf_proto_flock (); +extern bool_t xdr_gf_iatt (); +extern bool_t xdr_gfs3_stat_req (); +extern bool_t xdr_gfs3_stat_rsp (); +extern bool_t xdr_gfs3_readlink_req (); +extern bool_t xdr_gfs3_readlink_rsp (); +extern bool_t xdr_gfs3_mknod_req (); +extern bool_t xdr_gfs3_mknod_rsp (); +extern bool_t xdr_gfs3_mkdir_req (); +extern bool_t xdr_gfs3_mkdir_rsp (); +extern bool_t xdr_gfs3_unlink_req (); +extern bool_t xdr_gfs3_unlink_rsp (); +extern bool_t xdr_gfs3_rmdir_req (); +extern bool_t xdr_gfs3_rmdir_rsp (); +extern bool_t xdr_gfs3_symlink_req (); +extern bool_t xdr_gfs3_symlink_rsp (); +extern bool_t xdr_gfs3_rename_req (); +extern bool_t xdr_gfs3_rename_rsp (); +extern bool_t xdr_gfs3_link_req (); +extern bool_t xdr_gfs3_link_rsp (); +extern bool_t xdr_gfs3_truncate_req (); +extern bool_t xdr_gfs3_truncate_rsp (); +extern bool_t xdr_gfs3_open_req (); +extern bool_t xdr_gfs3_open_rsp (); +extern bool_t xdr_gfs3_read_req (); +extern bool_t xdr_gfs3_read_rsp (); +extern bool_t xdr_gfs3_lookup_req (); +extern bool_t xdr_gfs3_lookup_rsp (); +extern bool_t xdr_gfs3_write_req (); +extern bool_t xdr_gfs3_write_rsp (); +extern bool_t xdr_gfs3_statfs_req (); +extern bool_t xdr_gfs3_statfs_rsp (); +extern bool_t xdr_gfs3_lk_req (); +extern bool_t xdr_gfs3_lk_rsp (); +extern bool_t xdr_gfs3_inodelk_req (); +extern bool_t xdr_gfs3_finodelk_req (); +extern bool_t xdr_gfs3_flush_req (); +extern bool_t xdr_gfs3_fsync_req (); +extern bool_t xdr_gfs3_fsync_rsp (); +extern bool_t xdr_gfs3_setxattr_req (); +extern bool_t xdr_gfs3_fsetxattr_req (); +extern bool_t xdr_gfs3_xattrop_req (); +extern bool_t xdr_gfs3_xattrop_rsp (); +extern bool_t xdr_gfs3_fxattrop_req (); +extern bool_t xdr_gfs3_fxattrop_rsp (); +extern bool_t xdr_gfs3_getxattr_req (); +extern bool_t xdr_gfs3_getxattr_rsp (); +extern bool_t xdr_gfs3_fgetxattr_req (); +extern bool_t xdr_gfs3_fgetxattr_rsp (); +extern bool_t xdr_gfs3_removexattr_req (); +extern bool_t xdr_gfs3_fremovexattr_req (); +extern bool_t xdr_gfs3_opendir_req (); +extern bool_t xdr_gfs3_opendir_rsp (); +extern bool_t xdr_gfs3_fsyncdir_req (); +extern bool_t xdr_gfs3_readdir_req (); +extern bool_t xdr_gfs3_readdirp_req (); +extern bool_t xdr_gfs3_access_req (); +extern bool_t xdr_gfs3_create_req (); +extern bool_t xdr_gfs3_create_rsp (); +extern bool_t xdr_gfs3_ftruncate_req (); +extern bool_t xdr_gfs3_ftruncate_rsp (); +extern bool_t xdr_gfs3_fstat_req (); +extern bool_t xdr_gfs3_fstat_rsp (); +extern bool_t xdr_gfs3_entrylk_req (); +extern bool_t xdr_gfs3_fentrylk_req (); +extern bool_t xdr_gfs3_setattr_req (); +extern bool_t xdr_gfs3_setattr_rsp (); +extern bool_t xdr_gfs3_fsetattr_req (); +extern bool_t xdr_gfs3_fsetattr_rsp (); +extern bool_t xdr_gfs3_fallocate_req (); +extern bool_t xdr_gfs3_fallocate_rsp (); +extern bool_t xdr_gfs3_discard_req (); +extern bool_t xdr_gfs3_discard_rsp (); +extern bool_t xdr_gfs3_zerofill_req (); +extern bool_t xdr_gfs3_zerofill_rsp (); +extern bool_t xdr_gfs3_rchecksum_req (); +extern bool_t xdr_gfs3_rchecksum_rsp (); +extern bool_t xdr_gfs3_ipc_req (); +extern bool_t xdr_gf_setvolume_req (); +extern bool_t xdr_gf_setvolume_rsp (); +extern bool_t xdr_gf_getspec_req (); +extern bool_t xdr_gf_getspec_rsp (); +extern bool_t xdr_gf_mgmt_hndsk_req (); +extern bool_t xdr_gf_mgmt_hndsk_rsp (); +extern bool_t xdr_gf_log_req (); +extern bool_t xdr_gf_notify_req (); +extern bool_t xdr_gf_notify_rsp (); +extern bool_t xdr_gfs3_releasedir_req (); +extern bool_t xdr_gfs3_release_req (); +extern bool_t xdr_gf_common_rsp (); +extern bool_t xdr_gfs3_dirlist (); +extern bool_t xdr_gfs3_readdir_rsp (); +extern bool_t xdr_gfs3_dirplist (); +extern bool_t xdr_gfs3_readdirp_rsp (); +extern bool_t xdr_gf_set_lk_ver_rsp (); +extern bool_t xdr_gf_set_lk_ver_req (); +extern bool_t xdr_gf_event_notify_req (); +extern bool_t xdr_gf_event_notify_rsp (); + +#endif /* K&R C */ + +#ifdef __cplusplus +} +#endif + +#endif /* !_GLUSTERFS3_XDR_H_RPCGEN */ diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x index feef7e264..aabb14f26 100644 --- a/rpc/xdr/src/glusterfs3-xdr.x +++ b/rpc/xdr/src/glusterfs3-xdr.x @@ -630,6 +630,11 @@ struct gfs3_fstat_req { opaque xdata<>; /* Extra data */ } ; +struct gfs3_ipc_req { + int op; + opaque xdata<>; +}; + struct gf_setvolume_req { opaque dict<>; @@ -744,6 +749,7 @@ struct gf_set_lk_ver_req { int lk_ver; }; + struct gf_event_notify_req { int op; opaque dict<>; diff --git a/tests/basic/four-brick.t b/tests/basic/four-brick.t new file mode 100755 index 000000000..a8d9cd400 --- /dev/null +++ b/tests/basic/four-brick.t @@ -0,0 +1,85 @@ +#!/bin/bash + +# Test *very basic* NSR functionality - startup, mount, simplest possible file +# write. + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function get_rep_count { + v=$(getfattr --only-values -e text -n trusted.nsr.rep-count $1 2> /dev/null) + #echo $v > /dev/tty + echo $v +} + +function ping_file { + dd if=/dev/urandom of=$1 bs=4k count=100 2> /dev/null +} + +function kill_brick { + bpid=$(cat /var/lib/glusterd/vols/${V0}/run/*-${V0}${1}.pid) + rpid=$(cat /var/lib/glusterd/vols/${V0}/run/*-${V0}${1}-recon.pid) + echo "brick PID = $bpid" > /dev/tty + echo "recon PID = $rpid" > /dev/tty + kill -9 $bpid $rpid +} + +function count_matches { + n=0 + for f in $B0/$V0[1234]/$1; do + cmp $M0/$1 $f 2> /dev/null + if [ $? = 0 ]; then + n=$((n+1)) + fi + done + echo $n +} + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST mkdir -p ${V0}{1,2,3,4} +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} + +EXPECT "$V0" volinfo_field $V0 'Volume Name' +EXPECT 'Created' volinfo_field $V0 'Status' +EXPECT '4' brick_count $V0 + +TEST $CLI volume set $V0 cluster.nsr on +TEST $CLI volume set $V0 cluster.nsr.recon on + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +## Mount FUSE with caching disabled (read-only) +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +# Give the bricks a chance to connect to each other. +EXPECT_WITHIN 10 "2" get_rep_count $M0 + +TEST ping_file $M0/probe +TEST cmp ${M0}/probe ${B0}/${V0}1/probe +TEST cmp ${M0}/probe ${B0}/${V0}2/probe + +# Kill one brick from each pair. +TEST kill_brick 1 +TEST kill_brick 3 +sleep 10 + +# Make sure only one copy makes it while degraded. +TEST ping_file $M0/probe2 +TEST [ $(count_matches probe2) = 1 ] + +# Restart the brick and give reconciliation a chance to run. +# TBD: figure out why reconciliation takes so $#@! long to run +TEST $CLI volume start $V0 force +sleep 20 + +# Make sure *both* copies are valid after reconciliation. +TEST [ $(count_matches probe2) = 2 ] + +cleanup +#killall -9 etcd diff --git a/tests/basic/nsr.t b/tests/basic/nsr.t new file mode 100755 index 000000000..5dd97f2bf --- /dev/null +++ b/tests/basic/nsr.t @@ -0,0 +1,47 @@ +#!/bin/bash + +# Test *very basic* NSR functionality - startup, mount, simplest possible file +# write. + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function get_rep_count { + v=$(getfattr --only-values -e text -n trusted.nsr.rep-count $1 2> /dev/null) + #echo $v > /dev/tty + echo $v +} + +function ping_file { + dd if=/dev/urandom of=$1 bs=4k count=100 2> /dev/null +} + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} + +EXPECT "$V0" volinfo_field $V0 'Volume Name' +EXPECT 'Created' volinfo_field $V0 'Status' +EXPECT '2' brick_count $V0 + +TEST $CLI volume set $V0 cluster.nsr on + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +## Mount FUSE with caching disabled (read-only) +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +# Give the bricks a chance to connect to each other. +EXPECT_WITHIN 10 "2" get_rep_count $M0 + +TEST ping_file $M0/probe +TEST cmp ${M0}/probe ${B0}/${V0}1/probe +TEST cmp ${M0}/probe ${B0}/${V0}2/probe + +cleanup +killall -9 etcd diff --git a/tests/basic/quorum.t b/tests/basic/quorum.t new file mode 100644 index 000000000..b8fc9cf3a --- /dev/null +++ b/tests/basic/quorum.t @@ -0,0 +1,64 @@ +#!/bin/bash + +# Test *very basic* NSR functionality - startup, mount, simplest possible file +# write. + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function get_rep_count { + v=$(getfattr --only-values -e text -n trusted.nsr.rep-count $1 2> /dev/null) + #echo $v > /dev/tty + echo $v +} + +function kill_a_brick { + for r in /var/lib/glusterd/vols/${V0}/run/*-recon.pid; do + rpid=$(cat $r) + #echo "recon PID = $rpid" > /dev/tty + b=$(echo $r | sed '/\(.*\):\(.*\)-recon.pid/s//\1\2.pid/') + bpid=$(cat $b) + #echo "brick PID = $bpid" > /dev/tty + kill -9 $bpid $rpid + return 0 + done + + # No bricks?!? + return 1 +} + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} + +EXPECT "$V0" volinfo_field $V0 'Volume Name' +EXPECT 'Created' volinfo_field $V0 'Status' +EXPECT '2' brick_count $V0 + +TEST $CLI volume set $V0 cluster.nsr on +TEST $CLI volume set $V0 cluster.nsr.recon on + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +## Mount FUSE with caching disabled (read-only) +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +# Give the bricks a chance to connect to each other. +EXPECT_WITHIN 10 "2" get_rep_count $M0 + +TEST kill_a_brick +EXPECT_WITHIN 10 "1" get_rep_count $M0 + +# Make sure writes fail while degraded. +tmpfile=$(mktemp) +trap "rm $tmpfile" EXIT +dd if=/dev/urandom of=$M0/probe bs=4k count=100 status=none 2> $tmpfile +TEST [ x"$?" != x"0" ] +TEST grep -qs 'Read-only file system' $tmpfile + +cleanup diff --git a/tests/basic/recon.t b/tests/basic/recon.t new file mode 100755 index 000000000..e0fbea749 --- /dev/null +++ b/tests/basic/recon.t @@ -0,0 +1,190 @@ +#!/bin/bash + +# Test *very basic* NSR functionality - startup, mount, simplest possible file +# write. + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function my_getfattr { + getfattr --only-values -e text $* 2> /dev/null +} + +function get_rep_count { + v=$(my_getfattr -n trusted.nsr.rep-count $1) + #echo $v > /dev/tty + echo $v +} + +function create_file { + dd if=/dev/urandom of=$1 bs=4k count=$2 conv=sync 2> /dev/null +} + +function kill_a_brick { + for r in /var/lib/glusterd/vols/${V0}/run/*-recon.pid; do + rpid=$(cat $r) + echo "recon PID = $rpid" > /dev/tty + b=$(echo $r | sed '/\(.*\):\(.*\)-recon.pid/s//\1\2.pid/') + bpid=$(cat $b) + echo "brick PID = $bpid" > /dev/tty + kill -9 $bpid $rpid + return 0 + done + + # No bricks?!? + return 1 +} + +# Functions to check reconciliation for specific operation types. + +function check_create_write { + for b in $*; do + cmp $tmpdir/create-write $b/create-write || return 1 + done + return 0 +} + +function check_truncate { + truncate --size=8192 $tmpdir/truncate + for b in $*; do + cmp $tmpdir/truncate $b/truncate || return 1 + done + return 0 +} + +function check_hard_link { + for b in $*; do + inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1) + inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1) + [ "$inum1" = "$inum2" ] || return 1 + done + return 0 +} + +function check_soft_link { + for b in $*; do + [ "$(readlink $b/soft-link)" = "soft-link-tgt" ] || return 1 + done + return 0 +} + +function check_unlink { + for b in $*; do + [ ! -e $b/unlink ] || return 1 + done + return 0 +} + +function check_mkdir { + for b in $*; do + [ -d $b/mkdir ] || return 1 + done + return 0 +} + +function check_rmdir { + for b in $*; do + [ ! -e $b/rmdir ] || return 1 + done +} + +function check_setxattr { + for b in $*; do + v=$(my_getfattr -n user.foo $b/setxattr) + [ "$v" = "ash_nazg_durbatuluk" ] || return 1 + done + return 0 +} + +function check_removexattr { + for b in $*; do + my_getfattr -n user.bar $b/removexattr 2> /dev/null + [ $? = 0 ] && return 1 + done + return 0 +} + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} + +EXPECT "$V0" volinfo_field $V0 'Volume Name' +EXPECT 'Created' volinfo_field $V0 'Status' +EXPECT '2' brick_count $V0 + +TEST $CLI volume set $V0 cluster.nsr on +TEST $CLI volume set $V0 cluster.nsr.recon on + +# This would normally be a terrible idea, but it's handy for issuing ops that +# will have to be reconciled later. +TEST $CLI volume set $V0 cluster.nsr.quorum-percent 0 + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +# Mount FUSE with caching disabled +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +# Give the bricks a chance to connect to each other. +EXPECT_WITHIN 10 "2" get_rep_count $M0 + +# Create local files for comparisons etc. +tmpdir=$(mktemp -d) +trap "rm -rf $tmpdir" EXIT +TEST create_file $tmpdir/create-write 10 +TEST create_file $tmpdir/truncate 10 + +# Prepare files and directories we'll need later. +TEST cp $tmpdir/truncate $M0/ +TEST touch $M0/hard-link-1 +TEST touch $M0/unlink +TEST mkdir $M0/rmdir +TEST touch $M0/setxattr +TEST touch $M0/removexattr +TEST setfattr -n user.bar -v "ash_nazg_gimbatul" $M0/removexattr + +# Kill a brick and wait for a new leader to take over. +TEST kill_a_brick +sleep 10 + +# Test create+write +TEST cp $tmpdir/create-write $M0/ +# Test truncate +TEST truncate --size=8192 $M0/truncate +# Test hard link +TEST ln $M0/hard-link-1 $M0/hard-link-2 +# Test soft link +# Disabled here because it not only fails but crashes the recon daemon. +TEST ln -s soft-link-tgt $M0/soft-link +# Test unlink +TEST rm $M0/unlink +# Test mkdir +TEST mkdir $M0/mkdir +# Test rmdir +TEST rmdir $M0/rmdir +# Test setxattr +TEST setfattr -n user.foo -v "ash_nazg_durbatuluk" $M0/setxattr +# Test removexattr +TEST setfattr -x user.bar $M0/removexattr + +# Restart the brick and give reconciliation a chance to run. +TEST $CLI volume start $V0 force +sleep 20 + +# Make sure everything is as it should be. +TEST check_create_write $B0/${V0}{1,2} +TEST check_truncate $B0/${V0}{1,2} +TEST check_hard_link $B0/${V0}{1,2} +TEST check_soft_link $B0/${V0}{1,2} +TEST check_unlink $B0/${V0}{1,2} +TEST check_mkdir $B0/${V0}{1,2} +TEST check_rmdir $B0/${V0}{1,2} +#EST check_setxattr $B0/${V0}{1,2} +#EST check_removexattr $B0/${V0}{1,2} + +cleanup +#killall -9 etcd diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am index 0990822a7..6e883e565 100644 --- a/xlators/cluster/Makefile.am +++ b/xlators/cluster/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = stripe afr dht +SUBDIRS = stripe afr dht nsr-server nsr-recon nsr-client CLEANFILES = diff --git a/xlators/cluster/nsr-client/Makefile.am b/xlators/cluster/nsr-client/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/nsr-client/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/nsr-client/src/Makefile.am b/xlators/cluster/nsr-client/src/Makefile.am new file mode 100644 index 000000000..4541ea01a --- /dev/null +++ b/xlators/cluster/nsr-client/src/Makefile.am @@ -0,0 +1,33 @@ +noinst_PYTHON = gen-fops.py + +xlator_LTLIBRARIES = nsrc.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +nsrc_la_LDFLAGS = -module -avoid-version +nsrc_la_SOURCES = nsrc.c + +nsrc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = fop-template.c \ + $(top_srcdir)/xlators/lib/src/libxlator.h \ + $(top_srcdir)/glusterfsd/src/glusterfsd.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) \ + -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \ + -I$(top_srcdir)/rpc/rpc-lib/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +XLATOR_HEADER = $(top_srcdir)/libglusterfs/src/xlator.h + +CLEANFILES = nsrc-cg.c + +CODEGEN_DIR = ../../nsr-server/src/codegen.py + +nsrc-cg.c: gen-fops.py $(CODEGEN) $(XLATOR_HEADER) fop-template.c + $(PYTHON) ./gen-fops.py $(XLATOR_HEADER) fop-template.c > $@ + +nsrc.lo: nsrc-cg.c + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/nsr.so diff --git a/xlators/cluster/nsr-client/src/fop-template.c b/xlators/cluster/nsr-client/src/fop-template.c new file mode 100644 index 000000000..699b07d40 --- /dev/null +++ b/xlators/cluster/nsr-client/src/fop-template.c @@ -0,0 +1,113 @@ +// template-name fop +$TYPE$ +nsrc_$NAME$ (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsrc_local_t *local = NULL; + xlator_t *target_xl = ACTIVE_CHILD(this); + + local = mem_get(this->local_pool); + if (!local) { + goto err; + } + + local->stub = fop_$NAME$_stub (frame, nsrc_$NAME$_continue, + $ARGS_SHORT$); + if (!local->stub) { + goto err; + } + local->curr_xl = target_xl; + local->scars = 0; + + frame->local = local; + STACK_WIND_COOKIE (frame, nsrc_$NAME$_cbk, target_xl, + target_xl, target_xl->fops->$NAME$, + $ARGS_SHORT$); + return 0; + +err: + if (local) { + mem_put(local); + } + STACK_UNWIND_STRICT ($NAME$, frame, -1, ENOMEM, + $DEFAULTS$); + return 0; +} + +// template-name cbk +$TYPE$ +nsrc_$NAME$_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + $ARGS_LONG$) +{ + nsrc_local_t *local = frame->local; + xlator_t *last_xl = cookie; + xlator_t *next_xl; + nsrc_private_t *priv = this->private; + struct timespec spec; + + if (op_ret != (-1)) { + if (local->scars) { + gf_log (this->name, GF_LOG_INFO, + HILITE("retried %p OK"), frame->local); + } + priv->active = last_xl; + goto unwind; + } + if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { + goto unwind; + } + + /* TBD: get leader ID from xdata? */ + next_xl = next_xlator(this,last_xl); + /* + * We can't just give up after we've tried all bricks, because it's + * quite likely that a new leader election just hasn't finished yet. + * We also shouldn't retry endlessly, and especially not at a high + * rate, but that's good enough while we work on other things. + * + * TBD: implement slow/finite retry via a worker thread + */ + if (!next_xl || (local->scars >= SCAR_LIMIT)) { + gf_log (this->name, GF_LOG_DEBUG, + HILITE("ran out of retries for %p"), frame->local); + goto unwind; + } + + local->curr_xl = next_xl; + local->scars += 1; + spec.tv_sec = 1; + spec.tv_nsec = 0; + /* + * WARNING + * + * Just calling gf_timer_call_after like this leaves open the + * possibility that writes will get reordered, if a first write is + * rescheduled and then a second comes along to find an updated + * priv->active before the first actually executes. We might need to + * implement a stricter (and more complicated) queuing mechanism to + * ensure absolute consistency in this case. + */ + if (gf_timer_call_after(this->ctx,spec,nsrc_retry_cb,local)) { + return 0; + } + +unwind: + call_stub_destroy(local->stub); + STACK_UNWIND_STRICT ($NAME$, frame, op_ret, op_errno, + $ARGS_SHORT$); + return 0; +} + +// template-name cont-func +$TYPE$ +nsrc_$NAME$_continue (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsrc_local_t *local = frame->local; + + STACK_WIND_COOKIE (frame, nsrc_$NAME$_cbk, local->curr_xl, + local->curr_xl, local->curr_xl->fops->$NAME$, + $ARGS_SHORT$); + return 0; +} diff --git a/xlators/cluster/nsr-client/src/gen-fops.py b/xlators/cluster/nsr-client/src/gen-fops.py new file mode 100755 index 000000000..b07b3c5b1 --- /dev/null +++ b/xlators/cluster/nsr-client/src/gen-fops.py @@ -0,0 +1,57 @@ +#!/usr/bin/python + +# This script generates the boilerplate versions of most fops in the client, +# mostly so that we can use STACK_WIND instead of STACK_WIND_TAIL (see +# fop-template.c for the details). The problem we're solving is that we sit +# under DHT, which makes assumptions about getting callbacks only from its +# direct children. If we didn't define our own versions of these fops, the +# default versions would use STACK_WIND_TAIL and the callbacks would come from +# DHT's grandchildren. The code-generation approach allows us to handle this +# with a minimum of code, and also keep up with any changes to the fop table. + +import sys +sys.path.append("../../nsr-server/src") # Blech. +import codegen + +type_re = "([a-z_0-9]+)" +name_re = "\(\*fop_([a-z0-9]+)_t\)" +full_re = type_re + " *" + name_re +fop_cg = codegen.CodeGenerator() +fop_cg.skip = 2 +fop_cg.parse_decls(sys.argv[1],full_re) +fop_cg.load_templates(sys.argv[2]) + +# Use the multi-template feature to generate multiple callbacks from the same +# parsed declarations. +type_re = "([a-z_0-9]+)" +name_re = "\(\*fop_([a-z0-9]+)_cbk_t\)" +full_re = type_re + " *" + name_re +cbk_cg = codegen.CodeGenerator() +cbk_cg.skip = 5 +cbk_cg.parse_decls(sys.argv[1],full_re) +cbk_cg.load_templates(sys.argv[2]) + +# This is a nasty little trick to handle the case where a generated fop needs +# a set of default arguments for the corresponding callback. +# +# Yes, it's ironic that I'm copying and pasting the generator code. +fop_cg.make_defaults = cbk_cg.make_defaults + +# Sorry, getspec, you're not a real fop until someone writes a stub function +# for you. +del fop_cg.decls["getspec"] +del cbk_cg.decls["getspec"] + +# cbk is used by both fop and continue, so emit first +for f_name in cbk_cg.decls.keys(): + cbk_cg.emit(f_name,"cbk") + print("") + +# continue is used by fop, so emit next +for f_name in fop_cg.decls.keys(): + fop_cg.emit(f_name,"cont-func") + print("") + +for f_name in fop_cg.decls.keys(): + fop_cg.emit(f_name,"fop") + print("") diff --git a/xlators/cluster/nsr-client/src/nsrc.c b/xlators/cluster/nsr-client/src/nsrc.c new file mode 100644 index 000000000..4551a1432 --- /dev/null +++ b/xlators/cluster/nsr-client/src/nsrc.c @@ -0,0 +1,243 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "call-stub.h" +#include "defaults.h" +#include "timer.h" +#include "xlator.h" + +#define SCAR_LIMIT 20 +#define HILITE(x) ("[1;33m"x"[0m") + +/* + * The fops are actually generated by gen-fops.py; the rest was mostly copied + * from defaults.c (commit cd253754 on 27 August 2013). + */ + +enum gf_dht_mem_types_ { + gf_mt_nsrc_private_t = gf_common_mt_end + 1, + gf_mt_nsrc_end +}; + +typedef struct { + xlator_t *active; +} nsrc_private_t; + +typedef struct { + call_stub_t *stub; + xlator_t *curr_xl; + uint16_t scars; +} nsrc_local_t; + +char *NSRC_XATTR = "user.nsr.active"; + +static inline +xlator_t * +ACTIVE_CHILD (xlator_t *parent) +{ + nsrc_private_t *priv = parent->private; + + return priv ? priv->active : FIRST_CHILD(parent); +} + +xlator_t * +next_xlator (xlator_t *this, xlator_t *prev) +{ + xlator_list_t *trav; + + for (trav = this->children; trav; trav = trav->next) { + if (trav->xlator == prev) { + return trav->next ? trav->next->xlator + : this->children->xlator; + } + } + + return NULL; +} + +void +nsrc_retry_cb (void *cb_arg) +{ + nsrc_local_t *local = cb_arg; + + gf_log (__func__, GF_LOG_INFO, HILITE("retrying %p"), local); + call_resume_wind(local->stub); +} + +#include "nsrc-cg.c" + +int32_t +nsrc_forget (xlator_t *this, inode_t *inode) +{ + gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not " + "implement forget_cbk"); + return 0; +} + + +int32_t +nsrc_releasedir (xlator_t *this, fd_t *fd) +{ + gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not " + "implement releasedir_cbk"); + return 0; +} + +int32_t +nsrc_release (xlator_t *this, fd_t *fd) +{ + gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not " + "implement release_cbk"); + return 0; +} + +struct xlator_fops fops = { + .lookup = nsrc_lookup, + .stat = nsrc_stat, + .fstat = nsrc_fstat, + .truncate = nsrc_truncate, + .ftruncate = nsrc_ftruncate, + .access = nsrc_access, + .readlink = nsrc_readlink, + .mknod = nsrc_mknod, + .mkdir = nsrc_mkdir, + .unlink = nsrc_unlink, + .rmdir = nsrc_rmdir, + .symlink = nsrc_symlink, + .rename = nsrc_rename, + .link = nsrc_link, + .create = nsrc_create, + .open = nsrc_open, + .readv = nsrc_readv, + .writev = nsrc_writev, + .flush = nsrc_flush, + .fsync = nsrc_fsync, + .opendir = nsrc_opendir, + .readdir = nsrc_readdir, + .readdirp = nsrc_readdirp, + .fsyncdir = nsrc_fsyncdir, + .statfs = nsrc_statfs, + .setxattr = nsrc_setxattr, + .getxattr = nsrc_getxattr, + .fsetxattr = nsrc_fsetxattr, + .fgetxattr = nsrc_fgetxattr, + .removexattr = nsrc_removexattr, + .fremovexattr = nsrc_fremovexattr, + .lk = nsrc_lk, + .inodelk = nsrc_inodelk, + .finodelk = nsrc_finodelk, + .entrylk = nsrc_entrylk, + .fentrylk = nsrc_fentrylk, + .rchecksum = nsrc_rchecksum, + .xattrop = nsrc_xattrop, + .fxattrop = nsrc_fxattrop, + .setattr = nsrc_setattr, + .fsetattr = nsrc_fsetattr, + .fallocate = nsrc_fallocate, + .discard = nsrc_discard, +}; + +struct xlator_cbks cbks = { +}; + + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("nsrc", this, out); + + ret = xlator_mem_acct_init (this, gf_mt_nsrc_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Memory accounting init" "failed"); + return ret; + } +out: + return ret; +} + + +int32_t +nsrc_init (xlator_t *this) +{ + nsrc_private_t *priv = NULL; + + this->local_pool = mem_pool_new (nsrc_local_t, 128); + if (!this->local_pool) { + gf_log (this->name, GF_LOG_ERROR, + "failed to create nsrc_local_t pool"); + goto err; + } + + priv = GF_CALLOC (1, sizeof (*priv), gf_mt_nsrc_private_t); + if (!priv) { + goto err; + } + + priv->active = FIRST_CHILD(this); + this->private = priv; + return 0; + +err: + if (priv) { + GF_FREE(priv); + } + return -1; +} + +void +nsrc_fini (xlator_t *this) +{ + GF_FREE(this->private); +} + +int32_t +nsrc_notify (xlator_t *this, int32_t event, void *data, ...) +{ + int32_t ret = 0; + + switch (event) { + case GF_EVENT_CHILD_DOWN: + /* + * TBD: handle this properly + * + * What we really should do is propagate this only if it caused + * us to lose quorum, and likewise for GF_EVENT_CHILD_UP only + * if it caused us to gain quorum. However, that requires + * tracking child states and for now it's easier to swallow + * these unconditionally. The consequence of failing to do + * this is that DHT sees the first GF_EVENT_CHILD_DOWN and gets + * confused, so it doesn't call us and doesn't get up-to-date + * directory listings etc. + */ + break; + default: + ret = default_notify (this, event, data); + } + + return ret; +} + +class_methods_t class_methods = { + .init = nsrc_init, + .fini = nsrc_fini, + .notify = nsrc_notify, +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/cluster/nsr-recon/Makefile.am b/xlators/cluster/nsr-recon/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/nsr-recon/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/nsr-recon/src/Makefile.am b/xlators/cluster/nsr-recon/src/Makefile.am new file mode 100644 index 000000000..e639e4437 --- /dev/null +++ b/xlators/cluster/nsr-recon/src/Makefile.am @@ -0,0 +1,23 @@ +xlator_LTLIBRARIES = nsr_recon.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +nsr_recon_la_LDFLAGS = -module -avoid-version +nsr_recon_la_SOURCES = recon_driver.c recon_xlator.c + +nsr_recon_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/api/src/libgfapi.la + +noinst_HEADERS = recon_driver.h recon_xlator.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) \ + -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \ + -I$(top_srcdir)/rpc/rpc-lib/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +XLATOR_HEADER = $(top_srcdir)/libglusterfs/src/xlator.h + +CLEANFILES = + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/nsr.so diff --git a/xlators/cluster/nsr-recon/src/recon_driver.c b/xlators/cluster/nsr-recon/src/recon_driver.c new file mode 100644 index 000000000..8c7622a02 --- /dev/null +++ b/xlators/cluster/nsr-recon/src/recon_driver.c @@ -0,0 +1,3130 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <fnmatch.h> + + +#include "call-stub.h" +#include "defaults.h" +#include "xlator.h" + + +#include "recon_driver.h" +#include "recon_xlator.h" +#include "api/src/glfs-internal.h" +#include "api/src/glfs-handles.h" + +/* TBD: move declarations here and nsr.c into a common place */ +#define NSR_TERM_XATTR "trusted.nsr.term" +#define RECON_TERM_XATTR "trusted.nsr.recon-term" +#define RECON_INDEX_XATTR "trusted.nsr.recon-index" + +/* + * Execution architecture for the NSR reconciliation driver. The driver runs + * as a seperate process in each node where the brick is. The main function of + * the driver is nsr_reconciliation_driver() (last function below) The driver + * just sits in a tight loop waiting for state changes. When a brick becomes a + * replica leader, it fences IO, contacts this process and waits for + * reconciliation to finish. + * + * The replica leader talks to other bricks in replica group which are alive + * and gets the last term info using which it decides which has the latest + * data. That brick is referred to as the "reconciliator"; leader sends a + * message to reconciliator to freeze its data (by reading any incomplete data + * from other nodes from that term if required) + * + * Once that is done leader sends a message to all nodes except the + * reconciliator to sync themselves with the reconciliator. This process is + * referred to as "resolution". + * + * Hence the reconciliation processes need to talk to each other to get a given + * term info. This is implemented using the recon translator IOs which + * implements a bare bone RPC by exposing a file interface to which + * reads/writes are done to pass control messages. This is referred to as the + * "control plane". This implementation allows the control plane to be + * implemented as a bunch of threads for each of the nodes. + * + * The reconciliation process also needs to talk to the brick process on that + * node to actually write the data as part of reconciliation/resolution. This + * is referred to as the "data plane". Again there are a bunch of threads that + * do this work. + * + * The way the worker threads are organised is that main driver context has a + * pointer to contexts for each of these thread contexts. The thread context at + * index 0 always refers to talking with local recon process/brick. So the + * control worker at index 0 will get the local changelog info and data worker + * at index 0 will talk to local brick. + * + * All the ops from the control/data planes are implemented using the glfs + * APIs. + */ + +#if defined(NSR_DEBUG) + +/* This lets us change on the fly even if NSR_DEBUG is defined. */ +int nsr_debug_level = GF_LOG_TRACE; + +FILE * +recon_create_log (char *member, char *module) +{ + char *dpath = NULL; + char *p; + char *fpath = NULL; + FILE *fp = NULL; + int fd = -1; + + (void)mkdir(NSR_LOG_DIR,0777); + (void)asprintf(&dpath,NSR_LOG_DIR"/%s",member); + if (dpath) { + for (p = dpath + strlen(NSR_LOG_DIR) + 1; *p; ++p) { + if (*p == '/') { + *p = '-'; + } + } + (void)mkdir(dpath,0777); + (void)asprintf(&fpath,"%s/%s",dpath,module); + if (fpath) { + fd = open(fpath,O_WRONLY|O_CREAT|O_APPEND|O_SYNC,0666); + if (fd >= 0) { + fp = fdopen(fd,"a"); + if (!fp) { + close(fd); + } + } + if (fp) { + if (setvbuf (fp, NULL, _IONBF, 0)) { + /* + * Might as well take advantage of it + * to log the error. + */ + fprintf (fp, + "setvbuf failed for log\n"); + fprintf (fp, + "log output may be async\n"); + fflush(fp); + } + } + free(fpath); + } + free(dpath); + } + + return fp; +} + +void +_nsr_driver_log (const char *func, int line, char *member, FILE *fp, + char *fmt, ...) +{ + va_list ap; + char *buf = NULL; + int retval; + + if (!fp) { + fp = recon_create_log(member,"nsr-driver-log"); + if (!fp) { + return; + } + } + + va_start(ap,fmt); + retval = vasprintf(&buf,fmt,ap); + if (buf) { + fprintf(fp,"[%s:%d] %.*s\n",func,line,retval,buf); + free(buf); + } + va_end(ap); +} + +void +_nsr_worker_log (const char *func, int line, char *member, + char *type, uint32_t index, FILE *fp, + char *fmt, ...) +{ + va_list ap; + char *buf = NULL; + int retval; + + if (!fp) { + char *name; + if (asprintf(&name,"%s-%u",type,index) < 1) { + return; + } + fp = recon_create_log (member, name); + if (!fp) { + return; + } + } + + va_start(ap,fmt); + retval = vasprintf(&buf,fmt,ap); + if (buf) { + fprintf(fp,"[%s:%d] %.*s\n",func,line,retval,buf); + free(buf); + } + va_end(ap); +} + +#endif + +/* + * Recon Driver Calloc + * + * We need this because all of this messing about with gfapi from within a + * translator keeps scrambling THIS (only one reason it's a terrible idea) and + * we need THIS to have a value that represents our initialization with our + * memory types. + * + * Note that the macro requires "this" to be defined in the current scope. + */ + +#define RD_CALLOC(x,y,z) ({THIS = this; GF_CALLOC(x,y,z); }) + +/* + * This function gets the size of all the extended attributes for a file. + * This is used so that caller knows how much to allocate for key-value storage. + * + * Input Arguments: + * fd - the file opened using glfs API. + * dict - passed so that NSR translator can get this from the required brick + * + * Output Arguments: + * b - pointer to the buffer where the attributes are filled up. + * key_size - the size of all keys + * val_size - the size of all values + * num - number of key/values + */ +static int32_t +get_xattr_total_size( struct glfs_fd *fd, + char **b, + uint32_t *key_size, + uint32_t *val_size, + uint32_t* num, + dict_t *dict) +{ + int32_t s = -1, ret = -1; + char *c = NULL; + + *key_size = 0; + *val_size = 0; + *num = 0; + + // First get the size of the keys + s = glfs_flistxattr_with_xdata(fd, NULL,0, dict); + if (s == -1) { + goto out; + } + *key_size = s; + + // TBD - use the regular calloc + (*b) = c = calloc(s+1,1); + + // get the keys themselves + if (glfs_flistxattr_with_xdata(fd, c, s+1, dict) == -1) { + goto out; + } + do { + int32_t r; + uint32_t len = 0; + // for each key get the size of the value + r = glfs_fgetxattr_with_xdata(fd, c, NULL, 0, dict); + if (r == -1) + goto out; + (*val_size) += r; + len = strlen(c) + 1; + c += len; + s -= len; + (*num)++; + } while(s); + ret = 0; +out: + return ret; +} + +/* + * This function gets bunch of xattr values given set of keys. + * + * Input Arguments: + * fd - the file opened using glfs API. + * keys - the bunch of keys + * size - size of values + * num - number of keys + * dict - passed so that NSR translator can get this from the required brick + * + * Output Arguments: + * buf - where the values are written one after the other (NULL seperated) + */ +static int32_t +get_xattr(struct glfs_fd *fd, + char *keys, + char *buf, + uint32_t size, + uint32_t num, + dict_t *dict) +{ + while(num--) { + int32_t r; + uint32_t len = 0; + + // copy the key + strcpy(buf, keys); + len = strlen(keys); + len++; + buf += len; + + // get the value and copy the value after incrementing buf after the key + r = glfs_fgetxattr_with_xdata(fd, keys, buf, size, dict); + + // TBD - handle error + if (r == -1) + return -1; + + // increment the key to next value + keys += len; + + // increment buf to hold the next key + buf += strlen(buf) + 1; + } + return 0; +} + +/* + * Function deletes a bunch of key values in extended attributes of a file. + * Input Arguments: + * fd - the file opened using glfs API. + * dict - passed so that NSR translator can do this from the required brick + * keys - bunch of NULL seperated key names + * num - number of keys + */ +static int32_t delete_xattr(struct glfs_fd *fd, + dict_t *dict_t, + char *keys, + uint32_t num) +{ + while(num--) { + // get the value and copy the value + // TBD - handle failure cases when calling glfs_fremovexattr_with_xdata() + if (glfs_fremovexattr_with_xdata(fd, keys, dict_t) == -1) + return -1; + keys += strlen(keys) +1; + } + return 0; +} + +/* + * Given a bunch of key value pairs, fill them as xattrs for a file + * + * Input Arguments: + * fd - the file opened using glfs API. + * dict - passed so that NSR translator can do this from the required brick + * buf - buffer containing the keys-values pairs. The key value are NULL seperated. + * Each of the key-value is seperated by NULL in turn. + * num - Number of such key value pairs. + */ +static int32_t +fill_xattr(struct glfs_fd *fd, + dict_t *dict, + char *buf, + uint32_t num) +{ + char *k = buf, *val = NULL; + + while(num--) { + int32_t r; + + val = k + strlen(k) + 1; + + // TBD - handle failure cases when calling glfs_fsetxattr_with_xdata() + r = glfs_fsetxattr_with_xdata(fd, k, val, strlen(val), 0, dict); + if (r == -1) + return -1; + k = val + strlen(val) + 1; + } + return 0; +} + +/* + * This function gets a file that can be used for doing glfs_init later. + * The control file is used by control thread(function) to talk to peer reconciliation process. + * The data file is used by the data thread(function) to talk to the bricks. + * The control file is of name such as con:gfs1:-mnt-a1 where "gfs1" is name of host + * and the brick path is "/mnt/a1". + * The data file is of name such as data:gfs1:-mnt-a1. + * + * Input Arguments: + * vol - name of the volume. This is used to build the full path of the control and data file + * such as /var/lib/glusterd/vols/test/bricks/gfs2:-mnt-test1-nsr-recon.vol. + * In above example the volume name is test and brick on gfs2 is on path /mnt/test1 + * + * worker - The worker for a given node. This worker has 2 threads - one on the data plane + * and one on the control plane. The worker->name is already filled with hostname:brickname + * in the function nsr_reconciliation_driver(). Use that to build the volume file. + * So if worker->name has gfs1:/mnt/a1, control file is con:gfs1:-mnt-a1 + * and data file is data:gfs1:-mnt-a1. + * All these files are under the bricks directory. TBD - move this to a NSR recon directory later. + */ +static void +nsr_recon_get_file(char *vol, nsr_replica_worker_t *worker) +{ + char *ptr; + char tr[256]; + + // Replace the "/" to - + strcpy(tr, worker->name); + ptr = strchr (tr, '/'); + while (ptr) { + *ptr = '-'; + ptr = strchr (tr, '/'); + } + + // Build the base directory such as "/var/lib/glusterd/vols/test/bricks/" + sprintf(worker->control_worker->vol_file, + "/%s/%s/%s/%s/", + GLUSTERD_DEFAULT_WORKDIR, + GLUSTERD_VOLUME_DIR_PREFIX, + vol, + GLUSTERD_BRICK_INFO_DIR); + + strcat(worker->control_worker->vol_file, "con:"); + strcat(worker->control_worker->vol_file, tr); + + sprintf(worker->data_worker->vol_file, + "/%s/%s/%s/%s/", + GLUSTERD_DEFAULT_WORKDIR, + GLUSTERD_VOLUME_DIR_PREFIX, + vol, + GLUSTERD_BRICK_INFO_DIR); + strcat(worker->data_worker->vol_file, "data:"); + strcat(worker->data_worker->vol_file, tr); +} + +/* + * This function does all the glfs initialisation + * so that reconciliation process can talk to other recon processes/bricks + * for the control/data messages. + * This will be done everytime a worker needs to be kicked off to talk + * across any plane. + * + * Input arguments: + * ctx - The per worker based context + * control - set to true if this worker is for the control plane + */ +static int32_t +nsr_recon_start_work(nsr_per_node_worker_t *ctx, + gf_boolean_t control) +{ + glfs_t *fs = NULL; + xlator_t *this = ctx->driver_ctx->this; + int32_t ret = 0; + glfs_fd_t *aux_fd = NULL; // fd of auxilary log + char lf[256]; + nsr_recon_private_t *priv = NULL; + char *my_name = NULL; + char *morph_name = NULL, *ptr = NULL; + + priv = this->private; + my_name = RD_CALLOC (1, + strlen (priv->replica_group_members[0]) + 1, + gf_mt_recon_member_name_t); + strcpy (my_name, priv->replica_group_members[0]); + + nsr_worker_log(this->name, GF_LOG_INFO, + "starting work with volfile %s\n", + ctx->vol_file); + + fs = glfs_new(ctx->id); + if (!fs) { + glusterfs_this_set(this); + nsr_worker_log(this->name, GF_LOG_ERROR, + "cannot create gfls context for thread %s\n",ctx->id); + return -1; + } + + // For some vague reason, glfs init APIs seem to be clobbering "this". hence resetting it. + glusterfs_this_set(this); + nsr_worker_log(this->name, GF_LOG_INFO, + "init done. setting volfile %s\n", + ctx->vol_file); + + ret = glfs_set_volfile(fs, ctx->vol_file); + if (ret != 0) { + glusterfs_this_set(this); + nsr_worker_log(this->name, GF_LOG_ERROR, + "cannot set volfile %s for thread %s\n",ctx->vol_file, ctx->id); + return -1; + } + + morph_name = RD_CALLOC (1, strlen (my_name) + 1, + gf_mt_recon_member_name_t); + strcpy (morph_name, my_name); + + ptr = strchr (morph_name, '/'); + while (ptr) + { + *ptr = '-'; + ptr = strchr (morph_name, '/'); + } + // TBD - convert this to right /usr/local/var/log based log files. + + sprintf(lf, NSR_LOG_DIR"/%s/%s-%"PRIu32, morph_name, + (control == _gf_true)?"glfs-con":"glfs-data", ctx->index); + ret = glfs_set_logging (fs, lf, 7); + if (ret) { + glusterfs_this_set(this); + gf_log (this->name, GF_LOG_ERROR, "glfs logging set failed (%s)", + strerror (errno)); + return -1; + } + + ret = glfs_init (fs); + if (ret != 0) { + glusterfs_this_set(this); + nsr_worker_log(this->name, GF_LOG_ERROR, "cannot do init for thread %s with volfile %s\n",ctx->id, ctx->vol_file); + return -1; + } + glusterfs_this_set(this); + nsr_worker_log(this->name, GF_LOG_INFO, + "setting volfile %s done\n", + ctx->vol_file); + + // If it is control thread, open the "/" as the aux_fd. + // All IOs happening via the fd will do the RPCs across the reconciliation + // processes. For some vague reason, the root seems to be open'able like a file. + // TBD - try to clean this up. (implement a virtual file???) + if (control == _gf_true) { + nsr_worker_log(this->name, GF_LOG_INFO, + "doing open for / \n"); + aux_fd = glfs_open (fs, "/", O_RDWR); + // TBD - proper error handling. Stall reconciliation if such a thing happens? + if (aux_fd == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "cannot open aux log file for thread %s\n",ctx->id); + return -1; + } else { + nsr_worker_log(this->name, GF_LOG_ERROR, + "---opened aux log file for thread %s\n",ctx->id); + } + ctx->aux_fd = aux_fd; + } + glusterfs_this_set(this); + ctx->fs = fs; + return 0; +} + +/* + * + * This function does the cleanup after reconciliation is done + * or before we start a new reconciliation. + * + * Input arguments: + * ctx - The per worker based context + * control - set to true if this worker is for the control plane + */ +static int32_t +nsr_recon_end_work(nsr_per_node_worker_t *ctx, + gf_boolean_t control) +{ + int32_t ret = 0; + xlator_t *this = ctx->driver_ctx->this; + + nsr_worker_log(this->name, GF_LOG_INFO, + "doing fini for recon worker\n"); + + ret = glfs_fini(ctx->fs); + if (ret != 0) { + glusterfs_this_set(this); + nsr_worker_log(this->name, GF_LOG_ERROR, "cannot do fini for thread %s with volfile %s\n",ctx->id, ctx->vol_file); + return -1; + } + glusterfs_this_set(this); + ctx->fs = NULL; + if (control == _gf_true) { + glfs_close (ctx->aux_fd); + ctx->aux_fd = NULL; + } + return 0; +} + +//called in case all worker functions run as sepeerate threads +static void +init_worker(nsr_per_node_worker_t *ctx, gf_boolean_t control) +{ + pthread_mutex_init(&(ctx->mutex), NULL); + pthread_cond_init(&(ctx->cv), NULL); + INIT_LIST_HEAD(&(ctx->head.list)); +} + + +/* + * Control worker funct for getting changelog info on this node. + * calls directly functions to parse the changelog. + * + * Input arguments: + * ctx - The per worker based context + * control - set to true if this worker is for the control plane + */ +static void +control_worker_func_0(nsr_per_node_worker_t *ctx, + nsr_recon_work_t *work) +{ + unsigned int index = ctx->index; + nsr_replica_worker_t *rw = &(ctx->driver_ctx->workers[index]); + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + xlator_t *this = dr->this; + nsr_recon_private_t *priv = this->private; + + ctx->is_control = _gf_true; + + switch (work->req_id) { + case NSR_WORK_ID_INI: + { + break; + } + case NSR_WORK_ID_FINI: + { + break; + } + case NSR_WORK_ID_GET_LAST_TERM_INFO: + { + nsr_recon_last_term_info_t lt; + nsr_reconciliator_info_t *recon_info = rw->recon_info; + // term is stuffed inside work->index. overloading. + int32_t term = work->index; + + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to get last term info for node %d with current term %d\n",index, term); + + // TBD - handle errors + // This is called by the leader after it gets the current term. + // Makes searching easier. + nsr_recon_libchangelog_get_last_term_info(this, priv->changelog_base_path, term, <); + recon_info->last_term = lt.last_term; + recon_info->commited_ops = lt.commited_ops; + recon_info->last_index = lt.last_index; + recon_info->first_index = lt.first_index; + + + nsr_worker_log(this->name, GF_LOG_INFO, + "out of get last term info with current term %d. got ops %d with first %d and last %d \n", + recon_info->last_term, recon_info->commited_ops, + recon_info->first_index, recon_info->last_index); + break; + } + case NSR_WORK_ID_GET_GIVEN_TERM_INFO: + { + nsr_recon_last_term_info_t lt; + nsr_reconciliator_info_t *recon_info = rw->recon_info; + // term is stuffed inside work->index. overloading. + int32_t term = work->index; + + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to get term info for node %d for term %d\n",index, term); + + // TBD - handle errors + nsr_recon_libchangelog_get_this_term_info(this,priv->changelog_base_path, term, <); + + recon_info->last_term = lt.last_term; + recon_info->commited_ops = lt.commited_ops; + recon_info->last_index = lt.last_index; + recon_info->first_index = lt.first_index; + + nsr_worker_log(this->name, GF_LOG_INFO, + "out of get term info for term %d. got ops %d with first %d and last %d \n", + recon_info->last_term, recon_info->commited_ops, + recon_info->first_index, recon_info->last_index); + + break; + } + case NSR_WORK_ID_RECONCILIATOR_DO_WORK: + { + // For local resolution, the main driver thread does it. + // SO there is no way we can have this message for this node. + + nsr_worker_log(this->name, GF_LOG_INFO, + "this message should not be sent \n"); + break; + } + case NSR_WORK_ID_RESOLUTION_DO_WORK: + { + + nsr_worker_log(this->name, GF_LOG_INFO, + "this message should not be sent \n"); + ctx->result = -1; + break; + } + case NSR_WORK_ID_GET_RECONCILATION_WINDOW: + { + nsr_reconciliator_info_t *recon_info = rw->recon_info; + // first_index and last_index at 0 indicates empty log. + // For non empty log, the first_index always starts at 1. + uint32_t num = (dr->workers[index].recon_info->last_index - + dr->workers[index].recon_info->first_index + 1); + nsr_recon_record_details_t *rd; + uint32_t i=0; + + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to get reconciliation window records for node %d for term %d with first %d last %d\n", + index, recon_info->last_term, recon_info->first_index, recon_info->last_index); + + + // TBD - handle buffer allocation errors + rd = RD_CALLOC(num, + sizeof(nsr_recon_record_details_t), + gf_mt_recon_record_details_t); + if (rd == NULL) { + ctx->result = -1; + return; + } + + recon_info->records = RD_CALLOC(num, + sizeof(nsr_reconciliation_record_t), + gf_mt_recon_record_t); + if (recon_info->records == NULL) { + ctx->result = -1; + return; + } + + // TBD - handle errors + if (nsr_recon_libchangelog_get_records(this, priv->changelog_base_path, + recon_info->last_term, + recon_info->first_index, + recon_info->last_index, + rd) == _gf_false) { + ctx->result = -1; + return; + } + + // The above function writes into rd from 0 to (num -1) + // We need to take care of this whenever we deal with records + for (i=0; i < num; i++) { + ENDIAN_CONVERSION_RD(rd[i], _gf_true); //ntohl + memcpy(&(recon_info->records[i].rec), + &(rd[i]), + sizeof(nsr_recon_record_details_t)); + } + + GF_FREE(rd); + + nsr_worker_log(this->name, GF_LOG_INFO, + "got reconciliation window records for node %d for term %d \n", + index, recon_info->last_term); + break; + } + + default: + nsr_worker_log (this->name, GF_LOG_ERROR, + "bad req id %u", work->req_id); + } + + return; +} + +// Control worker thread +static void* +control_worker_main_0(nsr_per_node_worker_t *ctx) +{ + + ctx->is_control = _gf_true; + nsr_worker_log(this->name, GF_LOG_INFO, + "starting control worker func 0\n"); + + init_worker(ctx, 1); + + while(1) + { + nsr_recon_work_t *work = NULL; + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + + nsr_worker_log(this->name, GF_LOG_INFO, + "waiting for work\n"); + + pthread_mutex_lock(&ctx->mutex); + while (list_empty(&(ctx->head.list))) { + pthread_cond_wait(&ctx->cv, &ctx->mutex); + } + pthread_mutex_unlock(&ctx->mutex); + + + list_for_each_entry(work, &(ctx->head.list), list) { + nsr_worker_log(this->name, GF_LOG_INFO, + "got work with id %d\n", work->req_id); + work->in_use = _gf_false; + + // Call the main function. + control_worker_func_0(ctx, work); + + atomic_dec(&(dr->outstanding)); + break; + } + + nsr_worker_log(this->name, GF_LOG_INFO,"deleting work item\n"); + list_del_init (&work->list); + GF_FREE(work); + nsr_worker_log(this->name, GF_LOG_INFO,"finished deleting work item\n"); + } + + return NULL; +} + +static void +control_worker_do_reconciliation (nsr_per_node_worker_t *ctx, + nsr_recon_work_t *work) +{ + unsigned int index = ctx->index; + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + nsr_recon_role_t rr; + uint32_t i=0; + uint32_t num=0; + uint32_t idx = dr->reconciliator_index; + uint32_t term = dr->workers[idx].recon_info->last_term; + + GF_ASSERT(idx == index); + + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to make this index %d as reconciliator for term %d\n", index, term); + + // TBD - error handling for all the glfs APIs + if (glfs_lseek(ctx->aux_fd, + nsr_recon_xlator_sector_1, + SEEK_SET) == -1) { + ctx->result = -1; + return; + } + + // We have all the info for all other nodes. + // Fill all that info when sending data to that process. + for (i=0; i < dr->replica_group_size; i++) { + if ( dr->workers[i].in_use && + (dr->workers[i].recon_info->last_term == term)) { + rr.info[num].last_term = + dr->workers[i].recon_info->last_term; + rr.info[num].commited_ops = + dr->workers[i].recon_info->commited_ops; + rr.info[num].last_index = + dr->workers[i].recon_info->last_index; + rr.info[num].first_index = + dr->workers[i].recon_info->first_index; + strcpy(rr.info[num].name, + dr->workers[i].name); + } + num++; + } + rr.num = num; + rr.role = reconciliator; + ENDIAN_CONVERSION_RR(rr, _gf_false); //htonl + if (glfs_write(ctx->aux_fd, &rr, sizeof(rr), 0) == -1) { + ctx->result = -1; + // Put the errno only for this case since we are bothered about + // retrying only for this case. For rest of the cases we will + // just return EIO in errno. + ctx->op_errno = errno; + return; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "sent reconciliator info for term %d with node count as %d\n", term, num); +} + +static void +control_worker_do_resolution (nsr_per_node_worker_t *ctx, + nsr_recon_work_t *work) +{ + unsigned int index = ctx->index; + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + nsr_recon_role_t rr; + unsigned int i=0, j=0; + unsigned int rec = dr->reconciliator_index; + + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to make this index %d as resolutor with reconciliator as %d\n",index, rec); + + // TBD - error handling for all the glfs APIs + if (glfs_lseek(ctx->aux_fd, + nsr_recon_xlator_sector_1, + SEEK_SET) == -1) { + ctx->result = -1; + return; + } + + rr.num = 2; + + // Fill in info[0] as info for the node for which we are seeking + // resolution. Fill in info[1] as info of the reconciliator node. The + // function nsr_recon_driver_get_role() that will be called when this + // message reaches the node will look at index 1 for term information + // related to the reconciliator. + for (i=0; i < 2; i++) { + (i == 0) ? (j = index) : (j = rec); + rr.info[i].last_term = + dr->workers[j].recon_info->last_term; + rr.info[i].commited_ops = + dr->workers[j].recon_info->commited_ops; + rr.info[i].last_index = + dr->workers[j].recon_info->last_index; + rr.info[i].first_index = + dr->workers[j].recon_info->first_index; + // The name is used as the key to convert indices since the + // reconciliator index could be different across the nodes. + strcpy(rr.info[i].name, + dr->workers[j].name); + if (i == 0) { + nsr_worker_log(this->name, GF_LOG_INFO, + "this node info term=%d, ops=%d, first=%d, last=%d\n", + rr.info[i].last_term, rr.info[i].commited_ops, + rr.info[i].first_index,rr.info[i].last_index); + } else { + nsr_worker_log(this->name, GF_LOG_INFO, + "reconciliator node info term=%d, ops=%d, first=%d, last=%d\n", + rr.info[i].last_term, rr.info[i].commited_ops, + rr.info[i].first_index,rr.info[i].last_index); + } + } + rr.role = resolutor; + ENDIAN_CONVERSION_RR(rr, _gf_false); //htonl + if (glfs_write(ctx->aux_fd, &rr, sizeof(rr), 0) == -1) { + ctx->result = -1; + // Put the errno only for this case since we are bothered about + // retrying only for this case. For rest of the cases we will + // just return EIO in errno. + ctx->op_errno = errno; + return; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "sent message to this node %d resolutor with reconciliator as %d\n", index, rec); +} + +static void +control_worker_get_window (nsr_per_node_worker_t *ctx, nsr_recon_work_t *work) +{ + unsigned int index = ctx->index; + nsr_replica_worker_t *rw = &(ctx->driver_ctx->workers[index]); + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + xlator_t *this = dr->this; + nsr_recon_log_info_t li; + nsr_reconciliator_info_t *recon_info = rw->recon_info; + uint32_t i = 0; + uint32_t num = (dr->workers[index].recon_info->last_index - + dr->workers[index].recon_info->first_index +1); + nsr_recon_record_details_t *rd; + + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to get reconciliation window records for node %d for term %d with first %d last %d\n", + index, recon_info->last_term, recon_info->first_index, recon_info->last_index); + + // TBD - error handling for all the glfs APIs + if (glfs_lseek(ctx->aux_fd, nsr_recon_xlator_sector_2, SEEK_SET) == -1) { + ctx->result = -1; + return; + } + + // write to node what term & indices we are interested + li.term = recon_info->last_term; + li.first_index = recon_info->first_index; + li.last_index = recon_info->last_index; + ENDIAN_CONVERSION_LI(li, _gf_false); //htonl + if (glfs_write(ctx->aux_fd, &li, sizeof(li), 0) == -1) { + ctx->result = -1; + return; + } + + // then read + rd = RD_CALLOC(num, + sizeof(nsr_recon_record_details_t), + gf_mt_recon_private_t); + if (rd == NULL) { + ctx->result = -1; + return; + } + recon_info->records = RD_CALLOC(num, + sizeof(nsr_reconciliation_record_t), + gf_mt_recon_private_t); + if (recon_info->records == NULL) { + ctx->result = -1; + goto err; + } + + if (glfs_read(ctx->aux_fd, rd, num * sizeof(nsr_recon_record_details_t), 0) == -1) { + ctx->result = -1; + goto err; + } + + for (i=0; i < num; i++) { + ENDIAN_CONVERSION_RD(rd[i], _gf_true); //ntohl + memcpy (&(recon_info->records[i].rec), &(rd[i]), + sizeof(nsr_recon_record_details_t)); + nsr_worker_log(this->name, GF_LOG_INFO, + "get_reconcilaition_window:Got %d at index %d\n", + recon_info->records[i].rec.type, + i + recon_info->first_index); + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "got reconciliation window records for node %d for term %d \n", + index, recon_info->last_term); + +err: + GF_FREE(rd); +} + +/* + * Control worker funct for getting changelog info on some other node. + * calls glfs functions to seek/read/write on aux_fd. + * + * Input arguments: + * ctx - The per worker based context + * control - set to true if this worker is for the control plane + */ +static void +control_worker_func(nsr_per_node_worker_t *ctx, + nsr_recon_work_t *work) +{ + unsigned int index = ctx->index; + nsr_replica_worker_t *rw = &(ctx->driver_ctx->workers[index]); + nsr_recon_last_term_info_t lt; + nsr_reconciliator_info_t *recon_info = rw->recon_info; + int32_t term = htonl(work->index); // overloading it + + ctx->is_control = _gf_true; + + switch (work->req_id){ + + case NSR_WORK_ID_INI: + nsr_worker_log(this->name, GF_LOG_INFO, + "calling nsr_recon_start_work\n"); + + // TBD - handle error in case nsr_recon_start_work gives error + if (nsr_recon_start_work(ctx, _gf_true) != 0) { + ctx->result = -1; + return; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "finished nsr_recon_start_work\n"); + break; + + case NSR_WORK_ID_FINI: + nsr_worker_log(this->name, GF_LOG_INFO, + "calling nsr_recon_end_work\n"); + + // TBD - handle error in case nsr_recon_end_work gives error + if (nsr_recon_end_work(ctx, _gf_true) != 0) { + ctx->result = -1; + return; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "finished nsr_recon_end_work\n"); + break; + + case NSR_WORK_ID_GET_LAST_TERM_INFO: + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to get last term info for node %d with current term %d\n",index, work->index); + + // first write the current term term number + // TBD - error handling for all the glfs APIs + if (glfs_lseek(ctx->aux_fd, nsr_recon_xlator_sector_4, SEEK_SET) == -1) { + ctx->result = -1; + return; + } + if (glfs_write(ctx->aux_fd, &term, sizeof(term), 0) == -1) { + ctx->result = -1; + return; + } + if (glfs_read(ctx->aux_fd, <, sizeof(lt), 0) == -1) { + ctx->result = -1; + return; + } + ENDIAN_CONVERSION_LT(lt, _gf_true); //ntohl + recon_info->last_term = lt.last_term; + recon_info->commited_ops = lt.commited_ops; + recon_info->last_index = lt.last_index; + recon_info->first_index = lt.first_index; + + nsr_worker_log(this->name, GF_LOG_INFO, + "out of get last term info with current term %d. got ops %d with first %d and last %d \n", + recon_info->last_term, recon_info->commited_ops, + recon_info->first_index, recon_info->last_index); + + break; + + case NSR_WORK_ID_GET_GIVEN_TERM_INFO: + nsr_worker_log(this->name, GF_LOG_INFO, + "trying to get term info for node %d for term %d\n",index, work->index); + + // first write the term number + // TBD - error handling for all the glfs APIs + if (glfs_lseek(ctx->aux_fd, nsr_recon_xlator_sector_3, SEEK_SET) == -1) { + ctx->result = -1; + return; + } + if (glfs_write(ctx->aux_fd, &term, sizeof(term), 0) == -1) { + ctx->result = -1; + return; + } + if (glfs_read(ctx->aux_fd, <, sizeof(lt), 0) == -1) { + ctx->result = -1; + return; + } + ENDIAN_CONVERSION_LT(lt, _gf_true); //ntohl + recon_info->last_term = lt.last_term; + recon_info->commited_ops = lt.commited_ops; + recon_info->last_index = lt.last_index; + recon_info->first_index = lt.first_index; + + nsr_worker_log(this->name, GF_LOG_INFO, + "out of get term info for term %d. got ops %d with first %d and last %d \n", + recon_info->last_term, recon_info->commited_ops, + recon_info->first_index, recon_info->last_index); + + break; + + case NSR_WORK_ID_RECONCILIATOR_DO_WORK: + control_worker_do_reconciliation(ctx,work); + break; + + case NSR_WORK_ID_RESOLUTION_DO_WORK: + control_worker_do_resolution(ctx,work); + break; + + case NSR_WORK_ID_GET_RECONCILATION_WINDOW: + control_worker_get_window(ctx,work); + break; + + default: + nsr_worker_log (this->name, GF_LOG_ERROR, + "bad work type %d", work->req_id); + } + + return; +} + +// Control worker thread +static void* +control_worker_main(nsr_per_node_worker_t *ctx) +{ + unsigned int index = ctx->index; + + ctx->is_control = _gf_true; + nsr_worker_log(this->name, GF_LOG_INFO, + "starting control worker func\n"); + + // if this is for local processing, call the changelog parsing calls directly + if (index == 0) { + control_worker_main_0(ctx); + return NULL; + } + + init_worker(ctx, 1); + + + while(1) + { + nsr_recon_work_t *work = NULL; + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + + nsr_worker_log(this->name, GF_LOG_INFO, + "waiting for work\n"); + + pthread_mutex_lock(&ctx->mutex); + while (list_empty(&(ctx->head.list))) { + pthread_cond_wait(&ctx->cv, &ctx->mutex); + } + pthread_mutex_unlock(&ctx->mutex); + + + list_for_each_entry(work, &(ctx->head.list), list) { + nsr_worker_log(this->name, GF_LOG_INFO, + "got work with id %d\n", work->req_id); + work->in_use = _gf_false; + control_worker_func(ctx,work); + atomic_dec(&(dr->outstanding)); + break; + } + nsr_worker_log(this->name, GF_LOG_INFO,"deleting work item\n"); + list_del_init (&work->list); + GF_FREE(work); + nsr_worker_log(this->name, GF_LOG_INFO,"finished deleting work item\n"); + } + + return NULL; +} + +/* + * This function gets called if this process is chosen as the reconciliator + * for this replica group. It would have already got the records for the last term + * for the indices that are required (from the first HOLE to last index) from + * all other nodes that also witnessed that term. COmpare all the records and + * compute the work required. + * + * Input arguments + * ctx - driver context. All recon work is stored in workers[0].recon_info + */ +static void +compute_reconciliation_work(nsr_recon_driver_ctx_t *ctx) +{ + uint32_t i=0, j=0; + nsr_reconciliator_info_t *my_recon = ctx->workers[0].recon_info; + uint32_t num = (my_recon->last_index - my_recon->first_index + 1); + + for (i=0; i < num; i++) { + nsr_log_type_t orig, new; + unsigned int src = 0; + orig = new = my_recon->records[i].rec.type; + nsr_recon_work_type_t tw = NSR_RECON_WORK_NONE; + // index 0 means this node. Look at all other nodes. + for (j=1; j < ctx->replica_group_size; j++) { + if (ctx->workers[j].in_use) { + nsr_log_type_t pr = ctx->workers[j].recon_info->records[i].work.type; + if ((new != pr) && (pr > new)) { + src = j; + new = (new | pr); + } + } + } + // TBD - compare data if new and orig are all FILLs. (can detect changelog corruption) + // Right now we compare if both orig and new are psuedo holes since + // only that is of interest to us. + if (orig != new) { + if ((orig == NSR_LOG_HOLE) && (new == NSR_LOG_PSEUDO_HOLE)) + tw = NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE; + else if ((orig == NSR_LOG_HOLE) && (new == NSR_LOG_FILL)) + tw = NSR_RECON_WORK_HOLE_TO_FILL; + else if ((orig == NSR_LOG_PSEUDO_HOLE) && (new == NSR_LOG_PSEUDO_HOLE)) + tw = NSR_RECON_WORK_COMPARE_PSEUDO_HOLE; + else if ((orig == NSR_LOG_PSEUDO_HOLE) && (new == NSR_LOG_FILL)) + tw = NSR_RECON_WORK_HOLE_TO_FILL; + } + if (tw != NSR_RECON_WORK_NONE) { + my_recon->records[i].work.type = tw; + my_recon->records[i].work.source = src; + // Overwrite the record + memcpy(&(my_recon->records[i].rec), + &(ctx->workers[src].recon_info->records[i].rec), + sizeof(nsr_recon_record_details_t)); + } + } + return; +} + +static int32_t +nsr_recon_in_use(nsr_recon_driver_ctx_t *ctx, + uint32_t i, + gf_boolean_t in_use); + +/* + * Write the role and associated information to the node. + * This gets called from recon xlator indicating node is either + * leader, reconciliator or should do resolution. + */ +gf_boolean_t +nsr_recon_driver_set_role(nsr_recon_driver_ctx_t *ctx, + nsr_recon_role_t *rr, + uint32_t term) +{ + nsr_role_work_t *rw; + xlator_t *this = ctx->this; + + nsr_driver_log(this->name, GF_LOG_INFO, "set role called \n"); + rw = RD_CALLOC(1, sizeof (nsr_role_work_t), gf_mt_recon_role_work_t); + memcpy(&rw->role, rr, sizeof(nsr_recon_role_t)); + rw->term = term; + INIT_LIST_HEAD(&(rw->list)); + pthread_mutex_lock(&(ctx->mutex)); + list_add_tail(&rw->list, &ctx->role_head.list); + pthread_cond_signal(&(ctx->cv)); + pthread_mutex_unlock(&(ctx->mutex)); + nsr_driver_log(this->name, GF_LOG_INFO, "set role returns \n"); + return _gf_true; +} + +/* + * First we undo the last role to make sure we clean up. + * + * Input arguments + * ctx - driver context. + * rr - Role information. + * If leader, the thread now sends the list of all nodes that are part of + * the current replica group. Use that to find out the activate the + * required worker threads. + * If reconciliator, the leader node would have sent information about + * all nodes which saw last term as the reconciliator. + * If resolution to be done, then rr.info[0] will have this node's info + * which the leader would have got earlier. rr[1].info will have the + * info regarding the reconciliator. + * term - leader's term that is causing this role + */ +nsr_recon_driver_state_t +nsr_recon_driver_get_role(int32_t *status, + nsr_recon_driver_ctx_t *ctx, + nsr_role_work_t *rw) +{ + uint8_t i=0, j=0; + nsr_recon_role_t *rr = &(rw->role); + nsr_reconciliator_info_t *tmp; + xlator_t *this = ctx->this; + + // First make all the threads uninitialise + for (i = 0; i < ctx->replica_group_size; i++) { + if (nsr_recon_in_use(ctx, i, _gf_false) == -1) { + *status = -1; + return 0; + } + } + + switch (rr->role) { + case leader: + case joiner: + + // First set info this node + tmp = RD_CALLOC (1, sizeof (nsr_reconciliator_info_t), + gf_mt_recon_reconciliator_info_t); + if (!tmp) { + *status = -1; + return 0; + } + ctx->workers[0].recon_info = tmp; + if (nsr_recon_in_use(ctx, 0, _gf_true) == -1) { + *status = -1; + return 0; + } + ctx->current_term = rr->current_term; + + // Find rest of the nodes + for (i=1; i < ctx->replica_group_size; i++) { + for (j=0 ; /* nothing */; j++) { + if (j >= rr->num) { + nsr_driver_log (this->name, GF_LOG_ERROR, + "failed to find %s", + ctx->workers[i].name); + break; + } + if (strcmp(ctx->workers[i].name, + rr->info[j].name)) { + continue; + } + nsr_driver_log (this->name, GF_LOG_INFO, + "nsr_recon_driver_get_role: this as %s. found other server %s\n", + (rr->role == leader) ? "leader" + : "joiner", + ctx->workers[i].name); + + // Allocate this here. This will get later + // filled when the leader tries to get last term + // information from all the nodes + tmp = RD_CALLOC (1, + sizeof (nsr_reconciliator_info_t), + gf_mt_recon_reconciliator_info_t); + if (!tmp) { + *status = -1; + return 0; + } + ctx->workers[i].recon_info = tmp; + if (nsr_recon_in_use(ctx, i, _gf_true) == -1) { + *status = -1; + return 0; + } + break; + } + } + // If leader, reconciliator has to be chosen. + // If joiner, we are the reconciliator. + if (rr->role == leader) + ctx->reconciliator_index = -1; + else + ctx->reconciliator_index = 0; + break; + + case reconciliator: + ctx->reconciliator_index = 0; + // Copy information about all the other members which had the + // same term + for (i=0; i < rr->num; i++) { + for (j=0; /* nothing */; j++) { + if (j >= ctx->replica_group_size) { + nsr_driver_log (this->name, GF_LOG_ERROR, + "failed to find %s", + rr->info[i].name); + break; + } + if (strcmp(rr->info[i].name, + ctx->workers[j].name)) { + continue; + } + nsr_driver_log(this->name, GF_LOG_INFO, + "nsr_recon_driver_get_role: this as reconciliator. found other server %s\n", + ctx->workers[j].name); + tmp = RD_CALLOC (1, + sizeof (nsr_reconciliator_info_t), + gf_mt_recon_reconciliator_info_t); + if (!tmp) { + *status = -1; + return 0; + } + tmp->last_term = rr->info[i].last_term; + tmp->commited_ops = rr->info[i].commited_ops; + tmp->last_index = rr->info[i].last_index; + tmp->first_index = rr->info[i].first_index; + ctx->workers[j].recon_info = tmp; + if (nsr_recon_in_use(ctx, j, _gf_true) == -1) { + *status = -1; + return 0; + } + break; + } + } + break; + + case resolutor: + for (j=0; /* nothing */; j++) { + // info[1] has the information regarding the + // reconciliator + if (j >= ctx->replica_group_size) { + nsr_driver_log (this->name, GF_LOG_ERROR, + "failed to find %s", + rr->info[1].name); + break; + } + if (strcmp(rr->info[1].name, + ctx->workers[j].name)) { + continue; + } + nsr_driver_log(this->name, GF_LOG_INFO, + "nsr_recon_driver_get_role: this as resolutor. found other server %s as reconciliator\n", + ctx->workers[j].name); + tmp = RD_CALLOC (1, + sizeof (nsr_reconciliator_info_t), + gf_mt_recon_reconciliator_info_t); + if (!tmp) { + *status = -1; + return 0; + } + tmp->last_term = rr->info[1].last_term; + tmp->commited_ops = rr->info[1].commited_ops; + tmp->last_index = rr->info[1].last_index; + tmp->first_index = rr->info[1].first_index; + ctx->reconciliator_index = j; + ctx->workers[j].recon_info = tmp; + if (nsr_recon_in_use(ctx, j, _gf_true) == -1) { + *status = -1; + return 0; + } + GF_ASSERT(ctx->reconciliator_index != 0); + break; + } + tmp = RD_CALLOC (1, + sizeof (nsr_reconciliator_info_t), + gf_mt_recon_reconciliator_info_t); + if (!tmp) { + *status = -1; + return 0; + } + // info[0] has all info for this node + tmp->last_term = rr->info[0].last_term; + tmp->commited_ops = rr->info[0].commited_ops; + tmp->last_index = rr->info[0].last_index; + tmp->first_index = rr->info[0].first_index; + ctx->workers[0].recon_info = tmp; + if (nsr_recon_in_use(ctx, 0, _gf_true) == -1) { + *status = -1; + return 0; + } + } + + ctx->term = rw->term; + + *status = 0; + return rr->role; +} + + +/* + * This function gets called if this process is chosen to sync itself with + * the reconciliator. + * + * Input arguments + * ctx - driver context. + * my_info - local changelog info that has all the local records for indices that require work + * his_info - reconciliator's info that has all the golden copies + * invalidate - if set to true, then do not consult local records + */ + +static void +compute_resolution_work(nsr_recon_driver_ctx_t *ctx, + nsr_reconciliator_info_t *my_info, + nsr_reconciliator_info_t *his_info, + gf_boolean_t invalidate) +{ + uint32_t i=0; + uint32_t num = (my_info->last_index - my_info->first_index + 1); + xlator_t *this = ctx->this; + + if (invalidate) { + if (my_info->records) { + GF_FREE(my_info->records); + } + my_info->records = RD_CALLOC(num, + sizeof(nsr_reconciliation_record_t), + gf_mt_recon_record_t); + } + + for (i=0; i < num; i++) { + nsr_log_type_t orig, new; + nsr_recon_work_type_t tw = NSR_RECON_WORK_NONE; + orig = my_info->records[i].rec.type; + if (invalidate) + orig = NSR_LOG_HOLE; + new = his_info->records[i].rec.type; + // TBD - we can never have PSUEDO_HOLE in reconciliator's info + // We should have taken care of that during reconciliation. + // Put an assert to validate that. + if (new != orig) { + if ((orig != NSR_LOG_FILL) && (new == NSR_LOG_FILL)) + tw = NSR_RECON_WORK_HOLE_TO_FILL; + else if ((orig != NSR_LOG_HOLE) && (new == NSR_LOG_HOLE)) + tw = NSR_RECON_WORK_UNDO_FILL; + } + // copy the records anyway + my_info->records[i].work.type = tw; + my_info->records[i].work.source = ctx->reconciliator_index; + memcpy(&(my_info->records[i].rec), + &(his_info->records[i].rec), + sizeof(nsr_recon_record_details_t)); + } + return; +} + + +// Create an glfs object +static struct glfs_object * +create_obj(nsr_per_node_worker_t *ctx, char *gfid_str) +{ + struct glfs_object *obj = NULL; + uuid_t gfid; + + uuid_parse(gfid_str, gfid); + + obj = glfs_h_create_from_handle(ctx->fs, gfid, GFAPI_HANDLE_LENGTH, NULL); + if (obj == NULL) { + GF_ASSERT(obj != NULL); + nsr_worker_log(this->name, GF_LOG_ERROR, + "creating of handle failed\n"); + return NULL; + } + return obj; +} + +/* + * Function to apply the actual record onto the local brick. + * prior to this we should have read all the data from the + * brick that has the data. + * + * Input parameters: + * ctx - per node worker context that has the fs for communicating to brick + * ri - Reconciliation record that needs fixup + * dict - So that NSR server translator on brick applis fixup only on this brick + * and the changelog translator consumes term and index. + */ + +static gf_boolean_t +apply_record(nsr_per_node_worker_t *ctx, + nsr_reconciliation_record_t *ri, + dict_t * dict) +{ + struct glfs_fd *fd = NULL; + struct glfs_object *obj = NULL; + struct glfs_object *to_obj = NULL; + gf_boolean_t retval = _gf_false; + + if (ri->rec.op == GF_FOP_WRITE) { + + nsr_worker_log(this->name, GF_LOG_INFO, + "DOing write for file %s @offset %d for len %d\n", + ri->rec.gfid, ri->rec.offset, ri->rec.len); + + // The file has got deleted on the source. Hence just ignore + // this. + // TBD - get a way to just stuff the log entry without writing + // the data so that changelogs remain identical. + if (ri->work.data == NULL) { + return _gf_true; + } + + if ((obj = create_obj(ctx,ri->rec.gfid)) == NULL) + goto err; + + fd = glfs_h_open_with_xdata(ctx->fs, obj, O_RDWR, dict); + if (fd == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "open for file %s failed\n", + ri->rec.gfid); + goto err; + } + if (glfs_lseek_with_xdata(fd, ri->rec.offset, SEEK_SET, dict) != ri->rec.offset) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "lseek for file %s failed at offset %d\n", + ri->rec.gfid, ri->rec.offset); + goto err; + } + if (glfs_write_with_xdata(fd, ri->work.data, ri->rec.len, 0, dict) != ri->rec.len) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "write for file %s failed for bytes %d\n", + ri->rec.gfid, ri->rec.len); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finished DOing write for gfid %s @offset %d for len %d\n", + ri->rec.gfid, ri->rec.offset, ri->rec.len); + + } else if (ri->rec.op == GF_FOP_FTRUNCATE) { + + nsr_worker_log(this->name, GF_LOG_INFO, + "DOing truncate for file %s @offset %d \n", + ri->rec.gfid, ri->rec.offset); + + if ((obj = create_obj(ctx, ri->rec.gfid)) == NULL) { + goto err; + } + + fd = glfs_h_open_with_xdata(ctx->fs, obj, O_RDWR, dict); + if (fd == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "open for file %s failed\n", + ri->rec.gfid); + goto err; + } + if (glfs_ftruncate_with_xdata(fd, ri->rec.offset, dict) == -1) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "trunctae for file %s failed @offset %d\n", + ri->rec.gfid,ri->rec.offset ); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finished DOing truncate for gfid %s @offset %d \n", + ri->rec.gfid, ri->rec.offset); + + } else if ((ri->rec.op == GF_FOP_FREMOVEXATTR) || + (ri->rec.op == GF_FOP_REMOVEXATTR) || + (ri->rec.op == GF_FOP_SETXATTR) || + (ri->rec.op == GF_FOP_FSETXATTR)) { + + uint32_t k_s = 0, v_s = 0; + char *t_b= NULL; + uint32_t num = 0; + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing set extended attr for file %s \n", + ri->rec.gfid); + + // The file has got deleted on the source. Hence just ignore + // this. TBD - get a way to just stuff the log entry without + // writing the data so that changelogs remain identical. + if (ri->work.data == NULL) { + return _gf_true; + } + + if ((obj = create_obj(ctx, ri->rec.gfid)) == NULL) { + goto err; + } + + if (obj->inode->ia_type == IA_IFDIR) + fd = glfs_h_opendir_with_xdata(ctx->fs, obj, dict); + else + fd = glfs_h_open_with_xdata(ctx->fs, obj, O_RDWR, dict); + if (fd == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "open for file %s failed\n", + ri->rec.gfid); + goto err; + } + + if(get_xattr_total_size(fd, &t_b, &k_s, &v_s, &num, dict) == -1) { + if (t_b) free(t_b); + nsr_worker_log(this->name, GF_LOG_ERROR, + "list of xattr of %s failed\n", ri->rec.gfid); + goto err; + } + + if (delete_xattr(fd, dict, t_b, num) == -1) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "deleting xattrs failed\n"); + goto err; + } + + // Set one special dict flag to indicate the opcode so that + // the opcode gets set to this + if (dict_set_int32(dict,"recon-xattr-opcode",ri->rec.op)) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "setting opcode to %d failed\n",ri->rec.op); + goto err; + } + + if (fill_xattr(fd, dict, ri->work.data, ri->work.num) == -1) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "filling xattrs failed\n"); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finsihed Doing set extended attr for %s \n", + ri->rec.gfid); + + } else if (ri->rec.op == GF_FOP_CREATE) { + + uuid_t gfid; + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing create for file %s \n", + ri->rec.gfid); + + // TBD - add mode and flags later + uuid_parse(ri->rec.gfid, gfid); + if ((obj = create_obj(ctx, ri->rec.pargfid)) == NULL) { + goto err; + } + + nsr_worker_log (this->name, GF_LOG_INFO, + "creating with mode 0%o", ri->rec.mode); + if (glfs_h_creat_with_xdata(ctx->fs, obj, ri->rec.entry, O_RDWR, ri->rec.mode, NULL, gfid, dict) == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "Failure for Doing create for file %s\n", + ri->rec.entry); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finished Doing create for file %s \n", + ri->rec.entry); + + } else if (ri->rec.op == GF_FOP_MKNOD) { + + uuid_t gfid; + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing mknod for file %s \n", + ri->rec.entry); + + // TBD - add mode and flags later + uuid_parse(ri->rec.gfid, gfid); + if ((obj = create_obj(ctx, ri->rec.pargfid)) == NULL) { + goto err; + } + + if (glfs_h_mknod_with_xdata(ctx->fs, obj, ri->rec.entry, O_RDWR, 0777, NULL, gfid, dict) == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "Failure for Doing mknod for file %s\n", + ri->rec.entry); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finished Doing mknod for file %s \n", + ri->rec.entry); + + } else if (ri->rec.op == GF_FOP_MKDIR) { + + uuid_t gfid; + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing mkdir for dir %s \n", + ri->rec.gfid); + + // TBD - add mode and flags later + uuid_parse(ri->rec.gfid, gfid); + if ((obj = create_obj(ctx, ri->rec.pargfid)) == NULL) { + goto err; + } + + if (glfs_h_mkdir_with_xdata(ctx->fs, obj, ri->rec.entry, 0777, NULL, gfid, dict) != 0) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "Failure for Doing mkdir for file %s\n", + ri->rec.entry); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finished Doing mkdir for file %s \n", + ri->rec.entry); + + } else if ((ri->rec.op == GF_FOP_RMDIR) || (ri->rec.op == GF_FOP_UNLINK)) { + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing rmdir/ublink for dir %s \n", + ri->rec.entry); + + if ((obj = create_obj(ctx, ri->rec.pargfid)) == NULL) { + goto err; + } + if (glfs_h_unlink_with_xdata(ctx->fs, obj, ri->rec.entry, dict) != 0) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "Failure for Doing rmdir/unlink for file %s\n", + ri->rec.entry); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finished Doing rmdir/unlink for file %s \n", + ri->rec.entry); + + } else if (ri->rec.op == GF_FOP_SYMLINK) { + + uuid_t gfid; + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing symlink for file %s to file %s \n", + ri->rec.entry, ri->rec.link_path); + + if ((obj = create_obj(ctx, ri->rec.pargfid)) == NULL) { + goto err; + } + uuid_parse(ri->rec.gfid, gfid); + + if (glfs_h_symlink_with_xdata(ctx->fs, obj, ri->rec.entry, ri->rec.link_path, NULL, gfid, dict) == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "Failed to Doing symlink for file %s to file %s \n", + ri->rec.entry, ri->rec.link_path); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finished Doing symlink for file %s to file %s \n", + ri->rec.entry, ri->rec.link_path); + + } else if (ri->rec.op == GF_FOP_LINK) { + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing hard link for file %s to file %s \n", + ri->rec.entry, ri->rec.gfid); + + if ((obj = create_obj(ctx, ri->rec.pargfid)) == NULL) { + goto err; + } + if ((to_obj = create_obj(ctx, ri->rec.gfid)) == NULL) { + goto err; + } + + if (glfs_h_link_with_xdata(ctx->fs, to_obj, obj, ri->rec.entry, dict) == -1) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "Failed to Doing hard link for file %s to file %s \n", + ri->rec.entry, ri->rec.gfid); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finsihed doing hard link for file %s to file %s \n", + ri->rec.entry, ri->rec.gfid); + + } else if (ri->rec.op == GF_FOP_RENAME) { + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing rename for file %s to file %s \n", + ri->rec.entry, ri->rec.newloc); + + if ((obj = create_obj(ctx, ri->rec.pargfid)) == NULL) { + goto err; + } + if ((to_obj = create_obj(ctx, ri->rec.gfid)) == NULL) { + goto err; + } + + if (glfs_h_rename_with_xdata(ctx->fs, obj, ri->rec.entry, to_obj, ri->rec.newloc, dict) == -1) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "Failed to Doing rename for file %s to file %s \n", + ri->rec.entry, ri->rec.newloc); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Finsihed doing renam for file %s to file %s \n", + ri->rec.entry, ri->rec.newloc); + + + } else if ((ri->rec.op == GF_FOP_SETATTR) || (ri->rec.op == GF_FOP_FSETATTR)) { + + struct iatt iatt = {0, }; + int valid = 0; + int ret = -1; + + // TBD - do the actual settings once we do that + // right now we just set the mode so that changelog gets filled + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing attr for file %s \n", + ri->rec.gfid); + + if ((obj = create_obj(ctx, ri->rec.gfid)) == NULL) { + goto err; + } + + if (obj->inode->ia_type == IA_IFDIR) + fd = glfs_h_opendir_with_xdata(ctx->fs, obj, dict); + else + fd = glfs_h_open_with_xdata(ctx->fs, obj, O_RDWR, dict); + if (fd == NULL) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "open for file %s failed\n", + ri->rec.gfid); + goto err; + } + + iatt.ia_prot = ia_prot_from_st_mode(777); + valid = GF_SET_ATTR_MODE; + + + // Set one special dict flag to indicate the opcode so that + // the opcode gets set to this + if (dict_set_int32(dict,"recon-attr-opcode",ri->rec.op)) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "setting opcode to %d failed\n",ri->rec.op); + goto err; + } + + ret = glfs_fsetattr_with_xdata(fd, &iatt, valid, dict); + if (ret == -1) { + nsr_worker_log(this->name, GF_LOG_INFO, + "failed Doing attr for file %s \n", + ri->rec.gfid); + goto err; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "Doing attr for file %s \n", + ri->rec.gfid); + + } + + retval = _gf_true; + +err: + if (fd) { + /* + * It's not clear that we should be passing the same dict to + * glfs_close that was passed to us for glfs_open, but that's + * the prior behavior so let's preserve it for now. + */ + if (glfs_close_with_xdata(fd, dict) == -1) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "close failed\n"); + } + } + if (obj) { + /* + * AFAICT fd operations do not borrow this reference, so we + * still need to drop it ourselves. + */ + glfs_h_close(obj); + } + if (to_obj) { + /* + * AFAICT fd operations do not borrow this reference, so we + * still need to drop it ourselves. + */ + glfs_h_close(to_obj); + } + return retval; +} + +//return back opcodes that requires reading from source +static gf_boolean_t +recon_check_changelog(nsr_recon_record_details_t *rd) +{ + return((rd->op == GF_FOP_WRITE) || + (rd->op == GF_FOP_FSETATTR) || + (rd-> op == GF_FOP_SETATTR) || + (rd->op == GF_FOP_FREMOVEXATTR) || + (rd->op == GF_FOP_SETXATTR) || + (rd->op == GF_FOP_FSETXATTR) || + (rd->op == GF_FOP_SYMLINK)); + +} + +// TBD +static gf_boolean_t +recon_compute_undo(nsr_recon_record_details_t *rd) +{ + return(_gf_false); +} + + +/* + * Function that talks to the brick for data tranfer. + * + * Input arguments: + * ctx - worker context + * work - pointer to work object + */ +static void +data_worker_func(nsr_per_node_worker_t *ctx, + nsr_recon_work_t *work) +{ + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + xlator_t *this = dr->this; + nsr_reconciliation_record_t *ri = NULL; + nsr_recon_record_details_t *rd = NULL; + int wip = 0; + dict_t * dict = NULL; + struct glfs_fd *fd = NULL; + struct glfs_object *obj = NULL; + uuid_t gfid; + uint32_t k_s = 0, v_s = 0; + char *t_b= NULL; + uint32_t num=0; + + switch (work->req_id){ + case NSR_WORK_ID_INI: + nsr_worker_log(this->name, GF_LOG_INFO, + "started data ini \n"); + + if (nsr_recon_start_work(ctx, _gf_false) != 0) { + ctx->result = -1; + return; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "finished data ini \n"); + break; + case NSR_WORK_ID_FINI: + nsr_worker_log(this->name, GF_LOG_INFO, + "started data fini \n"); + + if (nsr_recon_end_work(ctx, _gf_false) != 0) { + ctx->result = -1; + return; + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "finished data fini \n"); + break; + case NSR_WORK_ID_SINGLE_RECONCILIATION_READ: + // first_index always starts with 1 but records starts at 0. + wip = work->index - (dr->workers[0].recon_info->first_index); + ri = &(dr->workers[0].recon_info->records[wip]); + rd = &(ri->rec); + + dict = dict_new (); + if (!dict) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "failed allocating for dictionary\n"); + break; + } + if (dict_set_int32(dict,RECON_TERM_XATTR,ri->work.term)) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "error setting term in dict\n"); + break; + } + if (dict_set_int32(dict,RECON_INDEX_XATTR,ri->work.index)) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "error setting term in dict\n"); + break; + } + + switch (rd->op) { + case GF_FOP_WRITE: + + // record already copied. + // copy data to this node's info. + + uuid_parse(ri->rec.gfid, gfid); + + nsr_worker_log(this->name, GF_LOG_INFO, + "started recon read for file %s at offset %d at len %d\n", + ri->rec.gfid, rd->offset, rd->len); + + obj = glfs_h_create_from_handle (ctx->fs, gfid, + GFAPI_HANDLE_LENGTH, + NULL); + if (obj == NULL) { + GF_ASSERT(obj != NULL); + nsr_worker_log(this->name, GF_LOG_ERROR, + "creating of handle failed\n"); + break; + } + + // The file has probably got deleted. + fd = glfs_h_open_with_xdata (ctx->fs, obj, O_RDONLY, + dict); + if (fd == NULL) { + GF_ASSERT(fd != NULL); + nsr_worker_log(this->name, GF_LOG_ERROR, + "opening of file failed\n"); + break; + } + + if (glfs_lseek_with_xdata (fd, rd->offset, SEEK_SET, + dict) != rd->offset) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "lseek of file failed to offset %d\n", + rd->offset); + break; + } + + ri->work.data = RD_CALLOC (rd->len , sizeof(char), + gf_mt_recon_work_data_t); + if (glfs_read_with_xdata (fd, ri->work.data, rd->len, + 0, dict) != rd->len) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "read of file failed to offset %d for bytes %d\n", + rd->offset, rd->len); + break; + } + break; + + case GF_FOP_FTRUNCATE: + case GF_FOP_SYMLINK: + case GF_FOP_RMDIR: + case GF_FOP_UNLINK: + case GF_FOP_MKNOD: + case GF_FOP_CREATE: + case GF_FOP_LINK: + case GF_FOP_MKDIR: + case GF_FOP_RENAME: + nsr_worker_log (this->name, GF_LOG_ERROR, + "unimplemented fop %u\n", rd->op); + break; + + case GF_FOP_FREMOVEXATTR: + case GF_FOP_REMOVEXATTR: + case GF_FOP_SETXATTR: + case GF_FOP_FSETXATTR: + + uuid_parse(ri->rec.gfid, gfid); + + + // This is for all the set attribute/extended + // attributes commands. Get all the attributes from + // the source and fill it in the buffer as a NULL + // seperated key and value which are in turn seperated + // by NULL. + + nsr_worker_log(this->name, GF_LOG_INFO, + "doing getattr for gfid %s \n", + ri->rec.gfid); + + obj = glfs_h_create_from_handle (ctx->fs, gfid, + GFAPI_HANDLE_LENGTH, + NULL); + if (obj == NULL) { + GF_ASSERT(fd != NULL); + nsr_worker_log(this->name, GF_LOG_ERROR, + "creating of handle failed\n"); + break; + } + + if (obj->inode->ia_type == IA_IFDIR) + fd = glfs_h_opendir_with_xdata (ctx->fs, obj, + dict); + else + fd = glfs_h_open_with_xdata (ctx->fs, obj, + O_RDONLY, dict); + + if (fd == NULL) { + GF_ASSERT(fd != NULL); + nsr_worker_log(this->name, GF_LOG_ERROR, + "opening of file failed\n"); + break; + } + + if (get_xattr_total_size (fd, &t_b, &k_s, &v_s, &num, + dict) == -1) { + if (t_b) free(t_b); + nsr_worker_log(this->name, GF_LOG_ERROR, + "list of xattr of gfid %s failed\n", + rd->gfid); + break; + } + ri->work.data = RD_CALLOC ((k_s + v_s) , sizeof(char), + gf_mt_recon_work_data_t); + if (get_xattr(fd, t_b, ri->work.data, v_s, num, dict) == -1) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "get xattr of gfid %s failed\n", rd->gfid); + break; + } + ri->work.num = num; + nsr_worker_log(this->name, GF_LOG_INFO, + "finished getattr for gfid %s \n", + ri->rec.gfid); + free(t_b); + break; + + case GF_FOP_FSETATTR: + case GF_FOP_SETATTR: + //TBD - to get the actual attrbutes and fill + // mode, uid, gid, size, atime, mtime + nsr_worker_log (this->name, GF_LOG_ERROR, + "unimplemented fop %u\n", rd->op); + break; + default: + nsr_worker_log (this->name, GF_LOG_ERROR, + "unrecognized fop %u\n", rd->op); + + } + nsr_worker_log(this->name, GF_LOG_INFO, + "finished recon read for gfid %s at offset %d for %d bytes \n", + rd->gfid, rd->offset, rd->len); + break; + + case NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT: + // first_index always starts with 1 but records starts at 0. + wip = work->index - (dr->workers[0].recon_info->first_index); + ri = &(dr->workers[0].recon_info->records[wip]); + rd = &(ri->rec); + + nsr_worker_log(this->name, GF_LOG_INFO, + "got recon commit for index %d that has gfid %s \n", + wip, rd->gfid); + dict = dict_new (); + if (!dict) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "failed allocating for dictionary\n"); + break; + } + if (dict_set_int32(dict,RECON_TERM_XATTR,ri->work.term)) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "error setting term in dict\n"); + break; + } + if (dict_set_int32(dict,RECON_INDEX_XATTR,ri->work.index)) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "error setting term in dict\n"); + break; + } + if (apply_record(ctx, ri, dict) == _gf_false) { + nsr_worker_log(this->name, GF_LOG_ERROR, + "apply_record fails\n"); + } + + nsr_worker_log(this->name, GF_LOG_INFO, + "finished recon commit for gfid %s \n", + rd->gfid); + break; + + case NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH: + dict = dict_new (); + if (!dict) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "failed allocating for dictionary\n"); + break; + } + if (dict_set_int32(dict,RECON_TERM_XATTR,ri->work.term)) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "error setting term in dict\n"); + break; + } + if (dict_set_int32(dict,RECON_INDEX_XATTR,ri->work.index)) { + ctx->result = -1; + nsr_worker_log(this->name, GF_LOG_ERROR, + "error setting term in dict\n"); + break; + } + + // Increment work index with the start index + wip = work->index - (dr->workers[0].recon_info->first_index); + ri = &(dr->workers[0].recon_info->records[wip]); + rd = &(ri->rec); + + glfs_fsync_with_xdata(fd, dict); + break; + + default: + nsr_worker_log (this->name, GF_LOG_ERROR, + "unrecognized request id %u\n", work->req_id); + } + + if (fd) { + glfs_close_with_xdata(fd, dict); + } + if (obj) { + glfs_h_close(obj); + } + if (dict) { + dict_unref(dict); + } +} + +// thread for doing data work +static void * +data_worker_main(nsr_per_node_worker_t *ctx) +{ + nsr_worker_log(this->name, GF_LOG_INFO, + "starting data worker func\n"); + init_worker(ctx, 0); + + while(1) { + nsr_recon_work_t *work = NULL; + nsr_recon_driver_ctx_t *dr = ctx->driver_ctx; + + nsr_worker_log(this->name, GF_LOG_INFO, + "waiting for work\n"); + + pthread_mutex_lock(&(ctx->mutex)); + while (list_empty(&(ctx->head.list))) { + pthread_cond_wait(&(ctx->cv), &(ctx->mutex)); + } + pthread_mutex_unlock(&(ctx->mutex)); + list_for_each_entry(work, &(ctx->head.list), list) { + nsr_worker_log(this->name, GF_LOG_INFO, + "got work with id %d\n",work->req_id); + work->in_use = _gf_false; + data_worker_func(ctx, work); + atomic_dec(&(dr->outstanding)); + break; + } + nsr_worker_log(this->name, GF_LOG_INFO,"deleting work item\n"); + list_del_init (&work->list); + GF_FREE(work); + nsr_worker_log(this->name, GF_LOG_INFO,"finished deleting work item\n"); + } + + return NULL; +} + + +//make recon work +static void +recon_make_work(nsr_recon_driver_ctx_t *ctx, + nsr_recon_work_t **work, + nsr_recon_work_req_id_t req_id, + int32_t i) +{ + xlator_t *this = ctx->this; + + // TBD - change this to get from a static pool + // This cannot fail + (*work) = RD_CALLOC (1, sizeof (nsr_recon_work_t), gf_mt_recon_work_t); + (*work)->req_id = req_id; + (*work)->index = i; + (*work)->in_use = _gf_true; + INIT_LIST_HEAD(&((*work)->list)); + return; +} + +// Schedule a work object to a worker thread. +static void +recon_queue_to_worker(nsr_recon_driver_ctx_t *ctx, + nsr_recon_work_t *work, + unsigned int id, + nsr_recon_queue_type_t type) +{ + nsr_per_node_worker_t *worker; + if (type == NSR_RECON_QUEUE_TO_CONTROL) { + worker = ctx->workers[id].control_worker; + nsr_driver_log(this->name, GF_LOG_INFO, + "queueing work to control index %d\n",id); + } else { + worker= ctx->workers[id].data_worker; + nsr_driver_log(this->name, GF_LOG_INFO, + "queueing work to data index %d\n",id); + } + pthread_mutex_lock(&worker->mutex); + list_add_tail(&work->list, &worker->head.list); + pthread_cond_signal(&worker->cv); + pthread_mutex_unlock(&worker->mutex); + return; +} + +typedef void * (*F_t) (void *); + +// In case mode is set to NSR_USE_THREADS, create worker threads. +static gf_boolean_t +create_worker_threads(nsr_recon_private_t *priv, + nsr_recon_driver_ctx_t *ctx, + nsr_per_node_worker_t *w, + gf_boolean_t control_or_data, + F_t f, + uint32_t num) +{ + uint32_t i; + nsr_per_node_worker_t *worker = w; + xlator_t *this = ctx->this; + + for (i=0; i < num; i++) { + worker->id = RD_CALLOC(1, 10, gf_mt_recon_id_t); + if (!worker->id) { + nsr_driver_log (priv->this->name, GF_LOG_ERROR, "memory allocation error \n"); + return _gf_false; + } + sprintf(worker->id,"recon_%d", i); + worker->driver_ctx = ctx ; + + if (ctx->mode == NSR_USE_THREADS) { + if (pthread_create(&worker->thread_id, NULL, f, worker)) { + nsr_driver_log (ctx->this->name, GF_LOG_ERROR, "control work thread creation error \n"); + return _gf_false; + } + } + worker->index = i; + worker++; + } + return _gf_true; +} + +/* + * In case of thread, send the work item; else call the function directly. + * + * Input arguments: + * bm - bitmap containing indices of nodes we want to send work + * num - number of such indices + * ctx - driver context from where we derive per worker context + * id - request ID + * q - control or data + * misc - used to overload such as index. + */ +static void +send_and_wait(int32_t *result, + int32_t *op_errno, + int32_t bm, + uint32_t num, + nsr_recon_driver_ctx_t *ctx, + nsr_recon_work_req_id_t id, + nsr_recon_queue_type_t q, + int32_t misc) +{ + uint32_t i = 0; + nsr_recon_work_t *work; + +#define CONTROL_WORKER(i) ctx->workers[i].control_worker +#define DATA_WORKER(i) ctx->workers[i].data_worker +#define WORKER(i) ((q == NSR_RECON_QUEUE_TO_CONTROL) ? (CONTROL_WORKER(i)) : (DATA_WORKER(i))) + + *result = *op_errno = 0; + + for (i=0; i < num; i++) { + if ((bm & (1 << i)) && ctx->workers[i].in_use) { + WORKER(i)->result = 0; + WORKER(i)->op_errno = 0; + } + } + if (ctx->mode == NSR_SEQ) { + for (i=0; i < num; i++) { + if ((bm & (1 << i)) && ctx->workers[i].in_use) { + recon_make_work(ctx,&work, id, misc); + if (q == NSR_RECON_QUEUE_TO_CONTROL) { + if (i == 0) + control_worker_func_0(ctx->workers[0].control_worker, work); + else + control_worker_func(ctx->workers[i].control_worker, work); + } else { + data_worker_func(ctx->workers[i].data_worker, work); + } + GF_FREE(work); + } + } + goto out; + } + + for (i=0; i < num; i++) { + if ((bm & (1 << i)) && ctx->workers[i].in_use) { + recon_make_work(ctx,&work, id, misc); + atomic_inc(&(ctx->outstanding)); + recon_queue_to_worker(ctx, work, i, q); + } + } + + nsr_driver_log(this->name, GF_LOG_INFO, "send_and_wait: waiting\n"); + while (ctx->outstanding) { + pthread_yield(); + } +out: + for (i=0; i < num; i++) { + if ((bm & (1 << i)) && ctx->workers[i].in_use) { + if (WORKER(i)->result == -1) { + *result = -1; + } + } + } + if (*result == -1) { + for (i=0; i < num; i++) { + if ((bm & (1 << i)) && ctx->workers[i].in_use) { + if (WORKER(i)->op_errno == EAGAIN) { + *op_errno = EAGAIN; + break; + } else { + *op_errno = EIO; + } + } + } + } + + nsr_driver_log(this->name, GF_LOG_INFO, "send_and_wait: all workers have returned with result: %d errno:%d\n", *result, *op_errno); + return; +} + +// send INI or FINI +static int32_t +nsr_recon_in_use(nsr_recon_driver_ctx_t *ctx, + uint32_t i, + gf_boolean_t in_use) +{ + uint32_t bm = 1 << i; + gf_boolean_t send = _gf_false; + int32_t status =0, op_errno = 0; + + send = (ctx->workers[i].in_use != in_use); + ctx->workers[i].in_use = in_use; + + if (!send) { + return 0; + } + nsr_driver_log (this->name, GF_LOG_INFO, + "sending %s to index %d\n",in_use?"INI":"FINI",i); + + send_and_wait(&status, &op_errno, bm, ctx->replica_group_size, ctx, + (in_use == _gf_true) ? NSR_WORK_ID_INI : NSR_WORK_ID_FINI, + NSR_RECON_QUEUE_TO_CONTROL, -1); + if (status == -1) + goto err; + + send_and_wait(&status, &op_errno, bm, ctx->replica_group_size, ctx, + (in_use == _gf_true) ? NSR_WORK_ID_INI : NSR_WORK_ID_FINI, + NSR_RECON_QUEUE_TO_DATA, -1); + if (status == -1) + goto err; + + /* + * We really need better error recovery. To activate a worker, we + * allocate memory and send two messages. If any of those actions + * fail, we should undo the others. It would probably be good to + * collapse the two messages into one, because it's pretty trivial to + * allocate a temporary structure and either link it in or free it + * depending on the result here. + */ + + if (in_use == _gf_false) { + GF_FREE(ctx->workers[i].recon_info); + } + + return 0; + +err: + GF_FREE(ctx->workers[i].recon_info); + ctx->workers[i].recon_info = NULL; + return -1; +} + +gf_boolean_t +nsr_recon_driver_reconciliator (nsr_recon_private_t *priv) +{ + uint32_t replica_group_size = priv->replica_group_size; + uint32_t i; + nsr_recon_driver_ctx_t *ctx = priv->driver_thread_context; + int32_t bm; + int32_t status = 0; + int32_t op_errno = 0; + gf_boolean_t do_recon = _gf_false; + uint32_t start_index = ctx->workers[0].recon_info->first_index; + uint32_t end_index = ctx->workers[0].recon_info->last_index; + uint32_t num = ((start_index == 0) && (end_index == 0)) ? 0 : (end_index - start_index + 1); + + nsr_driver_log (this->name, GF_LOG_INFO, + "starting reconciliation work as reconciliator \n"); + + // nothing to be done? signal back to the recon translator that this + // phase done. + bm = 1; + for (i=1; i < replica_group_size; i++) { + if (ctx->workers[i].in_use && + (ctx->workers[0].recon_info->last_term == ctx->workers[i].recon_info->last_term)) { + ctx->workers[i].recon_info->last_index = end_index; + ctx->workers[i].recon_info->first_index = start_index; + bm |= (1 << i); + do_recon = _gf_true; + } + } + + if (!do_recon || !num) { + nsr_driver_log (this->name, GF_LOG_INFO, + "nothing needs to be done as resolutor \n"); + return _gf_true; + } + + nsr_driver_log (this->name, GF_LOG_INFO, + "getting reconciliation window for term %d from %dto %d \n", + ctx->workers[0].recon_info->last_term, + start_index, end_index); + // We have set the bm in the above for loop where we go thru all nodes + // including this node that have seen the last term. + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_GET_RECONCILATION_WINDOW, + NSR_RECON_QUEUE_TO_CONTROL, -1); + if (status == -1) + return _gf_false; + nsr_driver_log (this->name, GF_LOG_INFO, + "finished getting reconciliation window for term %d from %dto %d \n", + ctx->workers[0].recon_info->last_term, + start_index, end_index); + + + // from the changelogs, calculate the entries that need action and the + // source for each of these entries + compute_reconciliation_work(ctx); + + // for each of the entries that need fixup, issue IO + for (i=start_index; i < (start_index + num); i++) { + nsr_reconciliator_info_t *my_recon_info = + ctx->workers[0].recon_info; + nsr_reconciliation_record_t *record = + &(my_recon_info->records[i - start_index]); + + record->work.term = ctx->workers[0].recon_info->last_term; + record->work.index = i; + + nsr_driver_log (this->name, GF_LOG_INFO, + "fixing index %d\n",i); + if ((record->work.type == NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE) || + (record->work.type == NSR_RECON_WORK_HOLE_TO_FILL)) { + // 1st case (RECON_WORK_HOLE_TO_PSEUDO_HOLE): If there + // are only pseudo_holes in others, it is best effort. + // Just pick from the first node that has it and + // proceed. + // 2nd case (RECON_WORK_HOLE_TO_FILL): this node has + // either a HOLE or PSUEDO_HOLE and some one else has a + // FILL(source). analyse the changelog to check if data + // needs to be read or if the log has all the data + // required + + if (recon_check_changelog(&record->rec)) { + bm = (1 << record->work.source); + nsr_driver_log (this->name, GF_LOG_INFO, + "reading data from source %d\n",record->work.source); + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_SINGLE_RECONCILIATION_READ, + NSR_RECON_QUEUE_TO_DATA, + i); + if (status == -1) + return _gf_false; + nsr_driver_log (this->name, GF_LOG_INFO, + "got data from source %d\n",record->work.source); + } + + nsr_driver_log (this->name, GF_LOG_INFO, + "fixing local data as part of reconciliation\n"); + + bm = 1; + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT, + NSR_RECON_QUEUE_TO_DATA, + i); + if (status == -1) + return _gf_false; + + nsr_driver_log (this->name, GF_LOG_INFO, + "finished fixing local data as part of reconciliation\n"); + + } else if (record->work.type == NSR_RECON_WORK_COMPARE_PSEUDO_HOLE) { + // this node has a pseudo_hole and some others have just + // that too. Just convert this to FILL. let others + // blindly pick it from here. + nsr_driver_log (this->name, GF_LOG_INFO, + "fixing this record as a fill\n"); + bm = 1; + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH, + NSR_RECON_QUEUE_TO_DATA, + i); + if (status == -1) + return _gf_false; + nsr_driver_log (this->name, GF_LOG_INFO, + "finished fixing this record as a fill\n"); + } + } + + nsr_driver_log (this->name, GF_LOG_INFO, + "finished reconciliation work as reconciliator \n"); + + // tbd - mark this term golden in the reconciliator + return _gf_true; +} + +gf_boolean_t +nsr_recon_driver_resolutor (nsr_recon_private_t *priv) +{ + uint32_t replica_group_size = priv->replica_group_size; + uint32_t i; + nsr_recon_driver_ctx_t *ctx = priv->driver_thread_context; + int32_t bm; + int32_t status = 0; + int32_t op_errno = 0; + // This node's last term is filled when it gets a message from the + // leader to act as a reconciliator. + uint32_t recon_index = ctx->reconciliator_index; + nsr_reconciliator_info_t *my_info = + ctx->workers[0].recon_info; + nsr_reconciliator_info_t *his_info = + ctx->workers[recon_index].recon_info; + uint32_t my_last_term = my_info->last_term; + uint32_t to_do_term = his_info->last_term; + uint32_t my_start_index = 1, my_end_index = 1; + uint32_t his_start_index = 1, his_end_index = 1; + uint32_t num = 0; + gf_boolean_t fl = _gf_true; + + nsr_driver_log (this->name, GF_LOG_INFO, + "starting resolutor work with reconciliator as %d from term %d to term %d \n", + recon_index, my_last_term, to_do_term); + + do { + + if (!fl) { + (his_info->last_term)++; + (my_info->last_term)++; + } else { + his_info->last_term = my_last_term; + } + + nsr_driver_log (this->name, GF_LOG_INFO, "resolving term %d \n", my_info->last_term); + + // Get reconciliator's term information for that term + nsr_driver_log (this->name, GF_LOG_INFO, + "getting info from reconciliator for term %d \n", my_info->last_term); + bm = (1 << recon_index); + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_GET_GIVEN_TERM_INFO, + NSR_RECON_QUEUE_TO_CONTROL, his_info->last_term); + if (status == -1) + return _gf_false; + nsr_driver_log (this->name, GF_LOG_INFO, + "finished getting info from reconciliator for term %d \n", my_info->last_term); + + + // empty term + if (!his_info->commited_ops) { + nsr_driver_log (this->name, GF_LOG_INFO, + "reconciliator for term %d is empty. moving to next term. \n", my_info->last_term); + // TBD - mark the term golden + fl = _gf_false; + continue; + } + + // calculate the resolution window boundary. for the last term + // this node saw, we compare the resolution window of this and + // reconciliator. for the rest of the nodes, we just accept the + // reconciliator info. + if (fl) { + my_start_index = my_info->first_index; + my_end_index = my_info->last_index; + his_start_index = his_info->first_index; + his_end_index = his_info->last_index; + my_info->first_index = (my_start_index < his_start_index) ? my_start_index : his_start_index; + my_info->last_index = (my_end_index > his_end_index) ? my_end_index : his_end_index; + } else { + my_info->first_index = his_info->first_index; + my_info->last_index = his_info->last_index; + my_info->commited_ops = his_info->commited_ops; + } + if (my_info->first_index == 0) + my_info->first_index = 1; + num = (my_info->last_index - my_info->first_index) + 1; + + + // Get the logs from the reconciliator (and this node for this + // term) + if (fl) + bm = ((1 << recon_index) | 1); + else + bm = (1 << recon_index); + + nsr_driver_log (this->name, GF_LOG_INFO, + "getting reconciliation window for term %d from %d to %d \n", + my_info->last_term, + my_info->first_index, my_info->last_index); + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_GET_RECONCILATION_WINDOW, + NSR_RECON_QUEUE_TO_CONTROL, -1); + if (status == -1) + return _gf_false; + nsr_driver_log (this->name, GF_LOG_INFO, + "finished getting reconciliation window for term %d from %d to %d \n", + my_info->last_term, + my_info->first_index, my_info->last_index); + + // from the changelogs, calculate the entries that need action + compute_resolution_work(ctx, my_info, his_info, !fl); + + + // for each of the entries that need fixup, issue IO + for (i=my_info->first_index; i < (my_info->first_index + num); i++) { + nsr_reconciliation_record_t *record = + &(my_info->records[i - my_info->first_index]); + + record->work.term = my_info->last_term; + record->work.index = i; + + nsr_driver_log (this->name, GF_LOG_INFO, + "fixing index %d\n",i); + if ((record->work.type == NSR_RECON_WORK_HOLE_TO_FILL) || + (record->work.type == NSR_RECON_WORK_UNDO_FILL)) { + if (((record->work.type == NSR_RECON_WORK_HOLE_TO_FILL) && + recon_check_changelog(&record->rec)) || + ((record->work.type == NSR_RECON_WORK_UNDO_FILL) && + recon_compute_undo(&record->rec))) { + nsr_driver_log (this->name, GF_LOG_INFO, + "reading data from source %d\n",recon_index); + bm = (1 << recon_index); + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_SINGLE_RECONCILIATION_READ, + NSR_RECON_QUEUE_TO_DATA, + i); + if (status == -1) + return _gf_false; + nsr_driver_log (this->name, GF_LOG_INFO, + "finished reading data from source %d\n",recon_index); + } + + nsr_driver_log (this->name, GF_LOG_INFO, + "fixing local data as part of resolutor\n"); + + bm = 1; + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT, + NSR_RECON_QUEUE_TO_DATA, + i); + if (status == -1) + return _gf_false; + + nsr_driver_log (this->name, GF_LOG_INFO, + "finished fixing local data as part of resolutor\n"); + } + } + fl = _gf_false; + + // tbd - mark this term golden in the reconciliator + } while (my_last_term++ != to_do_term); + + nsr_driver_log (this->name, GF_LOG_INFO, + "finished resolutor work \n"); + return _gf_true; +} + +gf_boolean_t +nsr_recon_driver_leader (nsr_recon_private_t *priv) +{ + uint32_t replica_group_size = priv->replica_group_size; + uint32_t i; + nsr_recon_driver_ctx_t *ctx = priv->driver_thread_context; + int32_t bm; + int32_t status = 0; + int32_t op_errno = 0; + int32_t chosen = -1; + int32_t last_term = -1, last_ops = -1; + + nsr_driver_log (this->name, GF_LOG_INFO, + "getting last term info from all members of this group\n"); + // Get last term info from all members for this group + send_and_wait(&status, &op_errno, -1, + replica_group_size, + ctx, + NSR_WORK_ID_GET_LAST_TERM_INFO, + NSR_RECON_QUEUE_TO_CONTROL, ctx->current_term); + if (status == -1) + return _gf_false; + + + // compare all the info received and choose the reconciliator First + // choose all with latest term + for (i=0; i < replica_group_size; i++) { + if (ctx->workers[i].in_use) { + if (ctx->workers[i].recon_info->last_term > last_term) { + last_term = ctx->workers[i].recon_info->last_term; + } + } + } + // First choose all with latest term and highest ops + for (i=0; i < replica_group_size; i++) { + if ((ctx->workers[i].in_use) && (last_term == ctx->workers[i].recon_info->last_term)) { + if (ctx->workers[i].recon_info->commited_ops > last_ops) { + last_ops = ctx->workers[i].recon_info->commited_ops; + } + } + } + // choose the first among the lot + for (i=0; i < replica_group_size; i++) { + if ((ctx->workers[i].in_use) && + (last_term == ctx->workers[i].recon_info->last_term) && + (last_ops == ctx->workers[i].recon_info->commited_ops)) { + chosen = i; + break; + } + } + + nsr_driver_log (this->name, GF_LOG_INFO, + "reconciliator chosen is %d\n", chosen); + ctx->reconciliator_index = chosen; + GF_ASSERT(chosen != -1); + if (chosen == -1) { + nsr_driver_log (this->name, GF_LOG_INFO, + "no reconciliatior chosen\n"); + return _gf_false; + } + + // send the message to reconciliator to do reconciliation with list of + // nodes that are part of this quorum + if (chosen != 0) { + nsr_driver_log (this->name, GF_LOG_INFO, + "sending reconciliation work to %d\n", chosen); + bm = 1 << ctx->reconciliator_index; + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_RECONCILIATOR_DO_WORK, + NSR_RECON_QUEUE_TO_CONTROL, -1); + if (status == -1) + return _gf_false; + nsr_driver_log (this->name, GF_LOG_INFO, + "finished reconciliation work to %d\n", chosen); + } else { + nsr_driver_log (this->name, GF_LOG_INFO, + "local node is reconciliator. before set jmp\n"); + nsr_recon_driver_reconciliator(priv); + } + + // send message to all other nodes to sync up with the reconciliator + // including itself if required + // requires optimisation - TBD + if (chosen != 0) { + nsr_driver_log (this->name, GF_LOG_INFO, + "local node resolution needs to be done. before set jmp\n"); + nsr_recon_driver_resolutor(priv); + } + + nsr_driver_log (this->name, GF_LOG_INFO, + "sending resolution work to all nodes except this node and reconciliator\n"); + bm = ~((1 << ctx->reconciliator_index) || 1); + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_RESOLUTION_DO_WORK, + NSR_RECON_QUEUE_TO_CONTROL, -1); + if (status == -1) + return _gf_false; + + nsr_driver_log (this->name, GF_LOG_INFO, + "finished reconciliation work as leader \n"); + return _gf_true; +} + +// main recon driver thread +void * +nsr_reconciliation_driver(void *arg) +{ + nsr_recon_private_t *priv = (nsr_recon_private_t *) arg; + uint32_t replica_group_size = priv->replica_group_size; + uint32_t i; + nsr_per_node_worker_t *control_s, *data_s; + nsr_recon_driver_ctx_t **driver_ctx, *ctx; + int32_t bm; + xlator_t *this = priv->this; + char *con_name, *data_name; + int32_t status = 0; + int32_t op_errno = 0; + + driver_ctx = &priv->driver_thread_context; + (*driver_ctx) = GF_CALLOC (1, + sizeof (nsr_recon_driver_ctx_t), + gf_mt_recon_driver_ctx_t); + if (!driver_ctx) { + gf_log (this->name, GF_LOG_ERROR, "memory allocation error \n"); + return NULL; + } + ctx = *driver_ctx; + ctx->this = priv->this; + ctx->replica_group_size = replica_group_size; + + ctx->fp = recon_create_log (priv->replica_group_members[0], "nsr-driver-log"); + if (!ctx->fp) + return NULL; + + if ((pthread_mutex_init(&(ctx->mutex), NULL)) || + (pthread_cond_init(&(ctx->cv), NULL))){ + nsr_driver_log (this->name, GF_LOG_ERROR, "mutex init error \n"); + return NULL; + } + INIT_LIST_HEAD(&(ctx->role_head.list)); + + ctx->workers = RD_CALLOC (replica_group_size, + sizeof(nsr_replica_worker_t), + gf_mt_recon_worker_t); + if (!ctx->workers) { + nsr_driver_log (this->name, GF_LOG_ERROR, "memory allocation error \n"); + return NULL; + } + for (i=0; i < replica_group_size; i++) { + strcpy(ctx->workers[i].name, priv->replica_group_members[i]); + } + + control_s = RD_CALLOC (replica_group_size, + sizeof(nsr_per_node_worker_t), + gf_mt_recon_per_node_worker_t); + if (!control_s) { + nsr_driver_log (this->name, GF_LOG_ERROR, "memory allocation error \n"); + return NULL; + } + + data_s = RD_CALLOC (replica_group_size, + sizeof(nsr_per_node_worker_t), + gf_mt_recon_per_node_worker_t); + if (!data_s) { + nsr_driver_log (this->name, GF_LOG_ERROR, "memory allocation error \n"); + return NULL; + } + for (i=0; i < replica_group_size; i++) { + ctx->workers[i].control_worker = &control_s[i]; + if (asprintf(&con_name,"recon-con-%u",i) < 1) { + return NULL; + } + ctx->workers[i].control_worker->fp = recon_create_log + (priv->replica_group_members[0], con_name); + if (!ctx->workers[i].control_worker->fp) + return NULL; + ctx->workers[i].data_worker = &data_s[i]; + if (asprintf (&data_name,"recon-data-%u",i) <1) { + return NULL; + } + ctx->workers[i].data_worker->fp = recon_create_log + (priv->replica_group_members[0], data_name); + if (!ctx->workers[i].data_worker->fp) + return NULL; + } + + nsr_driver_log (this->name, GF_LOG_INFO, "creating threads \n"); + // Create the worker threads + // For every brick including itself there will be 2 worker threads: + // one for data and one for control + if (!create_worker_threads(priv, ctx, control_s, _gf_true, + (F_t) control_worker_main, replica_group_size) || + !create_worker_threads(priv, ctx, data_s, _gf_false, + (F_t) data_worker_main, replica_group_size)) { + return NULL; + } + + for (i=0; i < replica_group_size; i++) { + nsr_recon_get_file(priv->volname, &(ctx->workers[i])); + } + + while (1) { + + nsr_role_work_t *rr; + + nsr_driver_log (this->name, GF_LOG_INFO, "waiting for role to be queued \n"); + pthread_mutex_lock(&(ctx->mutex)); + while (list_empty(&(ctx->role_head.list))) { + pthread_cond_wait(&(ctx->cv), &(ctx->mutex)); + } + pthread_mutex_unlock(&(ctx->mutex)); + + list_for_each_entry(rr, &(ctx->role_head.list), list) { + nsr_recon_driver_state_t state; + state = nsr_recon_driver_get_role(&status, ctx, rr); + + if (status == -1) { + op_errno = EIO; + goto out; + } + + switch (state) { + + case leader: + if (!nsr_recon_driver_leader(priv)) { + goto out; + } + break; + case reconciliator: + if (!nsr_recon_driver_reconciliator(priv)) { + goto out; + } + break; + case resolutor: + if (!nsr_recon_driver_resolutor(priv)) { + goto out; + } + break; + + case joiner: + + nsr_driver_log (this->name, GF_LOG_INFO, "getting last term info from all members of this group\n"); + // Get last term info from all members for this group + // which will be the leader(this node) and the node that wants to join. + send_and_wait(&status, &op_errno, -1, + replica_group_size, + ctx, + NSR_WORK_ID_GET_LAST_TERM_INFO, + NSR_RECON_QUEUE_TO_CONTROL, ctx->current_term); + if (status == -1) + goto out; + + + // send message to other node that just joined to sync up with this node which is also the leader + nsr_driver_log (this->name, GF_LOG_INFO, "sending resolution work to all nodes except this\n"); + bm = ~(1); + send_and_wait(&status, &op_errno, bm, + replica_group_size, + ctx, + NSR_WORK_ID_RESOLUTION_DO_WORK, + NSR_RECON_QUEUE_TO_CONTROL, -1); + if (status == -1) + goto out; + + nsr_driver_log (this->name, GF_LOG_INFO, + "finished recon work as joiner \n"); + break; + + default: + nsr_driver_log (this->name, GF_LOG_ERROR, + "bad state %d", state); + } + + + // free the asasociated recon_info contexts created as part of this role + +out: + nsr_driver_log (this->name, GF_LOG_INFO, + "sending end of reconciliation message \n"); + nsr_recon_return_back(priv, ctx->term, status, op_errno); + nsr_driver_log (this->name, GF_LOG_INFO, + "finished sending end of reconciliation message \n"); + } + list_del_init (&rr->list); + } + + return NULL; +} diff --git a/xlators/cluster/nsr-recon/src/recon_driver.h b/xlators/cluster/nsr-recon/src/recon_driver.h new file mode 100644 index 000000000..3efb26269 --- /dev/null +++ b/xlators/cluster/nsr-recon/src/recon_driver.h @@ -0,0 +1,325 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __RECON_DRIVER_H__ +#define __RECON_DRIVER_H__ + + +#include "api/src/glfs.h" + +#define MAX_HOSTNAME_LEN 32 +#define MAXIMUM_REPLICA_STRENGTH 8 +#define MAX_RECONCILIATION_WINDOW_SIZE 10000 + +#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd" +#define GLUSTERD_VOLUME_DIR_PREFIX "vols" +#define GLUSTERD_BRICK_INFO_DIR "bricks" + +/* + * Even with the names fixed, the non-NSR_DEBUG definitions of nsr_*_log don't + * work because many callers don't have "this" defined. + * + * TBD: use gf_log, fix "this" problem, eliminate extra fields and newlines. + */ +#define NSR_DEBUG + +typedef enum nsr_recon_work_req_id_t { + NSR_WORK_ID_GET_NONE = 0, + NSR_WORK_ID_GET_LAST_TERM_INFO = NSR_WORK_ID_GET_NONE + 1, + NSR_WORK_ID_GET_GIVEN_TERM_INFO = NSR_WORK_ID_GET_LAST_TERM_INFO + 1, + NSR_WORK_ID_RECONCILIATOR_DO_WORK = NSR_WORK_ID_GET_GIVEN_TERM_INFO + 1, + NSR_WORK_ID_RESOLUTION_DO_WORK = NSR_WORK_ID_RECONCILIATOR_DO_WORK + 1, + NSR_WORK_ID_GET_RECONCILATION_WINDOW = NSR_WORK_ID_RESOLUTION_DO_WORK + 1, + NSR_WORK_ID_SINGLE_RECONCILIATION_READ = NSR_WORK_ID_GET_RECONCILATION_WINDOW + 1, + NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT = NSR_WORK_ID_SINGLE_RECONCILIATION_READ + 1, + NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH = NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT + 1, + NSR_WORK_ID_GET_RESOLUTION_WINDOW = NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH + 1, + NSR_WORK_ID_END_RECONCILIATION = NSR_WORK_ID_GET_RESOLUTION_WINDOW + 1, + NSR_WORK_ID_INI = NSR_WORK_ID_END_RECONCILIATION + 1, + NSR_WORK_ID_FINI = NSR_WORK_ID_INI + 1 +} nsr_recon_work_req_id_t; + +typedef enum nsr_recon_queue_type_t { + NSR_RECON_QUEUE_TO_CONTROL = 0, + NSR_RECON_QUEUE_TO_DATA =NSR_RECON_QUEUE_TO_CONTROL + 1, +} nsr_recon_queue_type_t; + +typedef enum nsr_log_type_t { + NSR_LOG_HOLE = 0b0, + NSR_LOG_PSEUDO_HOLE = 0b1, + NSR_LOG_FILL = 0b11 +} nsr_log_type_t; + +typedef enum nsr_mode_t { + NSR_SEQ = 0, + NSR_USE_THREADS = 1, + NSR_ASYNC = 2 +} nsr_mode_t; + +typedef enum nsr_recon_work_type_t { + NSR_RECON_WORK_NONE = 0, + NSR_RECON_WORK_HOLE_TO_NOOP = NSR_RECON_WORK_NONE + 1, + NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE = NSR_RECON_WORK_HOLE_TO_NOOP + 1, + NSR_RECON_WORK_COMPARE_PSEUDO_HOLE = NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE + 1, + NSR_RECON_WORK_HOLE_TO_FILL = NSR_RECON_WORK_COMPARE_PSEUDO_HOLE + 1, + NSR_RECON_WORK_UNDO_FILL = NSR_RECON_WORK_HOLE_TO_FILL + 1, +} nsr_recon_work_type_t; + +typedef enum nsr_recon_driver_state_t { + none = 0, + leader = 1, + reconciliator = 2, + resolutor = 3, + joiner = 4, +} nsr_recon_driver_state_t; + +// role structure +#pragma pack(push, 1) +typedef struct _nsr_recon_role_s { + uint32_t role; // leader, reconciliator, resolutor + uint32_t num; // required in case state is reconciliator + uint32_t current_term; // current term used in case of leader + // In case this is reconciliator, num is set to nodes that were part + // of previous term. + // In case this is resolutor, num is set to 2. + // info[0] - information for this node. + // info[1] - information of the reconciliator. + // In case this is leader, num is set to this term's membership list + // set info.name to all members including the leader + struct { + int32_t last_term; + int32_t commited_ops; + uint32_t last_index; + uint32_t first_index; + char name[MAX_HOSTNAME_LEN]; + } info[MAXIMUM_REPLICA_STRENGTH]; +} nsr_recon_role_t; +#pragma pack(pop) + +#define ENDIAN_CONVERSION_RR(rr, is_true) \ +{ \ + uint32_t i=0; \ + uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ + if (is_true == _gf_true) rr.num = f(rr.num); \ + rr.current_term = f(rr.current_term); \ + for (i=0; i < rr.num; i++) { \ + rr.info[i].last_term = f(rr.info[i].last_term); \ + rr.info[i].commited_ops = f(rr.info[i].commited_ops); \ + rr.info[i].last_index = f(rr.info[i].last_index); \ + rr.info[i].first_index = f(rr.info[i].first_index); \ + } \ + if (is_true == _gf_false) rr.num = f(rr.num); \ +} + +// last term info structure +#pragma pack(push, 1) +typedef struct _nsr_recon_last_term_info_s { + int32_t last_term; + int32_t commited_ops; + uint32_t last_index; + uint32_t first_index; +} nsr_recon_last_term_info_t; +#pragma pack(pop) + +#define ENDIAN_CONVERSION_LT(lt, is_true) \ +{ \ + uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ + lt.last_term = f(lt.last_term); \ + lt.commited_ops = f(lt.commited_ops); \ + lt.last_index = f(lt.last_index); \ + lt.first_index = f(lt.first_index); \ +} + +// log information +#pragma pack(push, 1) +typedef struct _nsr_recon_log_info_s { + uint32_t term; + uint32_t first_index; + uint32_t last_index; +} nsr_recon_log_info_t; +#pragma pack(pop) + +#define ENDIAN_CONVERSION_LI(li, is_true) \ +{ \ + uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ + li.term = f(li.term); \ + li.first_index = f(li.first_index); \ + li.last_index = f(li.last_index); \ +} + +#pragma pack(push, 1) +typedef struct nsr_recon_record_details_s { + uint32_t type; + uint32_t op; + char gfid[36+1]; + char pargfid[36+1]; + char link_path[256]; // should it be PATH_MAX? + uint32_t offset; + uint32_t len; + char entry[128]; + char newloc[128]; // for rename. can you overload link_path for this? TBD + mode_t mode; +} nsr_recon_record_details_t; +#pragma pack(pop) + +#define ENDIAN_CONVERSION_RD(rd, is_true) \ +{ \ + uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ + rd.type = f(rd.type); \ + rd.op = f(rd.op); \ + rd.offset = f(rd.offset); \ + rd.len = f(rd.len); \ +} + +typedef struct _nsr_role_work_s { + nsr_recon_role_t role; + uint32_t term; + struct list_head list; +} nsr_role_work_t; + +typedef struct _nsr_recon_work_s { + gf_boolean_t in_use; + uint32_t index; + uint32_t req_id; + struct list_head list; +} nsr_recon_work_t; + +typedef struct _nsr_reconciliation_work_s { + uint32_t term; + uint32_t index; + uint32_t type; + uint32_t source; + void *data; + + uint32_t num; // used for xattr + +} nsr_reconciliation_work_t; + +typedef struct _nsr_reconciliation_record_s { + nsr_reconciliation_work_t work; // will store the computed work + nsr_recon_record_details_t rec; +} nsr_reconciliation_record_t; + +typedef struct _nsr_reconciliator_info { + uint32_t reconcilator_index; + int32_t last_term; + int32_t commited_ops; + uint32_t last_index; + uint32_t first_index; + //nsr_reconciliation_record_t records[MAX_RECONCILIATION_WINDOW_SIZE]; + nsr_reconciliation_record_t *records; +} nsr_reconciliator_info_t; + +typedef struct _nsr_per_node_worker_s { + char *id; // identifier + char vol_file[256]; //volfile that will be used by this thread + glfs_t *fs; + glfs_fd_t *aux_fd; + uint32_t index; // index into array of workers + pthread_t thread_id; // thread id + void * context; // thread context + struct _nsr_recon_driver_ctxt *driver_ctx; + char local; // local data worker + //struct list_head list; //list of work items + nsr_recon_work_t head; + pthread_mutex_t mutex; //mutex to guard the state + pthread_cond_t cv; //condition variable for signaling the worker thread + gf_boolean_t is_control; +#if defined(NSR_DEBUG) + FILE *fp; +#endif + int32_t result; // result of latest work + int32_t op_errno; // errno +} nsr_per_node_worker_t; + +typedef struct _nsr_replica_worker_s { + char name[256]; + nsr_per_node_worker_t *control_worker; + nsr_per_node_worker_t *data_worker; + gf_boolean_t in_use; + nsr_reconciliator_info_t *recon_info; // Bunch of infos kept for this reconciliation +} nsr_replica_worker_t; + +typedef struct _nsr_recon_driver_ctxt { + xlator_t *this; + uint32_t replica_group_size; // number of static members of replica group + nsr_replica_worker_t *workers; // worker info + int32_t reconciliator; + pthread_mutex_t mutex; + pthread_cond_t cv; + nsr_role_work_t role_head; + volatile int32_t outstanding; + uint32_t reconciliator_index; + uint32_t term; + uint32_t current_term; + nsr_mode_t mode; // default set to seq +#if defined(NSR_DEBUG) + FILE *fp; +#endif +} nsr_recon_driver_ctx_t; + +void * +nsr_reconciliation_driver(void *); + +gf_boolean_t +nsr_recon_driver_set_role(nsr_recon_driver_ctx_t *ctx, nsr_recon_role_t *rr, uint32_t term); + +#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) +#define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) +#define atomic_fetch_and __sync_fetch_and_and +#define atomic_fetch_or __sync_fetch_and_or + +#if defined(NSR_DEBUG) + +#define NSR_LOG_DIR "/var/log/nsr-logs" + +extern int nsr_debug_level; +extern FILE *recon_create_log (char *member, char *module); + +extern void +_nsr_driver_log (const char *func, int line, char *member, FILE *fp, + char *fmt, ...); + +#define nsr_driver_log(dom, levl, fmt...) do { \ + FMT_WARN (fmt); \ + if (levl <= nsr_debug_level) { \ + nsr_recon_private_t *priv = ctx->this->private; \ + _nsr_driver_log (__FUNCTION__, __LINE__, \ + priv->replica_group_members[0], \ + ctx->fp, \ + ##fmt); \ + } \ +} while (0) + +extern void +_nsr_worker_log (const char *func, int line, char *member, + char *type, uint32_t index, FILE *fp, + char *fmt, ...); + +#define nsr_worker_log(dom, levl, fmt...) do { \ + FMT_WARN (fmt); \ + if (levl <= nsr_debug_level) { \ + nsr_recon_private_t *priv; \ + priv = ctx->driver_ctx->this->private; \ + _nsr_worker_log (__FUNCTION__, __LINE__, \ + priv->replica_group_members[0], \ + ctx->is_control ? "recon-con" : \ + "recon-data", \ + ctx->index, ctx->fp, \ + ##fmt); \ + } \ +} while (0) + +#else +#define nsr_driver_log(dom, levl, fmt...) gf_log(dom, levl, fmt) +#define nsr_worker_log(dom, levl, fmt...) gf_log(dom, levl, fmt) +#endif + +#endif /* #ifndef __RECON_DRIVER_H__ */ diff --git a/xlators/cluster/nsr-recon/src/recon_xlator.c b/xlators/cluster/nsr-recon/src/recon_xlator.c new file mode 100644 index 000000000..272c35dc2 --- /dev/null +++ b/xlators/cluster/nsr-recon/src/recon_xlator.c @@ -0,0 +1,1010 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include "call-stub.h" +#include "defaults.h" +#include "xlator.h" + +#include "recon_driver.h" +#include "recon_xlator.h" + +typedef struct _nsr_recon_fd_s { + int32_t term; + nsr_recon_driver_state_t state; + uint32_t first_index; + uint32_t last_index; + call_frame_t *frame; +} nsr_recon_fd_t; + +#if defined(NSR_DEBUG) + +void +_recon_main_log (const char *func, int line, char *member, FILE *fp, + char *fmt, ...) +{ + va_list ap; + char *buf = NULL; + int retval; + + if (!fp) { + fp = recon_create_log(member,"recon-main-log"); + if (!fp) { + return; + } + } + + va_start(ap,fmt); + retval = vasprintf(&buf,fmt,ap); + if (buf) { + fprintf(fp,"[%s:%d] %.*s\n",func,line,retval,buf); + free(buf); + } + va_end(ap); +} + +#endif + +// Given fd, get back the NSR based fd context. +static int32_t this_fd_ctx_get(fd_t *fd, xlator_t *this, nsr_recon_fd_t **rfd) +{ + uint64_t tmp = 0; + int32_t ret = -1; + + if ((ret = fd_ctx_get(fd, this, &tmp)) != 0) { + return ret; + } else { + *rfd = (nsr_recon_fd_t *)tmp; + return 0; + } +} + +// Add the frame in q after associating with term +// term usage tbd +static void put_frame(nsr_recon_private_t *priv, + call_frame_t *frame, + uint32_t term) +{ + xlator_t *this = priv->this; + recon_main_log (this->name, GF_LOG_INFO, "adding frame for term %d \n", term); + priv->frame = frame; + return; +} + +// get the frame from the queue given the term +// term usage tbd +static void get_frame(nsr_recon_private_t *priv, + call_frame_t **frame, + uint32_t term) +{ + if (frame != NULL) + *frame = priv->frame; + priv->frame = NULL; + return; +} + +// check if there are outstanding frames +static gf_boolean_t is_frame(nsr_recon_private_t *priv) +{ + return((priv->frame != NULL) ? _gf_true : _gf_false); +} + +#define ENTRY_SIZE 128 + +long +get_entry_count (char *path) +{ + int fd; + struct stat buf; + unsigned long entries = -1; + long min; /* last entry not known to be empty */ + long max; /* first entry known to be empty */ + long curr; + char entry[ENTRY_SIZE]; + void *err_label = &&done; + + fd = open(path,O_RDONLY); + if (fd < 0) { + goto *err_label; + } + err_label = &&close_fd; + + if (fstat(fd,&buf) < 0) { + goto *err_label; + } + + min = 0; + max = buf.st_size / ENTRY_SIZE; + printf("max = %ld\n",max); + + while ((min+1) < max) { + curr = (min + max) / 2; + printf("trying entry %ld\n",curr); + if (lseek(fd,curr*ENTRY_SIZE,SEEK_SET) < 0) { + goto *err_label; + } + if (read(fd,entry,sizeof(entry)) != sizeof(entry)) { + goto *err_label; + } + if ((entry[0] == '_') && (entry[1] == 'P')) { + min = curr; + } + else { + max = curr; + } + } + + entries = max; + +close_fd: + close(fd); +done: + return entries; +} + +// Get the term info for the term number specified +void nsr_recon_libchangelog_get_this_term_info(xlator_t *this, char *bp, int32_t term, nsr_recon_last_term_info_t *lt) +{ + char path[PATH_MAX]; + long entries; + + bzero(lt, sizeof(nsr_recon_last_term_info_t)); + lt->last_term = term; + sprintf(path,"%s/%s%d",bp,"TERM.",term); + entries = get_entry_count(path); + if (entries > 1) { + /* The first entry is actually a header. */ + lt->first_index = 1; + /* + * This seems wrong, because it means that last_index*128 will + * be exactly at EOF and commited_ops will be one greater than + * it should be. Maybe some other code makes the exact + * opposite mistake to compensate. + */ + lt->last_index = lt->commited_ops = (int)entries; + } + recon_main_log (this->name, GF_LOG_INFO, "for term=%d got first_index=%d last_index=%d commited_ops=%d\n", + term, lt->first_index, lt->last_index, lt->commited_ops); + return; +} + +// Given the term number, find the last term in the changelogs +void nsr_recon_libchangelog_get_last_term_info(xlator_t *this, char *bp, int32_t term, nsr_recon_last_term_info_t *lt) +{ + uint32_t t = term; + struct stat buf; + char path[PATH_MAX]; + bzero(lt, sizeof(nsr_recon_last_term_info_t)); + while(t) { + // journal file is of type TERM-1.jnl + sprintf(path,"%s/%s%d",bp,"TERM.",t); + if (!stat(path, &buf)) { + nsr_recon_libchangelog_get_this_term_info(this, bp, t, lt); + recon_main_log (this->name, GF_LOG_INFO, "got last term given current term %d as %d\n", term, t); + return; + } + t--; + } + recon_main_log (this->name, GF_LOG_INFO, "got no last term given current term %d \n", term); + + return; +} + +// Return back the frame stored against the term +void nsr_recon_return_back(nsr_recon_private_t *priv, uint32_t term, int32_t status, int32_t op_errno) +{ + call_frame_t *old_frame = NULL; + xlator_t *this = priv->this; + + get_frame(priv, &old_frame, term); + if (old_frame) { + recon_main_log (this->name, GF_LOG_INFO, "nsr_recon_writev returns old frame \n"); + // first return the original write for which this ack was sent + STACK_UNWIND_STRICT (writev, old_frame, status, op_errno, NULL, NULL, NULL); + } else { + recon_main_log (this->name, GF_LOG_ERROR, "EIII---nsr_recon_writev cnnot return old frame \n"); + } +} + +typedef enum records_type_t { + fop_gfid_pgfid_oldloc_newloc = 1, + fop_gfid_pgfid_entry = fop_gfid_pgfid_oldloc_newloc + 1, + fop_gfid = fop_gfid_pgfid_entry + 1 , + fop_gfid_offset = fop_gfid + 1, + fop_gfid_offset_len = fop_gfid_offset + 1, +} records_type_t; + +// Get the backend ./glusterfs/xx/xx/<...> path +static void +get_gfid_path(nsr_recon_private_t *priv, char *gfid, char *path) +{ + strcpy(path, priv->base_dir); + strcat(path, "/.glusterfs/"); + strncat(path,gfid,2); + strcat(path,"/"); + strncat(path,gfid+2,2); + strcat(path,"/"); + strcat(path,gfid); +} + + +// Get the link to which backend points to +static gf_boolean_t +get_link_using_gfid(nsr_recon_private_t *priv, char *gfid, char *path) +{ + char lp[PATH_MAX]; + xlator_t *this = priv->this; + get_gfid_path(priv,gfid, lp); + if (readlink(lp, path, 255) == -1) { + GF_ASSERT(0); + recon_main_log(priv->this, GF_LOG_ERROR, + "cannot get readlink for %s\n",lp); + return _gf_false; + } + return _gf_true; +} + +// Get the list of changelog records given a term , first and last index. +// +// TBD: rewrite this hideous ball of mud in at least the following ways: +// +// (1) Break out the code for handling a single record into a separate +// function, to make error handling easier and reduce "indentation +// creep" so the code's readable. +// +// (2) Change all of the fop_xxx_yyy nonsense to OR together values +// like FOP_HAS_FIELD_XXX and FOP_HAS_FIELD_YYY, to reduce code +// duplication and facilitate the addition of new fields. +// +// (3) Stop making so many assumptions about the underlying formats. +// The code as it is won't even work for the existing binary format, +// let alone as changelog evolves over time. +// +// Really, 90% of this code should just GO AWAY in favor of using +// libgfchangelog, enhanced as necessary to support our needs. + +/* + * Use this macro to skip over a field we're not using yet. + * NB: the body is a null statement on purpose + * TBD: all instances of this should be removed eventually! + */ +#define SKIP_FIELD do /* nothing */ ; while (*(start++) != '\0') + +#define SKIP_OVER +gf_boolean_t nsr_recon_libchangelog_get_records(xlator_t *this, char *bp, int32_t term, uint32_t first, uint32_t last, void *buf) +{ + // do a mmap; seek into the first and read all records till last. + // TBD - right now all records are pseudo holes but mark them as fills. + // TBD - pseudo hole to be implemented when actual fsync gets done on data. + char *rb = NULL, *orig = NULL; + char path[PATH_MAX]; + int fd; + uint32_t index = 0; + + recon_main_log (this->name, GF_LOG_INFO, + "libchangelog_get_records called for term %d index from %d to %d \n", + term, first, last ); + + orig = rb = GF_CALLOC(128, ((last - first) + 1), + gf_mt_recon_changelog_buf_t); + + sprintf(path,"%s/%s%d",bp,"TERM.",term); + fd = open(path, O_RDONLY); + if (fd == -1) { + return _gf_false; + } else { + char *start = NULL; + nsr_recon_record_details_t * rec = (nsr_recon_record_details_t *)buf; + + if (first == 0) + lseek(fd, 128, SEEK_SET); + else + lseek(fd, first * 128, SEEK_SET); + if (read(fd, rb, (last - first + 1) * 128) == -1) { + return _gf_false; + } + start = rb; + index = first; + do { + recon_main_log (this->name, GF_LOG_INFO, + "libchangelog_get_records start inspecting records at index %d \n", + index ); + if (!strncmp(start, "_PRE_", 5)) { + uint32_t i; + uint32_t opcode = 0; + records_type_t type; + + start += 5; + // increment by the NULLs after the PRE + start += 4; + SKIP_FIELD; // real index + // now we have the opcode + while (*start != '\0') { + opcode *= 10; + opcode += (*(start++) - '0'); + } + ++start; + recon_main_log (this->name, GF_LOG_ERROR, + "libchangelog_get_records: got opcode %d @index %d\n", opcode, index); + if ((opcode == GF_FOP_RENAME)) { + type = fop_gfid_pgfid_oldloc_newloc; + } else if ((opcode == GF_FOP_UNLINK) || + (opcode == GF_FOP_RMDIR) || + (opcode == GF_FOP_LINK) || + (opcode == GF_FOP_MKDIR) || + (opcode == GF_FOP_SYMLINK) || + (opcode == GF_FOP_MKNOD) || + (opcode == GF_FOP_CREATE)) { + type = fop_gfid_pgfid_entry; + } else if ((opcode == GF_FOP_FSETATTR) || + (opcode == GF_FOP_SETATTR) || + (opcode == GF_FOP_FREMOVEXATTR) || + (opcode == GF_FOP_REMOVEXATTR) || + (opcode == GF_FOP_SETXATTR) || + (opcode == GF_FOP_FSETXATTR)) { + type = fop_gfid; + } else if ((opcode == GF_FOP_TRUNCATE) || + (opcode == GF_FOP_FTRUNCATE)) { + type = fop_gfid_offset; + } else if (opcode == GF_FOP_WRITE) { + type = fop_gfid_offset_len; + } else { + recon_main_log (this->name, + GF_LOG_ERROR, + "libchangelog_get_records:got no proper opcode %d @index %d\n", + opcode, index); + //GF_ASSERT(0); + // make this as a hole. + // TBD - check this logic later. maybe we should raise alarm here because + // this means that changelog is corrupted. We are not handling changelog + // corruptions as of now. + rec->type = NSR_LOG_HOLE; + goto finish; + } + // TBD - handle psuedo holes once that logic is in. + rec->type = NSR_LOG_FILL; + recon_main_log (this->name, GF_LOG_ERROR, + "libchangelog_get_records:got type %d at index %d \n", + rec->type, index); + rec->op = opcode; + + // Now get the gfid and parse it + // before that increment the pointer + for (i=0; i < 36; i++) { + rec->gfid[i] = (*start); + start++; + } + rec->gfid[i] = '\0'; + + GF_ASSERT(*start == 0); + start ++; + + if (opcode == GF_FOP_SYMLINK) { + i = 0; + do { + if (i >= 256) { + goto finish; + } + rec->link_path[i++] = *start; + } while (*(start++) != '\0'); + } + + i = 0; + // If type is fop_gfid_offset+_len, get offset + if ((type == fop_gfid_offset) || (type == fop_gfid_offset_len)) { + char offset_str[128]; + while(*start != 0) { + offset_str[i++] = *start; + start ++; + } + offset_str[i] = '\0'; + // get over the 0 + start++; + rec->offset = strtoul(offset_str, NULL, 10); + recon_main_log (this->name, + GF_LOG_ERROR, + "libchangelog_get_records:got offset %d @index %d \n", rec->offset, index); + + } + i = 0; + if (type == fop_gfid_offset_len) { + char len_str[128]; + while(*start != 0) { + len_str[i++] = *start; + start ++; + } + len_str[i] = '\0'; + // get over the 0 + start++; + rec->len = strtoul(len_str, NULL, 10); + recon_main_log (this->name, + GF_LOG_ERROR, + "libchangelog_get_records:got length %d @index %d \n", rec->len, index); + } + i = 0; + if (type == fop_gfid_pgfid_entry) { + switch (opcode) { + case GF_FOP_CREATE: + case GF_FOP_MKDIR: + case GF_FOP_MKNOD: + SKIP_FIELD; // mode + break; + /* TBD: handle GF_FOP_SYMLINK target */ + default: + ; + } + SKIP_FIELD; // uid + SKIP_FIELD; // gid + if (opcode == GF_FOP_MKNOD) { + SKIP_FIELD; // dev + } + // first get the gfid and then the path + for (i=0; i < 36; i++) { + rec->pargfid[i] = (*start); + start++; + } + rec->pargfid[i] = '\0'; + GF_ASSERT(*start == '/'); + start ++; + + i = 0; + while(*start != 0) { + rec->entry[i++] = *start; + start ++; + } + rec->entry[i] = '\0'; + // get over the 0 + start++; + /* + * Having to add this as a special case + * is awful. See the function header + * comment for the real solution. + */ + if (opcode == GF_FOP_CREATE) { + rec->mode = 0; + while (*start != '\0') { + rec->mode *= 10; + rec->mode += *start + - '0'; + ++start; + } + ++start; + } + recon_main_log (this->name, + GF_LOG_ERROR, + "libchangelog_get_records:got entry %s @index %d \n", rec->entry, index); + + } + i = 0; + if (type == fop_gfid_pgfid_oldloc_newloc) { + + // first get the source and then the destination + // source stuff gets stored in pargfid/entry + for (i=0; i < 36; i++) { + rec->pargfid[i] = (*start); + start++; + } + rec->pargfid[i] = '\0'; + GF_ASSERT(*start == '/'); + start ++; + + i=0; + while(*start != 0) { + rec->entry[i++] = *start; + start ++; + } + rec->entry[i] = '\0'; + // get over the 0 + start++; + + // dst stuff gets stored in gfid/newloc + for (i=0; i < 36; i++) { + rec->gfid[i] = (*start); + start++; + } + rec->gfid[i] = '\0'; + GF_ASSERT(*start == '/'); + start ++; + i = 0; + while(*start != 0) { + rec->newloc[i++] = *start; + start ++; + } + rec->newloc[i] = '\0'; + // get over the 0 + start++; + + } + ENDIAN_CONVERSION_RD((*rec), _gf_false); //htonl + } +finish: + if (index == last) + break; + index++; + rb += 128; + start = rb; + rec++; + } while(1); + } + GF_FREE(orig); + close(fd); + + recon_main_log (this->name, GF_LOG_INFO, + "libchangelog_get_records finsihed inspecting records for term %d \n", + term); + return _gf_true; +} + +int32_t +nsr_recon_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + nsr_recon_fd_t *rfd = NULL; + + recon_main_log (this->name, GF_LOG_INFO, "nsr_recon_open called for path %s \n",loc->path ); + rfd = GF_CALLOC (1, sizeof (*rfd), gf_mt_recon_fd_t); + if (!rfd) { + op_ret = -1; + op_errno = ENOMEM; + } + + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)rfd); + if (op_ret) { + op_ret = -1; + op_errno = EINVAL; + } + recon_main_log (this->name, GF_LOG_INFO, "nsr_recon_open returns with %d for path %s \n",op_ret,loc->path ); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); + return 0; +} + +int32_t +nsr_recon_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + nsr_recon_fd_t *rfd = NULL; + nsr_recon_private_t *priv = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int32_t ret = 0; + + ret = this_fd_ctx_get (fd, this, &rfd); + if (ret < 0) { + return -1; + } + priv = (nsr_recon_private_t *)this->private; + + recon_main_log (this->name, GF_LOG_INFO, "nsr_recon_writev called for offset %d \n",(unsigned int)offset ); + GF_ASSERT(count == 1); + switch (offset) { + // client(brick, leader) writes the role of the node + case nsr_recon_xlator_sector_1 : + { + nsr_recon_role_t rr; + memcpy((void *)&rr, (void *)vector[0].iov_base, sizeof(rr)); + ENDIAN_CONVERSION_RR(rr, _gf_true); //ntohl + + recon_main_log (this->name, GF_LOG_INFO, "nsr_recon_writev called to set role %d\n", rr.role); + if ((rr.role != leader) && + (rr.role != reconciliator) && + (rr.role != resolutor) && + (rr.role != joiner)) { + recon_main_log (this->name, GF_LOG_ERROR, + "EIII---nsr_recon_writev cannot set state \n"); + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, + NULL, NULL, NULL); + } + + GF_ASSERT(rr.num <= MAXIMUM_REPLICA_STRENGTH); + + // Check if already a role play is going on. If yes return with EAGAIN. + // Ideally we should check if we have got a higher term number while + // servicing a lower term number; if so abort the older one. + // However the abort infrastructure needs to be sketched properly; TBD. + if (is_frame(priv) == _gf_true) { + recon_main_log (this->name, GF_LOG_ERROR, + "nsr_recon_writev set_role - already role play \n"); + STACK_UNWIND_STRICT (writev, frame, -1, EAGAIN, + NULL, NULL, NULL); + } else { + + // Store the stack frame so that when the actual job gets finished + // we send the response back to the brick. + put_frame(priv, frame, rr.current_term); + if (nsr_recon_driver_set_role(priv->driver_thread_context, + &rr, + rr.current_term) == _gf_false) { + get_frame(priv, NULL, rr.current_term); + recon_main_log (this->name, GF_LOG_ERROR, + "nsr_recon_writev set_role - cannot seem to set role \n"); + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, + NULL, NULL, NULL); + } else { + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_writev set_role - set role succesfully \n"); + } + } + break; + } + // client(reconciliator) writes how much it needs for the read + case nsr_recon_xlator_sector_2 : + { + nsr_recon_log_info_t li; + memcpy((void *)&li, (void *)vector[0].iov_base, sizeof(li)); + ENDIAN_CONVERSION_LI(li, _gf_true); //ntohl + + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_writev - setting term info for reconcilation info. term=%d, first_index=%d,start_index=%d \n", + li.term, li.first_index, li.last_index); + rfd->term = li.term; + rfd->last_index = li.last_index; + rfd->first_index = li.first_index; + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, + NULL, NULL, NULL); + break; + } + // client(reconciliator) writes term for which it needs info + case nsr_recon_xlator_sector_3 : + { + int32_t term; + + memcpy((void *)&term, (void *)vector[0].iov_base, sizeof(term)); + term = ntohl(term); //ntohl + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_writev - setting term info for term info. term=%d\n", + term); + rfd->term = term; + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, + NULL, NULL, NULL); + break; + } + // client(reconciliator) writes current term so that it gets last term info later + case nsr_recon_xlator_sector_4 : + { + int32_t term; + + memcpy((void *)&term, (void *)vector[0].iov_base, sizeof(term)); + term = ntohl(term); //ntohl + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_writev - setting term info for last term info given current term=%d\n", + term); + rfd->term = term; + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, + NULL, NULL, NULL); + break; + } + default: + { + recon_main_log (this->name, GF_LOG_ERROR, + "nsr_recon_writev called with wrong offset\n"); + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, + NULL, NULL, NULL); + break; + } + } + + return 0; +} + +int +nsr_recon_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ + nsr_recon_fd_t *rfd = NULL; + int32_t op_errno = 0; + // copied stuff from quick-read.c and posix.c + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = {0, }; + int32_t ret = -1; + nsr_recon_private_t *priv = NULL; + + iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); + if (!iobuf) { + op_errno = ENOMEM; + goto out; + } + + iobref = iobref_new (); + if (!iobref) { + op_errno = ENOMEM; + goto out; + } + + iobref_add (iobref, iobuf); + + ret = this_fd_ctx_get (fd, this, &rfd); + if (ret < 0) { + op_errno = -ret; + goto out; + } + priv = (nsr_recon_private_t *)this->private; + + recon_main_log (this->name, GF_LOG_INFO, "nsr_recon_readv called for offset %d \n",(unsigned int)offset ); + switch (offset) { + // client(leader) reads from here to get info for this term on this node + // invole libchagelog to get the information + case nsr_recon_xlator_sector_3 : + { + nsr_recon_last_term_info_t lt; + GF_ASSERT(size == sizeof(lt)); + nsr_recon_libchangelog_get_this_term_info(this,priv->changelog_base_path, rfd->term, <); + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_readv - getting term info for term=%d, ops=%d, first=%d, last=%d\n", + rfd->term, lt.commited_ops, lt.first_index, lt.last_index); + ENDIAN_CONVERSION_LT(lt, _gf_false); //htonl + memcpy(iobuf->ptr, <, size); + goto out; + } + // client(reconciliator) reads individual record information + case nsr_recon_xlator_sector_2 : + { + uint32_t num = (rfd->last_index - rfd->first_index + 1); + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_readv - expected size %lu got size %lu\n", + (num * sizeof(nsr_recon_record_details_t)), size); + + GF_ASSERT(size == (num * sizeof(nsr_recon_record_details_t))); + bzero(iobuf->ptr, size); + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_readv - getting records for term=%d from %d to %d\n", + rfd->term, rfd->first_index, rfd->last_index); + nsr_recon_libchangelog_get_records(this, priv->changelog_base_path, + rfd->term, rfd->first_index, rfd->last_index, iobuf->ptr); + goto out; + } + // read last term info + case nsr_recon_xlator_sector_4 : + { + nsr_recon_last_term_info_t lt; + GF_ASSERT(size == sizeof(lt)); + nsr_recon_libchangelog_get_last_term_info(this, priv->changelog_base_path, rfd->term, <); + recon_main_log (this->name, GF_LOG_INFO, + "nsr_recon_readv - getting last term info given current term=%d. last term = %d ops=%d, first=%d, last=%d\n", + rfd->term, lt.last_term, lt.commited_ops, lt.first_index, lt.last_index); + ENDIAN_CONVERSION_LT(lt, _gf_false); //htonl + memcpy(iobuf->ptr, <, size); + goto out; + } + default: + { + recon_main_log (this->name, GF_LOG_ERROR, + "nsr_recon_readv called with wrong offset\n"); + op_errno = -1; + break; + } + } + +out: + if (op_errno == 0) { + iov.iov_base = iobuf->ptr; + ret = iov.iov_len = size; + } + + STACK_UNWIND_STRICT (readv, frame, ret, op_errno, &iov, 1, NULL, iobref , NULL); + + if (iobref) + iobref_unref (iobref); + if (iobuf) + iobuf_unref (iobuf); + return 0; +} + +int +nsr_recon_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xdata) +{ + struct iatt buf = {0, }; + // dirty hack to set root as regular but seems to work. + buf.ia_type = IA_IFREG; + recon_main_log (this->name, GF_LOG_INFO, "nsr_recon_lookup called \n"); + + STACK_UNWIND_STRICT (lookup, frame, 0, 0, this->itable->root, &buf, NULL, NULL); + return 0; +} + + +int32_t +nsr_recon_flush (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *xdata) +{ + STACK_UNWIND_STRICT (flush, frame, 0, 0, NULL); + return 0; +} + + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("recon", this, out); + + ret = xlator_mem_acct_init (this, gf_mt_recon_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Memory accounting init" "failed"); + return ret; + } +out: + return ret; +} + + +int32_t +init (xlator_t *this) +{ + nsr_recon_private_t *priv = NULL; + char *local, *members; + unsigned int i=0; + + priv = GF_CALLOC (1, sizeof (*priv), gf_mt_recon_private_t); + if (!priv) { + gf_log (this->name, GF_LOG_ERROR, + "priv allocation error\n"); + return -1; + } + GF_OPTION_INIT ("replica-group-size", priv->replica_group_size, uint32, err); + GF_OPTION_INIT ("vol-name", priv->volname, str, err); + if (!priv->volname) { + gf_log (this->name, GF_LOG_ERROR, + "missing volname option (required)"); + return -1; + } + GF_OPTION_INIT ("changelog-dir", priv->changelog_base_path, str, err); + if (!priv->changelog_base_path) { + gf_log (this->name, GF_LOG_ERROR, + "missing changelog directory option (required)"); + return -1; + } + GF_OPTION_INIT ("base-dir", priv->base_dir, str, err); + if (!priv->base_dir) { + gf_log (this->name, GF_LOG_ERROR, + "missing brick base directory option (required)"); + return -1; + } + GF_OPTION_INIT ("replica-group-members", members, str, err); + if (!members) { + gf_log (this->name, GF_LOG_ERROR, + "missing membership option (required)"); + return -1; + } + GF_OPTION_INIT ("local-member", local, str, err); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "missing local member option (required)"); + return -1; + } + + priv->replica_group_members = GF_CALLOC (priv->replica_group_size, + sizeof(char *), + gf_mt_recon_members_list_t); + priv->replica_group_members[0] = GF_CALLOC (1, + strlen(local), + gf_mt_recon_member_name_t); + if (!priv->replica_group_members || !(priv->replica_group_members[0])) { + gf_log (this->name, GF_LOG_ERROR, + "str allocation error\n"); + return -1; + } + strcpy(priv->replica_group_members[0], local); + for (i=1; i < priv->replica_group_size; i++) { + char *member; + if (i == 1) + member = strtok(members, ","); + else + member = strtok(NULL, ","); + priv->replica_group_members[i] = GF_CALLOC (1, + strlen(member) + 1, gf_mt_recon_member_name_t); + if (!priv->replica_group_members[i]) { + gf_log (this->name, GF_LOG_ERROR, + "str allocation error\n"); + return -1; + } + strcpy(priv->replica_group_members[i], member); + } + + + priv->this = this; + this->private = (void *)priv; + + priv->fp = recon_create_log (priv->replica_group_members[0], "recon-main-log"); + if (!priv->fp) + return -1; + + recon_main_log (this->name, GF_LOG_INFO, "creating reconciliation driver \n"); + + if (pthread_create(&priv->thread_id, NULL, nsr_reconciliation_driver, priv)) { + recon_main_log (this->name, GF_LOG_ERROR, + "pthread creation error \n"); + return -1; + } + + INIT_LIST_HEAD(&(priv->list)); + + + return 0; + +err: + return -1; +} + + +void +fini (xlator_t *this) +{ + nsr_recon_private_t *priv = NULL; + void *ret = NULL; + + priv = (nsr_recon_private_t *)this->private; + + pthread_cancel(priv->thread_id); + pthread_join(priv->thread_id, &ret); +} + + +struct xlator_fops fops = { + .open = nsr_recon_open, + .readv = nsr_recon_readv, + .writev = nsr_recon_writev, + .lookup = nsr_recon_lookup, + .flush = nsr_recon_flush +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = {"replica-group-size"}, + .type = GF_OPTION_TYPE_INT, + .min = 2, + .max = INT_MAX, + .default_value = "2", + .description = "Number of bricks in replica group. can be derived but putting it here for testing." + }, + { + .key = {"vol-name"}, + .type = GF_OPTION_TYPE_STR, + .description = "volume name" + }, + { + .key = {"local-member"}, + .type = GF_OPTION_TYPE_STR, + .description = "member(brick) for which this translator is responsible." + }, + { + .key = {"replica-group-members"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma seperated member names other than local." + }, + { + .key = {"changelog-dir"}, + .type = GF_OPTION_TYPE_STR, + .description = "Base directory where per term changelogs are maintained." + }, + { + .key = {"base-dir"}, + .type = GF_OPTION_TYPE_STR, + .description = "Base directory for this brick. This should go away once we fix gfid based lookups" + }, + { .key = {NULL} }, +}; diff --git a/xlators/cluster/nsr-recon/src/recon_xlator.h b/xlators/cluster/nsr-recon/src/recon_xlator.h new file mode 100644 index 000000000..d9692a632 --- /dev/null +++ b/xlators/cluster/nsr-recon/src/recon_xlator.h @@ -0,0 +1,92 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __RECON_XLATOR_H__ +#define __RECON_XLATOR_H__ + +#include <semaphore.h> +#include <pthread.h> + +enum gf_dht_mem_types_ { + gf_mt_recon_changelog_buf_t = gf_common_mt_end + 1, + gf_mt_recon_driver_ctx_t, + gf_mt_recon_fd_t, + gf_mt_recon_id_t, + gf_mt_recon_member_name_t, + gf_mt_recon_members_list_t, + gf_mt_recon_per_node_worker_t, + gf_mt_recon_private_t, + gf_mt_recon_reconciliator_info_t, + gf_mt_recon_record_t, + gf_mt_recon_record_details_t, + gf_mt_recon_role_work_t, + gf_mt_recon_work_t, + gf_mt_recon_work_data_t, + gf_mt_recon_worker_t, + gf_mt_recon_end, +}; + +enum nsr_recon_xlator_sector_t { + nsr_recon_xlator_sector_0 = 0, // to report back the status of given transaction ids + nsr_recon_xlator_sector_1 = 512, // to write here information about leadership changes from the brick + nsr_recon_xlator_sector_2 = (512 * 2), // to write here individual roles and wait for that role to be done + nsr_recon_xlator_sector_3 = (512 *3), // read from here to get term info for given term + nsr_recon_xlator_sector_4 = (512 * 4), // read from here to get last term info +}; + + +typedef struct _nsr_recon_private_s { + xlator_t *this; //back pointer + unsigned int replica_group_size; // number of static members of replica group + char **replica_group_members; // replica group members (including itself in first slot) + pthread_t thread_id; // driver thread id + nsr_recon_driver_ctx_t *driver_thread_context; //driver thread context + unsigned int outstanding; // for communicating with driver thread + call_frame_t *frame; // old frame that is pending (just one as of now) + struct list_head list; + char *volname; + uint32_t txn_id; + char *changelog_base_path; + char *base_dir; +#if defined(NSR_DEBUG) + FILE *fp; +#endif +} nsr_recon_private_t; + +#define atomic_cmpxchg __sync_val_compare_and_swap + +#if defined(NSR_DEBUG) + +extern void +_recon_main_log (const char *func, int line, char *member, FILE *fp, + char *fmt, ...); + +#define recon_main_log(dom, levl, fmt...) do { \ + FMT_WARN (fmt); \ + if (levl <= nsr_debug_level) { \ + nsr_recon_private_t *priv = this->private; \ + _recon_main_log (__FUNCTION__, __LINE__, \ + priv->replica_group_members[0], \ + priv->fp, \ + ##fmt); \ + } \ +} while (0) + +#else +#define recon_main_log(dom, levl, fmt...) gf_log(dom, levl, fmt) +#endif + +void nsr_recon_libchangelog_get_this_term_info(xlator_t *this, char *bp, int32_t term, nsr_recon_last_term_info_t *lt); +void nsr_recon_libchangelog_get_last_term_info(xlator_t *this, char *bp, int32_t term, nsr_recon_last_term_info_t *lt); +void nsr_recon_return_back(nsr_recon_private_t *priv, uint32_t term, int32_t status, int32_t op_errno); +gf_boolean_t nsr_recon_libchangelog_get_records(xlator_t *this, char *bp, int32_t term, uint32_t first, uint32_t last, void *buf); + + +#endif /* #ifndef __RECON_XLATOR_H__ */ diff --git a/xlators/cluster/nsr-server/Makefile.am b/xlators/cluster/nsr-server/Makefile.am new file mode 100644 index 000000000..d471a3f92 --- /dev/null +++ b/xlators/cluster/nsr-server/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/nsr-server/src/Makefile.am b/xlators/cluster/nsr-server/src/Makefile.am new file mode 100644 index 000000000..0092aad4f --- /dev/null +++ b/xlators/cluster/nsr-server/src/Makefile.am @@ -0,0 +1,43 @@ +noinst_PYTHON = codegen.py gen-fops.py + +xlator_LTLIBRARIES = nsr.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +nsr_la_LDFLAGS = -module -avoid-version -lcurl + +if ENABLE_ETCD_SIM +nsr_la_SOURCES = nsr.c leader.c recon_notify.c etcd-sim.c +else +nsr_la_SOURCES = nsr.c leader.c recon_notify.c etcd-api.c \ + yajl.c yajl_alloc.c yajl_buf.c yajl_encode.c yajl_gen.c \ + yajl_lex.c yajl_parser.c yajl_tree.c yajl_version.c +endif + + +nsr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/api/src/libgfapi.la + +noinst_HEADERS = nsr-internal.h etcd-api.h all-templates.c \ + yajl_alloc.h yajl_buf.h yajl_bytestack.h yajl_encode.h \ + yajl_lex.h yajl_parser.h yajl/yajl_common.h yajl/yajl_gen.h \ + yajl/yajl_parse.h yajl/yajl_tree.h yajl/yajl_version.h \ + $(top_srcdir)/xlators/lib/src/libxlator.h \ + $(top_srcdir)/glusterfsd/src/glusterfsd.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) \ + -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \ + -I$(top_srcdir)/rpc/rpc-lib/src -DSBIN_DIR=\"$(sbindir)\" + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +XLATOR_HEADER = $(top_srcdir)/libglusterfs/src/xlator.h + +CLEANFILES = nsr-cg.c + +nsr-cg.c: gen-fops.py codegen.py $(XLATOR_HEADER) all-templates.c + $(PYTHON) ./gen-fops.py $(XLATOR_HEADER) all-templates.c > $@ + +nsr.lo: nsr-cg.c + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/nsr.so diff --git a/xlators/cluster/nsr-server/src/all-templates.c b/xlators/cluster/nsr-server/src/all-templates.c new file mode 100644 index 000000000..fa29de7b2 --- /dev/null +++ b/xlators/cluster/nsr-server/src/all-templates.c @@ -0,0 +1,345 @@ +/* + * You can put anything here - it doesn't even have to be a comment - and it + * will be ignored until we reach the first template-name comment. + */ + + +// template-name read-fop +$TYPE$ +nsr_$NAME$ (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsr_private_t *priv = this->private; + gf_boolean_t in_recon = _gf_false; + int32_t recon_term, recon_index; + + // allow reads during reconciliation + // TBD: allow "dirty" reads on non-leaders + if (xdata && + (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && + (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { + in_recon = _gf_true; + } + + if ((!priv->leader) && (in_recon == _gf_false)) { + goto err; + } + + STACK_WIND (frame, default_$NAME$_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, + $ARGS_SHORT$); + return 0; + +err: + STACK_UNWIND_STRICT ($NAME$, frame, -1, EREMOTE, + $DEFAULTS$); + return 0; +} + +// template-name read-dispatch +/* No "dispatch" function needed for $NAME$ */ + +// template-name read-fan-in +/* No "fan-in" function needed for $NAME$ */ + +// template-name read-continue +/* No "continue" function needed for $NAME$ */ + +// template-name read-complete +/* No "complete" function needed for $NAME$ */ + +// template-name write-fop +$TYPE$ +nsr_$NAME$ (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsr_local_t *local = NULL; + nsr_private_t *priv = this->private; + int op_errno = ENOMEM; + int from_leader; + int from_recon; + uint32_t ti = 0; + double must_be_up; + double are_up; + + /* + * Our first goal here is to avoid "split brain surprise" for users who + * specify exactly 50% with two- or three-way replication. That means + * either a more-than check against half the total replicas or an + * at-least check against half of our peers (one less). Of the two, + * only an at-least check supports the intuitive use of 100% to mean + * all replicas must be present, because "more than 100%" will never + * succeed regardless of which count we use. This leaves us with a + * slightly non-traditional definition of quorum ("at least X% of peers + * not including ourselves") but one that's useful enough to be worth + * it. + * + * Note that n_children and up_children *do* include the local + * subvolume, so we need to subtract one in each case. + */ + must_be_up = ((double)(priv->n_children - 1)) * priv->quorum_pct; + are_up = ((double)(priv->up_children - 1)) * 100.0; + if (are_up < must_be_up) { + /* Emulate the AFR client-side-quorum behavior. */ + op_errno = EROFS; + goto err; + } + + local = mem_get0(this->local_pool); + if (!local) { + goto err; + } +#if defined(NSR_CG_NEED_FD) + local->fd = fd_ref(fd); +#else + local->fd = NULL; +#endif + INIT_LIST_HEAD(&local->qlinks); + frame->local = local; + + if (xdata) { + from_leader = !!dict_get(xdata,NSR_TERM_XATTR); + from_recon = !!dict_get(xdata,RECON_TERM_XATTR) + && !!dict_get(xdata,RECON_INDEX_XATTR); + } + else { + from_leader = from_recon = _gf_false; + } + + // follower/recon path + // just send it to local node + if (from_leader || from_recon) { + atomic_inc(&priv->ops_in_flight); + STACK_WIND (frame, nsr_$NAME$_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, + $ARGS_SHORT$); + return 0; + } + + + if (!priv->leader/* || priv->fence_io*/) { + op_errno = EREMOTE; + goto err; + } + + + if (!xdata) { + xdata = dict_new(); + if (!xdata) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate xdata"); + goto err; + } + } + + if (dict_set_int32(xdata,NSR_TERM_XATTR,priv->current_term) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set nsr-term"); + goto err; + } + + LOCK(&priv->index_lock); + ti = ++(priv->index); + UNLOCK(&priv->index_lock); + if (dict_set_int32(xdata,NSR_INDEX_XATTR,ti) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set index"); + goto err; + } + + local->stub = fop_$NAME$_stub (frame,nsr_$NAME$_continue, + $ARGS_SHORT$); + if (!local->stub) { + goto err; + } + + +#if defined(NSR_CG_QUEUE) + nsr_inode_ctx_t *ictx = nsr_get_inode_ctx(this,fd->inode); + if (!ictx) { + op_errno = EIO; + goto err; + } + LOCK(&ictx->lock); + if (ictx->active) { + gf_log (this->name, GF_LOG_DEBUG, + "queuing request due to conflict"); + /* + * TBD: enqueue only for real conflict + * + * Currently we just act like all writes are in + * conflict with one another. What we should really do + * is check the active/pending queues and defer only if + * there's a conflict there. + * + * It's important to check the pending queue because we + * might have an active request X which conflicts with + * a pending request Y, and this request Z might + * conflict with Y but not X. If we checked only the + * active queue then Z could jump ahead of Y, which + * would be incorrect. + */ + local->qstub = fop_$NAME$_stub (frame, + nsr_$NAME$_dispatch, + $ARGS_SHORT$); + if (!local->qstub) { + UNLOCK(&ictx->lock); + goto err; + } + list_add_tail(&local->qlinks,&ictx->pqueue); + ++(ictx->pending); + UNLOCK(&ictx->lock); + return 0; + } + else { + list_add_tail(&local->qlinks,&ictx->aqueue); + ++(ictx->active); + } + UNLOCK(&ictx->lock); +#endif + + return nsr_$NAME$_dispatch (frame, this, $ARGS_SHORT$); + +err: + if (local) { + if (local->stub) { + call_stub_destroy(local->stub); + } + if (local->qstub) { + call_stub_destroy(local->qstub); + } + if (local->fd) { + fd_unref(local->fd); + } + mem_put(local); + } + STACK_UNWIND_STRICT ($NAME$, frame, -1, op_errno, + $DEFAULTS$); + return 0; +} + +// template-name write-dispatch +$TYPE$ +nsr_$NAME$_dispatch (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + nsr_local_t *local = frame->local; + nsr_private_t *priv = this->private; + xlator_list_t *trav; + + atomic_inc(&priv->ops_in_flight); + + /* + * TBD: unblock pending request(s) if we fail after this point but + * before we get to nsr_$NAME$_complete (where that code currently + * resides). + */ + + local->call_count = priv->n_children - 1; + for (trav = this->children->next; trav; trav = trav->next) { + STACK_WIND (frame, nsr_$NAME$_fan_in, + trav->xlator, trav->xlator->fops->$NAME$, + $ARGS_SHORT$); + } + + // TBD: variable Issue count + return 0; +} + +// template-name write-fan-in +$TYPE$ +nsr_$NAME$_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + $ARGS_LONG$) +{ + nsr_local_t *local = frame->local; + uint8_t call_count; + + gf_log (this->name, GF_LOG_TRACE, + "op_ret = %d, op_errno = %d\n", op_ret, op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + UNLOCK(&frame->lock); + + // TBD: variable Completion count + if (call_count == 0) { + call_resume(local->stub); + } + + return 0; +} + +// template-name write-continue +$TYPE$ +nsr_$NAME$_continue (call_frame_t *frame, xlator_t *this, + $ARGS_LONG$) +{ + STACK_WIND (frame, nsr_$NAME$_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->$NAME$, + $ARGS_SHORT$); + return 0; +} + +// template-name write-complete +$TYPE$ +nsr_$NAME$_complete (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + $ARGS_LONG$) +{ + nsr_private_t *priv = this->private; +#if defined(NSR_CG_NEED_FD) + nsr_local_t *local = frame->local; +#endif + +#if defined(NSR_CG_QUEUE) + nsr_inode_ctx_t *ictx; + nsr_local_t *next; + if (local->qlinks.next != &local->qlinks) { + list_del(&local->qlinks); + ictx = nsr_get_inode_ctx(this,local->fd->inode); + if (ictx) { + LOCK(&ictx->lock); + if (ictx->pending) { + /* + * TBD: dequeue *all* non-conflicting reqs + * + * With the stub implementation there can only + * be one request active at a time (zero here) + * so it's not an issue. In a real + * implementation there might still be other + * active requests to check against, and + * multiple pending requests that could + * continue. + */ + gf_log (this->name, GF_LOG_DEBUG, + "unblocking next request"); + --(ictx->pending); + next = list_entry (ictx->pqueue.next, + nsr_local_t, qlinks); + list_del(&next->qlinks); + list_add_tail(&next->qlinks,&ictx->aqueue); + call_resume(next->qstub); + } + else { + --(ictx->active); + } + UNLOCK(&ictx->lock); + } + } +#endif + +#if defined(NSR_CG_FSYNC) + nsr_mark_fd_dirty(this,local); +#endif + +#if defined(NSR_CG_NEED_FD) + fd_unref(local->fd); +#endif + + STACK_UNWIND_STRICT ($NAME$, frame, op_ret, op_errno, + $ARGS_SHORT$); + atomic_dec(&priv->ops_in_flight); + return 0; + +} diff --git a/xlators/cluster/nsr-server/src/codegen.py b/xlators/cluster/nsr-server/src/codegen.py new file mode 100755 index 000000000..709f5662f --- /dev/null +++ b/xlators/cluster/nsr-server/src/codegen.py @@ -0,0 +1,174 @@ +#!/usr/bin/python + +# This module lets us auto-generate boilerplate versions of fops and cbks, +# both for the client side and (eventually) on the server side as well. This +# allows us to implement common logic (e.g. leader fan-out and sequencing) +# once, without all the problems that come with copying and pasting the same +# code into dozens of functions (or failing to). +# +# I've tried to make this code pretty generic, since it's already likely to +# be used multiple ways within NSR. Really, we should use something like this +# to generate defaults.[ch] as well, to avoid the same sorts of mismatches +# that we've already seen and to which this approach makes NSR immune. That +# would require using something other than defaults.h as the input, but that +# format could be even simpler so that's a good thing too. + + +import re +import sys + +decl_re = re.compile("([a-z0-9_]+)$") +tmpl_re = re.compile("// template-name (.*)") + +class CodeGenerator: + + def __init__ (self): + self.decls = {} + self.skip = 0 + self.templates = {} + self.make_defaults = self._make_defaults + + # Redefine this to preprocess the name in a declaration, e.g. + # fop_lookup_t => nsrc_lookup + def munge_name (self, orig): + return orig + + # By default, this will convert the argument string into a sequence of + # (type, name) tuples minus the first self.skip (default zero) arguments. + # You can redefine it to skip the conversion, do a different conversion, + # or rearrange the arguments however you like. + def munge_args (self, orig): + args = [] + for decl in orig.strip("(); ").split(","): + m = decl_re.search(decl) + if m: + args.append((m.group(1),decl[:m.start(1)].strip())) + else: + raise RuntimeError("can't split %s into type+name"%decl) + return args[self.skip:] + + def add_decl (self, fname, ftype, fargs): + self.decls[self.munge_name(fname)] = (ftype, self.munge_args(fargs)) + + def parse_decls (self, path, pattern): + regex = re.compile(pattern) + f = open(path,"r") + have_decl = False + while True: + line = f.readline() + if not line: + break + m = regex.search(line) + if m: + if have_decl: + self.add_decl(f_name,f_type,f_args) + f_name = m.group(2) + f_type = m.group(1) + f_args = line[m.end(0):-1].strip() + if f_args.rfind(")") >= 0: + self.add_decl(f_name,f_type,f_args) + else: + have_decl = True + elif have_decl: + if line.strip() == "": + self.add_decl(f_name,f_type,f_args) + have_decl = False + else: + f_args += " " + f_args += line[:-1].strip() + if have_decl: + self.add_decl(f_name,f_type,f_args) + + # Legacy function (yeah, already) to load a single template. If you're + # using multiple templates, you're better off loading them all from one + # file using load_templates (note plural) instead. + def load_template (self, name, path): + self.templates[name] = open(path,"r").readlines() + + # Load multiple templates. Each is introduced by a special comment of + # the form + # + # // template-name xyz + # + # One side effect is that the block before the first such comment will be + # ignored. This seems like it might be useful some day so I'll leave it + # in, but if people trip over it maybe it will change. + # + # It is recommended to define templates in expected execution order, to + # make the result more readable than the inverted order (e.g. callback + # then fop) common in the rest of our code. + def load_templates (self, path): + t_name = None + for line in open(path,"r").readlines(): + if not line: + break + m = tmpl_re.match(line) + if m: + if t_name: + self.templates[t_name] = t_contents + t_name = m.group(1).strip() + t_contents = [] + elif t_name: + t_contents.append(line) + if t_name: + self.templates[t_name] = t_contents + + # Emit the template, with the following expansions: + # + # $NAME$ => function name (as passed in) + # $TYPE$ => function return value + # $ARGS_SHORT$ => argument list, including types + # $ARGS_LONG$ => argument list, *not* including types + # $DEFAULTS$ => default callback args (see below) + # + # The $DEFAULTS$ substitution is for the case where a fop (which has one + # set of arguments) needs to signal an error via STACK_UNWIND (which + # requires a different set of arguments). In this case we look up the + # argument list for the opposite direction, using self.make_defaults which + # the user must explicitly set to the method for the opposite direction. + # If an argument is a pointer, we replace it with NULL; otherwise we + # replace it with zero. It's a hack, but it's the only thing we do that + # doesn't require specific knowledge of our environment and the specific + # call we're handling. If this doesn't suffice, we'll have to add + # something like $ARG0$ which can be passed in for specific cases. + def emit (self, f_name, tmpl): + args = self.decls[f_name][1] + zipper = lambda x: x[0] + a_short = ", ".join(map(zipper,args)) + zipper = lambda x: x[1] + " " + x[0] + a_long = ", ".join(map(zipper,args)) + for line in self.templates[tmpl]: + line = line.replace("$NAME$",f_name) + line = line.replace("$TYPE$",self.decls[f_name][0]) + line = line.replace("$ARGS_SHORT$",a_short) + line = line.replace("$ARGS_LONG$",a_long) + line = line.replace("$DEFAULTS$",self.make_defaults(f_name)) + print(line.rstrip()) + + def _make_defaults (self, f_name): + result = [] + for arg in self.decls[f_name][1]: + if arg[1][-1] == "*": + result.append("NULL") + else: + result.append("0") + return ", ".join(result) + +if __name__ == "__main__": + type_re = "([a-z_0-9]+)" + name_re = "\(\*fop_([a-z0-9]+)_t\)" + full_re = type_re + " *" + name_re + cg = CodeGenerator() + cg.skip = 2 + cg.parse_decls(sys.argv[1],full_re) + """ + for k, v in cg.decls.iteritems(): + print("=== %s" % k) + print(" return type %s" % v[0]) + for arg in v[1]: + print(" arg %s (type %s)" % arg) + """ + cg.load_template("fop",sys.argv[2]) + cg.emit("lookup","fop") + cg.emit("rename","fop") + cg.emit("setxattr","fop") diff --git a/xlators/cluster/nsr-server/src/etcd-api.c b/xlators/cluster/nsr-server/src/etcd-api.c new file mode 100644 index 000000000..a07019244 --- /dev/null +++ b/xlators/cluster/nsr-server/src/etcd-api.c @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2013, Red Hat + * All rights reserved. + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* For asprintf */ +#if !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <curl/curl.h> +#include <yajl/yajl_tree.h> +#include "etcd-api.h" + + +#define DEFAULT_ETCD_PORT 4001 +#define SL_DELIM "\n\r\t ,;" + +typedef struct { + etcd_server *servers; +} _etcd_session; + +typedef struct { + char *key; + char *value; + int *index_in; /* pointer so NULL can be special */ + int index_out; /* NULL would be meaningless */ +} etcd_watch_t; + +typedef size_t curl_callback_t (void *, size_t, size_t, void *); + +int g_inited = 0; +const char *value_path[] = { "node", "value", NULL }; +const char *nodes_path[] = { "node", "nodes", NULL }; +const char *entry_path[] = { "key", NULL }; + +/* + * We only call this in case where it should be safe, but gcc doesn't know + * that so we use this to shut it up. + */ +char * +MY_YAJL_GET_STRING (yajl_val x) +{ + char *y = YAJL_GET_STRING(x); + + return y ? y : "bogus"; +} + +#if defined(DEBUG) +void +print_curl_error (char *intro, CURLcode res) +{ + printf("%s: %s\n",intro,curl_easy_strerror(res)); +} +#else +#define print_curl_error(intro,res) +#endif + + +etcd_session +etcd_open (etcd_server *server_list) +{ + _etcd_session *session; + + if (!g_inited) { + curl_global_init(CURL_GLOBAL_ALL); + g_inited = 1; + } + + session = malloc(sizeof(*session)); + if (!session) { + return NULL; + } + + /* + * Some day we'll set up more persistent connections, and keep track + * (via redirects) of which server is leader so that we can always + * try it first. For now we just push that to the individual request + * functions, which do the most brain-dead thing that can work. + */ + + session->servers = server_list; + return session; +} + + +void +etcd_close (etcd_session session) +{ + free(session); +} + +/* + * Normal yajl_tree_get is returning NULL for these paths even when I can + * verify (in gdb) that they exist. I suppose I could debug this for them, but + * this is way easier. + * + * TBD: see if common distros are packaging a JSON library that isn't total + * crap. + */ +yajl_val +my_yajl_tree_get (yajl_val root, char const **path, yajl_type type) +{ + yajl_val obj = root; + int i; + + for (;;) { + if (!*path) { + if (obj && (obj->type != type)) { + return NULL; + } + return obj; + } + if (obj->type != yajl_t_object) { + return NULL; + } + for (i = 0; /* nothing */; ++i) { + if (i >= obj->u.object.len) { + return NULL; + } + if (!strcmp(obj->u.object.keys[i],*path)) { + obj = obj->u.object.values[i]; + ++path; + break; + } + } + } +} + + +/* + * Looking directly at node->u.array seems terribly un-modular, but the YAJL + * tree interface doesn't seem to have any exposed API for iterating over the + * elements of an array. I tried using yajl_tree_get with an index in the + * path, either as a type-casted integer or as a string, but that didn't work. + */ +char * +parse_array_response (yajl_val parent) +{ + size_t i; + yajl_val item; + yajl_val value; + char *retval = NULL; + char *saved; + yajl_val node; + + node = my_yajl_tree_get(parent,nodes_path,yajl_t_array); + if (!node) { + return NULL; + } + + for (i = 0; i < node->u.array.len; ++i) { + item = node->u.array.values[i]; + if (!item) { + break; + } + value = my_yajl_tree_get(item,entry_path,yajl_t_string); + if (!value) { + break; + } + if (retval) { + saved = retval; + retval = NULL; + (void)asprintf (&retval, "%s\n%s", + saved, MY_YAJL_GET_STRING(value)); + free(saved); + } + else { + retval = strdup(MY_YAJL_GET_STRING(value)); + } + if (!retval) { + break; + } + } + + return retval; +} + +size_t +parse_get_response (void *ptr, size_t size, size_t nmemb, void *stream) +{ + yajl_val node; + yajl_val value; + + node = yajl_tree_parse(ptr,NULL,0); + if (node) { + value = my_yajl_tree_get(node,value_path,yajl_t_string); + if (value) { + /* + * YAJL probably copied it once, now we're going to + * copy it again. If anybody really cares for such + * small and infrequently used values, we'd have to do + * do something much more complicated (like using the + * stream interface) to avoid the copy. Right now it's + * just not worth it. + */ + *((char **)stream) = strdup(MY_YAJL_GET_STRING(value)); + } + else { + /* Might as well try this. */ + *((char **)stream) = parse_array_response(node); + } + yajl_tree_free(node); + } + + return size*nmemb; +} + + +etcd_result +etcd_get_one (_etcd_session *session, char *key, etcd_server *srv, char *prefix, + char *post, curl_callback_t cb, char **stream) +{ + char *url; + CURL *curl; + CURLcode curl_res; + etcd_result res = ETCD_WTF; + void *err_label = &&done; + + if (asprintf(&url,"http://%s:%u/v2/%s%s", + srv->host,srv->port,prefix,key) < 0) { + goto *err_label; + } + printf("url = %s\n",url); + err_label = &&free_url; + + curl = curl_easy_init(); + if (!curl) { + goto *err_label; + } + err_label = &&cleanup_curl; + + /* TBD: add error checking for these */ + curl_easy_setopt(curl,CURLOPT_URL,url); + curl_easy_setopt(curl,CURLOPT_FOLLOWLOCATION,1L); + curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,cb); + curl_easy_setopt(curl,CURLOPT_WRITEDATA,stream); + if (post) { + curl_easy_setopt(curl,CURLOPT_POST,1L); + curl_easy_setopt(curl,CURLOPT_POSTFIELDS,post); + } +#if defined(DEBUG) + curl_easy_setopt(curl,CURLOPT_VERBOSE,1L); +#endif + + curl_res = curl_easy_perform(curl); + if (curl_res != CURLE_OK) { + print_curl_error("perform",curl_res); + goto *err_label; + } + + res = ETCD_OK; + +cleanup_curl: + curl_easy_cleanup(curl); +free_url: + free(url); +done: + return res; +} + + +char * +etcd_get (etcd_session session_as_void, char *key) +{ + _etcd_session *session = session_as_void; + etcd_server *srv; + etcd_result res; + char *value = NULL; + + for (srv = session->servers; srv->host; ++srv) { + res = etcd_get_one(session,key,srv,"keys/",NULL, + parse_get_response,&value); + if ((res == ETCD_OK) && value) { + return value; + } + } + + return NULL; +} + + +size_t +parse_watch_response (void *ptr, size_t size, size_t nmemb, void *stream) +{ + yajl_val node; + yajl_val value; + etcd_watch_t *watch = stream; + static const char *i_path[] = { "node", "modifiedIndex", NULL }; + static const char *k_path[] = { "node", "key", NULL }; + static const char *v_path[] = { "node", "value", NULL }; + + node = yajl_tree_parse(ptr,NULL,0); + if (node) { + value = my_yajl_tree_get(node,i_path,yajl_t_number); + if (value) { + watch->index_out = strtoul(YAJL_GET_NUMBER(value), + NULL,10); + } + value = my_yajl_tree_get(node,k_path,yajl_t_string); + if (value) { + watch->key = strdup(MY_YAJL_GET_STRING(value)); + } + value = my_yajl_tree_get(node,v_path,yajl_t_string); + if (value) { + watch->value = strdup(MY_YAJL_GET_STRING(value)); + } + } + + return size*nmemb; +} + + +etcd_result +etcd_watch (etcd_session session_as_void, char *pfx, + char **keyp, char **valuep, int *index_in, int *index_out) +{ + _etcd_session *session = session_as_void; + etcd_server *srv; + etcd_result res; + etcd_watch_t watch; + char *path; + + if (index_in) { + if (asprintf(&path,"%s?wait=true&recursive=true&waitIndex=%d", + pfx,*index_in) < 0) { + return ETCD_WTF; + } + } + else { + if (asprintf(&path,"%s?wait=true&recursive=true",pfx) < 0) { + return ETCD_WTF; + } + } + + memset(&watch,0,sizeof(watch)); + watch.index_in = index_in; + + for (srv = session->servers; srv->host; ++srv) { + res = etcd_get_one(session,path,srv,"keys/",NULL, + parse_watch_response,(char **)&watch); + if (res == ETCD_OK) { + if (keyp) { + *keyp = watch.key; + } + if (valuep) { + *valuep = watch.value; + } + if (index_out) { + *index_out = watch.index_out; + } + break; + } + } + + free(path); + return res; +} + + +size_t +parse_set_response (void *ptr, size_t size, size_t nmemb, void *stream) +{ + yajl_val node; + yajl_val value; + etcd_result res = ETCD_PROTOCOL_ERROR; + /* + * Success responses contain prevValue and index. Failure responses + * contain errorCode and cause. Among all these, index seems to be the + * one we're most likely to need later, so look for that. + */ + static const char *path[] = { "node", "modifiedIndex", NULL }; + + node = yajl_tree_parse(ptr,NULL,0); + if (node) { + value = my_yajl_tree_get(node,path,yajl_t_number); + if (value) { + res = ETCD_OK; + } + } + + *((etcd_result *)stream) = res; + return size*nmemb; +} + + +size_t +parse_lock_response (void *ptr, size_t size, size_t nmemb, void *stream) +{ + *((char **)stream) = strdup(ptr); + return size*nmemb; +} + + +/* + * There are two use cases, based on is_lock. + * + * If is_lock is null, we use the "keys" namespace. A null value means an + * HTTP DELETE; precond and ttl are both ignored. Otherwise we're setting a + * value, with *optional* precond and ttl. + * + * If is_lock is set, we use the "locks" namespace. A null value means an + * HTTP DELETE as before, and we still ignore ttl as before, but now precond + * must be set to represent the lock index. Otherwise ttl must be present, + * and we decide what to do based on precond. If it's null, this is an + * initial lock so we use an HTTP POST. Otherwise it's a renewal so we use + * an HTTP PUT instead. + */ +etcd_result +etcd_set_one (_etcd_session *session, char *key, char *value, + char *precond, unsigned int ttl, etcd_server *srv, + char **is_lock) +{ + char *url; + char *contents = NULL; + CURL *curl; + etcd_result res = ETCD_WTF; + CURLcode curl_res; + void *err_label = &&done; + char *namespace; + char *http_cmd; + char *orig_index; + + if (is_lock) { + namespace = "mod/v2/lock"; + if (value) { + if (!ttl) { + /* Lock/renew must specify ttl. */ + return ETCD_WTF; + } + http_cmd = precond ? "PUT" : "POST"; + } + else { + if (!precond) { + /* Unlock must specify index. */ + return ETCD_WTF; + } + http_cmd = "DELETE"; + } + orig_index = *is_lock; + } + else { + namespace = "v2/keys"; + http_cmd = value ? "PUT" : "DELETE"; + } + + if (asprintf(&url,"http://%s:%u/%s/%s", + srv->host,srv->port,namespace,key) < 0) { + goto *err_label; + } + err_label = &&free_url; + + if (is_lock) { + if (precond) { + if (asprintf(&contents,"index=%s",precond) < 0) { + goto *err_label; + } + err_label = &&free_contents; + } + if (ttl) { + if (contents) { + char *c2; + if (asprintf(&c2,"ttl=%u;%s",ttl,contents) < 0) { + goto *err_label; + } + free(contents); + contents = c2; + } + else { + if (asprintf(&contents,"ttl=%u",ttl) < 0) { + goto *err_label; + } + } + err_label = &&free_contents; + } + } + else { + if (value) { + if (asprintf(&contents,"value=%s",value) < 0) { + goto *err_label; + } + err_label = &&free_contents; + } + if (precond) { + char *c2; + if (asprintf(&c2,"%s;prevValue=%s",contents, + precond) < 0) { + goto *err_label; + } + free(contents); + contents = c2; + err_label = &&free_contents; + } + if (ttl) { + char *c2; + if (asprintf(&c2,"%s;ttl=%u",contents,ttl) < 0) { + goto *err_label; + } + free(contents); + contents = c2; + err_label = &&free_contents; + } + } + + curl = curl_easy_init(); + if (!curl) { + goto *err_label; + } + err_label = &&cleanup_curl; + + /* TBD: add error checking for these */ + curl_easy_setopt(curl,CURLOPT_CUSTOMREQUEST,http_cmd); + curl_easy_setopt(curl,CURLOPT_URL,url); + curl_easy_setopt(curl,CURLOPT_FOLLOWLOCATION,1L); + curl_easy_setopt(curl,CURLOPT_POSTREDIR,CURL_REDIR_POST_ALL); + + if (is_lock && value && !precond) { + /* Only do this for an initial lock, not a renewal. */ + curl_easy_setopt (curl, CURLOPT_WRITEFUNCTION, + parse_lock_response); + curl_easy_setopt(curl,CURLOPT_WRITEDATA,is_lock); + } + else { + curl_easy_setopt (curl, CURLOPT_WRITEFUNCTION, + parse_set_response); + curl_easy_setopt(curl,CURLOPT_WRITEDATA,&res); + } + + /* + * CURLOPT_HTTPPOST would be easier, but it looks like etcd will barf on + * that. Sigh. + */ + if (contents) { + curl_easy_setopt(curl,CURLOPT_POST,1L); + curl_easy_setopt(curl,CURLOPT_POSTFIELDS,contents); + } +#if defined(DEBUG) + curl_easy_setopt(curl,CURLOPT_VERBOSE,1L); +#endif + + curl_res = curl_easy_perform(curl); + if (curl_res != CURLE_OK) { + print_curl_error("perform",curl_res); + goto *err_label; + } + + if (is_lock && value) { + if (!precond) { + /* + * If this is an initial lock, parse_lock_response would + * have been unable to set "res" for us. Instead, we + * set it here if the index string got updated. + */ + if (*is_lock != orig_index) { + res = ETCD_OK; + } + } + else { + /* + * If this is a lock renewal, then a successful call + * will pass through neither parse_lock_response nor + * parse_get_response. The curl response code alone + * is sufficient. + */ + res = ETCD_OK; + } + } + + /* + * If the request succeeded, or at least got to the server and failed + * there, parse_set_response should have set res appropriately. + */ + +cleanup_curl: + curl_easy_cleanup(curl); +free_contents: + free(contents); /* might already be NULL for delete, but that's OK */ +free_url: + free(url); +done: + return res; +} + + +etcd_result +etcd_set (etcd_session session_as_void, char *key, char *value, + char *precond, unsigned int ttl) +{ + _etcd_session *session = session_as_void; + etcd_server *srv; + etcd_result res; + + for (srv = session->servers; srv->host; ++srv) { + res = etcd_set_one(session,key,value,precond,ttl,srv,NULL); + /* + * Protocol errors are likely to be things like precondition + * failures, which won't be helped by retrying on another + * server. + */ + if ((res == ETCD_OK) || (res == ETCD_PROTOCOL_ERROR)) { + return res; + } + } + + return ETCD_WTF; +} + + +/* + * This uses the same path and status checks as SET, but with a different HTTP + * command instead of data. Precondition and TTL are obviously not used in + * this case, though a conditional delete would be a cool feature for etcd. I + * think you can get a timed delete by doing a conditional set to the current + * value with a TTL, but I haven't actually tried it. + */ +etcd_result +etcd_delete (etcd_session session_as_void, char *key) +{ + _etcd_session *session = session_as_void; + etcd_server *srv; + etcd_result res; + + for (srv = session->servers; srv->host; ++srv) { + res = etcd_set_one(session,key,NULL,NULL,0,srv,NULL); + if (res == ETCD_OK) { + break; + } + } + + return res; +} + + +etcd_result +etcd_lock (etcd_session session_as_void, char *key, unsigned int ttl, + char *index_in, char **index_out) +{ + _etcd_session *session = session_as_void; + etcd_server *srv; + etcd_result res; + char *tmp = NULL; + + for (srv = session->servers; srv->host; ++srv) { + res = etcd_set_one(session,key,"hack",index_in,ttl,srv,&tmp); + if (res == ETCD_OK) { + if (index_out) { + *index_out = tmp; + } + break; + } + } + + return res; +} + + +etcd_result +etcd_unlock (etcd_session session_as_void, char *key, char *index) +{ + _etcd_session *session = session_as_void; + etcd_server *srv; + etcd_result res; + char *tmp = NULL; + + for (srv = session->servers; srv->host; ++srv) { + res = etcd_set_one(session,key,NULL,index,0,srv,&tmp); + if (res == ETCD_OK) { + break; + } + } + + return res; +} +size_t +store_leader (void *ptr, size_t size, size_t nmemb, void *stream) +{ + *((char **)stream) = strdup(ptr); + return size * nmemb; +} + + +char * +etcd_leader (etcd_session session_as_void) +{ + _etcd_session *session = session_as_void; + etcd_server *srv; + etcd_result res; + char *value = NULL; + + for (srv = session->servers; srv->host; ++srv) { + res = etcd_get_one(session,"leader",srv,"",NULL, + store_leader,&value); + if ((res == ETCD_OK) && value) { + return value; + } + } + + return NULL; +} + + +void +free_sl (etcd_server *server_list) +{ + size_t num_servers; + + for (num_servers = 0; server_list[num_servers].host; ++num_servers) { + free(server_list[num_servers].host); + } + free(server_list); +} + + +int +_count_matching (char *text, char *cset, int result) +{ + char *t; + int res = 0; + + for (t = text; *t; ++t) { + if ((strchr(cset,*t) != NULL) != result) { + break; + } + ++res; + } + + return res; +} + +#define count_matching(t,cs) _count_matching(t,cs,1) +#define count_nonmatching(t,cs) _count_matching(t,cs,0) + + +etcd_session +etcd_open_str (char *server_names) +{ + char *snp; + int run_len; + int host_len; + size_t num_servers; + etcd_server *server_list; + etcd_session *session; + + /* + * Yeah, we iterate over the string twice so we can allocate an + * appropriately sized array instead of turning it into a linked list. + * Unfortunately this means we can't use strtok* which is destructive + * with no platform-independent way to reverse the destructive effects. + */ + + num_servers = 0; + snp = server_names; + while (*snp) { + run_len = count_nonmatching(snp,SL_DELIM); + if (!run_len) { + snp += count_matching(snp,SL_DELIM); + continue; + } + ++num_servers; + snp += run_len; + } + + if (!num_servers) { + return NULL; + } + + server_list = calloc(num_servers+1,sizeof(*server_list)); + if (!server_list) { + return NULL; + } + num_servers = 0; + + snp = server_names; + while (*snp) { + run_len = count_nonmatching(snp,SL_DELIM); + if (!run_len) { + snp += count_matching(snp,SL_DELIM); + continue; + } + host_len = count_nonmatching(snp,":"); + if ((run_len - host_len) > 1) { + server_list[num_servers].host = strndup(snp,host_len); + server_list[num_servers].port = (unsigned short) + strtoul(snp+host_len+1,NULL,10); + } + else { + server_list[num_servers].host = strndup(snp,run_len); + server_list[num_servers].port = DEFAULT_ETCD_PORT; + } + ++num_servers; + snp += run_len; + } + + session = etcd_open(server_list); + if (!session) { + free_sl(server_list); + } + return session; +} + + +void +etcd_close_str (etcd_session session) +{ + free_sl(((_etcd_session *)session)->servers); + etcd_close(session); +} diff --git a/xlators/cluster/nsr-server/src/etcd-api.h b/xlators/cluster/nsr-server/src/etcd-api.h new file mode 100644 index 000000000..66275d40d --- /dev/null +++ b/xlators/cluster/nsr-server/src/etcd-api.h @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2013, Red Hat + * All rights reserved. + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Description of an etcd server. For now it just includes the name and + * port, but some day it might include other stuff like SSL certificate + * information. + */ + +typedef enum { + ETCD_OK = 0, + ETCD_PROTOCOL_ERROR, + /* TBD: add other error categories here */ + ETCD_WTF /* anything we can't easily categorize */ +} etcd_result; + +typedef struct { + char *host; + unsigned short port; +} etcd_server; + +typedef void *etcd_session; + +/* + * etcd_open + * + * Establish a session to an etcd cluster, with automatic reconnection and + * so on. + * + * server_list + * Array of etcd_server structures, with the last having host=NULL. The + * caller is responsible for ensuring that this remains valid as long as + * the session exists. + */ +etcd_session etcd_open (etcd_server *server_list); + + +/* + * etcd_open_str + * + * Same as etcd_open, except that the servers are specified as a list of + * host:port strings, separated by comma/semicolon or whitespace. + */ +etcd_session etcd_open_str (char *server_names); + + +/* + * etcd_close + * + * Terminate a session, closing connections and freeing memory (or any other + * resources) associated with it. + */ +void etcd_close (etcd_session session); + + +/* + * etcd_close + * + * Same as etcd_close, but also free the server list as etcd_open_str would + * have allocated it. + */ +void etcd_close_str (etcd_session session); + + +/* + * etcd_get + * + * Fetch a key from one of the servers in a session. The return value is a + * newly allocated string, which must be freed by the caller. + * + * key + * The etcd key (path) to fetch. + */ +char * etcd_get (etcd_session session, char *key); + + +/* + * etcd_watch + * Watch the set of keys matching a prefix. + * + * pfx + * The etcd key prefix (like a path) to watch. + * + * keyp + * Space for a pointer to the key that was added/modified/deleted. + * + * valuep + * Space for a pointer to the value if a key was added/modified. A delete + * is signified by this being set to NULL. + * + * index_in + * Pointer to an index to be used for *issuing* the watch request, or + * NULL for a watch without an index. + * + * index_out + * Pointer to space for an index *returned* by etcd, or NULL to mean don't + * bother. + * + * In normal usage, index_in will be NULL and index_out will be set to receive + * the index for the first watch. Subsequently, index_in will be set to + * provide the previous index (plus one) and index_out will be set to receive + * the next. It's entirely legitimate to point both at the same variable. + */ + +etcd_result etcd_watch (etcd_session session, char *pfx, + char **keyp, char **valuep, + int *index_in, int *index_out); + + +/* + * etcd_set + * + * Write a key, with optional TTL and/or previous value (as a precondition). + * + * key + * The etcd key (path) to set. + * + * value + * New value as a null-terminated string. Unlike etcd_get, we can derive + * the length ourselves instead of needing it to be passed in separately. + * + * precond + * Required previous value as a null-terminated string, or NULL to mean + * an unconditional set. + * + * ttl + * Time in seconds after which the value will automatically expire and be + * deleted, or zero to mean no auto-expiration. + */ + +etcd_result etcd_set (etcd_session session, char *key, char *value, + char *precond, unsigned int ttl); + + +/* + * etcd_delete + * + * Delete a key from one of the servers in a session. + * + * key + * The etcd key (path) to delete. + */ + +etcd_result etcd_delete (etcd_session session, char *key); + + +/* + * etcd_leader + * + * Get the identify of the current leader. + */ + +char * etcd_leader (etcd_session session); + +/* + * etcd_lock + * + * Take or renew a lock - really a lease but the etcd folks call it a lock so + * we'll follow suit. + * + * key + * The path (in the "locks" namespace) for the lock. + * + * ttl + * Time in seconds for the lock. + * + * index_in (optional, indicates renewal) + * Lock index from previous lock call. + * + * index_out (only used for initial lock) + * Place for the new lock index. You must free this. + */ + +etcd_result etcd_lock (etcd_session session_as_void, char *key, + unsigned int ttl, char *index_in, char **index_out); + +/* + * etcd_unlock + * + * Release a lock (see etcd_lock regarding terminology). + * + * key + * The path (in the "locks" namespace) for the lock. + * + * index + * Lock index from previous lock call. + */ + +etcd_result etcd_unlock (etcd_session session_as_void, char *key, + char *index); + diff --git a/xlators/cluster/nsr-server/src/etcd-sim.c b/xlators/cluster/nsr-server/src/etcd-sim.c new file mode 100644 index 000000000..d0bea12c7 --- /dev/null +++ b/xlators/cluster/nsr-server/src/etcd-sim.c @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2014, Red Hat + * All rights reserved. + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. Redistributions in binary + * form must reproduce the above copyright notice, this list of conditions and + * the following disclaimer in the documentation and/or other materials + * provided with the distribution. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/file.h> + +#include "mem-pool.h" + +/* + * Mock implementation of etcd + * The etcd file is simulated in /tmp/<server-names> + * Writes from Multiple writers are protected using file lock. +*/ + +#include "etcd-api.h" +#define MAX_KEY_LEN 64 +#define MAX_VALUE_LEN 64 +#define MAX_EXPIRE_LEN 16 + +etcd_session +etcd_open (etcd_server *server_list) +{ + return NULL; +} + +typedef struct _etcd_sim_s { + char *path; +} etcd_sim_t; + +void +etcd_close (etcd_session this) +{ + etcd_sim_t *sim = (etcd_sim_t *)this; + free(sim->path); + free(this); +} + + +char * +etcd_get_1 (FILE *stream, char *key) +{ + char *str = NULL; + size_t len; + unsigned long expires; + char *ret; + + // Read the file + while(1) { + if(str) { + free(str); + str = NULL; + } + if (getline((char **)&str, &len,stream) == -1) { + break; + } + if (!strncmp(str, key, strlen(key))) { + char k[256], s[256]; + sscanf(str,"%s %s %lu",k, s, &expires); + // check if key is expired. + if (time(NULL) > expires) { + /* Keep looking for an unexpired entry. */ + continue; + } + ret = calloc(1, strlen(s) + 1); + strcpy(ret,s); + free(str); + return(ret); + } + } + return NULL; +} + + +char * +etcd_get (etcd_session this, char *key) +{ + etcd_sim_t *sim = (etcd_sim_t *)this; + int fd; + FILE *stream; + char *retval; + + fd = open(sim->path,O_RDONLY); + if (!fd) { + return NULL; + } + + stream = fdopen(fd,"r"); + (void)flock(fd,LOCK_SH); + retval = etcd_get_1(stream,key); + (void)flock(fd,LOCK_UN); + fclose(stream); /* closes fd as well */ + + return retval; +} + + +etcd_result +etcd_set_1 (FILE *stream, char *key, char *value, + char *precond, unsigned int ttl) +{ + char *str = NULL; + char tp[255]; + size_t len; + unsigned long expires; + + while(1) { + if(str) { + free(str); + str = NULL; + } + if (getline((char **)&str, &len,stream) == -1) { + break; + } + if (!strncmp(str, key, strlen(key))) { + char k[256], s[256]; + sscanf(str,"%s %s %lu",k, s, &expires); + // check if the present key is expired + if (time(NULL) > expires) { + /* Keep looking for an unexpired entry. */ + continue; + } + /* + * The only case in which we should fail here is if a + * precondition was specified and does not match the + * current (non-expired) value. + */ + if (precond && strcmp(precond, s)) { + free(str); + return ETCD_WTF; + } + fseek(stream, -strlen(str), SEEK_CUR); + free(str); + goto here; + } + } +here: + memset(tp, 0, 255); + sprintf(tp,"%*s %*s %*lu\n", + -MAX_KEY_LEN, key, -MAX_VALUE_LEN, value, + -MAX_EXPIRE_LEN, ttl ? time(NULL) + ttl : ~0); + if (fwrite(tp, 1,strlen(tp), stream) != strlen(tp)) { + return ETCD_WTF; + } + fflush(stream); + fsync(fileno(stream)); + return ETCD_OK; +} + + +etcd_result +etcd_set (etcd_session this, char *key, char *value, + char *precond, unsigned int ttl) +{ + etcd_sim_t *sim = (etcd_sim_t *)this; + int fd; + FILE *stream; + etcd_result retval; + + fd = open(sim->path,O_RDWR); + if (fd < 0) { + return ETCD_WTF; + } + + stream = fdopen(fd,"r+"); + (void)flock(fd,LOCK_EX); + retval = etcd_set_1(stream,key,value,precond,ttl); + (void)flock(fd,LOCK_UN); + fclose(stream); /* closes fd as well */ + + return retval; +} + + +etcd_session +etcd_open_str (char *server_names) +{ + etcd_sim_t *sim; + int fd; + + sim = calloc(1, sizeof(etcd_sim_t)); + (void)asprintf(&sim->path,"/tmp/%s",server_names); + + fd = open(sim->path, O_RDWR | O_CREAT, 0777); + if (fd == -1) { + free(sim->path); + free(sim); + return NULL; + } + + close(fd); + return ((void *)sim); +} + + +void +etcd_close_str (etcd_session this) +{ + etcd_close(this); +} + +etcd_result +etcd_delete (etcd_session this, char *key) +{ + return ETCD_WTF; +} + +char * +etcd_leader (etcd_session this_as_void) +{ + return NULL; +} + +etcd_result +etcd_watch (etcd_session this, char *pfx, char **keyp, char **valuep, + int *index_in, int *index_out) +{ + return ETCD_WTF; +} + +etcd_result +etcd_lock (etcd_session session_as_void, char *key, unsigned int ttl, + char *index_in, char **index_out) +{ + char *path; + int fd; + + if (!index_in) { + if (gf_asprintf(&path,"/var/tmp/%s",key) < 0) { + return ETCD_WTF; + } + fd = open(path,O_RDWR|O_CREAT,0666); + GF_FREE(path); + if (fd < 0) { + return ETCD_WTF; + } + if (flock(fd,LOCK_EX) < 0) { + close(fd); + return ETCD_WTF; + } + *index_out = strdup("42"); + } + + /* + * Yes, we leak an fd by not closing it here (and nobody else even + * knows about it). That would be awful in any other context, but + * for test scripts it won't matter. + */ + return ETCD_OK; +} + diff --git a/xlators/cluster/nsr-server/src/gen-fops.py b/xlators/cluster/nsr-server/src/gen-fops.py new file mode 100755 index 000000000..1639f489c --- /dev/null +++ b/xlators/cluster/nsr-server/src/gen-fops.py @@ -0,0 +1,120 @@ +#!/usr/bin/python + +# This script generates the boilerplate versions of most fops and cbks in the +# server. This allows the details of leadership-status checking, sequencing +# between leader and followers (including fan-out), and basic error checking +# to be centralized one place, with per-operation code kept to a minimum. + +import sys +import codegen + +type_re = "([a-z_0-9]+)" +name_re = "\(\*fop_([a-z0-9]+)_t\)" +full_re = type_re + " *" + name_re +fop_cg = codegen.CodeGenerator() +fop_cg.skip = 2 +fop_cg.parse_decls(sys.argv[1],full_re) +fop_cg.load_templates(sys.argv[2]) + +# Use the multi-template feature to generate multiple callbacks from the same +# parsed declarations. +type_re = "([a-z_0-9]+)" +name_re = "\(\*fop_([a-z0-9]+)_cbk_t\)" +full_re = type_re + " *" + name_re +cbk_cg = codegen.CodeGenerator() +cbk_cg.skip = 5 +cbk_cg.parse_decls(sys.argv[1],full_re) +cbk_cg.load_templates(sys.argv[2]) + +# This is a nasty little trick to handle the case where a generated fop needs +# a set of default arguments for the corresponding callback. +fop_cg.make_defaults = cbk_cg.make_defaults + +# We need two types of templates. The first, for pure read operations, just +# needs to do a simple am-i-leader check (augmented to allow dirty reads). +# The second, for pure writes, needs to do fan-out to followers between those +# initial checks and local execution. There are other operations that don't +# fit neatly into either category - e.g. lock ops or fsync - so we'll just have +# to handle those manually. The table thus includes entries only for those we +# can categorize. The special cases, plus any new operations we've never even +# heard of, aren't in there. +# +# Various keywords can be used to define/undefine preprocessor symbols used +# in the templates, on a per-function basis. For example, if the keyword here +# is "fsync" (lowercase word or abbreviation) that will cause NSR_CG_FSYNC +# (prefix plus uppercase version) to be defined above all of the generated code +# for that fop. + +fop_table = { + "access": "read", + "create": "write", + "discard": "write", +# "entrylk": "read", + "fallocate": "write", +# "fentrylk": "read", + "fgetxattr": "read", +# "finodelk": "read", +# "flush": "read", + "fremovexattr": "write", + "fsetattr": "write", + "fsetxattr": "write", + "fstat": "read", +# "fsync": "read", +# "fsyncdir": "read", + "ftruncate": "write", + "fxattrop": "write", + "getxattr": "read", +# "inodelk": "read", + "link": "write", +# "lk": "read", +# "lookup": "read", + "mkdir": "write", + "mknod": "write", + "open": "write", + "opendir": "read", + "rchecksum": "read", + "readdir": "read", + "readdirp": "read", + "readlink": "read", + "readv": "read", + "removexattr": "write", + "rename": "write", + "rmdir": "write", + "setattr": "write", + "setxattr": "write", + "stat": "read", + "statfs": "read", + "symlink": "write", + "truncate": "write", + "unlink": "write", + "writev": "write,fsync,queue", + "xattrop": "write", +} + +fops_done = [] +for x in sorted(fop_cg.decls.keys()): + if x in fop_table.keys(): + info = fop_table[x].split(",") + kind = info[0] + flags = info[1:] + if ("fsync" in flags) or ("queue" in flags): + flags.append("need_fd") + for fname in flags: + print "#define NSR_CG_%s" % fname.upper() + cbk_cg.emit(x,kind+"-complete") + fop_cg.emit(x,kind+"-continue") + cbk_cg.emit(x,kind+"-fan-in") + fop_cg.emit(x,kind+"-dispatch") + fop_cg.emit(x,kind+"-fop") + for fname in flags: + print "#undef NSR_CG_%s" % fname.upper() + fops_done.append(x) + else: + print("/* No code emitted for %s */"%x) + print("") + +# Just for fun, emit the fops table too. +print("struct xlator_fops fops = {") +for x in fops_done: + print(" .%s = nsr_%s,"%(x,x)) +print("};") diff --git a/xlators/cluster/nsr-server/src/leader.c b/xlators/cluster/nsr-server/src/leader.c new file mode 100644 index 000000000..02a2609c8 --- /dev/null +++ b/xlators/cluster/nsr-server/src/leader.c @@ -0,0 +1,138 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <regex.h> +//#include <stdlib.h> +#include <string.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "call-stub.h" +#include "defaults.h" +#include "xlator.h" +#include "api/src/glfs.h" +#include "api/src/glfs-internal.h" + +#ifndef NSR_SIM_ETCD +#include "etcd-api.h" +#endif +#include "nsr-internal.h" +#include "../../nsr-recon/src/recon_driver.h" +#include "../../nsr-recon/src/recon_xlator.h" + +#define NSR_TTL 5 + +static void +nsr_set_leader (xlator_t *this, etcd_session etcd) +{ + long term = 0; + etcd_result res; + nsr_private_t *priv = this->private; + char n_t[sizeof(long)+1]; + char *text = NULL; + + gf_log (this->name, GF_LOG_INFO, "Just became leader"); + + text = etcd_get(etcd, priv->term_key); + if(text == NULL) { + term = 0; + } else { + term = strtol(text, NULL, 10); + } + sprintf(n_t,"%ld",term+1); + res = etcd_set(etcd, priv->term_key,n_t,text,0); + if(res != ETCD_OK) { + gf_log (this->name, GF_LOG_ERROR, "failed to set term"); + return; + } + priv->leader = _gf_true; + + priv->current_term = term + 1; + + if (priv->nsr_recon_start == _gf_false) { + atomic_fetch_and(&(priv->fence_io), 0); + return; + } + + // Move this inside recon notify??? + atomic_fetch_or(&(priv->fence_io), 1); + + nsr_recon_notify_event_set_leader(priv); + + return; +} + +void * +nsr_leader_thread (void *arg) +{ + xlator_t *this = (xlator_t *) arg; + nsr_private_t *priv = this->private; + etcd_result res; + char *index_in = NULL; + char *index_out = NULL; + + gf_log (this->name, GF_LOG_INFO, + "calling etcd_open_str on servers %s", priv->etcd_servers); + + priv->etcd = etcd_open_str(priv->etcd_servers); + if (!(priv->etcd)) { + gf_log (this->name, GF_LOG_ERROR, + "failed to open etcd session\n"); + return NULL; + } + + priv->leader_inited = 1; + + for (;;) { + /* Not leader yet. Try to become leader. */ + for (;;) { + res = etcd_lock (priv->etcd, priv->leader_key, NSR_TTL, + index_in, &index_out); + if (res == ETCD_OK) { + break; + } + gf_log (this->name, GF_LOG_WARNING, + "etcd_lock failed (%d)", res); + sleep(1); + } + /* We're there. Notify other parts of the code. */ + nsr_set_leader(this,priv->etcd); + /* Try to retain leadership. */ + index_in = index_out; + index_out = NULL; + for (;;) { + res = etcd_lock (priv->etcd, priv->leader_key, NSR_TTL, + index_in, &index_out); + if (index_out && (index_in != index_out)) { + if (index_in) { + free(index_in); + } + index_in = index_out; + index_out = NULL; + } + if (res != ETCD_OK) { + gf_log (this->name, GF_LOG_WARNING, + "lost leadership (%d)", res); + if (index_out) { + free(index_out); + } + break; + } + sleep(1); + } + } + + etcd_close_str(priv->etcd); + return NULL; +} + diff --git a/xlators/cluster/nsr-server/src/nsr-internal.h b/xlators/cluster/nsr-server/src/nsr-internal.h new file mode 100644 index 000000000..72b61bfa5 --- /dev/null +++ b/xlators/cluster/nsr-server/src/nsr-internal.h @@ -0,0 +1,101 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <sys/stat.h> +#include <sys/types.h> + +#define LEADER_XATTR "user.nsr.leader" +#define SECOND_CHILD(xl) (xl->children->next->xlator) + +enum { + gf_mt_nsr_private_t = gf_common_mt_end + 1, + gf_mt_nsr_fd_ctx_t, + gf_mt_nsr_inode_ctx_t, + gf_mt_nsr_dirty_t, + gf_mt_nsr_end +}; + +typedef enum nsr_recon_notify_ev_id_t { + NSR_RECON_SET_LEADER = 1, + NSR_RECON_ADD_CHILD = 2 +} nsr_recon_notify_ev_id_t; + +typedef struct _nsr_recon_notify_ev_s { + nsr_recon_notify_ev_id_t id; + uint32_t index; // in case of add + struct list_head list; +} nsr_recon_notify_ev_t; + +typedef struct { + char *etcd_servers; + char *subvol_uuid; + char *leader_key; + char *term_key; + char *brick_uuid; + gf_boolean_t leader; + uint8_t up_children; + uint8_t n_children; + char *vol_file; + etcd_session etcd; + volatile unsigned int fence_io; + uint32_t current_term; +#ifdef NSR_DEBUG + uint32_t leader_log_fd; +#endif + volatile int recon_notify_inited; + volatile int leader_inited; + uint32_t kid_state; + gf_lock_t dirty_lock; + struct list_head dirty_fds; + gf_boolean_t nsr_recon_start; + void * recon_ctx; + volatile uint32_t ops_in_flight; + uint32_t index; + gf_lock_t index_lock; + double quorum_pct; +} nsr_private_t; + +typedef struct { + call_stub_t *stub; + call_stub_t *qstub; + uint8_t call_count; + fd_t *fd; + struct list_head qlinks; +} nsr_local_t; + +/* + * This should match whatever changelog returns on the pre-op for us to pass + * when we're ready for our post-op. + */ +typedef uint32_t log_id_t; + +typedef struct { + struct list_head links; + log_id_t id; +} nsr_dirty_list_t; + +typedef struct { + fd_t *fd; + struct list_head dirty_list; + struct list_head fd_list; +} nsr_fd_ctx_t; + +typedef struct { + gf_lock_t lock; + uint32_t active; + struct list_head aqueue; + uint32_t pending; + struct list_head pqueue; +} nsr_inode_ctx_t; + +void nsr_recon_notify_event_set_leader(nsr_private_t *priv); +void nsr_recon_notify_event_add_child(nsr_private_t *priv, uint32_t index); +void* nsr_recon_notify_thread (void *this); + diff --git a/xlators/cluster/nsr-server/src/nsr.c b/xlators/cluster/nsr-server/src/nsr.c new file mode 100644 index 000000000..85eba09b5 --- /dev/null +++ b/xlators/cluster/nsr-server/src/nsr.c @@ -0,0 +1,812 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "call-stub.h" +#include "defaults.h" +#include "xlator.h" +#include "api/src/glfs.h" +#include "api/src/glfs-internal.h" +#include "run.h" +#include "common-utils.h" +#include "syncop.h" + +#include "etcd-api.h" +#include "nsr-internal.h" +#include "../../nsr-recon/src/recon_driver.h" +#include "../../nsr-recon/src/recon_xlator.h" + + +#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd" +#define GLUSTERD_VOLUME_DIR_PREFIX "vols" +#define GLUSTERD_BRICK_INFO_DIR "bricks" + +#define NSR_FLUSH_INTERVAL 5 + +nsr_inode_ctx_t * +nsr_get_inode_ctx (xlator_t *this, inode_t *inode) +{ + uint64_t ctx_int = 0LL; + nsr_inode_ctx_t *ctx_ptr; + + if (__inode_ctx_get(inode,this,&ctx_int) == 0) { + ctx_ptr = (nsr_inode_ctx_t *)(long)ctx_int; + } + else { + ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), + gf_mt_nsr_inode_ctx_t); + if (ctx_ptr) { + ctx_int = (uint64_t)(long)ctx_ptr; + if (__inode_ctx_set(inode,this,&ctx_int) == 0) { + LOCK_INIT(&ctx_ptr->lock); + INIT_LIST_HEAD(&ctx_ptr->aqueue); + INIT_LIST_HEAD(&ctx_ptr->pqueue); + } + else { + GF_FREE(ctx_ptr); + ctx_ptr = NULL; + } + } + + } + + return ctx_ptr; +} + +nsr_fd_ctx_t * +nsr_get_fd_ctx (xlator_t *this, fd_t *fd) +{ + uint64_t ctx_int = 0LL; + nsr_fd_ctx_t *ctx_ptr; + + if (__fd_ctx_get(fd,this,&ctx_int) == 0) { + ctx_ptr = (nsr_fd_ctx_t *)(long)ctx_int; + } + else { + ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), gf_mt_nsr_fd_ctx_t); + if (ctx_ptr) { + if (__fd_ctx_set(fd,this,(uint64_t)ctx_ptr) == 0) { + INIT_LIST_HEAD(&ctx_ptr->dirty_list); + INIT_LIST_HEAD(&ctx_ptr->fd_list); + } + else { + GF_FREE(ctx_ptr); + ctx_ptr = NULL; + } + } + + } + + return ctx_ptr; +} + +void +nsr_mark_fd_dirty (xlator_t *this, nsr_local_t *local) +{ + fd_t *fd = local->fd; + nsr_fd_ctx_t *ctx_ptr; + nsr_dirty_list_t *dirty; + nsr_private_t *priv = this->private; + + /* + * TBD: don't do any of this for O_SYNC/O_DIRECT writes. + * Unfortunately, that optimization requires that we distinguish + * between writev and other "write" calls, saving the original flags + * and checking them in the callback. Too much work for too little + * gain right now. + */ + + LOCK(&fd->lock); + ctx_ptr = nsr_get_fd_ctx(this,fd); + dirty = GF_CALLOC(1,sizeof(*dirty),gf_mt_nsr_dirty_t); + if (ctx_ptr && dirty) { + gf_log (this->name, GF_LOG_TRACE, + "marking fd %p as dirty (%p)", fd, dirty); + /* TBD: fill dirty->id from what changelog gave us */ + list_add_tail(&dirty->links,&ctx_ptr->dirty_list); + if (list_empty(&ctx_ptr->fd_list)) { + /* Add a ref so _release doesn't get called. */ + ctx_ptr->fd = fd_ref(fd); + LOCK(&priv->dirty_lock); + list_add_tail (&ctx_ptr->fd_list, + &priv->dirty_fds); + UNLOCK(&priv->dirty_lock); + } + } + else { + gf_log (this->name, GF_LOG_ERROR, + "could not mark %p dirty", fd); + if (ctx_ptr) { + GF_FREE(ctx_ptr); + } + if (dirty) { + GF_FREE(dirty); + } + } + UNLOCK(&fd->lock); +} + +#define NSR_TERM_XATTR "trusted.nsr.term" +#define NSR_INDEX_XATTR "trusted.nsr.index" +#define RECON_TERM_XATTR "trusted.nsr.recon-term" +#define RECON_INDEX_XATTR "trusted.nsr.recon-index" +#define NSR_REP_COUNT_XATTR "trusted.nsr.rep-count" +#include "nsr-cg.c" + +uint8_t +nsr_count_up_kids (nsr_private_t *priv) +{ + uint8_t retval = 0; + uint8_t i; + + for (i = 0; i < priv->n_children; ++i) { + if (priv->kid_state & (1 << i)) { + ++retval; + } + } + + return retval; +} + +/* + * The fsync machinery looks a lot like that for any write call, but there are + * some important differences that are easy to miss. First, we don't care + * about the xdata that shows whether the call came from a leader or + * reconciliation process. If we're the leader we fan out; if we're not we + * don't. Second, we don't wait for followers before we issue the local call. + * The code generation system could be updated to handle this, and still might + * if we need to implement other "almost identical" paths (e.g. for open), but + * a copy is more readable as long as it's just one. + */ + +int32_t +nsr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + nsr_local_t *local = frame->local; + gf_boolean_t unwind; + + LOCK(&frame->lock); + unwind = !--(local->call_count); + UNLOCK(&frame->lock); + + if (unwind) { + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + } + return 0; +} + +int32_t +nsr_fsync_local_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + nsr_dirty_list_t *dirty; + nsr_dirty_list_t *dtmp; + nsr_local_t *local = frame->local; + + list_for_each_entry_safe (dirty, dtmp, &local->qlinks, links) { + gf_log (this->name, GF_LOG_TRACE, + "sending post-op on %p (%p)", local->fd, dirty); + GF_FREE(dirty); + } + + return nsr_fsync_cbk (frame, cookie, this, op_ret, op_errno, + prebuf, postbuf, xdata); +} + +int32_t +nsr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + nsr_private_t *priv = this->private; + nsr_local_t *local; + uint64_t ctx_int = 0LL; + nsr_fd_ctx_t *ctx_ptr; + xlator_list_t *trav; + + local = mem_get0(this->local_pool); + if (!local) { + STACK_UNWIND_STRICT(fsync,frame,-1,ENOMEM,NULL,NULL,xdata); + return 0; + } + INIT_LIST_HEAD(&local->qlinks); + frame->local = local; + + /* Move the dirty list from the fd to the fsync request. */ + LOCK(&fd->lock); + if (__fd_ctx_get(fd,this,&ctx_int) == 0) { + ctx_ptr = (nsr_fd_ctx_t *)(long)ctx_int; + list_splice_init (&ctx_ptr->dirty_list, + &local->qlinks); + } + UNLOCK(&fd->lock); + + /* Issue the local call. */ + local->call_count = priv->leader ? priv->n_children : 1; + STACK_WIND (frame, nsr_fsync_local_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, + fd, flags, xdata); + + /* Issue remote calls if we're the leader. */ + if (priv->leader) { + for (trav = this->children->next; trav; trav = trav->next) { + STACK_WIND (frame, nsr_fsync_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, + fd, flags, xdata); + } + } + + return 0; +} + +int32_t +nsr_getxattr_special (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + dict_t *result; + nsr_private_t *priv = this->private; + + if (!priv->leader) { + STACK_UNWIND_STRICT (getxattr, frame, -1, EREMOTE, NULL, NULL); + return 0; + } + + if (!name || (strcmp(name,NSR_REP_COUNT_XATTR) != 0)) { + STACK_WIND_TAIL (frame, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, + loc, name, xdata); + return 0; + } + + result = dict_new(); + if (!result) { + goto dn_failed; + } + + priv->up_children = nsr_count_up_kids(this->private); + if (dict_set_uint32(result,NSR_REP_COUNT_XATTR,priv->up_children) != 0) { + goto dsu_failed; + } + + STACK_UNWIND_STRICT (getxattr, frame, 0, 0, result, NULL); + dict_destroy(result); + return 0; + +dsu_failed: + dict_destroy(result); +dn_failed: + STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; +} + +void +nsr_flush_fd (xlator_t *this, nsr_fd_ctx_t *fd_ctx) +{ + nsr_dirty_list_t *dirty; + nsr_dirty_list_t *dtmp; + + list_for_each_entry_safe (dirty, dtmp, &fd_ctx->dirty_list, links) { + gf_log (this->name, GF_LOG_TRACE, + "sending post-op on %p (%p)", fd_ctx->fd, dirty); + GF_FREE(dirty); + } + + INIT_LIST_HEAD(&fd_ctx->dirty_list); +} + +void * +nsr_flush_thread (void *ctx) +{ + xlator_t *this = ctx; + nsr_private_t *priv = this->private; + struct list_head dirty_fds; + nsr_fd_ctx_t *fd_ctx; + nsr_fd_ctx_t *fd_tmp; + int ret; + + for (;;) { + /* + * We have to be very careful to avoid lock inversions here, so + * we can't just hold priv->dirty_lock while we take and + * release locks for each fd. Instead, we only hold dirty_lock + * at the beginning of each iteration, as we (effectively) make + * a copy of the current list head and then clear the original. + * This leads to four scenarios for adding the first entry to + * an fd and potentially putting it on the global list. + * + * (1) While we're asleep. No lock contention, it just gets + * added and will be processed on the next iteration. + * + * (2) After we've made a local copy, but before we've started + * processing that fd. The new entry will be added to the + * fd (under its lock), and we'll process it on the current + * iteration. + * + * (3) While we're processing the fd. They'll block on the fd + * lock, then see that the list is empty and put it on the + * global list. We'll process it here on the next + * iteration. + * + * (4) While we're working, but after we've processed that fd. + * Same as (1) as far as that fd is concerned. + */ + INIT_LIST_HEAD(&dirty_fds); + LOCK(&priv->dirty_lock); + list_splice_init(&priv->dirty_fds,&dirty_fds); + UNLOCK(&priv->dirty_lock); + + list_for_each_entry_safe (fd_ctx, fd_tmp, &dirty_fds, fd_list) { + ret = syncop_fsync(FIRST_CHILD(this),fd_ctx->fd,0); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to fsync %p (%d)", + fd_ctx->fd, -ret); + } + + LOCK(&fd_ctx->fd->lock); + nsr_flush_fd(this,fd_ctx); + list_del_init(&fd_ctx->fd_list); + UNLOCK(&fd_ctx->fd->lock); + fd_unref(fd_ctx->fd); + } + + sleep(NSR_FLUSH_INTERVAL); + } + + return NULL; +} + +int32_t +nsr_forget (xlator_t *this, inode_t *inode) +{ + uint64_t ctx = 0LL; + + if ((inode_ctx_del(inode,this,&ctx) == 0) && ctx) { + GF_FREE((void *)(long)ctx); + } + + return 0; +} + +int32_t +nsr_release (xlator_t *this, fd_t *fd) +{ + uint64_t ctx = 0LL; + + if ((fd_ctx_del(fd,this,&ctx) == 0) && ctx) { + GF_FREE((void *)(long)ctx); + } + + return 0; +} + +struct xlator_cbks cbks = { + .forget = nsr_forget, + .release = nsr_release, +}; + +int +nsr_reconfigure (xlator_t *this, dict_t *options) +{ + nsr_private_t *priv = this->private; + + GF_OPTION_RECONF ("leader", priv->leader, options, bool, err); + gf_log (this->name, GF_LOG_INFO, + "reconfigure called. setting priv->leader to %d\n", priv->leader); + return 0; + +err: + return -1; +} + +int +nsr_get_child_index (xlator_t *this, xlator_t *kid) +{ + xlator_list_t *trav; + int retval = -1; + + for (trav = this->children; trav; trav = trav->next) { + ++retval; + if (trav->xlator == kid) { + return retval; + } + } + + return -1; +} + +/* + * Child notify handling is unreasonably FUBAR. Sometimes we'll get a + * CHILD_DOWN for a protocol/client child before we ever got a CHILD_UP for it. + * Other times we won't. Because it's effectively random (probably racy), we + * can't just maintain a count. We actually have to keep track of the state + * for each child separately, to filter out the bogus CHILD_DOWN events, and + * then generate counts on demand. + */ +int +nsr_notify (xlator_t *this, int event, void *data, ...) +{ + nsr_private_t *priv = this->private; + int index; + + switch (event) { + case GF_EVENT_CHILD_UP: + index = nsr_get_child_index(this,data); + if (index >= 0) { + priv->kid_state |= (1 << index); + priv->up_children = nsr_count_up_kids(priv); + gf_log (this->name, GF_LOG_INFO, + "got CHILD_UP for %s, now %u kids", + ((xlator_t *)data)->name, + priv->up_children); + if (priv->nsr_recon_start == _gf_true) { + nsr_recon_notify_event_add_child(priv, index); + } + } + break; + case GF_EVENT_CHILD_DOWN: + index = nsr_get_child_index(this,data); + if (index >= 0) { + priv->kid_state &= ~(1 << index); + priv->up_children = nsr_count_up_kids(priv); + gf_log (this->name, GF_LOG_INFO, + "got CHILD_DOWN for %s, now %u kids", + ((xlator_t *)data)->name, + priv->up_children); + } + break; + default: + ; + } + + return default_notify(this,event,data); +} + + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("nsr", this, out); + + ret = xlator_mem_acct_init (this, gf_mt_nsr_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Memory accounting init" "failed"); + return ret; + } +out: + return ret; +} + + +extern void *nsr_leader_thread (void *); + +void +nsr_deallocate_priv (nsr_private_t *priv) +{ + if (!priv) { + return; + } + + if (priv->leader_key) { + GF_FREE(priv->leader_key); + } + + if (priv->term_key) { + GF_FREE(priv->term_key); + } + + GF_FREE(priv); +} + + +int32_t +nsr_init (xlator_t *this) +{ + xlator_list_t *remote; + xlator_list_t *local; + nsr_private_t *priv = NULL; + xlator_list_t *trav; + pthread_t kid; + uuid_t tmp_uuid; + char *my_name = NULL, *morph_name = NULL, *recon_file = NULL, *recon_pid_file = NULL, *ptr = NULL; + char *volname; + extern xlator_t global_xlator; + glusterfs_ctx_t *oldctx = global_xlator.ctx; + runner_t runner = {0,}; + int32_t ret = -1; + struct stat buf; + char *recon_log = NULL, *recon_log_dir = NULL; + + /* + * Any fop that gets special treatment has to be patched in here, + * because the compiled-in table is produced by the code generator and + * only contains generated functions. Note that we have to go through + * this->fops because of some dynamic-linking strangeness; modifying + * the static table doesn't work. + */ + this->fops->getxattr = nsr_getxattr_special; + this->fops->fsync = nsr_fsync; + + local = this->children; + if (!local) { + gf_log (this->name, GF_LOG_ERROR, "no local subvolume"); + goto err; + } + + remote = local->next; + if (!remote) { + gf_log (this->name, GF_LOG_ERROR, "no remote subvolumes"); + goto err; + } + + this->local_pool = mem_pool_new (nsr_local_t, 128); + if (!this->local_pool) { + gf_log (this->name, GF_LOG_ERROR, + "failed to create nsr_local_t pool"); + goto err; + } + + priv = GF_CALLOC (1, sizeof(*priv), gf_mt_nsr_private_t); + if (!priv) { + gf_log (this->name, GF_LOG_ERROR, "could not allocate priv"); + goto err; + } + + // set this so that unless leader election is done, IO is fenced + priv->fence_io = 1; + + for (trav = this->children; trav; trav = trav->next) { + ++(priv->n_children); + } + + LOCK_INIT(&priv->dirty_lock); + LOCK_INIT(&priv->index_lock); + INIT_LIST_HEAD(&priv->dirty_fds); + + this->private = priv; + + GF_OPTION_INIT ("etcd-servers", priv->etcd_servers, str, err); + if (!priv->etcd_servers) { + gf_log (this->name, GF_LOG_ERROR, "etcd servers not generated. ???"); + goto err; + } + + + GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err); + + GF_OPTION_INIT ("subvol-uuid", priv->subvol_uuid, str, err); + gf_log (this->name, GF_LOG_INFO, "subvol_uuid = %s", priv->subvol_uuid); + if (gf_asprintf(&priv->leader_key,"%s:leader",priv->subvol_uuid) <= 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not generate leader key"); + goto err; + } + if (gf_asprintf(&priv->term_key,"%s:term",priv->subvol_uuid) <= 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not generate term key"); + goto err; + } + uuid_generate(tmp_uuid); + priv->brick_uuid = strdup(uuid_utoa(tmp_uuid)); + gf_log (this->name, GF_LOG_INFO, "brick_uuid = %s\n", priv->brick_uuid); + + GF_OPTION_INIT ("my-name", my_name, str, err); + if (!my_name) { + gf_log (this->name, GF_LOG_ERROR, "brick name not generated. ???"); + goto err; + } + GF_OPTION_INIT ("vol-name", volname, str, err); + if (!volname) { + gf_log (this->name, GF_LOG_ERROR, "vol name not generated. ???"); + goto err; + } + + morph_name = GF_CALLOC (1, strlen(my_name) + 1, gf_mt_nsr_private_t); + strcpy(morph_name, my_name); + recon_file = GF_CALLOC (1,PATH_MAX + strlen(morph_name) + strlen("con") +1, gf_mt_nsr_private_t); + recon_pid_file = GF_CALLOC (1,PATH_MAX + strlen(morph_name) + strlen("recon") +1, gf_mt_nsr_private_t); + if ((!recon_file) || (!recon_pid_file)) { + gf_log (this->name, GF_LOG_ERROR, "could not allocate reconciliation file name"); + goto err; + } + ptr = strchr (morph_name, '/'); + while (ptr) { + *ptr = '-'; + ptr = strchr (morph_name, '/'); + } + + sprintf(recon_file,"/%s/%s/%s/%s/",GLUSTERD_DEFAULT_WORKDIR, + GLUSTERD_VOLUME_DIR_PREFIX, + volname, + GLUSTERD_BRICK_INFO_DIR); + strcat(recon_file, morph_name); + strcat(recon_file, "-nsr-recon.vol"); + + sprintf(recon_pid_file,"/%s/%s/%s/%s/",GLUSTERD_DEFAULT_WORKDIR, + GLUSTERD_VOLUME_DIR_PREFIX, + volname, + "run"); + strcat(recon_pid_file, morph_name); + strcat(recon_pid_file, "-recon.pid"); + + priv->vol_file = GF_CALLOC (1,PATH_MAX + strlen(morph_name) + strlen("con") +1, gf_mt_nsr_private_t); + if (!priv->vol_file) { + gf_log (this->name, GF_LOG_ERROR, "could not allocate reconciliation file name"); + goto err; + } + sprintf(priv->vol_file,"%s/%s/%s/%s/", + GLUSTERD_DEFAULT_WORKDIR, + GLUSTERD_VOLUME_DIR_PREFIX, + volname, + GLUSTERD_BRICK_INFO_DIR); + strcat(priv->vol_file, "con:"); + strcat(priv->vol_file, morph_name); + + if (pthread_create(&kid,NULL,nsr_flush_thread,this) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not start flush thread"); + /* TBD: treat this as a fatal error? */ + } + + // Start the recon process. Then start the leader thread. + /* + * REVIEW + * Logs belong in /var/log not /tmp. + */ + + ret = mkdir (NSR_LOG_DIR, 0777); + if (ret != 0) { + if (errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Couldn't create" + " nsr log directory (%s)", strerror (errno)); + goto err; + } + } + + recon_log_dir = GF_CALLOC (1, strlen (NSR_LOG_DIR) + strlen(morph_name) + + 2, gf_mt_nsr_private_t); + if (!recon_log_dir) { + gf_log (this->name, GF_LOG_ERROR, "Couldn't allocate recon log " + "dir name"); + goto err; + } + sprintf (recon_log_dir, "%s/%s", NSR_LOG_DIR, morph_name); + ret = mkdir (recon_log_dir, 0777); + + if (ret != 0){ + if (errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, + "Couldn't create brick log dir (%s)", + strerror (errno)); + goto err; + } + } + + recon_log = GF_CALLOC (1, strlen (recon_log_dir)+ + strlen ("reconciliation.log") + 2, + gf_mt_nsr_private_t); + if (!recon_log) { + gf_log (this->name, GF_LOG_ERROR, "Couldn't allocate recon log" + " file name"); + goto err; + } + sprintf (recon_log, "%s/reconciliation.log", recon_log_dir); + + if (!stat(priv->vol_file, &buf)) { + + runinit (&runner); + runner_add_args(&runner, SBIN_DIR "/glusterfs", + "-f", recon_file, + "-p", recon_pid_file, + "-l", recon_log, + NULL); + ret = runner_run (&runner); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not exec reconciliation process %s", + SBIN_DIR "/glusterfs"); + goto err; + } + + // TBD - convert this to make sure recon process runs + sleep(2); + priv->nsr_recon_start = _gf_true; + } + + + (void)pthread_create(&kid,NULL,nsr_recon_notify_thread,this); + while (priv->recon_notify_inited == 0) { + sleep(1); + } + + if (pthread_create(&kid,NULL,nsr_leader_thread,this) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to start leader thread"); + } + while (priv->leader_inited == 0) { + sleep(1); + } + + + /* + * Calling glfs_new changes old->ctx, even if THIS still points + * to global_xlator. That causes problems later in the main + * thread, when gf_log_dump_graph tries to use the FILE after + * we've mucked with it and gets a segfault in __fprintf_chk. + * We can avoid all that by undoing the damage before we + * continue. + */ + global_xlator.ctx = oldctx; + + return 0; + +err: + nsr_deallocate_priv(priv); + return -1; +} + + +void +nsr_fini (xlator_t *this) +{ + nsr_deallocate_priv(this->private); +} + +class_methods_t class_methods = { + .init = nsr_init, + .fini = nsr_fini, + .reconfigure = nsr_reconfigure, + .notify = nsr_notify, +}; + +struct volume_options options[] = { + { .key = {"leader"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Start in the leader role. This is only for " + "bootstrapping the code, and should go away when we " + "have real leader election." + }, + { .key ={"vol-name"}, + .type = GF_OPTION_TYPE_STR, + .description = "volume name" + }, + { .key = {"my-name"}, + .type = GF_OPTION_TYPE_STR, + .description = "brick name in form of host:/path" + }, + { .key = {"etcd-servers"}, + .type = GF_OPTION_TYPE_STR, + .description = "list of comma seperated etc servers" + }, + { .key = {"subvol-uuid"}, + .type = GF_OPTION_TYPE_STR, + .description = "UUID for this NSR (sub)volume" + }, + { .key = {"quorum-percent"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "50.0", + .description = "percentage of rep_count-1 that must be up" + }, + { .key = {NULL} }, +}; diff --git a/xlators/cluster/nsr-server/src/recon_notify.c b/xlators/cluster/nsr-server/src/recon_notify.c new file mode 100644 index 000000000..1c50de234 --- /dev/null +++ b/xlators/cluster/nsr-server/src/recon_notify.c @@ -0,0 +1,389 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <string.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "call-stub.h" +#include "defaults.h" +#include "xlator.h" +#include "api/src/glfs.h" +#include "api/src/glfs-internal.h" +#include "etcd-api.h" +#include "nsr-internal.h" +#include "../../nsr-recon/src/recon_driver.h" +#include "../../nsr-recon/src/recon_xlator.h" + + + +typedef struct _nsr_recon_notify_ctx_t { + nsr_recon_notify_ev_t recon_head; + pthread_mutex_t recon_mutex; + pthread_cond_t recon_cv; + char **hosts; // list of hosts ordered depending on child indices + uint32_t current_term; + uint32_t last_reconciled_term; + glfs_t *fs; + glfs_fd_t *fd; +} nsr_recon_notify_ctx_t; + +static int +xlator_get_option (xlator_t *xl, char *key, char **value) +{ + GF_ASSERT (xl); + return dict_get_str (xl->options, key, value); +} + +void nsr_recon_notify_event_set_leader(nsr_private_t *priv) +{ + nsr_recon_notify_ev_t *ev; + nsr_recon_notify_ctx_t *ctx = (nsr_recon_notify_ctx_t *)priv->recon_ctx; + + ev = GF_CALLOC (1, sizeof (nsr_recon_notify_ev_t), 0); + ev->id = NSR_RECON_SET_LEADER; + INIT_LIST_HEAD(&(ev->list)); + pthread_mutex_lock(&ctx->recon_mutex); + list_add_tail(&ev->list, &ctx->recon_head.list); + pthread_cond_signal(&ctx->recon_cv); + pthread_mutex_unlock(&ctx->recon_mutex); +} + +void nsr_recon_notify_event_add_child(nsr_private_t *priv, uint32_t index) +{ + nsr_recon_notify_ev_t *ev; + nsr_recon_notify_ctx_t *ctx = (nsr_recon_notify_ctx_t *)priv->recon_ctx; + + ev = GF_CALLOC (1, sizeof (nsr_recon_notify_ev_t), 0); + ev->id = NSR_RECON_ADD_CHILD; + ev->index = index; + INIT_LIST_HEAD(&(ev->list)); + pthread_mutex_lock(&ctx->recon_mutex); + list_add_tail(&ev->list, &ctx->recon_head.list); + pthread_cond_signal(&ctx->recon_cv); + pthread_mutex_unlock(&ctx->recon_mutex); +} + + +static void +nsr_recon_set_leader (xlator_t *this) +{ + + nsr_private_t *priv = this->private; + nsr_recon_notify_ctx_t *ctx = (nsr_recon_notify_ctx_t *)priv->recon_ctx; + nsr_recon_role_t role; + xlator_t *old = this; + uint32_t i=0; + + if (priv->leader != _gf_true) + return; + + if (ctx->last_reconciled_term == priv->current_term) + return; + + /* + * Quorum for reconciliation is not the same as quorum for I/O. Here, + * we require a true majority. The +1 is because we don't count + * ourselves as part of n_children or up_children. + * + * TBD: re-evaluate when to reconcile (including partial) + */ + if (priv->up_children <= (priv->n_children / 2)) + return; + + gf_log (this->name, GF_LOG_INFO, + "Sending message to do recon with %d nodes\n", + priv->up_children); + + role.num = 0; + role.role = leader; + for (i = 0; i < priv->n_children; ++i) { + if (priv->kid_state & (1 << i)) { + gf_log (this->name, GF_LOG_INFO, + "Recon using host %s", + ctx->hosts[i]); + strcpy(role.info[role.num].name, ctx->hosts[i]); + (role.num)++; + } + } + + gf_log (this->name, GF_LOG_INFO, + "setting current term as %d", priv->current_term); + role.current_term = priv->current_term; + ENDIAN_CONVERSION_RR(role, _gf_false); + + // inform the reconciliator that this is leader + // in the callback (once reconciliation is done), + // we will unfence the IOs. + // TBD - error handling later. + if (glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "doing lseek failed\n"); + return; + } + + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_INFO, + "Writing to local node to set leader"); + do { + if (priv->leader != _gf_true) { + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, "no longer leader\n"); + return; + } + if (glfs_write(ctx->fd, &role, sizeof(role), 0) == -1) { + if (errno == EAGAIN) { + // Wait for old reconciliation to bail out. + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, + "write failed with retry. retrying after some time\n"); + sleep(5); + continue; + } + else{ + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_ERROR, + "doing write failed\n"); + // This is because reconciliation has returned with error + // because some node has died in between. + // What should be done? Either we retry being leader + // or hook to CHILD_DOWN notification. + // Put that logic later. As of now we will just retry. + // This is easier. + sleep(5); + continue; + } + } else { + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_INFO, "doing write with success\n"); + break; + } + } while(1); + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_INFO, + "glfs_write returned. unfencing IO\n"); + + // TBD - error handling + + ctx->last_reconciled_term = priv->current_term; + priv->index = 0; // reset changelog index + atomic_fetch_and(&(priv->fence_io), 0); + + return; +} + +static void +nsr_recon_add_child (xlator_t *this, uint32_t index) +{ + nsr_private_t *priv = this->private; + nsr_recon_notify_ctx_t *ctx = (nsr_recon_notify_ctx_t *)priv->recon_ctx; + nsr_recon_role_t role; + xlator_t *old = this; + + if (priv->leader != _gf_true) + return; + + // reconciliation still pending. + // Check if we have majority + if (ctx->last_reconciled_term != priv->current_term) { + nsr_recon_set_leader(this); + } else { + // Reconciliation done. + // new child joining the majority/ + // Do reconciliation only fot this child but after fencing new IO and draining old IO + role.num = 1; + role.role = joiner; + + atomic_fetch_or(&(priv->fence_io), 1); + while(priv->ops_in_flight) { + sleep(1); + } + + strcpy(role.info[0].name, ctx->hosts[index]); + role.current_term = priv->current_term; + ENDIAN_CONVERSION_RR(role, _gf_false); + glfs_lseek(ctx->fd, nsr_recon_xlator_sector_1, SEEK_SET); + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_INFO, + "Writing to local node to join %s\n", role.info[0].name); + glfs_write(ctx->fd, &role, + sizeof(role), 0); + glusterfs_this_set(old); + gf_log (this->name, GF_LOG_INFO, + "Write to local node to set joiner returned\n"); + + // TBD - error handling + atomic_fetch_and(&(priv->fence_io), 0); + } + + return; +} + +static uint32_t +nsr_setup_recon (xlator_t *this) +{ + nsr_private_t *priv = this->private; + xlator_t *old = this; + uint32_t ret = 0; + nsr_recon_notify_ctx_t *ctx = (nsr_recon_notify_ctx_t *)priv->recon_ctx; + + if (priv->nsr_recon_start == _gf_false) + return 0; + + ctx->fs = glfs_new(priv->subvol_uuid); + if (!ctx->fs) { + ret = 1; + gf_log (this->name, GF_LOG_ERROR, "failed to initialise glfs \n"); + goto done; + } + + glusterfs_this_set(old); + ret = glfs_set_volfile(ctx->fs, priv->vol_file); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to set volfile \n"); + goto done; + } + + glusterfs_this_set(old); + /* + * REVIEW + * Logs belong in /var/log not /tmp. + */ + glfs_set_logging (ctx->fs,"/tmp/glfs-log", 7); + if (glfs_init(ctx->fs) < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to init volfile \n"); + ret = 1; + goto done; + } + + glusterfs_this_set(old); + ctx->fd = glfs_open (ctx->fs, "/", O_RDWR); + if (ctx->fd == NULL) { + ret = 1; + gf_log (this->name, GF_LOG_ERROR, + "failed to open fd to communicate with recon process \n"); + goto done; + } + + +done: + glusterfs_this_set(old); + return ret; +} + + +static void +nsr_setup_hosts(xlator_t *this) +{ + xlator_list_t *trav; + nsr_private_t *priv = this->private; + uint32_t i = 0; + nsr_recon_notify_ctx_t *ctx = (nsr_recon_notify_ctx_t *)priv->recon_ctx; + + ctx->hosts = GF_CALLOC(sizeof(char *), priv->n_children, gf_mt_nsr_private_t); + // Iterate thru all the children + for (trav = this->children; trav; trav = trav->next) { + char *hostname = NULL, *vol = NULL; + int ret1 = 0, ret2 = 0, ret = 0; + xlator_t *xl = trav->xlator; + // If the child type is that of protocol/client + if (!strcmp(trav->xlator->type, "protocol/client")) { + ret1 = xlator_get_option (xl, "remote-host", &hostname); + ret2 = xlator_get_option (xl, "remote-subvolume", &vol); + if (!ret1 && !ret2) { + // add the name of that host to the hosts + ctx->hosts[i] = GF_CALLOC(sizeof(char), strlen(hostname) + strlen(vol) + 2, 0); + strcpy(ctx->hosts[i], hostname); + strcat(ctx->hosts[i], ":"); + strcat(ctx->hosts[i], vol); + gf_log (this->name, GF_LOG_INFO, + "adding hosts %s to recon notfiy list", ctx->hosts[i]); + } else { + gf_log (this->name, GF_LOG_ERROR, + "CANNOT FIND HOSTNAME FOR A CHILD"); + GF_ASSERT(0); + } + // local brick + } else { + ret = xlator_get_option (this, "my-name", &hostname); + if (!ret) { + uint32_t len = strlen(hostname); + ctx->hosts[i] = GF_CALLOC(sizeof(char), + len+1, + gf_mt_nsr_private_t); + strcpy(ctx->hosts[i], hostname); + gf_log (this->name, GF_LOG_INFO, + "adding my host %s to recon notfiy list", ctx->hosts[i]); + } else { + gf_log (this->name, GF_LOG_ERROR, + "CANNOT FIND MY HOSTNAME"); + GF_ASSERT(0); + } + } + i++; + } +} + +void * +nsr_recon_notify_thread (void *arg) +{ + xlator_t *this = (xlator_t *)arg; + nsr_private_t *priv = this->private; + nsr_recon_notify_ev_t *ev; + nsr_recon_notify_ctx_t *ctx; + + priv->recon_ctx = GF_CALLOC(1, sizeof(nsr_recon_notify_ctx_t), gf_mt_nsr_private_t); + if (!priv->recon_ctx) { + gf_log (this->name, GF_LOG_ERROR, "calloc error"); + return NULL; + } + ctx = priv->recon_ctx; + + pthread_mutex_init(&(ctx->recon_mutex), NULL); + pthread_cond_init(&(ctx->recon_cv), NULL); + INIT_LIST_HEAD(&(ctx->recon_head.list)); + + nsr_setup_hosts(this); + + if (nsr_setup_recon(this)) { + gf_log (this->name, GF_LOG_ERROR, "recon notify thread : initing glfs error"); + return NULL; + } + + priv->recon_notify_inited = 1; + + while(1) { + pthread_mutex_lock(&ctx->recon_mutex); + while (list_empty(&(ctx->recon_head.list))) { + pthread_cond_wait(&ctx->recon_cv, &ctx->recon_mutex); + } + pthread_mutex_unlock(&ctx->recon_mutex); + + list_for_each_entry(ev, &(ctx->recon_head.list), list) { + + if (ev->id == NSR_RECON_SET_LEADER) { + gf_log (this->name, GF_LOG_INFO, + "got add leader notfiy event"); + nsr_recon_set_leader(this); + } else if (ev->id == NSR_RECON_ADD_CHILD) { + gf_log (this->name, GF_LOG_INFO, + "got add child notify event"); + nsr_recon_add_child(this, ev->index); + } + } + list_del_init (&ev->list); + } + + return NULL; +} + diff --git a/xlators/cluster/nsr-server/src/yajl.c b/xlators/cluster/nsr-server/src/yajl.c new file mode 100644 index 000000000..54e6474fc --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "yajl/yajl_parse.h" +#include "yajl_lex.h" +#include "yajl_parser.h" +#include "yajl_alloc.h" + +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> + +const char * +yajl_status_to_string(yajl_status stat) +{ + const char * statStr = "unknown"; + switch (stat) { + case yajl_status_ok: + statStr = "ok, no error"; + break; + case yajl_status_client_canceled: + statStr = "client canceled parse"; + break; + case yajl_status_error: + statStr = "parse error"; + break; + } + return statStr; +} + +yajl_handle +yajl_alloc(const yajl_callbacks * callbacks, + yajl_alloc_funcs * afs, + void * ctx) +{ + yajl_handle hand = NULL; + yajl_alloc_funcs afsBuffer; + + /* first order of business is to set up memory allocation routines */ + if (afs != NULL) { + if (afs->malloc == NULL || afs->realloc == NULL || afs->free == NULL) + { + return NULL; + } + } else { + yajl_set_default_alloc_funcs(&afsBuffer); + afs = &afsBuffer; + } + + hand = (yajl_handle) YA_MALLOC(afs, sizeof(struct yajl_handle_t)); + + /* copy in pointers to allocation routines */ + memcpy((void *) &(hand->alloc), (void *) afs, sizeof(yajl_alloc_funcs)); + + hand->callbacks = callbacks; + hand->ctx = ctx; + hand->lexer = NULL; + hand->bytesConsumed = 0; + hand->decodeBuf = yajl_buf_alloc(&(hand->alloc)); + hand->flags = 0; + yajl_bs_init(hand->stateStack, &(hand->alloc)); + yajl_bs_push(hand->stateStack, yajl_state_start); + + return hand; +} + +int +yajl_config(yajl_handle h, yajl_option opt, ...) +{ + int rv = 1; + va_list ap; + va_start(ap, opt); + + switch(opt) { + case yajl_allow_comments: + case yajl_dont_validate_strings: + case yajl_allow_trailing_garbage: + case yajl_allow_multiple_values: + case yajl_allow_partial_values: + if (va_arg(ap, int)) h->flags |= opt; + else h->flags &= ~opt; + break; + default: + rv = 0; + } + va_end(ap); + + return rv; +} + +void +yajl_free(yajl_handle handle) +{ + yajl_bs_free(handle->stateStack); + yajl_buf_free(handle->decodeBuf); + if (handle->lexer) { + yajl_lex_free(handle->lexer); + handle->lexer = NULL; + } + YA_FREE(&(handle->alloc), handle); +} + +yajl_status +yajl_parse(yajl_handle hand, const unsigned char * jsonText, + size_t jsonTextLen) +{ + yajl_status status; + + /* lazy allocation of the lexer */ + if (hand->lexer == NULL) { + hand->lexer = yajl_lex_alloc(&(hand->alloc), + hand->flags & yajl_allow_comments, + !(hand->flags & yajl_dont_validate_strings)); + } + + status = yajl_do_parse(hand, jsonText, jsonTextLen); + return status; +} + + +yajl_status +yajl_complete_parse(yajl_handle hand) +{ + /* The lexer is lazy allocated in the first call to parse. if parse is + * never called, then no data was provided to parse at all. This is a + * "premature EOF" error unless yajl_allow_partial_values is specified. + * allocating the lexer now is the simplest possible way to handle this + * case while preserving all the other semantics of the parser + * (multiple values, partial values, etc). */ + if (hand->lexer == NULL) { + hand->lexer = yajl_lex_alloc(&(hand->alloc), + hand->flags & yajl_allow_comments, + !(hand->flags & yajl_dont_validate_strings)); + } + + return yajl_do_finish(hand); +} + +unsigned char * +yajl_get_error(yajl_handle hand, int verbose, + const unsigned char * jsonText, size_t jsonTextLen) +{ + return yajl_render_error_string(hand, jsonText, jsonTextLen, verbose); +} + +size_t +yajl_get_bytes_consumed(yajl_handle hand) +{ + if (!hand) return 0; + else return hand->bytesConsumed; +} + + +void +yajl_free_error(yajl_handle hand, unsigned char * str) +{ + /* use memory allocation functions if set */ + YA_FREE(&(hand->alloc), str); +} + +/* XXX: add utility routines to parse from file */ diff --git a/xlators/cluster/nsr-server/src/yajl/yajl_common.h b/xlators/cluster/nsr-server/src/yajl/yajl_common.h new file mode 100644 index 000000000..49ca3a5cb --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl/yajl_common.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __YAJL_COMMON_H__ +#define __YAJL_COMMON_H__ + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define YAJL_MAX_DEPTH 128 + +/* msft dll export gunk. To build a DLL on windows, you + * must define WIN32, YAJL_SHARED, and YAJL_BUILD. To use a shared + * DLL, you must define YAJL_SHARED and WIN32 */ +#if defined(WIN32) && defined(YAJL_SHARED) +# ifdef YAJL_BUILD +# define YAJL_API __declspec(dllexport) +# else +# define YAJL_API __declspec(dllimport) +# endif +#else +# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +# define YAJL_API __attribute__ ((visibility("default"))) +# else +# define YAJL_API +# endif +#endif + +/** pointer to a malloc function, supporting client overriding memory + * allocation routines */ +typedef void * (*yajl_malloc_func)(void *ctx, size_t sz); + +/** pointer to a free function, supporting client overriding memory + * allocation routines */ +typedef void (*yajl_free_func)(void *ctx, void * ptr); + +/** pointer to a realloc function which can resize an allocation. */ +typedef void * (*yajl_realloc_func)(void *ctx, void * ptr, size_t sz); + +/** A structure which can be passed to yajl_*_alloc routines to allow the + * client to specify memory allocation functions to be used. */ +typedef struct +{ + /** pointer to a function that can allocate uninitialized memory */ + yajl_malloc_func malloc; + /** pointer to a function that can resize memory allocations */ + yajl_realloc_func realloc; + /** pointer to a function that can free memory allocated using + * reallocFunction or mallocFunction */ + yajl_free_func free; + /** a context pointer that will be passed to above allocation routines */ + void * ctx; +} yajl_alloc_funcs; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/xlators/cluster/nsr-server/src/yajl/yajl_gen.h b/xlators/cluster/nsr-server/src/yajl/yajl_gen.h new file mode 100644 index 000000000..52fa99fc2 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl/yajl_gen.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/** + * \file yajl_gen.h + * Interface to YAJL's JSON generation facilities. + */ + +#include <yajl/yajl_common.h> + +#ifndef __YAJL_GEN_H__ +#define __YAJL_GEN_H__ + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + /** generator status codes */ + typedef enum { + /** no error */ + yajl_gen_status_ok = 0, + /** at a point where a map key is generated, a function other than + * yajl_gen_string was called */ + yajl_gen_keys_must_be_strings, + /** YAJL's maximum generation depth was exceeded. see + * YAJL_MAX_DEPTH */ + yajl_max_depth_exceeded, + /** A generator function (yajl_gen_XXX) was called while in an error + * state */ + yajl_gen_in_error_state, + /** A complete JSON document has been generated */ + yajl_gen_generation_complete, + /** yajl_gen_double was passed an invalid floating point value + * (infinity or NaN). */ + yajl_gen_invalid_number, + /** A print callback was passed in, so there is no internal + * buffer to get from */ + yajl_gen_no_buf, + /** returned from yajl_gen_string() when the yajl_gen_validate_utf8 + * option is enabled and an invalid was passed by client code. + */ + yajl_gen_invalid_string + } yajl_gen_status; + + /** an opaque handle to a generator */ + typedef struct yajl_gen_t * yajl_gen; + + /** a callback used for "printing" the results. */ + typedef void (*yajl_print_t)(void * ctx, + const char * str, + size_t len); + + /** configuration parameters for the parser, these may be passed to + * yajl_gen_config() along with option specific argument(s). In general, + * all configuration parameters default to *off*. */ + typedef enum { + /** generate indented (beautiful) output */ + yajl_gen_beautify = 0x01, + /** + * Set an indent string which is used when yajl_gen_beautify + * is enabled. Maybe something like \\t or some number of + * spaces. The default is four spaces ' '. + */ + yajl_gen_indent_string = 0x02, + /** + * Set a function and context argument that should be used to + * output generated json. the function should conform to the + * yajl_print_t prototype while the context argument is a + * void * of your choosing. + * + * example: + * yajl_gen_config(g, yajl_gen_print_callback, myFunc, myVoidPtr); + */ + yajl_gen_print_callback = 0x04, + /** + * Normally the generator does not validate that strings you + * pass to it via yajl_gen_string() are valid UTF8. Enabling + * this option will cause it to do so. + */ + yajl_gen_validate_utf8 = 0x08, + /** + * the forward solidus (slash or '/' in human) is not required to be + * escaped in json text. By default, YAJL will not escape it in the + * iterest of saving bytes. Setting this flag will cause YAJL to + * always escape '/' in generated JSON strings. + */ + yajl_gen_escape_solidus = 0x10 + } yajl_gen_option; + + /** allow the modification of generator options subsequent to handle + * allocation (via yajl_alloc) + * \returns zero in case of errors, non-zero otherwise + */ + YAJL_API int yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...); + + /** allocate a generator handle + * \param allocFuncs an optional pointer to a structure which allows + * the client to overide the memory allocation + * used by yajl. May be NULL, in which case + * malloc/free/realloc will be used. + * + * \returns an allocated handle on success, NULL on failure (bad params) + */ + YAJL_API yajl_gen yajl_gen_alloc(const yajl_alloc_funcs * allocFuncs); + + /** free a generator handle */ + YAJL_API void yajl_gen_free(yajl_gen handle); + + YAJL_API yajl_gen_status yajl_gen_integer(yajl_gen hand, long long int number); + /** generate a floating point number. number may not be infinity or + * NaN, as these have no representation in JSON. In these cases the + * generator will return 'yajl_gen_invalid_number' */ + YAJL_API yajl_gen_status yajl_gen_double(yajl_gen hand, double number); + YAJL_API yajl_gen_status yajl_gen_number(yajl_gen hand, + const char * num, + size_t len); + YAJL_API yajl_gen_status yajl_gen_string(yajl_gen hand, + const unsigned char * str, + size_t len); + YAJL_API yajl_gen_status yajl_gen_null(yajl_gen hand); + YAJL_API yajl_gen_status yajl_gen_bool(yajl_gen hand, int boolean); + YAJL_API yajl_gen_status yajl_gen_map_open(yajl_gen hand); + YAJL_API yajl_gen_status yajl_gen_map_close(yajl_gen hand); + YAJL_API yajl_gen_status yajl_gen_array_open(yajl_gen hand); + YAJL_API yajl_gen_status yajl_gen_array_close(yajl_gen hand); + + /** access the null terminated generator buffer. If incrementally + * outputing JSON, one should call yajl_gen_clear to clear the + * buffer. This allows stream generation. */ + YAJL_API yajl_gen_status yajl_gen_get_buf(yajl_gen hand, + const unsigned char ** buf, + size_t * len); + + /** clear yajl's output buffer, but maintain all internal generation + * state. This function will not "reset" the generator state, and is + * intended to enable incremental JSON outputing. */ + YAJL_API void yajl_gen_clear(yajl_gen hand); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/xlators/cluster/nsr-server/src/yajl/yajl_parse.h b/xlators/cluster/nsr-server/src/yajl/yajl_parse.h new file mode 100644 index 000000000..55c831101 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl/yajl_parse.h @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/** + * \file yajl_parse.h + * Interface to YAJL's JSON stream parsing facilities. + */ + +#include <yajl/yajl_common.h> + +#ifndef __YAJL_PARSE_H__ +#define __YAJL_PARSE_H__ + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + /** error codes returned from this interface */ + typedef enum { + /** no error was encountered */ + yajl_status_ok, + /** a client callback returned zero, stopping the parse */ + yajl_status_client_canceled, + /** An error occured during the parse. Call yajl_get_error for + * more information about the encountered error */ + yajl_status_error + } yajl_status; + + /** attain a human readable, english, string for an error */ + YAJL_API const char * yajl_status_to_string(yajl_status code); + + /** an opaque handle to a parser */ + typedef struct yajl_handle_t * yajl_handle; + + /** yajl is an event driven parser. this means as json elements are + * parsed, you are called back to do something with the data. The + * functions in this table indicate the various events for which + * you will be called back. Each callback accepts a "context" + * pointer, this is a void * that is passed into the yajl_parse + * function which the client code may use to pass around context. + * + * All callbacks return an integer. If non-zero, the parse will + * continue. If zero, the parse will be canceled and + * yajl_status_client_canceled will be returned from the parse. + * + * \attention { + * A note about the handling of numbers: + * + * yajl will only convert numbers that can be represented in a + * double or a 64 bit (long long) int. All other numbers will + * be passed to the client in string form using the yajl_number + * callback. Furthermore, if yajl_number is not NULL, it will + * always be used to return numbers, that is yajl_integer and + * yajl_double will be ignored. If yajl_number is NULL but one + * of yajl_integer or yajl_double are defined, parsing of a + * number larger than is representable in a double or 64 bit + * integer will result in a parse error. + * } + */ + typedef struct { + int (* yajl_null)(void * ctx); + int (* yajl_boolean)(void * ctx, int boolVal); + int (* yajl_integer)(void * ctx, long long integerVal); + int (* yajl_double)(void * ctx, double doubleVal); + /** A callback which passes the string representation of the number + * back to the client. Will be used for all numbers when present */ + int (* yajl_number)(void * ctx, const char * numberVal, + size_t numberLen); + + /** strings are returned as pointers into the JSON text when, + * possible, as a result, they are _not_ null padded */ + int (* yajl_string)(void * ctx, const unsigned char * stringVal, + size_t stringLen); + + int (* yajl_start_map)(void * ctx); + int (* yajl_map_key)(void * ctx, const unsigned char * key, + size_t stringLen); + int (* yajl_end_map)(void * ctx); + + int (* yajl_start_array)(void * ctx); + int (* yajl_end_array)(void * ctx); + } yajl_callbacks; + + /** allocate a parser handle + * \param callbacks a yajl callbacks structure specifying the + * functions to call when different JSON entities + * are encountered in the input text. May be NULL, + * which is only useful for validation. + * \param afs memory allocation functions, may be NULL for to use + * C runtime library routines (malloc and friends) + * \param ctx a context pointer that will be passed to callbacks. + */ + YAJL_API yajl_handle yajl_alloc(const yajl_callbacks * callbacks, + yajl_alloc_funcs * afs, + void * ctx); + + + /** configuration parameters for the parser, these may be passed to + * yajl_config() along with option specific argument(s). In general, + * all configuration parameters default to *off*. */ + typedef enum { + /** Ignore javascript style comments present in + * JSON input. Non-standard, but rather fun + * arguments: toggled off with integer zero, on otherwise. + * + * example: + * yajl_config(h, yajl_allow_comments, 1); // turn comment support on + */ + yajl_allow_comments = 0x01, + /** + * When set the parser will verify that all strings in JSON input are + * valid UTF8 and will emit a parse error if this is not so. When set, + * this option makes parsing slightly more expensive (~7% depending + * on processor and compiler in use) + * + * example: + * yajl_config(h, yajl_dont_validate_strings, 1); // disable utf8 checking + */ + yajl_dont_validate_strings = 0x02, + /** + * By default, upon calls to yajl_complete_parse(), yajl will + * ensure the entire input text was consumed and will raise an error + * otherwise. Enabling this flag will cause yajl to disable this + * check. This can be useful when parsing json out of a that contains more + * than a single JSON document. + */ + yajl_allow_trailing_garbage = 0x04, + /** + * Allow multiple values to be parsed by a single handle. The + * entire text must be valid JSON, and values can be seperated + * by any kind of whitespace. This flag will change the + * behavior of the parser, and cause it continue parsing after + * a value is parsed, rather than transitioning into a + * complete state. This option can be useful when parsing multiple + * values from an input stream. + */ + yajl_allow_multiple_values = 0x08, + /** + * When yajl_complete_parse() is called the parser will + * check that the top level value was completely consumed. I.E., + * if called whilst in the middle of parsing a value + * yajl will enter an error state (premature EOF). Setting this + * flag suppresses that check and the corresponding error. + */ + yajl_allow_partial_values = 0x10 + } yajl_option; + + /** allow the modification of parser options subsequent to handle + * allocation (via yajl_alloc) + * \returns zero in case of errors, non-zero otherwise + */ + YAJL_API int yajl_config(yajl_handle h, yajl_option opt, ...); + + /** free a parser handle */ + YAJL_API void yajl_free(yajl_handle handle); + + /** Parse some json! + * \param hand - a handle to the json parser allocated with yajl_alloc + * \param jsonText - a pointer to the UTF8 json text to be parsed + * \param jsonTextLength - the length, in bytes, of input text + */ + YAJL_API yajl_status yajl_parse(yajl_handle hand, + const unsigned char * jsonText, + size_t jsonTextLength); + + /** Parse any remaining buffered json. + * Since yajl is a stream-based parser, without an explicit end of + * input, yajl sometimes can't decide if content at the end of the + * stream is valid or not. For example, if "1" has been fed in, + * yajl can't know whether another digit is next or some character + * that would terminate the integer token. + * + * \param hand - a handle to the json parser allocated with yajl_alloc + */ + YAJL_API yajl_status yajl_complete_parse(yajl_handle hand); + + /** get an error string describing the state of the + * parse. + * + * If verbose is non-zero, the message will include the JSON + * text where the error occured, along with an arrow pointing to + * the specific char. + * + * \returns A dynamically allocated string will be returned which should + * be freed with yajl_free_error + */ + YAJL_API unsigned char * yajl_get_error(yajl_handle hand, int verbose, + const unsigned char * jsonText, + size_t jsonTextLength); + + /** + * get the amount of data consumed from the last chunk passed to YAJL. + * + * In the case of a successful parse this can help you understand if + * the entire buffer was consumed (which will allow you to handle + * "junk at end of input"). + * + * In the event an error is encountered during parsing, this function + * affords the client a way to get the offset into the most recent + * chunk where the error occured. 0 will be returned if no error + * was encountered. + */ + YAJL_API size_t yajl_get_bytes_consumed(yajl_handle hand); + + /** free an error returned from yajl_get_error */ + YAJL_API void yajl_free_error(yajl_handle hand, unsigned char * str); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/xlators/cluster/nsr-server/src/yajl/yajl_tree.h b/xlators/cluster/nsr-server/src/yajl/yajl_tree.h new file mode 100644 index 000000000..8b377f636 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl/yajl_tree.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2010-2011 Florian Forster <ff at octo.it> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/** + * \file yajl_tree.h + * + * Parses JSON data and returns the data in tree form. + * + * \author Florian Forster + * \date August 2010 + * + * This interface makes quick parsing and extraction of + * smallish JSON docs trivial: + * + * \include example/parse_config.c + */ + +#ifndef YAJL_TREE_H +#define YAJL_TREE_H 1 + +#include <yajl/yajl_common.h> + +/** possible data types that a yajl_val_s can hold */ +typedef enum { + yajl_t_string = 1, + yajl_t_number = 2, + yajl_t_object = 3, + yajl_t_array = 4, + yajl_t_true = 5, + yajl_t_false = 6, + yajl_t_null = 7, + /** The any type isn't valid for yajl_val_s.type, but can be + * used as an argument to routines like yajl_tree_get(). + */ + yajl_t_any = 8 +} yajl_type; + +#define YAJL_NUMBER_INT_VALID 0x01 +#define YAJL_NUMBER_DOUBLE_VALID 0x02 + +/** A pointer to a node in the parse tree */ +typedef struct yajl_val_s * yajl_val; + +/** + * A JSON value representation capable of holding one of the seven + * types above. For "string", "number", "object", and "array" + * additional data is available in the union. The "YAJL_IS_*" + * and "YAJL_GET_*" macros below allow type checking and convenient + * value extraction. + */ +struct yajl_val_s +{ + /** Type of the value contained. Use the "YAJL_IS_*" macors to check for a + * specific type. */ + yajl_type type; + /** Type-specific data. You may use the "YAJL_GET_*" macros to access these + * members. */ + union + { + char * string; + struct { + long long i; /*< integer value, if representable. */ + double d; /*< double value, if representable. */ + /** Signals whether the \em i and \em d members are + * valid. See \c YAJL_NUMBER_INT_VALID and + * \c YAJL_NUMBER_DOUBLE_VALID. */ + char *r; /*< unparsed number in string form. */ + unsigned int flags; + } number; + struct { + const char **keys; /*< Array of keys */ + yajl_val *values; /*< Array of values. */ + size_t len; /*< Number of key-value-pairs. */ + } object; + struct { + yajl_val *values; /*< Array of elements. */ + size_t len; /*< Number of elements. */ + } array; + } u; +}; + +/** + * Parse a string. + * + * Parses an null-terminated string containing JSON data and returns a pointer + * to the top-level value (root of the parse tree). + * + * \param input Pointer to a null-terminated utf8 string containing + * JSON data. + * \param error_buffer Pointer to a buffer in which an error message will + * be stored if \em yajl_tree_parse fails, or + * \c NULL. The buffer will be initialized before + * parsing, so its content will be destroyed even if + * \em yajl_tree_parse succeeds. + * \param error_buffer_size Size of the memory area pointed to by + * \em error_buffer_size. If \em error_buffer_size is + * \c NULL, this argument is ignored. + * + * \returns Pointer to the top-level value or \c NULL on error. The memory + * pointed to must be freed using \em yajl_tree_free. In case of an error, a + * null terminated message describing the error in more detail is stored in + * \em error_buffer if it is not \c NULL. + */ +YAJL_API yajl_val yajl_tree_parse (const char *input, + char *error_buffer, size_t error_buffer_size); + +/** + * Free a parse tree returned by "yajl_tree_parse". + * + * \param v Pointer to a JSON value returned by "yajl_tree_parse". Passing NULL + * is valid and results in a no-op. + */ +YAJL_API void yajl_tree_free (yajl_val v); + +/** + * Access a nested value inside a tree. + * + * \param parent the node under which you'd like to extract values. + * \param path A null terminated array of strings, each the name of an object key + * \param type the yajl_type of the object you seek, or yajl_t_any if any will do. + * + * \returns a pointer to the found value, or NULL if we came up empty. + * + * Future Ideas: it'd be nice to move path to a string and implement support for + * a teeny tiny micro language here, so you can extract array elements, do things + * like .first and .last, even .length. Inspiration from JSONPath and css selectors? + * No it wouldn't be fast, but that's not what this API is about. + */ +YAJL_API yajl_val yajl_tree_get(yajl_val parent, const char ** path, yajl_type type); + +/* Various convenience macros to check the type of a `yajl_val` */ +#define YAJL_IS_STRING(v) (((v) != NULL) && ((v)->type == yajl_t_string)) +#define YAJL_IS_NUMBER(v) (((v) != NULL) && ((v)->type == yajl_t_number)) +#define YAJL_IS_INTEGER(v) (YAJL_IS_NUMBER(v) && ((v)->u.flags & YAJL_NUMBER_INT_VALID)) +#define YAJL_IS_DOUBLE(v) (YAJL_IS_NUMBER(v) && ((v)->u.flags & YAJL_NUMBER_DOUBLE_VALID)) +#define YAJL_IS_OBJECT(v) (((v) != NULL) && ((v)->type == yajl_t_object)) +#define YAJL_IS_ARRAY(v) (((v) != NULL) && ((v)->type == yajl_t_array )) +#define YAJL_IS_TRUE(v) (((v) != NULL) && ((v)->type == yajl_t_true )) +#define YAJL_IS_FALSE(v) (((v) != NULL) && ((v)->type == yajl_t_false )) +#define YAJL_IS_NULL(v) (((v) != NULL) && ((v)->type == yajl_t_null )) + +/** Given a yajl_val_string return a ptr to the bare string it contains, + * or NULL if the value is not a string. */ +#define YAJL_GET_STRING(v) (YAJL_IS_STRING(v) ? (v)->u.string : NULL) + +/** Get the string representation of a number. You should check type first, + * perhaps using YAJL_IS_NUMBER */ +#define YAJL_GET_NUMBER(v) ((v)->u.number.r) + +/** Get the double representation of a number. You should check type first, + * perhaps using YAJL_IS_DOUBLE */ +#define YAJL_GET_DOUBLE(v) ((v)->u.number.d) + +/** Get the 64bit (long long) integer representation of a number. You should + * check type first, perhaps using YAJL_IS_INTEGER */ +#define YAJL_GET_INTEGER(v) ((v)->u.number.i) + +/** Get a pointer to a yajl_val_object or NULL if the value is not an object. */ +#define YAJL_GET_OBJECT(v) (YAJL_IS_OBJECT(v) ? &(v)->u.object : NULL) + +/** Get a pointer to a yajl_val_array or NULL if the value is not an object. */ +#define YAJL_GET_ARRAY(v) (YAJL_IS_ARRAY(v) ? &(v)->u.array : NULL) + +#endif /* YAJL_TREE_H */ diff --git a/xlators/cluster/nsr-server/src/yajl/yajl_version.h b/xlators/cluster/nsr-server/src/yajl/yajl_version.h new file mode 100644 index 000000000..0fba9b8fc --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl/yajl_version.h @@ -0,0 +1,23 @@ +#ifndef YAJL_VERSION_H_ +#define YAJL_VERSION_H_ + +#include <yajl/yajl_common.h> + +#define YAJL_MAJOR 2 +#define YAJL_MINOR 0 +#define YAJL_MICRO 1 + +#define YAJL_VERSION ((YAJL_MAJOR * 10000) + (YAJL_MINOR * 100) + YAJL_MICRO) + +#ifdef __cplusplus +extern "C" { +#endif + +extern int YAJL_API yajl_version(void); + +#ifdef __cplusplus +} +#endif + +#endif /* YAJL_VERSION_H_ */ + diff --git a/xlators/cluster/nsr-server/src/yajl_alloc.c b/xlators/cluster/nsr-server/src/yajl_alloc.c new file mode 100644 index 000000000..276315af7 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_alloc.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/** + * \file yajl_alloc.h + * default memory allocation routines for yajl which use malloc/realloc and + * free + */ + +#include "yajl_alloc.h" +#include <stdlib.h> + +static void * yajl_internal_malloc(void *ctx, size_t sz) +{ + return malloc(sz); +} + +static void * yajl_internal_realloc(void *ctx, void * previous, + size_t sz) +{ + return realloc(previous, sz); +} + +static void yajl_internal_free(void *ctx, void * ptr) +{ + free(ptr); +} + +void yajl_set_default_alloc_funcs(yajl_alloc_funcs * yaf) +{ + yaf->malloc = yajl_internal_malloc; + yaf->free = yajl_internal_free; + yaf->realloc = yajl_internal_realloc; + yaf->ctx = NULL; +} + diff --git a/xlators/cluster/nsr-server/src/yajl_alloc.h b/xlators/cluster/nsr-server/src/yajl_alloc.h new file mode 100644 index 000000000..a8a9e45e6 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_alloc.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/** + * \file yajl_alloc.h + * default memory allocation routines for yajl which use malloc/realloc and + * free + */ + +#ifndef __YAJL_ALLOC_H__ +#define __YAJL_ALLOC_H__ + +#include "yajl/yajl_common.h" + +#define YA_MALLOC(afs, sz) (afs)->malloc((afs)->ctx, (sz)) +#define YA_FREE(afs, ptr) (afs)->free((afs)->ctx, (ptr)) +#define YA_REALLOC(afs, ptr, sz) (afs)->realloc((afs)->ctx, (ptr), (sz)) + +void yajl_set_default_alloc_funcs(yajl_alloc_funcs * yaf); + +#endif diff --git a/xlators/cluster/nsr-server/src/yajl_buf.c b/xlators/cluster/nsr-server/src/yajl_buf.c new file mode 100644 index 000000000..0d249d364 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_buf.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "yajl_buf.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#define YAJL_BUF_INIT_SIZE 2048 + +struct yajl_buf_t { + size_t len; + size_t used; + unsigned char * data; + yajl_alloc_funcs * alloc; +}; + +static +void yajl_buf_ensure_available(yajl_buf buf, size_t want) +{ + size_t need; + + assert(buf != NULL); + + /* first call */ + if (buf->data == NULL) { + buf->len = YAJL_BUF_INIT_SIZE; + buf->data = (unsigned char *) YA_MALLOC(buf->alloc, buf->len); + buf->data[0] = 0; + } + + need = buf->len; + + while (want >= (need - buf->used)) need <<= 1; + + if (need != buf->len) { + buf->data = (unsigned char *) YA_REALLOC(buf->alloc, buf->data, need); + buf->len = need; + } +} + +yajl_buf yajl_buf_alloc(yajl_alloc_funcs * alloc) +{ + yajl_buf b = YA_MALLOC(alloc, sizeof(struct yajl_buf_t)); + memset((void *) b, 0, sizeof(struct yajl_buf_t)); + b->alloc = alloc; + return b; +} + +void yajl_buf_free(yajl_buf buf) +{ + assert(buf != NULL); + if (buf->data) YA_FREE(buf->alloc, buf->data); + YA_FREE(buf->alloc, buf); +} + +void yajl_buf_append(yajl_buf buf, const void * data, size_t len) +{ + yajl_buf_ensure_available(buf, len); + if (len > 0) { + assert(data != NULL); + memcpy(buf->data + buf->used, data, len); + buf->used += len; + buf->data[buf->used] = 0; + } +} + +void yajl_buf_clear(yajl_buf buf) +{ + buf->used = 0; + if (buf->data) buf->data[buf->used] = 0; +} + +const unsigned char * yajl_buf_data(yajl_buf buf) +{ + return buf->data; +} + +size_t yajl_buf_len(yajl_buf buf) +{ + return buf->used; +} + +void +yajl_buf_truncate(yajl_buf buf, size_t len) +{ + assert(len <= buf->used); + buf->used = len; +} diff --git a/xlators/cluster/nsr-server/src/yajl_buf.h b/xlators/cluster/nsr-server/src/yajl_buf.h new file mode 100644 index 000000000..94929a519 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_buf.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __YAJL_BUF_H__ +#define __YAJL_BUF_H__ + +#include "yajl/yajl_common.h" +#include "yajl_alloc.h" + +/* + * Implementation/performance notes. If this were moved to a header + * only implementation using #define's where possible we might be + * able to sqeeze a little performance out of the guy by killing function + * call overhead. YMMV. + */ + +/** + * yajl_buf is a buffer with exponential growth. the buffer ensures that + * you are always null padded. + */ +typedef struct yajl_buf_t * yajl_buf; + +/* allocate a new buffer */ +yajl_buf yajl_buf_alloc(yajl_alloc_funcs * alloc); + +/* free the buffer */ +void yajl_buf_free(yajl_buf buf); + +/* append a number of bytes to the buffer */ +void yajl_buf_append(yajl_buf buf, const void * data, size_t len); + +/* empty the buffer */ +void yajl_buf_clear(yajl_buf buf); + +/* get a pointer to the beginning of the buffer */ +const unsigned char * yajl_buf_data(yajl_buf buf); + +/* get the length of the buffer */ +size_t yajl_buf_len(yajl_buf buf); + +/* truncate the buffer */ +void yajl_buf_truncate(yajl_buf buf, size_t len); + +#endif diff --git a/xlators/cluster/nsr-server/src/yajl_bytestack.h b/xlators/cluster/nsr-server/src/yajl_bytestack.h new file mode 100644 index 000000000..1fc50c470 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_bytestack.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * A header only implementation of a simple stack of bytes, used in YAJL + * to maintain parse state. + */ + +#ifndef __YAJL_BYTESTACK_H__ +#define __YAJL_BYTESTACK_H__ + +#include "yajl/yajl_common.h" + +#define YAJL_BS_INC 128 + +typedef struct yajl_bytestack_t +{ + unsigned char * stack; + size_t size; + size_t used; + yajl_alloc_funcs * yaf; +} yajl_bytestack; + +/* initialize a bytestack */ +#define yajl_bs_init(obs, _yaf) { \ + (obs).stack = NULL; \ + (obs).size = 0; \ + (obs).used = 0; \ + (obs).yaf = (_yaf); \ + } \ + + +/* initialize a bytestack */ +#define yajl_bs_free(obs) \ + if ((obs).stack) (obs).yaf->free((obs).yaf->ctx, (obs).stack); + +#define yajl_bs_current(obs) \ + (assert((obs).used > 0), (obs).stack[(obs).used - 1]) + +#define yajl_bs_push(obs, byte) { \ + if (((obs).size - (obs).used) == 0) { \ + (obs).size += YAJL_BS_INC; \ + (obs).stack = (obs).yaf->realloc((obs).yaf->ctx,\ + (void *) (obs).stack, (obs).size);\ + } \ + (obs).stack[((obs).used)++] = (byte); \ +} + +/* removes the top item of the stack, returns nothing */ +#define yajl_bs_pop(obs) { ((obs).used)--; } + +#define yajl_bs_set(obs, byte) \ + (obs).stack[((obs).used) - 1] = (byte); + + +#endif diff --git a/xlators/cluster/nsr-server/src/yajl_encode.c b/xlators/cluster/nsr-server/src/yajl_encode.c new file mode 100644 index 000000000..9dc9a3e81 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_encode.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "yajl_encode.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +static void CharToHex(unsigned char c, char * hexBuf) +{ + const char * hexchar = "0123456789ABCDEF"; + hexBuf[0] = hexchar[c >> 4]; + hexBuf[1] = hexchar[c & 0x0F]; +} + +void +yajl_string_encode(const yajl_print_t print, + void * ctx, + const unsigned char * str, + size_t len, + int escape_solidus) +{ + size_t beg = 0; + size_t end = 0; + char hexBuf[7]; + hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0'; + hexBuf[6] = 0; + + while (end < len) { + const char * escaped = NULL; + switch (str[end]) { + case '\r': escaped = "\\r"; break; + case '\n': escaped = "\\n"; break; + case '\\': escaped = "\\\\"; break; + /* it is not required to escape a solidus in JSON: + * read sec. 2.5: http://www.ietf.org/rfc/rfc4627.txt + * specifically, this production from the grammar: + * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + */ + case '/': if (escape_solidus) escaped = "\\/"; break; + case '"': escaped = "\\\""; break; + case '\f': escaped = "\\f"; break; + case '\b': escaped = "\\b"; break; + case '\t': escaped = "\\t"; break; + default: + if ((unsigned char) str[end] < 32) { + CharToHex(str[end], hexBuf + 4); + escaped = hexBuf; + } + break; + } + if (escaped != NULL) { + print(ctx, (const char *) (str + beg), end - beg); + print(ctx, escaped, (unsigned int)strlen(escaped)); + beg = ++end; + } else { + ++end; + } + } + print(ctx, (const char *) (str + beg), end - beg); +} + +static void hexToDigit(unsigned int * val, const unsigned char * hex) +{ + unsigned int i; + for (i=0;i<4;i++) { + unsigned char c = hex[i]; + if (c >= 'A') c = (c & ~0x20) - 7; + c -= '0'; + assert(!(c & 0xF0)); + *val = (*val << 4) | c; + } +} + +static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf) +{ + if (codepoint < 0x80) { + utf8Buf[0] = (char) codepoint; + utf8Buf[1] = 0; + } else if (codepoint < 0x0800) { + utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0); + utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80); + utf8Buf[2] = 0; + } else if (codepoint < 0x10000) { + utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0); + utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80); + utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80); + utf8Buf[3] = 0; + } else if (codepoint < 0x200000) { + utf8Buf[0] =(char)((codepoint >> 18) | 0xF0); + utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80); + utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80); + utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80); + utf8Buf[4] = 0; + } else { + utf8Buf[0] = '?'; + utf8Buf[1] = 0; + } +} + +void yajl_string_decode(yajl_buf buf, const unsigned char * str, + size_t len) +{ + size_t beg = 0; + size_t end = 0; + + while (end < len) { + if (str[end] == '\\') { + char utf8Buf[5]; + const char * unescaped = "?"; + yajl_buf_append(buf, str + beg, end - beg); + switch (str[++end]) { + case 'r': unescaped = "\r"; break; + case 'n': unescaped = "\n"; break; + case '\\': unescaped = "\\"; break; + case '/': unescaped = "/"; break; + case '"': unescaped = "\""; break; + case 'f': unescaped = "\f"; break; + case 'b': unescaped = "\b"; break; + case 't': unescaped = "\t"; break; + case 'u': { + unsigned int codepoint = 0; + hexToDigit(&codepoint, str + ++end); + end+=3; + /* check if this is a surrogate */ + if ((codepoint & 0xFC00) == 0xD800) { + end++; + if (str[end] == '\\' && str[end + 1] == 'u') { + unsigned int surrogate = 0; + hexToDigit(&surrogate, str + end + 2); + codepoint = + (((codepoint & 0x3F) << 10) | + ((((codepoint >> 6) & 0xF) + 1) << 16) | + (surrogate & 0x3FF)); + end += 5; + } else { + unescaped = "?"; + break; + } + } + + Utf32toUtf8(codepoint, utf8Buf); + unescaped = utf8Buf; + + if (codepoint == 0) { + yajl_buf_append(buf, unescaped, 1); + beg = ++end; + continue; + } + + break; + } + default: + assert("this should never happen" == NULL); + } + yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped)); + beg = ++end; + } else { + end++; + } + } + yajl_buf_append(buf, str + beg, end - beg); +} + +#define ADV_PTR s++; if (!(len--)) return 0; + +int yajl_string_validate_utf8(const unsigned char * s, size_t len) +{ + if (!len) return 1; + if (!s) return 0; + + while (len--) { + /* single byte */ + if (*s <= 0x7f) { + /* noop */ + } + /* two byte */ + else if ((*s >> 5) == 0x6) { + ADV_PTR; + if (!((*s >> 6) == 0x2)) return 0; + } + /* three byte */ + else if ((*s >> 4) == 0x0e) { + ADV_PTR; + if (!((*s >> 6) == 0x2)) return 0; + ADV_PTR; + if (!((*s >> 6) == 0x2)) return 0; + } + /* four byte */ + else if ((*s >> 3) == 0x1e) { + ADV_PTR; + if (!((*s >> 6) == 0x2)) return 0; + ADV_PTR; + if (!((*s >> 6) == 0x2)) return 0; + ADV_PTR; + if (!((*s >> 6) == 0x2)) return 0; + } else { + return 0; + } + + s++; + } + + return 1; +} diff --git a/xlators/cluster/nsr-server/src/yajl_encode.h b/xlators/cluster/nsr-server/src/yajl_encode.h new file mode 100644 index 000000000..af1e8bbde --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_encode.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __YAJL_ENCODE_H__ +#define __YAJL_ENCODE_H__ + +#include "yajl_buf.h" +#include "yajl/yajl_gen.h" + +void yajl_string_encode(const yajl_print_t printer, + void * ctx, + const unsigned char * str, + size_t length, + int escape_solidus); + +void yajl_string_decode(yajl_buf buf, const unsigned char * str, + size_t length); + +int yajl_string_validate_utf8(const unsigned char * s, size_t len); + +#endif diff --git a/xlators/cluster/nsr-server/src/yajl_gen.c b/xlators/cluster/nsr-server/src/yajl_gen.c new file mode 100644 index 000000000..73763a9e0 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_gen.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "yajl/yajl_gen.h" +#include "yajl_buf.h" +#include "yajl_encode.h" + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <math.h> +#include <stdarg.h> + +typedef enum { + yajl_gen_start, + yajl_gen_map_start, + yajl_gen_map_key, + yajl_gen_map_val, + yajl_gen_array_start, + yajl_gen_in_array, + yajl_gen_complete, + yajl_gen_error +} yajl_gen_state; + +struct yajl_gen_t +{ + unsigned int flags; + unsigned int depth; + const char * indentString; + yajl_gen_state state[YAJL_MAX_DEPTH]; + yajl_print_t print; + void * ctx; /* yajl_buf */ + /* memory allocation routines */ + yajl_alloc_funcs alloc; +}; + +int +yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...) +{ + int rv = 1; + va_list ap; + va_start(ap, opt); + + switch(opt) { + case yajl_gen_beautify: + case yajl_gen_validate_utf8: + if (va_arg(ap, int)) g->flags |= opt; + else g->flags &= ~opt; + break; + case yajl_gen_indent_string: { + const char *indent = va_arg(ap, const char *); + g->indentString = indent; + for (; *indent; indent++) { + if (*indent != '\n' + && *indent != '\v' + && *indent != '\f' + && *indent != '\t' + && *indent != '\r' + && *indent != ' ') + { + g->indentString = NULL; + rv = 0; + } + } + break; + } + case yajl_gen_print_callback: + yajl_buf_free(g->ctx); + g->print = va_arg(ap, const yajl_print_t); + g->ctx = va_arg(ap, void *); + break; + default: + rv = 0; + } + + va_end(ap); + + return rv; +} + + + +yajl_gen +yajl_gen_alloc(const yajl_alloc_funcs * afs) +{ + yajl_gen g = NULL; + yajl_alloc_funcs afsBuffer; + + /* first order of business is to set up memory allocation routines */ + if (afs != NULL) { + if (afs->malloc == NULL || afs->realloc == NULL || afs->free == NULL) + { + return NULL; + } + } else { + yajl_set_default_alloc_funcs(&afsBuffer); + afs = &afsBuffer; + } + + g = (yajl_gen) YA_MALLOC(afs, sizeof(struct yajl_gen_t)); + if (!g) return NULL; + + memset((void *) g, 0, sizeof(struct yajl_gen_t)); + /* copy in pointers to allocation routines */ + memcpy((void *) &(g->alloc), (void *) afs, sizeof(yajl_alloc_funcs)); + + g->print = (yajl_print_t)&yajl_buf_append; + g->ctx = yajl_buf_alloc(&(g->alloc)); + g->indentString = " "; + + return g; +} + +void +yajl_gen_free(yajl_gen g) +{ + if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_free((yajl_buf)g->ctx); + YA_FREE(&(g->alloc), g); +} + +#define INSERT_SEP \ + if (g->state[g->depth] == yajl_gen_map_key || \ + g->state[g->depth] == yajl_gen_in_array) { \ + g->print(g->ctx, ",", 1); \ + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); \ + } else if (g->state[g->depth] == yajl_gen_map_val) { \ + g->print(g->ctx, ":", 1); \ + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, " ", 1); \ + } + +#define INSERT_WHITESPACE \ + if ((g->flags & yajl_gen_beautify)) { \ + if (g->state[g->depth] != yajl_gen_map_val) { \ + unsigned int _i; \ + for (_i=0;_i<g->depth;_i++) \ + g->print(g->ctx, \ + g->indentString, \ + (unsigned int)strlen(g->indentString)); \ + } \ + } + +#define ENSURE_NOT_KEY \ + if (g->state[g->depth] == yajl_gen_map_key || \ + g->state[g->depth] == yajl_gen_map_start) { \ + return yajl_gen_keys_must_be_strings; \ + } \ + +/* check that we're not complete, or in error state. in a valid state + * to be generating */ +#define ENSURE_VALID_STATE \ + if (g->state[g->depth] == yajl_gen_error) { \ + return yajl_gen_in_error_state;\ + } else if (g->state[g->depth] == yajl_gen_complete) { \ + return yajl_gen_generation_complete; \ + } + +#define INCREMENT_DEPTH \ + if (++(g->depth) >= YAJL_MAX_DEPTH) return yajl_max_depth_exceeded; + +#define DECREMENT_DEPTH \ + if (--(g->depth) >= YAJL_MAX_DEPTH) return yajl_gen_error; + +#define APPENDED_ATOM \ + switch (g->state[g->depth]) { \ + case yajl_gen_start: \ + g->state[g->depth] = yajl_gen_complete; \ + break; \ + case yajl_gen_map_start: \ + case yajl_gen_map_key: \ + g->state[g->depth] = yajl_gen_map_val; \ + break; \ + case yajl_gen_array_start: \ + g->state[g->depth] = yajl_gen_in_array; \ + break; \ + case yajl_gen_map_val: \ + g->state[g->depth] = yajl_gen_map_key; \ + break; \ + default: \ + break; \ + } \ + +#define FINAL_NEWLINE \ + if ((g->flags & yajl_gen_beautify) && g->state[g->depth] == yajl_gen_complete) \ + g->print(g->ctx, "\n", 1); + +yajl_gen_status +yajl_gen_integer(yajl_gen g, long long int number) +{ + char i[32]; + ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE; + sprintf(i, "%lld", number); + g->print(g->ctx, i, (unsigned int)strlen(i)); + APPENDED_ATOM; + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +#ifdef WIN32 +#include <float.h> +#define isnan _isnan +#define isinf !_finite +#endif + +yajl_gen_status +yajl_gen_double(yajl_gen g, double number) +{ + char i[32]; + ENSURE_VALID_STATE; ENSURE_NOT_KEY; + if (isnan(number) || isinf(number)) return yajl_gen_invalid_number; + INSERT_SEP; INSERT_WHITESPACE; + sprintf(i, "%.20g", number); + g->print(g->ctx, i, (unsigned int)strlen(i)); + APPENDED_ATOM; + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_number(yajl_gen g, const char * s, size_t l) +{ + ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE; + g->print(g->ctx, s, l); + APPENDED_ATOM; + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_string(yajl_gen g, const unsigned char * str, + size_t len) +{ + // if validation is enabled, check that the string is valid utf8 + // XXX: This checking could be done a little faster, in the same pass as + // the string encoding + if (g->flags & yajl_gen_validate_utf8) { + if (!yajl_string_validate_utf8(str, len)) { + return yajl_gen_invalid_string; + } + } + ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE; + g->print(g->ctx, "\"", 1); + yajl_string_encode(g->print, g->ctx, str, len, g->flags & yajl_gen_escape_solidus); + g->print(g->ctx, "\"", 1); + APPENDED_ATOM; + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_null(yajl_gen g) +{ + ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE; + g->print(g->ctx, "null", strlen("null")); + APPENDED_ATOM; + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_bool(yajl_gen g, int boolean) +{ + const char * val = boolean ? "true" : "false"; + + ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE; + g->print(g->ctx, val, (unsigned int)strlen(val)); + APPENDED_ATOM; + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_map_open(yajl_gen g) +{ + ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE; + INCREMENT_DEPTH; + + g->state[g->depth] = yajl_gen_map_start; + g->print(g->ctx, "{", 1); + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_map_close(yajl_gen g) +{ + ENSURE_VALID_STATE; + DECREMENT_DEPTH; + + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); + APPENDED_ATOM; + INSERT_WHITESPACE; + g->print(g->ctx, "}", 1); + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_array_open(yajl_gen g) +{ + ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE; + INCREMENT_DEPTH; + g->state[g->depth] = yajl_gen_array_start; + g->print(g->ctx, "[", 1); + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_array_close(yajl_gen g) +{ + ENSURE_VALID_STATE; + DECREMENT_DEPTH; + if ((g->flags & yajl_gen_beautify)) g->print(g->ctx, "\n", 1); + APPENDED_ATOM; + INSERT_WHITESPACE; + g->print(g->ctx, "]", 1); + FINAL_NEWLINE; + return yajl_gen_status_ok; +} + +yajl_gen_status +yajl_gen_get_buf(yajl_gen g, const unsigned char ** buf, + size_t * len) +{ + if (g->print != (yajl_print_t)&yajl_buf_append) return yajl_gen_no_buf; + *buf = yajl_buf_data((yajl_buf)g->ctx); + *len = yajl_buf_len((yajl_buf)g->ctx); + return yajl_gen_status_ok; +} + +void +yajl_gen_clear(yajl_gen g) +{ + if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_clear((yajl_buf)g->ctx); +} diff --git a/xlators/cluster/nsr-server/src/yajl_lex.c b/xlators/cluster/nsr-server/src/yajl_lex.c new file mode 100644 index 000000000..b098e6a99 --- /dev/null +++ b/xlators/cluster/nsr-server/src/yajl_lex.c @@ -0,0 +1,763 @@ +/* + * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "yajl_lex.h" +#include "yajl_buf.h" + +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> + +#ifdef YAJL_LEXER_DEBUG +static const char * +tokToStr(yajl_tok tok) +{ + switch (tok) { + case yajl_tok_bool: return "bool"; + case yajl_tok_colon: return "colon"; + case yajl_tok_comma: return "comma"; + case yajl_tok_eof: return "eof"; + case yajl_tok_error: return "error"; + case yajl_tok_left_brace: return "brace"; + case yajl_tok_left_bracket: return "bracket"; + case yajl_tok_null: return "null"; + case yajl_tok_integer: return "integer"; + case yajl_tok_double: return "double"; + case yajl_tok_right_brace: return "brace"; + case yajl_tok_right_bracket: return "bracket"; + case yajl_tok_string: return "string"; + case yajl_tok_string_with_escapes: return "string_with_escapes"; + } + return "unknown"; +} +#endif + +/* Impact of the stream parsing feature on the lexer: + * + * YAJL support stream parsing. That is, the ability to parse the first + * bits of a chunk of JSON before the last bits are available (still on + * the network or disk). This makes the lexer more complex. The + * responsibility of the lexer is to handle transparently the case where + * a chunk boundary falls in the middle of a token. This is + * accomplished is via a buffer and a character reading abstraction. + * + * Overview of implementation + * + * When we lex to end of input string before end of token is hit, we + * copy all of the input text composing the token into our lexBuf. + * + * Every time we read a character, we do so through the readChar function. + * readChar's responsibility is to handle pulling all chars from the buffer + * before pulling chars from input text + */ + +struct yajl_lexer_t { + /* the overal line and char offset into the data */ + size_t lineOff; + size_t charOff; + + /* error */ + yajl_lex_error error; + + /* a input buffer to handle the case where a token is spread over + * multiple chunks */ + yajl_buf buf; + + /* in the case where we have data in the lexBuf, bufOff holds + * the current offset into the lexBuf. */ + size_t bufOff; + + /* are we using the lex buf? */ + unsigned int bufInUse; + + /* shall we allow comments? */ + unsigned int allowComments; + + /* shall we validate utf8 inside strings? */ + unsigned int validateUTF8; + + yajl_alloc_funcs * alloc; +}; + +#define readChar(lxr, txt, off) \ + (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \ + (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \ + ((txt)[(*(off))++])) + +#define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--)) + +yajl_lexer +yajl_lex_alloc(yajl_alloc_funcs * alloc, + unsigned int allowComments, unsigned int validateUTF8) +{ + yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t)); + memset((void *) lxr, 0, sizeof(struct yajl_lexer_t)); + lxr->buf = yajl_buf_alloc(alloc); + lxr->allowComments = allowComments; + lxr->validateUTF8 = validateUTF8; + lxr->alloc = alloc; + return lxr; +} + +void +yajl_lex_free(yajl_lexer lxr) +{ + yajl_buf_free(lxr->buf); + YA_FREE(lxr->alloc, lxr); + return; +} + +/* a lookup table which lets us quickly determine three things: + * VEC - valid escaped control char + * note. the solidus '/' may be escaped or not. + * IJC - invalid json char + * VHC - valid hex char + * NFP - needs further processing (from a string scanning perspective) + * NUC - needs utf8 checking when enabled (from a string scanning perspective) + */ +#define VEC 0x01 +#define IJC 0x02 +#define VHC 0x04 +#define NFP 0x08 +#define NUC 0x10 + +static const char charLookupTable[256] = +{ +/*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , +/*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , +/*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , +/*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC , + +/*20*/ 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 , +/*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC , +/*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC , +/*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 , + +/*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 , +/*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , +/*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , + +/*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 , +/*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 , +/*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 , +/*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC , + NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC +}; + +/** process a variable length utf8 encoded codepoint. + * + * returns: + * yajl_tok_string - if valid utf8 char was parsed and offset was + * advanced + * yajl_tok_eof - if end of input was hit before validation could + * complete + * yajl_tok_error - if invalid utf8 was encountered + * + * NOTE: on error the offset will point to the first char of the + * invalid utf8 */ +#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; } + +static yajl_tok +yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset, + unsigned char curChar) +{ + if (curChar <= 0x7f) { + /* single byte */ + return yajl_tok_string; + } else if ((curChar >> 5) == 0x6) { + /* two byte */ + UTF8_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if ((curChar >> 6) == 0x2) return yajl_tok_string; + } else if ((curChar >> 4) == 0x0e) { + /* three byte */ + UTF8_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if ((curChar >> 6) == 0x2) { + UTF8_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if ((curChar >> 6) == 0x2) return yajl_tok_string; + } + } else if ((curChar >> 3) == 0x1e) { + /* four byte */ + UTF8_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if ((curChar >> 6) == 0x2) { + UTF8_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if ((curChar >> 6) == 0x2) { + UTF8_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if ((curChar >> 6) == 0x2) return yajl_tok_string; + } + } + } + + return yajl_tok_error; +} + +/* lex a string. input is the lexer, pointer to beginning of + * json text, and start of string (offset). + * a token is returned which has the following meanings: + * yajl_tok_string: lex of string was successful. offset points to + * terminating '"'. + * yajl_tok_eof: end of text was encountered before we could complete + * the lex. + * yajl_tok_error: embedded in the string were unallowable chars. offset + * points to the offending char + */ +#define STR_CHECK_EOF \ +if (*offset >= jsonTextLen) { \ + tok = yajl_tok_eof; \ + goto finish_string_lex; \ +} + +/** scan a string for interesting characters that might need further + * review. return the number of chars that are uninteresting and can + * be skipped. + * (lth) hi world, any thoughts on how to make this routine faster? */ +static size_t +yajl_string_scan(const unsigned char * buf, size_t len, int utf8check) +{ + unsigned char mask = IJC|NFP|(utf8check ? NUC : 0); + size_t skip = 0; + while (skip < len && !(charLookupTable[*buf] & mask)) + { + skip++; + buf++; + } + return skip; +} + +static yajl_tok +yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset) +{ + yajl_tok tok = yajl_tok_error; + int hasEscapes = 0; + + for (;;) { + unsigned char curChar; + + /* now jump into a faster scanning routine to skip as much + * of the buffers as possible */ + { + const unsigned char * p; + size_t len; + + if ((lexer->bufInUse && yajl_buf_len(lexer->buf) && + lexer->bufOff < yajl_buf_len(lexer->buf))) + { + p = ((const unsigned char *) yajl_buf_data(lexer->buf) + + (lexer->bufOff)); + len = yajl_buf_len(lexer->buf) - lexer->bufOff; + lexer->bufOff += yajl_string_scan(p, len, lexer->validateUTF8); + } + else if (*offset < jsonTextLen) + { + p = jsonText + *offset; + len = jsonTextLen - *offset; + *offset += yajl_string_scan(p, len, lexer->validateUTF8); + } + } + + STR_CHECK_EOF; + + curChar = readChar(lexer, jsonText, offset); + + /* quote terminates */ + if (curChar == '"') { + tok = yajl_tok_string; + break; + } + /* backslash escapes a set of control chars, */ + else if (curChar == '\\') { + hasEscapes = 1; + STR_CHECK_EOF; + + /* special case \u */ + curChar = readChar(lexer, jsonText, offset); + if (curChar == 'u') { + unsigned int i = 0; + + for (i=0;i<4;i++) { + STR_CHECK_EOF; + curChar = readChar(lexer, jsonText, offset); + if (!(charLookupTable[curChar] & VHC)) { + /* back up to offending char */ + unreadChar(lexer, offset); + lexer->error = yajl_lex_string_invalid_hex_char; + goto finish_string_lex; + } + } + } else if (!(charLookupTable[curChar] & VEC)) { + /* back up to offending char */ + unreadChar(lexer, offset); + lexer->error = yajl_lex_string_invalid_escaped_char; + goto finish_string_lex; + } + } + /* when not validating UTF8 it's a simple table lookup to determine + * if the present character is invalid */ + else if(charLookupTable[curChar] & IJC) { + /* back up to offending char */ + unreadChar(lexer, offset); + lexer->error = yajl_lex_string_invalid_json_char; + goto finish_string_lex; + } + /* when in validate UTF8 mode we need to do some extra work */ + else if (lexer->validateUTF8) { + yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen, + offset, curChar); + + if (t == yajl_tok_eof) { + tok = yajl_tok_eof; + goto finish_string_lex; + } else if (t == yajl_tok_error) { + lexer->error = yajl_lex_string_invalid_utf8; + goto finish_string_lex; + } + } + /* accept it, and move on */ + } + finish_string_lex: + /* tell our buddy, the parser, wether he needs to process this string + * again */ + if (hasEscapes && tok == yajl_tok_string) { + tok = yajl_tok_string_with_escapes; + } + + return tok; +} + +#define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof; + +static yajl_tok +yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset) +{ + /** XXX: numbers are the only entities in json that we must lex + * _beyond_ in order to know that they are complete. There + * is an ambiguous case for integers at EOF. */ + + unsigned char c; + + yajl_tok tok = yajl_tok_integer; + + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + + /* optional leading minus */ + if (c == '-') { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } + + /* a single zero, or a series of integers */ + if (c == '0') { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } else if (c >= '1' && c <= '9') { + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (c >= '0' && c <= '9'); + } else { + unreadChar(lexer, offset); + lexer->error = yajl_lex_missing_integer_after_minus; + return yajl_tok_error; + } + + /* optional fraction (indicates this is floating point) */ + if (c == '.') { + int numRd = 0; + + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + + while (c >= '0' && c <= '9') { + numRd++; + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } + + if (!numRd) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_missing_integer_after_decimal; + return yajl_tok_error; + } + tok = yajl_tok_double; + } + + /* optional exponent (indicates this is floating point) */ + if (c == 'e' || c == 'E') { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + + /* optional sign */ + if (c == '+' || c == '-') { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } + + if (c >= '0' && c <= '9') { + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (c >= '0' && c <= '9'); + } else { + unreadChar(lexer, offset); + lexer->error = yajl_lex_missing_integer_after_exponent; + return yajl_tok_error; + } + tok = yajl_tok_double; + } + + /* we always go "one too far" */ + unreadChar(lexer, offset); + + return tok; +} + +static yajl_tok +yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset) +{ + unsigned char c; + + yajl_tok tok = yajl_tok_comment; + + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + + /* either slash or star expected */ + if (c == '/') { + /* now we throw away until end of line */ + do { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + } while (c != '\n'); + } else if (c == '*') { + /* now we throw away until end of comment */ + for (;;) { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + if (c == '*') { + RETURN_IF_EOF; + c = readChar(lexer, jsonText, offset); + if (c == '/') { + break; + } else { + unreadChar(lexer, offset); + } + } + } + } else { + lexer->error = yajl_lex_invalid_char; + tok = yajl_tok_error; + } + + return tok; +} + +yajl_tok +yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText, + size_t jsonTextLen, size_t * offset, + const unsigned char ** outBuf, size_t * outLen) +{ + yajl_tok tok = yajl_tok_error; + unsigned char c; + size_t startOffset = *offset; + + *outBuf = NULL; + *outLen = 0; + + for (;;) { + assert(*offset <= jsonTextLen); + + if (*offset >= jsonTextLen) { + tok = yajl_tok_eof; + goto lexed; + } + + c = readChar(lexer, jsonText, offset); + + switch (c) { + case '{': + tok = yajl_tok_left_bracket; + goto lexed; + case '}': + tok = yajl_tok_right_bracket; + goto lexed; + case '[': + tok = yajl_tok_left_brace; + goto lexed; + case ']': + tok = yajl_tok_right_brace; + goto lexed; + case ',': + tok = yajl_tok_comma; + goto lexed; + case ':': + tok = yajl_tok_colon; + goto lexed; + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + startOffset++; + break; + case 't': { + const char * want = "rue"; + do { + if (*offset >= jsonTextLen) { + tok = yajl_tok_eof; + goto lexed; + } + c = readChar(lexer, jsonText, offset); + if (c != *want) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_invalid_string; + tok = yajl_tok_error; + goto lexed; + } + } while (*(++want)); + tok = yajl_tok_bool; + goto lexed; + } + case 'f': { + const char * want = "alse"; + do { + if (*offset >= jsonTextLen) { + tok = yajl_tok_eof; + goto lexed; + } + c = readChar(lexer, jsonText, offset); + if (c != *want) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_invalid_string; + tok = yajl_tok_error; + goto lexed; + } + } while (*(++want)); + tok = yajl_tok_bool; + goto lexed; + } + case 'n': { + const char * want = "ull"; + do { + if (*offset >= jsonTextLen) { + tok = yajl_tok_eof; + goto lexed; + } + c = readChar(lexer, jsonText, offset); + if (c != *want) { + unreadChar(lexer, offset); + lexer->error = yajl_lex_invalid_string; + tok = yajl_tok_error; + goto lexed; + } + } while (*(++want)); + tok = yajl_tok_null; + goto lexed; + } + case '"': { + tok = yajl_lex_string(lexer, (const unsigned char *) jsonText, + jsonTextLen, offset); + goto lexed; + } + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + /* integer parsing wants to start from the beginning */ + unreadChar(lexer, offset); + tok = yajl_lex_number(lexer, (const unsigned char *) jsonText, |