diff options
author | Amar Tumballi <amar@gluster.com> | 2011-09-07 12:53:09 +0530 |
---|---|---|
committer | Vijay Bellur <vijay@gluster.com> | 2011-09-12 23:57:38 -0700 |
commit | a07bb18c8adeb8597f62095c5d1361c5bad01f09 (patch) | |
tree | 55bf7ef630793cfd11fc15e84d859cbfb2184b01 /libglusterfs | |
parent | 09eeaf4e68c225b8e5ccc0a9b4f10f8c4748e205 (diff) |
distribute rebalance: handle the open file migration
Complexity involved: To migrate a file with open fd, we have to
notify the other client process which has the open fd, and make
sure the write()s happening on that fd is properly synced to the
migrated file. Once the migration is complete, the client
process which has open-fd should get notified and it should
start performing all the operations on the new subvolume,
instead of earlier cached volume.
How to solve the notification part:
We can overload the 'postbuf' attribute in the _cbk() function to
understand if a file is 'under-migration' or 'migration-complete'
state. (This will be something similar to deciding whether a file
is DHT-linkfile by its 'mode').
Overall change includes below mentioned major changes:
1. dht_linkfile is decided by only 2 factors (mode(01000),
xattr(trusted.glusterfs.dht.linkto)), instead of earlier
3 factors (size==0)
2. in linkfile self-heal part (in 'dht_lookup_everywhere_cbk()'),
don't delete a linkfile if there is a open-fd on it. It means,
there may be a migration in progress.
3. if a file's revalidate fails with ENOENT, it may be due to file
migration, and hence need a lookup_everywhere()
4. There will be 2 phases of file-migration.
-> Phase 1: Migration in progress
* The source data file will have SGID and STICKY bit set in its mode.
* The source data file will have a 'linkto' xattr pointing the
destination.
* Destination file will have mode set to '01000', and 'linkto' xattr
set to itself.
-> Phase 2: File migration Complete
* The source data file will have mode '01000', and will be 'truncated'
to size 0.
* The destination file will have inherited mode from the source. (without
sgid and sticky bit) and its 'linkto' attribute will be removed.
4. Changes in distribute to work smoothly with a file which is in migration /
got migrated.
The 'fops' are divided into 3 categories, inode-read, inode-write and others.
inode-read fops need to handle only 'phase 2' notification, where as, the
inode-write fops need to handle both 'phase 1' and phase2. The inode-write
operations will be done on source file, and if any of 'file-migration' procedures
are detected in _cbk(), then the operations should be performed on the destination
too.
when a phase-2 is detected, then the inode-ctx itself should be changed to represent
a new layout.
With these changes, the open file migration will work smoothly with multiple clients.
Change-Id: I512408463814e650f34c62ed009bf2101d016fd6
BUG: 3071
Reviewed-on: http://review.gluster.com/209
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'libglusterfs')
-rw-r--r-- | libglusterfs/src/syncop.c | 130 | ||||
-rw-r--r-- | libglusterfs/src/syncop.h | 7 |
2 files changed, 134 insertions, 3 deletions
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 76d2b5811fe..bbcf5201ebd 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -494,8 +494,8 @@ syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags) } int -syncop_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *dict) +syncop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict) { struct syncargs *args = NULL; @@ -516,7 +516,7 @@ syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict) { struct syncargs args = {0, }; - SYNCOP (subvol, (&args), syncop_listxattr_cbk, subvol->fops->getxattr, + SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr, loc, NULL); if (dict) @@ -527,6 +527,36 @@ syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict) } int +syncop_getxattr (xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr, + loc, key); + + if (dict) + *dict = args.xattr; + + errno = args.op_errno; + return args.op_ret; +} + +int +syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr, + fd, key); + + if (dict) + *dict = args.xattr; + + errno = args.op_errno; + return args.op_ret; +} + +int syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct statvfs *buf) @@ -856,3 +886,97 @@ syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset) errno = args.op_errno; return args.op_ret; } + +int +syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate, + loc, offset); + + errno = args.op_errno; + return args.op_ret; +} + +int +syncop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + + __wake (args); + + return 0; + +} + +int +syncop_fsync (xlator_t *subvol, fd_t *fd) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync, + fd, 0); + + errno = args.op_errno; + return args.op_ret; + +} + +int +syncop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + if (op_ret == 0) + args->iatt1 = *stbuf; + + __wake (args); + + return 0; + +} + +int +syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat, + fd); + + if (stbuf) + *stbuf = args.iatt1; + + errno = args.op_errno; + return args.op_ret; + +} + +int +syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->stat, + loc); + + if (stbuf) + *stbuf = args.iatt1; + + errno = args.op_errno; + return args.op_ret; + +} diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h index bb898e11edb..1c3fe07b517 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/syncop.h @@ -181,6 +181,8 @@ int syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf); int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags); int syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict); +int syncop_getxattr (xlator_t *xl, loc_t *loc, dict_t **dict, const char *key); +int syncop_fgetxattr (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key); int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name); int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode, @@ -197,8 +199,13 @@ int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off, struct iovec **vector, int *count, struct iobref **iobref); int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset); +int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset); int syncop_unlink (xlator_t *subvol, loc_t *loc); +int syncop_fsync (xlator_t *subvol, fd_t *fd); +int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf); +int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf); + #endif /* _SYNCOP_H */ |