diff options
author | Amar Tumballi <amar@gluster.com> | 2011-09-07 12:53:09 +0530 |
---|---|---|
committer | Vijay Bellur <vijay@gluster.com> | 2011-09-12 23:57:38 -0700 |
commit | a07bb18c8adeb8597f62095c5d1361c5bad01f09 (patch) | |
tree | 55bf7ef630793cfd11fc15e84d859cbfb2184b01 /xlators/cluster/dht/src/nufa.c | |
parent | 09eeaf4e68c225b8e5ccc0a9b4f10f8c4748e205 (diff) |
distribute rebalance: handle the open file migration
Complexity involved: To migrate a file with open fd, we have to
notify the other client process which has the open fd, and make
sure the write()s happening on that fd is properly synced to the
migrated file. Once the migration is complete, the client
process which has open-fd should get notified and it should
start performing all the operations on the new subvolume,
instead of earlier cached volume.
How to solve the notification part:
We can overload the 'postbuf' attribute in the _cbk() function to
understand if a file is 'under-migration' or 'migration-complete'
state. (This will be something similar to deciding whether a file
is DHT-linkfile by its 'mode').
Overall change includes below mentioned major changes:
1. dht_linkfile is decided by only 2 factors (mode(01000),
xattr(trusted.glusterfs.dht.linkto)), instead of earlier
3 factors (size==0)
2. in linkfile self-heal part (in 'dht_lookup_everywhere_cbk()'),
don't delete a linkfile if there is a open-fd on it. It means,
there may be a migration in progress.
3. if a file's revalidate fails with ENOENT, it may be due to file
migration, and hence need a lookup_everywhere()
4. There will be 2 phases of file-migration.
-> Phase 1: Migration in progress
* The source data file will have SGID and STICKY bit set in its mode.
* The source data file will have a 'linkto' xattr pointing the
destination.
* Destination file will have mode set to '01000', and 'linkto' xattr
set to itself.
-> Phase 2: File migration Complete
* The source data file will have mode '01000', and will be 'truncated'
to size 0.
* The destination file will have inherited mode from the source. (without
sgid and sticky bit) and its 'linkto' attribute will be removed.
4. Changes in distribute to work smoothly with a file which is in migration /
got migrated.
The 'fops' are divided into 3 categories, inode-read, inode-write and others.
inode-read fops need to handle only 'phase 2' notification, where as, the
inode-write fops need to handle both 'phase 1' and phase2. The inode-write
operations will be done on source file, and if any of 'file-migration' procedures
are detected in _cbk(), then the operations should be performed on the destination
too.
when a phase-2 is detected, then the inode-ctx itself should be changed to represent
a new layout.
With these changes, the open file migration will work smoothly with multiple clients.
Change-Id: I512408463814e650f34c62ed009bf2101d016fd6
BUG: 3071
Reviewed-on: http://review.gluster.com/209
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/dht/src/nufa.c')
-rw-r--r-- | xlators/cluster/dht/src/nufa.c | 39 |
1 files changed, 7 insertions, 32 deletions
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 845c6b74ee1..9dcf224d177 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -23,7 +23,7 @@ #include "config.h" #endif -#include "dht-common.c" +#include "dht-common.h" /* TODO: all 'TODO's in dht.c holds good */ @@ -169,21 +169,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this, conf = this->private; - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP); if (!local) { op_errno = ENOMEM; goto err; } - ret = loc_dup (loc, &local->loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_DEBUG, - "copying location failed for path=%s", - loc->path); - goto err; - } - if (xattr_req) { local->xattr_req = dict_ref (xattr_req); } else { @@ -191,14 +182,12 @@ nufa_lookup (call_frame_t *frame, xlator_t *this, } hashed_subvol = dht_subvol_get_hashed (this, &local->loc); - cached_subvol = dht_subvol_get_cached (this, local->loc.inode); + cached_subvol = local->cached_subvol; - local->cached_subvol = cached_subvol; local->hashed_subvol = hashed_subvol; if (is_revalidate (loc)) { - local->layout = layout = dht_layout_get (this, loc->inode); - + layout = local->layout; if (!layout) { gf_log (this->name, GF_LOG_DEBUG, "revalidate without cache. path=%s", @@ -215,7 +204,7 @@ nufa_lookup (call_frame_t *frame, xlator_t *this, goto do_fresh_lookup; } - local->inode = inode_ref (loc->inode); + local->inode = inode_ref (loc->inode); local->call_cnt = layout->cnt; call_cnt = local->call_cnt; @@ -314,7 +303,6 @@ nufa_create (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; xlator_t *avail_subvol = NULL; int op_errno = -1; - int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -324,7 +312,7 @@ nufa_create (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, fd, GF_FOP_CREATE); if (!local) { op_errno = ENOMEM; goto err; @@ -348,13 +336,6 @@ nufa_create (call_frame_t *frame, xlator_t *this, if (subvol != avail_subvol) { /* create a link file instead of actual file */ - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - - local->fd = fd_ref (fd); local->params = dict_ref (params); local->mode = mode; local->flags = flags; @@ -421,7 +402,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; xlator_t *avail_subvol = NULL; int op_errno = -1; - int ret = -1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -431,7 +411,7 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, dht_get_du_info (frame, this, loc); - local = dht_local_init (frame); + local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD); if (!local) { op_errno = ENOMEM; goto err; @@ -456,11 +436,6 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, if (avail_subvol != subvol) { /* Create linkfile first */ - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } local->params = dict_ref (params); local->mode = mode; |