diff options
author | Amar Tumballi <amar@gluster.com> | 2011-09-07 12:53:09 +0530 |
---|---|---|
committer | Vijay Bellur <vijay@gluster.com> | 2011-09-12 23:57:38 -0700 |
commit | a07bb18c8adeb8597f62095c5d1361c5bad01f09 (patch) | |
tree | 55bf7ef630793cfd11fc15e84d859cbfb2184b01 /xlators/cluster/dht/src/dht-common.h | |
parent | 09eeaf4e68c225b8e5ccc0a9b4f10f8c4748e205 (diff) |
distribute rebalance: handle the open file migration
Complexity involved: To migrate a file with open fd, we have to
notify the other client process which has the open fd, and make
sure the write()s happening on that fd is properly synced to the
migrated file. Once the migration is complete, the client
process which has open-fd should get notified and it should
start performing all the operations on the new subvolume,
instead of earlier cached volume.
How to solve the notification part:
We can overload the 'postbuf' attribute in the _cbk() function to
understand if a file is 'under-migration' or 'migration-complete'
state. (This will be something similar to deciding whether a file
is DHT-linkfile by its 'mode').
Overall change includes below mentioned major changes:
1. dht_linkfile is decided by only 2 factors (mode(01000),
xattr(trusted.glusterfs.dht.linkto)), instead of earlier
3 factors (size==0)
2. in linkfile self-heal part (in 'dht_lookup_everywhere_cbk()'),
don't delete a linkfile if there is a open-fd on it. It means,
there may be a migration in progress.
3. if a file's revalidate fails with ENOENT, it may be due to file
migration, and hence need a lookup_everywhere()
4. There will be 2 phases of file-migration.
-> Phase 1: Migration in progress
* The source data file will have SGID and STICKY bit set in its mode.
* The source data file will have a 'linkto' xattr pointing the
destination.
* Destination file will have mode set to '01000', and 'linkto' xattr
set to itself.
-> Phase 2: File migration Complete
* The source data file will have mode '01000', and will be 'truncated'
to size 0.
* The destination file will have inherited mode from the source. (without
sgid and sticky bit) and its 'linkto' attribute will be removed.
4. Changes in distribute to work smoothly with a file which is in migration /
got migrated.
The 'fops' are divided into 3 categories, inode-read, inode-write and others.
inode-read fops need to handle only 'phase 2' notification, where as, the
inode-write fops need to handle both 'phase 1' and phase2. The inode-write
operations will be done on source file, and if any of 'file-migration' procedures
are detected in _cbk(), then the operations should be performed on the destination
too.
when a phase-2 is detected, then the inode-ctx itself should be changed to represent
a new layout.
With these changes, the open file migration will work smoothly with multiple clients.
Change-Id: I512408463814e650f34c62ed009bf2101d016fd6
BUG: 3071
Reviewed-on: http://review.gluster.com/209
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.h')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 413 |
1 files changed, 331 insertions, 82 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 95d1b1d6a..ab1b82af2 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -29,45 +29,61 @@ #ifndef _DHT_H #define _DHT_H -#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout" +#define GF_XATTR_FIX_LAYOUT_KEY "trusted.distribute.fix.layout" #define GF_DHT_LOOKUP_UNHASHED_ON 1 #define GF_DHT_LOOKUP_UNHASHED_AUTO 2 -#define DHT_PATHINFO_HEADER "DISTRIBUTE:" +#define DHT_PATHINFO_HEADER "DISTRIBUTE:" #include <fnmatch.h> typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno); + xlator_t *this, + int32_t op_ret, int32_t op_errno); +typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame, + int ret); struct dht_layout { - int spread_cnt; /* layout spread count per directory, - is controlled by 'setxattr()' with - special key */ - int cnt; - int preset; - int gen; - int type; - int ref; /* use with dht_conf_t->layout_lock */ - int search_unhashed; + int spread_cnt; /* layout spread count per directory, + is controlled by 'setxattr()' with + special key */ + int cnt; + int preset; + int gen; + int type; + int ref; /* use with dht_conf_t->layout_lock */ + int search_unhashed; struct { - int err; /* 0 = normal - -1 = dir exists and no xattr - >0 = dir lookup failed with errno - */ - uint32_t start; - uint32_t stop; - xlator_t *xlator; + int err; /* 0 = normal + -1 = dir exists and no xattr + >0 = dir lookup failed with errno + */ + uint32_t start; + uint32_t stop; + xlator_t *xlator; } list[0]; }; -typedef struct dht_layout dht_layout_t; +typedef struct dht_layout dht_layout_t; typedef enum { DHT_HASH_TYPE_DM, } dht_hashfn_type_t; +/* rebalance related */ +struct dht_rebalance_ { + xlator_t *from_subvol; + xlator_t *target_node; + int32_t wbflags; + off_t offset; + size_t size; + int32_t flags; + int count; + struct iobref *iobref; + struct iovec *vector; + struct iatt stbuf; + dht_defrag_cbk_fn_t target_op_fn; +}; struct dht_local { int call_cnt; @@ -140,11 +156,12 @@ struct dht_local { /* flag used to make sure we need to return estale in {lookup,revalidate}_cbk */ - char return_estale; + char return_estale; + char need_lookup_everywhere; - /* rebalance related */ -#define to_subvol hashed_subvol -#define from_subvol cached_subvol + glusterfs_fop_t fop; + + struct dht_rebalance_ rebalance; }; typedef struct dht_local dht_local_t; @@ -209,11 +226,21 @@ typedef struct dht_disk_layout dht_disk_layout_t; #define is_last_call(cnt) (cnt == 0) -#define DHT_LINKFILE_MODE (S_ISVTX) +#define DHT_MIGRATION_IN_PROGRESS 1 +#define DHT_MIGRATION_COMPLETED 2 + +#define DHT_LINKFILE_KEY "trusted.glusterfs.dht.linkto" +#define DHT_LINKFILE_MODE (S_ISVTX) + #define check_is_linkfile(i,s,x) ( \ ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \ == DHT_LINKFILE_MODE) && \ - (s->ia_size == 0)) + dict_get (x, DHT_LINKFILE_KEY)) + +#define check_is_linkfile_wo_dict(i,s) ( \ + ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) \ + == DHT_LINKFILE_MODE)) + #define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type)) @@ -221,85 +248,86 @@ typedef struct dht_disk_layout dht_disk_layout_t; #define DHT_STACK_UNWIND(fop, frame, params ...) do { \ dht_local_t *__local = NULL; \ - xlator_t *__xl = NULL; \ + xlator_t *__xl = NULL; \ if (frame) { \ - __xl = frame->this; \ - __local = frame->local; \ + __xl = frame->this; \ + __local = frame->local; \ frame->local = NULL; \ } \ STACK_UNWIND_STRICT (fop, frame, params); \ dht_local_wipe (__xl, __local); \ } while (0) -#define DHT_STACK_DESTROY(frame) do { \ - dht_local_t *__local = NULL; \ - xlator_t *__xl = NULL; \ - __xl = frame->this; \ - __local = frame->local; \ - frame->local = NULL; \ - STACK_DESTROY (frame->root); \ - dht_local_wipe (__xl, __local); \ +#define DHT_STACK_DESTROY(frame) do { \ + dht_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + __xl = frame->this; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + dht_local_wipe (__xl, __local); \ } while (0) -dht_layout_t *dht_layout_new (xlator_t *this, int cnt); -dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); -dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); -xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, +dht_layout_t *dht_layout_new (xlator_t *this, int cnt); +dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); +dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name); -int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); -int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, - uint32_t *holes_p, uint32_t *overlaps_p, - uint32_t *missing_p, uint32_t *down_p, - uint32_t *misc_p); -int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, - xlator_t *subvol, loc_t *loc, dict_t *xattr); +int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); +int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, + uint32_t *holes_p, uint32_t *overlaps_p, + uint32_t *missing_p, uint32_t *down_p, + uint32_t *misc_p); +int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, + xlator_t *subvol, loc_t *loc, dict_t *xattr); xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *buf, dict_t *xattr); -int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, - xlator_t *subvol, loc_t *loc); +int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, + xlator_t *subvol, loc_t *loc); int dht_layouts_init (xlator_t *this, dht_conf_t *conf); int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, - int op_ret, int op_errno, dict_t *xattr); + int op_ret, int op_errno, dict_t *xattr); int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, - int pos, int32_t **disk_layout_p); -int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, - int pos, void *disk_layout_raw); + int pos, int32_t **disk_layout_p); +int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, + int pos, void *disk_layout_raw); int dht_frame_return (call_frame_t *frame); -int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); +int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol, uint64_t *x); void dht_local_wipe (xlator_t *this, dht_local_t *local); -dht_local_t *dht_local_init (call_frame_t *frame); -int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from, - xlator_t *subvol); +dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, + glusterfs_fop_t fop); +int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from, + xlator_t *subvol); xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc); xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode); xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev); -int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); +int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); int dht_hash_compute (int type, const char *name, uint32_t *hash_p); -int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, - xlator_t *tovol, xlator_t *fromvol, loc_t *loc); -int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc); -int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc); +int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, + xlator_t *tovol, xlator_t *fromvol, loc_t *loc); +int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc); +int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc); int -dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, - loc_t *loc, dht_layout_t *layout); +dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); int dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, dht_layout_t *layout); int -dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, - loc_t *loc, dht_layout_t *layout); +dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); int dht_layout_sort_volname (dht_layout_t *layout); @@ -308,31 +336,252 @@ int dht_rename (call_frame_t *frame, xlator_t *this, int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); -int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); -int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); +int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); -int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout); -void dht_layout_unref (xlator_t *this, dht_layout_t *layout); +int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout); +void dht_layout_unref (xlator_t *this, dht_layout_t *layout); dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout); -xlator_t *dht_first_up_subvol (xlator_t *this); -xlator_t *dht_last_up_subvol (xlator_t *this); +xlator_t *dht_first_up_subvol (xlator_t *this); +xlator_t *dht_last_up_subvol (xlator_t *this); int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name); -int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, +int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, xlator_t **subvol); -int dht_rename_cleanup (call_frame_t *frame); +int dht_rename_cleanup (call_frame_t *frame); int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent); + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent); int dht_fix_directory_layout (call_frame_t *frame, - dht_selfheal_dir_cbk_t dir_cbk, - dht_layout_t *layout); + dht_selfheal_dir_cbk_t dir_cbk, + dht_layout_t *layout); +int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf); + +/* migration/rebalance */ int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame); -#endif /* _DHT_H */ + +int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame); +int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame); + + +/* FOPS */ +int32_t dht_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req); + +int32_t dht_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc); + +int32_t dht_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd); + +int32_t dht_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset); + +int32_t dht_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset); + +int32_t dht_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask); + +int32_t dht_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size); + +int32_t dht_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t rdev, dict_t *params); + +int32_t dht_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dict_t *params); + +int32_t dht_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc); + +int32_t dht_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags); + +int32_t dht_symlink (call_frame_t *frame, xlator_t *this, + const char *linkpath, loc_t *loc, dict_t *params); + +int32_t dht_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc); + +int32_t dht_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc); + +int32_t dht_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + fd_t *fd, dict_t *params); + +int32_t dht_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, fd_t *fd, + int32_t wbflags); + +int32_t dht_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset); + +int32_t dht_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t offset, + struct iobref *iobref); + +int32_t dht_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd); + +int32_t dht_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync); + +int32_t dht_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, fd_t *fd); + +int32_t dht_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t datasync); + +int32_t dht_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc); + +int32_t dht_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags); + +int32_t dht_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name); + +int32_t dht_fsetxattr (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + dict_t *dict, + int32_t flags); + +int32_t dht_fgetxattr (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + const char *name); + +int32_t dht_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name); + +int32_t dht_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct gf_flock *flock); + +int32_t dht_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock); + +int32_t dht_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock); + +int32_t dht_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type); + +int32_t dht_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, off_t off); + +int32_t dht_readdirp (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, off_t off); + +int32_t dht_xattrop (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + gf_xattrop_flags_t flags, + dict_t *dict); + +int32_t dht_fxattrop (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + gf_xattrop_flags_t flags, + dict_t *dict); + +int32_t dht_forget (xlator_t *this, inode_t *inode); +int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid); +int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid); + +int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...); + +/* definitions for nufa/switch */ +int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent); +int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent); +int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent); + + + +#endif/* _DHT_H */ |