From 74612a456ad1602f8038fae79fee654eb427602a Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Tue, 24 Nov 2009 08:45:09 +0000 Subject: cluster/afr: Do self-heal on reopened fds. This patch brings in partial support for self-heal of open fds. The precondition is that the fd should have been opened successfully during the initial open() (or create()), and we assume that protocol/client has successfully reopened the fd when the subvolume comes back up. It works by doing an "up/down flush" (a dummy flush transaction to do post-op wherever necessary) and then triggering data self-heal on the file in the post-post-op hook of the dummy flush transaction. This ensures that any writes that come in during self-heal will wait until self-heal completes. The up/down flush is also done when a subvolume goes down, so that post-op is done on all subvolumes where pre-op was done. Signed-off-by: Vikas Gorur Signed-off-by: Anand V. Avati BUG: 170 (Auto-heal fails on files that are open()-ed/mmap()-ed) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=170 --- xlators/cluster/afr/src/afr.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'xlators/cluster/afr/src/afr.h') diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 23e75e6126a..56f7a069dcc 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -69,6 +69,9 @@ typedef struct _afr_private { unsigned int entry_lock_server_count; unsigned int wait_count; /* # of servers to wait for success */ + + uint64_t up_count; /* number of CHILD_UPs we have seen */ + uint64_t down_count; /* number of CHILD_DOWNs we have seen */ } afr_private_t; typedef struct { @@ -76,6 +79,8 @@ typedef struct { directories? */ gf_boolean_t forced_merge; + glusterfs_fop_t calling_fop; + /* array of stat's, one for each child */ struct stat *buf; struct stat parentbuf; @@ -124,6 +129,8 @@ typedef struct { gf_boolean_t data_lock_held; /* set by caller: true if caller has already acquired 0-0 lock */ + int (*flush_self_heal_cbk) (call_frame_t *frame, xlator_t *this); + int (*completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this); @@ -168,6 +175,12 @@ afr_index_for_transaction_type (afr_transaction_type type) } +typedef enum { + AFR_CHILD_UP_FLUSH, + AFR_CHILD_DOWN_FLUSH, +} afr_flush_type; + + typedef struct _afr_local { unsigned int call_count; unsigned int success_count; @@ -203,9 +216,12 @@ typedef struct _afr_local { dict_t *xattr_req; int open_fd_count; + int32_t inodelk_count; int32_t entrylk_count; + int (*up_down_flush_cbk) (call_frame_t *, xlator_t *); + /* This struct contains the arguments for the "continuation" (scheme-like) of fops @@ -503,8 +519,10 @@ typedef struct _afr_local { typedef struct { - unsigned char pre_op_done; + unsigned char *pre_op_done; unsigned char *child_failed; + uint64_t up_count; /* number of CHILD_UPs this fd has seen */ + uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */ } afr_fd_ctx_t; @@ -560,9 +578,19 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this); int afr_frame_return (call_frame_t *frame); +uint64_t +afr_is_split_brain (xlator_t *this, inode_t *inode); + void afr_set_split_brain (xlator_t *this, inode_t *inode); +int +afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, int32_t wbflags); + +int +afr_up_down_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, afr_flush_type type); + void afr_set_opendir_done (xlator_t *this, inode_t *inode); -- cgit