diff options
author | Vikas Gorur <vikas@gluster.com> | 2009-11-24 08:45:07 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2009-11-24 06:40:00 -0800 |
commit | 9e3fddb13769288ddc13db3125b8bedf26058cdf (patch) | |
tree | 2da64c5ca49ed14b22c253d67eed800533bf1e94 /xlators/cluster | |
parent | 21cffbc219efc36229002e71a02b9270cfee9186 (diff) |
cluster/afr: Refactored the data self-heal algorithm.
Refactored the operation of the data self-heal algorithm
as:
* open all fd's (if fd not supplied by caller)
* lock 0-0 (if lock not supplied by caller)
* fxattrop, fstat (instead of lookup)
... self heal ...
* unlock (if lock not supplied by caller)
* close (if fd not supplied by caller).
Signed-off-by: Vikas Gorur <vikas@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 170 (Auto-heal fails on files that are open()-ed/mmap()-ed)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=170
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 385 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 6 |
2 files changed, 248 insertions, 143 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index bafb92fd2..46d074831 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -66,7 +66,9 @@ afr_sh_data_done (call_frame_t *frame, xlator_t *this) TODO: cleanup sh->* */ - if (sh->healing_fd) { + if (sh->healing_fd && !sh->healing_fd_opened) { + /* unref only if we created the fd ourselves */ + fd_unref (sh->healing_fd); sh->healing_fd = NULL; } @@ -158,6 +160,13 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this) stbuf.st_mtime = sh->buf[source].st_mtime; #endif + if (sh->healing_fd_opened) { + /* not our job to close the fd */ + + afr_sh_data_done (frame, this); + return 0; + } + if (!sh->healing_fd) { afr_sh_data_done (frame, this); return 0; @@ -268,6 +277,13 @@ afr_sh_data_unlock (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; + if (sh->data_lock_held) { + /* not our job to unlock, proceed to close */ + + afr_sh_data_close (frame, this); + return 0; + } + for (i = 0; i < priv->child_count; i++) { if (sh->locked_nodes[i]) call_count++; @@ -393,8 +409,8 @@ afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this) STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk, (void *) (long) i, priv->children[i], - priv->children[i]->fops->xattrop, - &local->loc, + priv->children[i]->fops->fxattrop, + sh->healing_fd, GF_XATTROP_ADD_ARRAY, erase_xattr[i]); if (!--call_count) break; @@ -571,131 +587,6 @@ afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this) int -afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int child_index = 0; - - struct afr_sh_algorithm *sh_algo = NULL; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - child_index = (long) cookie; - - /* TODO: some of the open's might fail. - In that case, modify cleanup fn to send flush on those - fd's which are already open */ - - LOCK (&frame->lock); - { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, - "open of %s failed on child %s (%s)", - local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); - sh->op_failed = 1; - } - - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) { - if (sh->op_failed) { - afr_sh_data_finish (frame, this); - return 0; - } - gf_log (this->name, GF_LOG_TRACE, - "fd for %s opened, commencing sync", - local->loc.path); - - gf_log (this->name, GF_LOG_TRACE, - "sourcing file %s from %s to other sinks", - local->loc.path, priv->children[sh->source]->name); - - sh->algo_completion_cbk = afr_sh_data_trim_sinks; - sh->algo_abort_cbk = afr_sh_data_finish; - - sh_algo = afr_sh_data_pick_algo (frame, this); - - sh_algo->fn (frame, this); - } - - return 0; -} - - -int -afr_sh_data_open (call_frame_t *frame, xlator_t *this) -{ - int i = 0; - int call_count = 0; - - int source = -1; - int *sources = NULL; - - fd_t *fd = NULL; - - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - call_count = sh->active_sinks + 1; - local->call_count = call_count; - - fd = fd_create (local->loc.inode, frame->root->pid); - sh->healing_fd = fd; - - source = local->self_heal.source; - sources = local->self_heal.sources; - - sh->block_size = 65536; - sh->file_size = sh->buf[source].st_size; - - if (FILE_HAS_HOLES (&sh->buf[source])) - sh->file_has_holes = 1; - - /* open source */ - STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk, - (void *) (long) source, - priv->children[source], - priv->children[source]->fops->open, - &local->loc, O_RDWR|O_LARGEFILE, fd, 0); - call_count--; - - /* open sinks */ - for (i = 0; i < priv->child_count; i++) { - if(sources[i] || !local->child_up[i]) - continue; - - STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->open, - &local->loc, - O_RDWR|O_LARGEFILE, fd, 0); - - if (!--call_count) - break; - } - - return 0; -} - - -int afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; @@ -705,6 +596,8 @@ afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this) int source = 0; int i = 0; + struct afr_sh_algorithm *sh_algo = NULL; + local = frame->local; sh = &local->self_heal; priv = this->private; @@ -732,7 +625,12 @@ afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this) "self-healing file %s from subvolume %s to %d other", local->loc.path, priv->children[source]->name, active_sinks); - afr_sh_data_open (frame, this); + sh->algo_completion_cbk = afr_sh_data_trim_sinks; + sh->algo_abort_cbk = afr_sh_data_finish; + + sh_algo = afr_sh_data_pick_algo (frame, this); + + sh_algo->fn (frame, this); return 0; } @@ -809,7 +707,13 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) return 0; } - sh->source = source; + sh->source = source; + sh->block_size = 65536; + sh->file_size = sh->buf[source].st_size; + + if (FILE_HAS_HOLES (&sh->buf[source])) + sh->file_has_holes = 1; + local->cont.lookup.buf.st_size = sh->buf[source].st_size; /* detect changes not visible through pending flags -- JIC */ @@ -834,10 +738,9 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) int -afr_sh_data_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct stat *buf, dict_t *xattr, - struct stat *postparent) +afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct stat *buf) { afr_private_t *priv = NULL; afr_local_t *local = NULL; @@ -853,7 +756,11 @@ afr_sh_data_lookup_cbk (call_frame_t *frame, void *cookie, LOCK (&frame->lock); { if (op_ret != -1) { - sh->xattr[child_index] = dict_ref (xattr); + gf_log (this->name, GF_LOG_TRACE, + "fstat of %s on %s succeeded", + local->loc.path, + priv->children[child_index]->name); + sh->buf[child_index] = *buf; } } @@ -870,13 +777,89 @@ afr_sh_data_lookup_cbk (call_frame_t *frame, void *cookie, int -afr_sh_data_lookup (call_frame_t *frame, xlator_t *this) +afr_sh_data_fstat (call_frame_t *frame, xlator_t *this) +{ + afr_self_heal_t *sh = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int call_count = 0; + int i = 0; + + priv = this->private; + local = frame->local; + sh = &local->self_heal; + + call_count = local->child_count; + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fstat, + sh->healing_fd); + + if (!--call_count) + break; + } + } + + return 0; +} + + +int +afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xattr) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + int call_count = -1; + int child_index = (long) cookie; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + LOCK (&frame->lock); + { + if (op_ret != -1) { + gf_log (this->name, GF_LOG_TRACE, + "fxattrop of %s on %s succeeded", + local->loc.path, + priv->children[child_index]->name); + + sh->xattr[child_index] = dict_ref (xattr); + } + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_sh_data_fstat (frame, this); + } + + return 0; +} + + +int +afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this) { afr_self_heal_t *sh = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; dict_t *xattr_req = NULL; + int32_t zero_pending[3] = {0, 0, 0}; + int call_count = 0; int i = 0; int ret = 0; @@ -892,18 +875,20 @@ afr_sh_data_lookup (call_frame_t *frame, xlator_t *this) xattr_req = dict_new(); if (xattr_req) { for (i = 0; i < priv->child_count; i++) { - ret = dict_set_uint64 (xattr_req, priv->pending_key[i], - 3 * sizeof(int32_t)); + ret = dict_set_static_bin (xattr_req, priv->pending_key[i], + zero_pending, 3 * sizeof(int32_t)); } } for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_sh_data_lookup_cbk, + STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk, (void *) (long) i, priv->children[i], - priv->children[i]->fops->lookup, - &local->loc, xattr_req); + priv->children[i]->fops->fxattrop, + sh->healing_fd, GF_XATTROP_ADD_ARRAY, + xattr_req); + if (!--call_count) break; } @@ -993,7 +978,7 @@ afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) if ((child_index == priv->child_count) || (sh->lock_count == afr_lock_server_count (priv, AFR_DATA_TRANSACTION))) { - afr_sh_data_lookup (frame, this); + afr_sh_data_fxattrop (frame, this); return 0; } @@ -1025,6 +1010,14 @@ afr_sh_data_lock (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; + if (sh->data_lock_held) { + /* caller has held the lock already, + so skip locking */ + + afr_sh_data_fxattrop (frame, this); + return 0; + } + for (i = 0; i < priv->child_count; i++) sh->locked_nodes[i] = 0; @@ -1033,6 +1026,112 @@ afr_sh_data_lock (call_frame_t *frame, xlator_t *this) int +afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int child_index = 0; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + child_index = (long) cookie; + + /* TODO: some of the open's might fail. + In that case, modify cleanup fn to send flush on those + fd's which are already open */ + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, + "open of %s failed on child %s (%s)", + local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); + sh->op_failed = 1; + } + + gf_log (this->name, GF_LOG_TRACE, + "open of %s succeeded on child %s", + local->loc.path, + priv->children[child_index]->name); + } + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + if (sh->op_failed) { + afr_sh_data_finish (frame, this); + return 0; + } + + gf_log (this->name, GF_LOG_TRACE, + "fd for %s opened, commencing sync", + local->loc.path); + + afr_sh_data_lock (frame, this); + } + + return 0; +} + + +int +afr_sh_data_open (call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int call_count = 0; + + fd_t *fd = NULL; + + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + if (sh->healing_fd_opened) { + /* caller has opened the fd for us already, so skip open */ + + afr_sh_data_lock (frame, this); + return 0; + } + + call_count = afr_up_children_count (priv->child_count, local->child_up); + local->call_count = call_count; + + fd = fd_create (local->loc.inode, frame->root->pid); + sh->healing_fd = fd; + + /* open sinks */ + for (i = 0; i < priv->child_count; i++) { + if(!local->child_up[i]) + continue; + + STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->open, + &local->loc, + O_RDWR|O_LARGEFILE, fd, 0); + + if (!--call_count) + break; + } + + return 0; +} + + +int afr_self_heal_data (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; @@ -1044,7 +1143,7 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; if (local->need_data_self_heal && priv->data_self_heal) { - afr_sh_data_lock (frame, this); + afr_sh_data_open (frame, this); } else { gf_log (this->name, GF_LOG_TRACE, "not doing data self heal on %s", diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2f5742662..412f38afc 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -118,6 +118,12 @@ typedef struct { /* private data for the particular self-heal algorithm */ void *private; + gf_boolean_t healing_fd_opened; /* set by caller: true if caller + has already opened fd */ + + gf_boolean_t data_lock_held; /* set by caller: true if caller + has already acquired 0-0 lock */ + int (*completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this); int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this); |