diff options
author | Pavan Sondur <pavan@gluster.com> | 2010-10-01 01:42:46 +0000 |
---|---|---|
committer | Vijay Bellur <vijay@dev.gluster.com> | 2010-10-01 01:02:52 -0700 |
commit | 9555e5ee7dd2c3a2c98cf505bb198e453ea747d1 (patch) | |
tree | 1daa301a747d2b4cb490e5742b364522e8cc2fcc /xlators/cluster | |
parent | d355ecd89427871d8d3f20a99acd284baccef6e1 (diff) |
cluster/afr: Recover locks on child_up from source to sink.
Signed-off-by: Pavan Vilas Sondur <pavan@gluster.com>
Signed-off-by: Vijay Bellur <vijay@dev.gluster.com>
BUG: 865 (Add locks recovery support in GlusterFS)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=865
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 18 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 383 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 7 |
3 files changed, 408 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9d9f5d041..e5a7e87c7 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1450,8 +1450,24 @@ out: int afr_release (xlator_t *this, fd_t *fd) { + afr_locked_fd_t *locked_fd = NULL; + afr_locked_fd_t *tmp = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + afr_cleanup_fd_ctx (this, fd); + list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds, + list) { + + if (locked_fd->fd == fd) { + list_del_init (&locked_fd->list); + GF_FREE (locked_fd); + } + + } + return 0; } @@ -2523,6 +2539,8 @@ afr_notify (xlator_t *this, int32_t event, case GF_EVENT_CHILD_UP: i = find_child_index (this, data); + afr_attempt_lock_recovery (this, i); + child_up[i] = 1; LOCK (&priv->lock); diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index de95a6c76..12050af1d 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1743,6 +1743,7 @@ __afr_save_locked_fd (xlator_t *this, fd_t *fd) goto out; } + locked_fd->fd = fd; INIT_LIST_HEAD (&locked_fd->list); list_add_tail (&locked_fd->list, &priv->saved_fds); @@ -1779,3 +1780,385 @@ unlock: return ret; } + +static int +afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_locked_fd_t *locked_fd = NULL; + + local = frame->local; + + locked_fd = local->locked_fd; + + STACK_DESTROY (frame->root); + afr_local_cleanup (local, this); + + afr_save_locked_fd (this, locked_fd->fd); + + return 0; + +} + +static int +afr_get_source_lock_recovery (xlator_t *this, fd_t *fd) +{ + afr_fd_ctx_t *fdctx = NULL; + afr_private_t *priv = NULL; + uint64_t tmp = 0; + int i = 0; + int source_child = -1; + int ret = 0; + + priv = this->private; + + ret = fd_ctx_get (fd, this, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + for (i = 0; i < priv->child_count; i++) { + if (fdctx->locked_on[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "Found lock recovery source=%d", + i); + source_child = i; + break; + } + + } + +out: + return source_child; + +} + +int32_t +afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock); +int32_t +afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t source_child = 0; + struct flock flock = {0,}; + + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "lock recovery failed"); + goto cleanup; + } + + source_child = local->source_child; + + memcpy (&flock, lock, sizeof (*lock)); + + STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk, + (void *) (long) source_child, + priv->children[source_child], + priv->children[source_child]->fops->lk, + local->fd, F_GETLK_FD, &flock); + + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + return 0; +} + +int +afr_recover_lock (call_frame_t *frame, xlator_t *this, + struct flock *flock) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t lock_recovery_child = 0; + + priv = this->private; + local = frame->local; + + lock_recovery_child = local->lock_recovery_child; + + STACK_WIND_COOKIE (frame, afr_recover_lock_cbk, + (void *) (long) lock_recovery_child, + priv->children[lock_recovery_child], + priv->children[lock_recovery_child]->fops->lk, + local->fd, F_SETLK, flock); + + return 0; +} + +static int +is_afr_lock_eol (struct flock *lock) +{ + int ret = 0; + + if ((lock->l_type = GF_LK_EOL)) + ret = 1; + + return ret; +} + +int32_t +afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to get locks on fd"); + goto cleanup; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Got a lock on fd"); + + if (is_afr_lock_eol (lock)) { + gf_log (this->name, GF_LOG_DEBUG, + "Reached EOL on locks on fd"); + goto cleanup; + } + + afr_recover_lock (frame, this, lock); + + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + + return 0; +} + +static int +afr_lock_recovery (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + int ret = 0; + int32_t source_child = 0; + struct flock flock = {0,}; + + priv = this->private; + local = frame->local; + + fd = local->fd; + + source_child = afr_get_source_lock_recovery (this, fd); + if (source_child < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not recover locks due to lock " + "split brain"); + ret = -1; + goto out; + } + + local->source_child = source_child; + + /* the flock can be zero filled as we're querying incrementally + the locks held on the fd. + */ + STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk, + (void *) (long) source_child, + priv->children[source_child], + priv->children[source_child]->fops->lk, + local->fd, F_GETLK_FD, &flock); + +out: + return ret; +} + + +static int +afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index) +{ + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; + + ret = fd_ctx_get (fd, this, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + fdctx->opened_on[child_index] = 1; + +out: + return ret; +} + +int32_t +afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + int32_t child_index = (long )cookie; + int ret = 0; + + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Reopen during lock-recovery failed"); + goto cleanup; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Open succeeded => proceed to recover locks"); + + ret = afr_lock_recovery (frame, this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Lock recovery failed"); + goto cleanup; + } + + ret = afr_mark_fd_opened (this, fd, child_index); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Marking fd open failed"); + goto cleanup; + } + + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + return 0; +} + +static int +afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + uint64_t tmp = 0; + afr_fd_ctx_t *fdctx = NULL; + loc_t loc = {0,}; + int32_t child_index = 0; + int ret = 0; + + priv = this->private; + local = frame->local; + + GF_ASSERT (local && local->fd); + + ret = fd_ctx_get (local->fd, this, &tmp); + fdctx = (afr_fd_ctx_t *) (long) tmp; + GF_ASSERT (fdctx); + + child_index = local->lock_recovery_child; + + inode_path (local->fd->inode, NULL, (char **)&loc.path); + loc.name = strrchr (loc.path, '/'); + loc.inode = inode_ref (local->fd->inode); + loc.parent = inode_parent (local->fd->inode, 0, NULL); + + + STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk, + (void *)(long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->open, + &loc, fdctx->flags, local->fd, + fdctx->wbflags); + + return 0; +} + +static int +is_fd_opened (fd_t *fd, int32_t child_index) +{ + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; + + ret = fd_ctx_get (fd, THIS, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + if (fdctx->opened_on[child_index]) + ret = 1; + +out: + return ret; +} + +int +afr_attempt_lock_recovery (xlator_t *this, int32_t child_index) +{ + call_frame_t *frame = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_locked_fd_t *locked_fd = NULL; + afr_locked_fd_t *tmp = NULL; + int ret = 0; + struct list_head locks_list; + + + priv = this->private; + + if (list_empty (&priv->saved_fds)) + goto out; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + local = GF_CALLOC (1, sizeof (*local), + gf_afr_mt_afr_local_t); + if (!local) { + gf_log (this->name, GF_LOG_DEBUG, + "Out of memory"); + ret = -1; + goto out; + } + + AFR_LOCAL_INIT (local, priv); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + frame->local = local; + + INIT_LIST_HEAD (&locks_list); + + pthread_mutex_lock (&priv->mutex); + { + list_splice_init (&priv->saved_fds, &locks_list); + } + pthread_mutex_unlock (&priv->mutex); + + list_for_each_entry_safe (locked_fd, tmp, + &locks_list, list) { + + list_del_init (&locked_fd->list); + + local->fd = locked_fd->fd; + local->lock_recovery_child = child_index; + local->locked_fd = locked_fd; + + if (!is_fd_opened (locked_fd->fd, child_index)) { + gf_log (this->name, GF_LOG_DEBUG, + "attempting open before lock " + "recovery"); + afr_lock_recovery_preopen (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "attempting lock recovery " + "without a preopen"); + afr_lock_recovery (frame, this); + } + } + +out: + return ret; +} diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 68b4a1e30..005847b41 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -307,6 +307,10 @@ typedef struct _afr_local { afr_internal_lock_t internal_lock; + afr_locked_fd_t *locked_fd; + int32_t source_child; + int32_t lock_recovery_child; + dict_t *dict; int (*openfd_flush_cbk) (call_frame_t *frame, xlator_t *this); @@ -666,6 +670,9 @@ afr_notify (xlator_t *this, int32_t event, void *data, ...); int +afr_attempt_lock_recovery (xlator_t *this, int32_t child_index); + +int afr_save_locked_fd (xlator_t *this, fd_t *fd); int |