diff options
-rw-r--r-- | libglusterfs/src/glusterfs.h | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 18 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 383 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 7 | ||||
-rw-r--r-- | xlators/features/locks/src/posix.c | 2 | ||||
-rw-r--r-- | xlators/features/locks/src/reservelk.c | 4 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-lk.c | 5 |
7 files changed, 415 insertions, 6 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 9b5a0e99239..2bb06fbb7f4 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -160,7 +160,7 @@ typedef enum { GF_LK_F_RDLCK = 0, GF_LK_F_WRLCK, GF_LK_F_UNLCK, - GF_LK_RECLK, + GF_LK_EOL, } glusterfs_lk_types_t; typedef enum { diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9d9f5d0414d..e5a7e87c72f 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1450,8 +1450,24 @@ out: int afr_release (xlator_t *this, fd_t *fd) { + afr_locked_fd_t *locked_fd = NULL; + afr_locked_fd_t *tmp = NULL; + afr_private_t *priv = NULL; + + priv = this->private; + afr_cleanup_fd_ctx (this, fd); + list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds, + list) { + + if (locked_fd->fd == fd) { + list_del_init (&locked_fd->list); + GF_FREE (locked_fd); + } + + } + return 0; } @@ -2523,6 +2539,8 @@ afr_notify (xlator_t *this, int32_t event, case GF_EVENT_CHILD_UP: i = find_child_index (this, data); + afr_attempt_lock_recovery (this, i); + child_up[i] = 1; LOCK (&priv->lock); diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index de95a6c763b..12050af1d35 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1743,6 +1743,7 @@ __afr_save_locked_fd (xlator_t *this, fd_t *fd) goto out; } + locked_fd->fd = fd; INIT_LIST_HEAD (&locked_fd->list); list_add_tail (&locked_fd->list, &priv->saved_fds); @@ -1779,3 +1780,385 @@ unlock: return ret; } + +static int +afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_locked_fd_t *locked_fd = NULL; + + local = frame->local; + + locked_fd = local->locked_fd; + + STACK_DESTROY (frame->root); + afr_local_cleanup (local, this); + + afr_save_locked_fd (this, locked_fd->fd); + + return 0; + +} + +static int +afr_get_source_lock_recovery (xlator_t *this, fd_t *fd) +{ + afr_fd_ctx_t *fdctx = NULL; + afr_private_t *priv = NULL; + uint64_t tmp = 0; + int i = 0; + int source_child = -1; + int ret = 0; + + priv = this->private; + + ret = fd_ctx_get (fd, this, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + for (i = 0; i < priv->child_count; i++) { + if (fdctx->locked_on[i]) { + gf_log (this->name, GF_LOG_DEBUG, + "Found lock recovery source=%d", + i); + source_child = i; + break; + } + + } + +out: + return source_child; + +} + +int32_t +afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock); +int32_t +afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t source_child = 0; + struct flock flock = {0,}; + + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "lock recovery failed"); + goto cleanup; + } + + source_child = local->source_child; + + memcpy (&flock, lock, sizeof (*lock)); + + STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk, + (void *) (long) source_child, + priv->children[source_child], + priv->children[source_child]->fops->lk, + local->fd, F_GETLK_FD, &flock); + + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + return 0; +} + +int +afr_recover_lock (call_frame_t *frame, xlator_t *this, + struct flock *flock) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t lock_recovery_child = 0; + + priv = this->private; + local = frame->local; + + lock_recovery_child = local->lock_recovery_child; + + STACK_WIND_COOKIE (frame, afr_recover_lock_cbk, + (void *) (long) lock_recovery_child, + priv->children[lock_recovery_child], + priv->children[lock_recovery_child]->fops->lk, + local->fd, F_SETLK, flock); + + return 0; +} + +static int +is_afr_lock_eol (struct flock *lock) +{ + int ret = 0; + + if ((lock->l_type = GF_LK_EOL)) + ret = 1; + + return ret; +} + +int32_t +afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to get locks on fd"); + goto cleanup; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Got a lock on fd"); + + if (is_afr_lock_eol (lock)) { + gf_log (this->name, GF_LOG_DEBUG, + "Reached EOL on locks on fd"); + goto cleanup; + } + + afr_recover_lock (frame, this, lock); + + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + + return 0; +} + +static int +afr_lock_recovery (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + int ret = 0; + int32_t source_child = 0; + struct flock flock = {0,}; + + priv = this->private; + local = frame->local; + + fd = local->fd; + + source_child = afr_get_source_lock_recovery (this, fd); + if (source_child < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not recover locks due to lock " + "split brain"); + ret = -1; + goto out; + } + + local->source_child = source_child; + + /* the flock can be zero filled as we're querying incrementally + the locks held on the fd. + */ + STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk, + (void *) (long) source_child, + priv->children[source_child], + priv->children[source_child]->fops->lk, + local->fd, F_GETLK_FD, &flock); + +out: + return ret; +} + + +static int +afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index) +{ + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; + + ret = fd_ctx_get (fd, this, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + fdctx->opened_on[child_index] = 1; + +out: + return ret; +} + +int32_t +afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + int32_t child_index = (long )cookie; + int ret = 0; + + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Reopen during lock-recovery failed"); + goto cleanup; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Open succeeded => proceed to recover locks"); + + ret = afr_lock_recovery (frame, this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Lock recovery failed"); + goto cleanup; + } + + ret = afr_mark_fd_opened (this, fd, child_index); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Marking fd open failed"); + goto cleanup; + } + + return 0; + +cleanup: + afr_lock_recovery_cleanup (frame, this); + return 0; +} + +static int +afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + uint64_t tmp = 0; + afr_fd_ctx_t *fdctx = NULL; + loc_t loc = {0,}; + int32_t child_index = 0; + int ret = 0; + + priv = this->private; + local = frame->local; + + GF_ASSERT (local && local->fd); + + ret = fd_ctx_get (local->fd, this, &tmp); + fdctx = (afr_fd_ctx_t *) (long) tmp; + GF_ASSERT (fdctx); + + child_index = local->lock_recovery_child; + + inode_path (local->fd->inode, NULL, (char **)&loc.path); + loc.name = strrchr (loc.path, '/'); + loc.inode = inode_ref (local->fd->inode); + loc.parent = inode_parent (local->fd->inode, 0, NULL); + + + STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk, + (void *)(long) child_index, + priv->children[child_index], + priv->children[child_index]->fops->open, + &loc, fdctx->flags, local->fd, + fdctx->wbflags); + + return 0; +} + +static int +is_fd_opened (fd_t *fd, int32_t child_index) +{ + afr_fd_ctx_t *fdctx = NULL; + uint64_t tmp = 0; + int ret = 0; + + ret = fd_ctx_get (fd, THIS, &tmp); + if (ret) + goto out; + + fdctx = (afr_fd_ctx_t *) (long) tmp; + + if (fdctx->opened_on[child_index]) + ret = 1; + +out: + return ret; +} + +int +afr_attempt_lock_recovery (xlator_t *this, int32_t child_index) +{ + call_frame_t *frame = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_locked_fd_t *locked_fd = NULL; + afr_locked_fd_t *tmp = NULL; + int ret = 0; + struct list_head locks_list; + + + priv = this->private; + + if (list_empty (&priv->saved_fds)) + goto out; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + local = GF_CALLOC (1, sizeof (*local), + gf_afr_mt_afr_local_t); + if (!local) { + gf_log (this->name, GF_LOG_DEBUG, + "Out of memory"); + ret = -1; + goto out; + } + + AFR_LOCAL_INIT (local, priv); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + ret = -1; + goto out; + } + + frame->local = local; + + INIT_LIST_HEAD (&locks_list); + + pthread_mutex_lock (&priv->mutex); + { + list_splice_init (&priv->saved_fds, &locks_list); + } + pthread_mutex_unlock (&priv->mutex); + + list_for_each_entry_safe (locked_fd, tmp, + &locks_list, list) { + + list_del_init (&locked_fd->list); + + local->fd = locked_fd->fd; + local->lock_recovery_child = child_index; + local->locked_fd = locked_fd; + + if (!is_fd_opened (locked_fd->fd, child_index)) { + gf_log (this->name, GF_LOG_DEBUG, + "attempting open before lock " + "recovery"); + afr_lock_recovery_preopen (frame, this); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "attempting lock recovery " + "without a preopen"); + afr_lock_recovery (frame, this); + } + } + +out: + return ret; +} diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 68b4a1e305e..005847b412f 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -307,6 +307,10 @@ typedef struct _afr_local { afr_internal_lock_t internal_lock; + afr_locked_fd_t *locked_fd; + int32_t source_child; + int32_t lock_recovery_child; + dict_t *dict; int (*openfd_flush_cbk) (call_frame_t *frame, xlator_t *this); @@ -666,6 +670,9 @@ afr_notify (xlator_t *this, int32_t event, void *data, ...); int +afr_attempt_lock_recovery (xlator_t *this, int32_t child_index); + +int afr_save_locked_fd (xlator_t *this, fd_t *fd); int diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index f085594243c..7c8ead8b383 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -825,7 +825,7 @@ out: static void pl_mark_eol_lock (posix_lock_t *lock) { - lock->user_flock.l_type = GF_LK_RECLK; + lock->user_flock.l_type = GF_LK_EOL; return; } diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c index c36484c46cc..9b718963214 100644 --- a/xlators/features/locks/src/reservelk.c +++ b/xlators/features/locks/src/reservelk.c @@ -42,7 +42,7 @@ __delete_reserve_lock (posix_lock_t *lock) void __destroy_reserve_lock (posix_lock_t *lock) { - FREE (lock); + GF_FREE (lock); } /* Return true if the two reservelks have exactly same lock boundaries */ @@ -125,7 +125,7 @@ __reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode, list_del_init (&conf->list); gf_log (this->name, GF_LOG_TRACE, "Removing the matching reservelk for setlk to progress"); - FREE (conf); + GF_FREE (conf); ret = 0; } else { gf_log (this->name, GF_LOG_TRACE, diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c index 4ad7fc2b1ec..37cae88b7d2 100644 --- a/xlators/protocol/client/src/client-lk.c +++ b/xlators/protocol/client/src/client-lk.c @@ -164,7 +164,7 @@ __delete_client_lock (client_posix_lock_t *lock) static void __destroy_client_lock (client_posix_lock_t *lock) { - free (lock); + GF_FREE (lock); } /* Subtract two locks */ @@ -861,7 +861,7 @@ int client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx) { call_frame_t *frame = NULL; - clnt_local_t *local = NULL; + clnt_local_t *local = NULL; client_posix_lock_t *lock = NULL; struct flock reserve_flock; @@ -895,6 +895,7 @@ client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx) construct_reserve_lock (lock, frame, &reserve_flock); + frame->local = local; local->fdctx = fdctx; local->client_lock = lock; |