From c527449c0504828378f33ed32d1e7fe2e2c51c6f Mon Sep 17 00:00:00 2001 From: Raghavendra G Date: Wed, 10 Sep 2014 09:05:22 +0530 Subject: cluster/dht: synchronize rename and file-migration Change-Id: I4f243c946f76d440680b651235f925e3d0ebf0fd Signed-off-by: Raghavendra G Reviewed-on: http://review.gluster.org/8523 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur BUG: 1139998 Reviewed-on: http://review.gluster.org/8681 Reviewed-by: Kaleb KEITHLEY --- xlators/cluster/dht/src/dht-common.h | 4 + xlators/cluster/dht/src/dht-rebalance.c | 59 +++++-- xlators/cluster/dht/src/dht-rename.c | 272 ++++++++++++++++++++++++++++---- 3 files changed, 292 insertions(+), 43 deletions(-) diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index a8275816b5e..3049d366403 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -24,6 +24,7 @@ #define GF_DHT_LOOKUP_UNHASHED_ON 1 #define GF_DHT_LOOKUP_UNHASHED_AUTO 2 #define DHT_PATHINFO_HEADER "DISTRIBUTE:" +#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate" #include @@ -203,6 +204,7 @@ struct dht_local { xlator_t *first_up_subvol; gf_boolean_t added_link; + gf_boolean_t is_linkfile; struct dht_skip_linkto_unlink skip_unlink; @@ -828,5 +830,7 @@ dht_unlock_inodelk (call_frame_t *frame, dht_lock_t **lk_array, int lk_count, dht_lock_t * dht_lock_new (xlator_t *this, xlator_t *xl, loc_t *loc, short type, const char *domain); +void +dht_lock_array_free (dht_lock_t **lk_array, int count); #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 725e0c8c7b0..2c26650253c 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -692,18 +692,21 @@ int dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, int flag) { - int ret = -1; - struct iatt new_stbuf = {0,}; - struct iatt stbuf = {0,}; - struct iatt empty_iatt = {0,}; - ia_prot_t src_ia_prot = {0,}; - fd_t *src_fd = NULL; - fd_t *dst_fd = NULL; - dict_t *dict = NULL; - dict_t *xattr = NULL; - dict_t *xattr_rsp = NULL; - int file_has_holes = 0; - int rcvd_enoent_from_src = 0; + int ret = -1; + struct iatt new_stbuf = {0,}; + struct iatt stbuf = {0,}; + struct iatt empty_iatt = {0,}; + ia_prot_t src_ia_prot = {0,}; + fd_t *src_fd = NULL; + fd_t *dst_fd = NULL; + dict_t *dict = NULL; + dict_t *xattr = NULL; + dict_t *xattr_rsp = NULL; + int file_has_holes = 0; + int rcvd_enoent_from_src = 0; + struct gf_flock flock = {0, }; + loc_t tmp_loc = {0, }; + gf_boolean_t locked = _gf_false; gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -719,6 +722,24 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + flock.l_type = F_WRLCK; + + tmp_loc.inode = inode_ref (loc->inode); + uuid_copy (tmp_loc.gfid, loc->gfid); + + ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW, + &flock, NULL, NULL); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "migrate file failed: " + "%s: failed to lock file on %s (%s)", + loc->path, from->name, strerror (-ret)); + ret = -1; + goto out; + } + + locked = _gf_true; + /* Phase 1 - Data migration is in progress from now on */ ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL); if (ret) { @@ -935,6 +956,18 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, ret = 0; out: + if (locked) { + flock.l_type = F_UNLCK; + + ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, + F_SETLK, &flock, NULL, NULL); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to unlock file on %s (%s)", + loc->path, from->name, strerror (-ret)); + } + } + if (dict) dict_unref (dict); @@ -948,6 +981,8 @@ out: if (src_fd) syncop_close (src_fd); + loc_wipe (&tmp_loc); + return ret; } diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index c206c9de364..c19d47298c9 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -21,6 +21,54 @@ #include "dht-common.h" #include "defaults.h" +int +dht_rename_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + + local = frame->local; + + DHT_STRIP_PHASE1_FLAGS (&local->stbuf); + DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, + &local->stbuf, &local->preoldparent, + &local->postoldparent, &local->preparent, + &local->postparent, NULL); + return 0; +} + +int +dht_rename_unlock (call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + int op_ret = -1; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + op_ret = dht_unlock_inodelk (frame, local->lock.locks, + local->lock.lk_count, + dht_rename_unlock_cbk); + if (op_ret < 0) { + uuid_utoa_r (local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); + + gf_log (this->name, GF_LOG_WARNING, + "winding unlock inodelk failed " + "rename (%s:%s:%s %s:%s:%s), " + "stale locks left on bricks", + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + + dht_rename_unlock_cbk (frame, NULL, this, 0, 0, NULL); + } + + return 0; +} int dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -67,16 +115,7 @@ dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unwind: this_call_cnt = dht_frame_return (frame); if (is_last_call (this_call_cnt)) { - WIPE (&local->preoldparent); - WIPE (&local->postoldparent); - WIPE (&local->preparent); - WIPE (&local->postparent); - - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, - &local->preparent, &local->postparent, xdata); + dht_rename_unlock (frame, this); } return 0; @@ -155,9 +194,8 @@ unwind: DHT_STRIP_PHASE1_FLAGS (&local->stbuf); DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, &local->stbuf, &local->preoldparent, - &local->postoldparent, - &local->preparent, &local->postparent, NULL); - + &local->postoldparent, &local->preparent, + &local->postparent, NULL); return 0; } @@ -307,6 +345,21 @@ err: return 0; } +int +dht_rename_done (call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + + local = frame->local; + + if (local->linked == _gf_true) { + local->linked = _gf_false; + dht_linkfile_attr_heal (frame, this); + } + + dht_rename_unlock (frame, this); + return 0; +} int dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -340,11 +393,7 @@ dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, WIPE (&local->postparent); if (is_last_call (this_call_cnt)) { - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, &local->preparent, - &local->postparent, NULL); + dht_rename_done (frame, this); } out: @@ -419,12 +468,7 @@ nolinks: WIPE (&local->preparent); WIPE (&local->postparent); - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, &local->preparent, - &local->postparent, NULL); - + dht_rename_unlock (frame, this); return 0; } @@ -589,12 +633,7 @@ unwind: WIPE (&local->preparent); WIPE (&local->postparent); - DHT_STRIP_PHASE1_FLAGS (&local->stbuf); - DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preoldparent, - &local->postoldparent, &local->preparent, - &local->postparent, NULL); - + dht_rename_done (frame, this); return 0; cleanup: @@ -796,6 +835,175 @@ nolinks: return 0; } +int +dht_rename_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) +{ + dht_local_t *local = NULL; + int call_cnt = 0; + + local = frame->local; + + if (op_ret < 0) { + /* The meaning of is_linkfile is overloaded here. For locking + * to work properly both rebalance and rename should acquire + * lock on datafile. The reason for sending this lookup is to + * find out whether we've acquired a lock on data file. + * Between the lookup before rename and this rename, the + * file could be migrated by a rebalance process and now this + * file this might be a linkto file. We verify that by sending + * this lookup. However, if this lookup fails we cannot really + * say whether we've acquired lock on a datafile or linkto file. + * So, we act conservatively and _assume_ + * that this is a linkfile and fail the rename operation. + */ + local->is_linkfile = _gf_true; + } else if (xattr && check_is_linkfile (inode, stbuf, xattr)) { + local->is_linkfile = _gf_true; + } + + call_cnt = dht_frame_return (frame); + if (is_last_call (call_cnt)) { + if (local->is_linkfile) { + local->op_ret = -1; + local->op_errno = EBUSY; + goto fail; + } + + dht_rename_create_links (frame); + } + + return 0; +fail: + dht_rename_unlock (frame, this); + return 0; +} + +int32_t +dht_rename_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + dht_local_t *local = NULL; + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + dict_t *xattr_req = NULL; + int i = 0; + + local = frame->local; + + if (op_ret < 0) { + uuid_utoa_r (local->loc.inode->gfid, src_gfid); + + if (local->loc2.inode) + uuid_utoa_r (local->loc2.inode->gfid, dst_gfid); + + gf_log (this->name, GF_LOG_WARNING, + "acquiring inodelk failed (%s) " + "rename (%s:%s:%s %s:%s:%s), returning EBUSY", + strerror (op_errno), + local->loc.path, src_gfid, local->src_cached->name, + local->loc2.path, dst_gfid, + local->dst_cached ? local->dst_cached->name : NULL); + + local->op_ret = -1; + local->op_errno = (op_errno == EAGAIN) ? EBUSY : op_errno; + + goto done; + } + + xattr_req = dict_new (); + if (xattr_req == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto done; + } + + op_ret = dict_set_uint32 (xattr_req, DHT_LINKFILE_KEY, 256); + if (op_ret < 0) { + local->op_ret = -1; + local->op_errno = -op_ret; + goto done; + } + + local->call_cnt = local->lock.lk_count; + + for (i = 0; i < local->lock.lk_count; i++) { + STACK_WIND (frame, dht_rename_lookup_cbk, + local->lock.locks[i]->xl, + local->lock.locks[i]->xl->fops->lookup, + &local->lock.locks[i]->loc, xattr_req); + } + + dict_unref (xattr_req); + return 0; + +done: + /* Its fine to call unlock even when no locks are acquired, as we check + * for lock->locked before winding a unlock call. + */ + dht_rename_unlock (frame, this); + + if (xattr_req) + dict_unref (xattr_req); + + return 0; +} + +int +dht_rename_lock (call_frame_t *frame) +{ + dht_local_t *local = NULL; + int count = 1, ret = -1; + dht_lock_t **lk_array = NULL; + + local = frame->local; + + if (local->dst_cached) + count++; + + lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char); + if (lk_array == NULL) + goto err; + + lk_array[0] = dht_lock_new (frame->this, local->src_cached, &local->loc, + F_WRLCK, DHT_FILE_MIGRATE_DOMAIN); + if (lk_array[0] == NULL) + goto err; + + if (local->dst_cached) { + lk_array[1] = dht_lock_new (frame->this, local->dst_cached, + &local->loc2, F_WRLCK, + DHT_FILE_MIGRATE_DOMAIN); + if (lk_array[1] == NULL) + goto err; + } + + local->lock.locks = lk_array; + local->lock.lk_count = count; + + ret = dht_nonblocking_inodelk (frame, lk_array, count, + dht_rename_lock_cbk); + if (ret < 0) { + local->lock.locks = NULL; + local->lock.lk_count = 0; + goto err; + } + + return 0; +err: + if (lk_array != NULL) { + int tmp_count = 0, i = 0; + + for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++); + + dht_lock_array_free (lk_array, tmp_count); + GF_FREE (lk_array); + } + + return -1; +} int dht_rename (call_frame_t *frame, xlator_t *this, @@ -874,7 +1082,9 @@ dht_rename (call_frame_t *frame, xlator_t *this, dht_rename_dir (frame, this); } else { local->op_ret = 0; - dht_rename_create_links (frame); + ret = dht_rename_lock (frame); + if (ret < 0) + goto err; } return 0; -- cgit