From 36ef90d6de0e97812bebb303a7fa5215ae4c6ebe Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Fri, 30 Sep 2011 12:22:34 +0530 Subject: rebalance process: propagate proper errors to user * cluster/distribute: while rebalance, differentiate between valid errors, validation check failures (may not be migrate failure), and success. * mgmt/glusterd: differentiate the errors based on errno. If a valid error has happened, mark the rebalance status as failure, and stop the rebalnace crawl. Change-Id: I2d7bd7b73d2b79bfaf892ad4364bc89830a0d5bb BUG: 3656 Reviewed-on: http://review.gluster.com/540 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-rebalance.c | 53 ++++++++++++++++++++++---- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 22 ++++++++++- 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 7b04e8a2d..edb5c8cdc 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -118,7 +118,7 @@ out: } static inline int -__dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf, +__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, dict_t *dict, fd_t **dst_fd) { xlator_t *this = NULL; @@ -135,7 +135,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf, goto out; } - ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); + ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set gfid in dict for create", loc->path); @@ -218,7 +218,9 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc, "lesser disk space (%s)", from->name, to->name, loc->path); - ret = -1; + /* this is not a 'failure', but we don't want to + consider this as 'success' too :-/ */ + ret = 1; goto out; } @@ -357,6 +359,14 @@ out: return ret; } + +/* + return values: + + -1 : failure + 0 : successfully migrated data + 1 : not a failure, but we can't migrate data as of now +*/ int dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, int flag) @@ -399,15 +409,17 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; /* create the destination, with required modes/xattr */ - ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd); + ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf, + dict, &dst_fd); if (ret) goto out; /* Should happen on all files when 'force' option is not given */ if (flag != DHT_MIGRATE_EVEN_IF_LINK_EXISTS) { ret = __dht_check_free_space (to, from, loc, &stbuf); - if (ret) + if (ret) { goto out; + } } /* Open the source, and also update mode/xattr */ @@ -435,6 +447,15 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data", loc->path); + /* reset the destination back to 0 */ + ret = syncop_ftruncate (to, dst_fd, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to reset the target size back to 0", + loc->path); + } + + ret = -1; goto out; } @@ -591,6 +612,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data) dht_layout_t *layout = 0; xlator_t *this = NULL; dht_local_t *local = NULL; + int32_t op_errno = EINVAL; this = THIS; local = sync_frame->local; @@ -611,9 +633,24 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data) "%s: failed to set inode ctx", local->loc.path); } - /* if success, errno is not checked, - if ret is -1, then let errno be 'ENOTSUP' */ - DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, ENOTSUP); + if (op_ret == -1) { + /* Failure of migration process, mostly due to write process. + as we can't preserve the exact errno, lets say there was + no space to migrate-data + */ + op_errno = ENOSPC; + } + + if (op_ret == 1) { + /* migration didn't happen, but is not a failure, let the user + understand that he doesn't have permission to migrate the + file. + */ + op_ret = -1; + op_errno = EPERM; + } + + DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno); return 0; } diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 208d0b3f3..829915963 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -119,9 +119,29 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) ret = sys_lsetxattr (full_path, "distribute.migrate-data", force_string, strlen (force_string), 0); - if (ret < 0) + + /* if errno is not ENOSPC or ENOTCONN, we can still continue + with rebalance process */ + if ((ret == -1) && ((errno != ENOSPC) || + (errno != ENOTCONN))) continue; + if ((ret == -1) && (errno == ENOTCONN)) { + /* Most probably mount point went missing (mostly due + to a brick down), say rebalance failure to user, + let him restart it if everything is fine */ + volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; + break; + } + + if ((ret == -1) && (errno == ENOSPC)) { + /* rebalance process itself failed, may be + remote brick went down, or write failed due to + disk full etc etc.. */ + volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; + break; + } + LOCK (&defrag->lock); { defrag->total_files += 1; -- cgit