diff options
| author | Amar Tumballi <amar@gluster.com> | 2011-09-30 12:22:34 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vijay@gluster.com> | 2011-09-30 00:55:20 -0700 | 
| commit | 36ef90d6de0e97812bebb303a7fa5215ae4c6ebe (patch) | |
| tree | 6940ab40835274d8b71fca0cd6603fa6beb9b43f | |
| parent | ba20df9f1a218177e7c7dcc15a11143183d28243 (diff) | |
rebalance process: propagate proper errors to user
* cluster/distribute: while rebalance, differentiate between valid
  errors, validation check failures (may not be migrate failure),
  and success.
* mgmt/glusterd: differentiate the errors based on errno. If a valid
  error has happened, mark the rebalance status as failure, and
  stop the rebalnace crawl.
Change-Id: I2d7bd7b73d2b79bfaf892ad4364bc89830a0d5bb
BUG: 3656
Reviewed-on: http://review.gluster.com/540
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 53 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 22 | 
2 files changed, 66 insertions, 9 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 7b04e8a2d..edb5c8cdc 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -118,7 +118,7 @@ out:  }  static inline int -__dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf, +__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,                                   dict_t *dict, fd_t **dst_fd)  {          xlator_t *this = NULL; @@ -135,7 +135,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, loc_t *loc, struct iatt *stbuf,                  goto out;          } -        ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); +        ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name);          if (ret) {                  gf_log (this->name, GF_LOG_ERROR,                          "%s: failed to set gfid in dict for create", loc->path); @@ -218,7 +218,9 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,                          "lesser disk space (%s)", from->name,                          to->name, loc->path); -                ret = -1; +                /* this is not a 'failure', but we don't want to +                   consider this as 'success' too :-/ */ +                ret = 1;                  goto out;          } @@ -357,6 +359,14 @@ out:          return ret;  } + +/* +  return values: + +   -1 : failure +    0 : successfully migrated data +    1 : not a failure, but we can't migrate data as of now +*/  int  dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                    int flag) @@ -399,15 +409,17 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                  goto out;          /* create the destination, with required modes/xattr */ -        ret = __dht_rebalance_create_dst_file (to, loc, &stbuf, dict, &dst_fd); +        ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf, +                                               dict, &dst_fd);          if (ret)                  goto out;          /* Should happen on all files when 'force' option is not given */          if (flag != DHT_MIGRATE_EVEN_IF_LINK_EXISTS) {                  ret = __dht_check_free_space (to, from, loc, &stbuf); -                if (ret) +                if (ret) {                          goto out; +                }          }          /* Open the source, and also update mode/xattr */ @@ -435,6 +447,15 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          if (ret) {                  gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",                          loc->path); +                /* reset the destination back to 0 */ +                ret = syncop_ftruncate (to, dst_fd, 0); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "%s: failed to reset the target size back to 0", +                                loc->path); +                } + +                ret = -1;                  goto out;          } @@ -591,6 +612,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)          dht_layout_t *layout     = 0;          xlator_t     *this       = NULL;          dht_local_t  *local      = NULL; +        int32_t       op_errno   = EINVAL;          this = THIS;          local = sync_frame->local; @@ -611,9 +633,24 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)                                  "%s: failed to set inode ctx", local->loc.path);          } -        /* if success, errno is not checked, -           if ret is -1, then let errno be 'ENOTSUP' */ -        DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, ENOTSUP); +        if (op_ret == -1) { +                /* Failure of migration process, mostly due to write process. +                   as we can't preserve the exact errno, lets say there was +                   no space to migrate-data +                */ +                op_errno = ENOSPC; +        } + +        if (op_ret == 1) { +                /* migration didn't happen, but is not a failure, let the user +                   understand that he doesn't have permission to migrate the +                   file. +                */ +                op_ret = -1; +                op_errno = EPERM; +        } + +        DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno);          return 0;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 208d0b3f3..829915963 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -119,9 +119,29 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)                  ret = sys_lsetxattr (full_path, "distribute.migrate-data",                                       force_string, strlen (force_string), 0); -                if (ret < 0) + +                /* if errno is not ENOSPC or ENOTCONN, we can still continue +                   with rebalance process */ +                if ((ret == -1) && ((errno != ENOSPC) || +                                    (errno != ENOTCONN)))                          continue; +                if ((ret == -1) && (errno == ENOTCONN)) { +                        /* Most probably mount point went missing (mostly due +                           to a brick down), say rebalance failure to user, +                           let him restart it if everything is fine */ +                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; +                        break; +                } + +                if ((ret == -1) && (errno == ENOSPC)) { +                        /* rebalance process itself failed, may be +                           remote brick went down, or write failed due to +                           disk full etc etc.. */ +                        volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; +                        break; +                } +                  LOCK (&defrag->lock);                  {                          defrag->total_files += 1;  | 
