diff options
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 19 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 96 | 
3 files changed, 92 insertions, 25 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 4555152db54..0e082e35c57 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -1153,7 +1153,7 @@ dht_dir_has_layout (dict_t *xattr, char *name);  gf_boolean_t  dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);  xlator_t * -dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, +dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, xlator_t *ignore,                                     dht_layout_t *layout, uint64_t filesize);  xlator_t *  dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 13698a9616d..05592154e30 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -315,7 +315,7 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,          LOCK (&conf->subvolume_lock);  	{ -                avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, +                avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL,                                                                   layout, 0);                  if(!avail_subvol)                  { @@ -340,8 +340,8 @@ out:  }  static inline -int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this, -                                         dht_layout_t *layout) +int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this, xlator_t *ignore, +                            dht_layout_t *layout)  {          int ret = -1;          int i   = 0; @@ -349,6 +349,13 @@ int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this,          if (!this || !layout)                  goto out; +        /* this check is meant for rebalance process. The source of the file +         * should be ignored for space check */ +        if (this == ignore) { +                goto out; +        } + +          /* check if subvol has layout errors, before selecting it */          for (i = 0; i < layout->cnt; i++) {                  if (!strcmp (layout->list[i].xlator->name, this->name) && @@ -376,7 +383,7 @@ out:  /*Get subvolume which has both space and inodes more than the min criteria*/  xlator_t * -dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, xlator_t *ignore,                                    dht_layout_t *layout, uint64_t filesize)  {          int i = 0; @@ -398,7 +405,7 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,                  /* check if subvol has layout errors and also it is not a                   * decommissioned brick, before selecting it */                  ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i], -                                                    layout); +                                                    ignore, layout);                  if (ignore_subvol)                          continue; @@ -463,7 +470,7 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,                  /* check if subvol has layout errors and also it is not a                   * decommissioned brick, before selecting it*/ -                ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i], +                ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i], NULL,                                                      layout);                  if (ignore_subvol)                          continue; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index d963cfb2b8f..c1174225138 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -719,27 +719,30 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc                          loc->path, to->name, strerror (-ret));          */ +        ret = syncop_fsetattr (to, fd, stbuf, +                               (GF_SET_ATTR_UID | GF_SET_ATTR_GID), +                                NULL, NULL, NULL, NULL); +        if (ret < 0) +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_MIGRATE_FILE_FAILED, +                        "chown failed for %s on %s (%s)", +                        loc->path, to->name, strerror (-ret)); +          /* Fallocate does not work for size 0, hence the check. Anyway we don't           * need to care about min-free-disk for 0 byte size file */          if (stbuf->ia_size > 0) {                  ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size, NULL,                                          NULL); -                if (ret < 0) +                if (ret < 0) {                          gf_msg (this->name, GF_LOG_ERROR, 0,                                  DHT_MSG_MIGRATE_FILE_FAILED,                                  "fallocate failed for %s on %s (%s)",                                  loc->path, to->name, strerror (-ret)); +                        ret = -1; +                        goto out; +                }          } -        ret = syncop_fsetattr (to, fd, stbuf, -                               (GF_SET_ATTR_UID | GF_SET_ATTR_GID), -                                NULL, NULL, NULL, NULL); -        if (ret < 0) -                gf_msg (this->name, GF_LOG_ERROR, 0, -                        DHT_MSG_MIGRATE_FILE_FAILED, -                        "chown failed for %s on %s (%s)", -                        loc->path, to->name, strerror (-ret)); -          /* success */          ret = 0; @@ -761,7 +764,8 @@ out:  static int  __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,                          struct iatt *stbuf, int flag, dht_conf_t *conf, -                        gf_boolean_t *target_changed, xlator_t **new_subvol) +                        gf_boolean_t *target_changed, xlator_t **new_subvol, +                        gf_boolean_t *ignore_failure)  {          struct statvfs  src_statfs = {0,};          struct statvfs  dst_statfs = {0,}; @@ -773,6 +777,7 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,          uint64_t        dst_statfs_blocks = 1;          double   post_availspace = 0;          double   post_percent = 0; +        int             i = 0;          this = THIS; @@ -897,13 +902,27 @@ find_new_subvol:                  goto out;          } -        *new_subvol = dht_subvol_with_free_space_inodes (this, to, -                      layout, stbuf->ia_size); -        if (!(*new_subvol)) { +        *new_subvol = dht_subvol_with_free_space_inodes (this, to, from, layout, +                                                         stbuf->ia_size); +        if ((!(*new_subvol)) || (*new_subvol == from)) {                  gf_msg (this->name, GF_LOG_WARNING, 0,                          DHT_MSG_SUBVOL_INSUFF_SPACE, "Could not find any subvol" -                        " with space accomodating the file. Consider adding " -                        "bricks"); +                        " with space accomodating the file - %s. Consider adding " +                        "bricks", loc->path); + +                /* For remove-brick case if the source is not one of the +                 * removed-brick, do not mark the error as failure */ +                if (conf->decommission_subvols_cnt) { +                        *ignore_failure = _gf_true; +                        for (i = 0; i < conf->decommission_subvols_cnt; i++) { +                                if (conf->decommissioned_bricks[i] == from) { +                                        *ignore_failure = _gf_false; +                                         break; +                                } +                        } +                } else { +                        *ignore_failure = _gf_false; +                }                  *target_changed = _gf_false;                  ret = -1; @@ -1382,6 +1401,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          gf_boolean_t            target_changed          = _gf_false;          xlator_t                *new_target             = NULL;          xlator_t                *old_target             = NULL; +        fd_t                    *linkto_fd              = NULL; +        gf_boolean_t            ignore_failure          = _gf_false;          defrag = conf->defrag;          if (!defrag) @@ -1500,7 +1521,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          clean_dst = _gf_true;          ret = __dht_check_free_space (to, from, loc, &stbuf, flag, conf, -                                      &target_changed, &new_target); +                                      &target_changed, &new_target, &ignore_failure);          if (target_changed) {                  /* Can't handle for hardlinks. Marking this as failure */                  if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) { @@ -1544,6 +1565,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,          }          if (ret) { +                if (ignore_failure) +                        ret = 0; +                  goto out;          } @@ -1791,13 +1815,47 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,                          }                          ret = syncop_setxattr (old_target, loc, dict, 0, NULL, NULL); -                        if (ret) { +                        if (ret && -ret != ESTALE && -ret != ENOENT) {                                  gf_msg (this->name, GF_LOG_ERROR, 0,                                          DHT_MSG_MIGRATE_FILE_FAILED,                                          "failed to set xattr on %s in %s (%s)",                                          loc->path, old_target->name, strerror (-ret));                                  ret = -1;                                  goto out; +                        } else if (-ret == ESTALE || -ret == ENOENT) { +                               /* The failure ESTALE indicates that the linkto +                                * file on the hashed subvol might have been deleted. +                                * In this case will create a linkto file with new target +                                * as linkto xattr value*/ +                                linkto_fd = fd_create (loc->inode, DHT_REBALANCE_PID); +                                if (!linkto_fd) { +                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                                DHT_MSG_MIGRATE_FILE_FAILED, +                                                "%s: fd create failed (%s)", +                                                loc->path, strerror (errno)); +                                        ret = -1; +                                        goto out; +                                } +                                ret = syncop_create (old_target, loc, O_RDWR, +                                                     DHT_LINKFILE_MODE, linkto_fd, +                                                     NULL, dict, NULL); +                                if (ret != 0 && -ret != EEXIST && -ret != ESTALE) { +                                        ret = -1; +                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                                DHT_MSG_MIGRATE_FILE_FAILED, +                                                "failed to create linkto file on %s in %s (%s)", +                                                loc->path, old_target->name, strerror (-ret)); +                                        goto out; +                                } else if (ret == 0) { +                                        ret = syncop_fsetattr (old_target, linkto_fd, &stbuf, +                                                               (GF_SET_ATTR_UID | GF_SET_ATTR_GID), +                                                               NULL, NULL, NULL, NULL); +                                        if (ret < 0) +                                                gf_msg (this->name, GF_LOG_ERROR, 0, +                                                DHT_MSG_MIGRATE_FILE_FAILED, +                                                "chown failed for %s on %s (%s)", +                                                loc->path, old_target->name, strerror (-ret)); +                                }                          }                 }          } @@ -2043,6 +2101,8 @@ out:                  syncop_close (dst_fd);          if (src_fd)                  syncop_close (src_fd); +        if (linkto_fd) +                syncop_close (linkto_fd);          loc_wipe (&tmp_loc);  | 
