diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-rebalance.c')
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 8900 |
1 files changed, 4361 insertions, 4539 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 9983429acec..7a2539dc5f5 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -8,7 +8,6 @@ cases as published by the Free Software Foundation. */ - #include "tier.h" #include "dht-common.h" #include "xlator.h" @@ -18,244 +17,230 @@ #include <signal.h> #include "events.h" -#define GF_DISK_SECTOR_SIZE 512 -#define DHT_REBALANCE_PID 4242 /* Change it if required */ -#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ -#define MAX_MIGRATE_QUEUE_COUNT 500 -#define MIN_MIGRATE_QUEUE_COUNT 200 -#define MAX_REBAL_TYPE_SIZE 16 -#define FILE_CNT_INTERVAL 600 /* 10 mins */ -#define ESTIMATE_START_INTERVAL 600 /* 10 mins */ -#define HARDLINK_MIG_INPROGRESS -2 -#define SKIP_MIGRATION_FD_POSITIVE -3 +#define GF_DISK_SECTOR_SIZE 512 +#define DHT_REBALANCE_PID 4242 /* Change it if required */ +#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ +#define MAX_MIGRATE_QUEUE_COUNT 500 +#define MIN_MIGRATE_QUEUE_COUNT 200 +#define MAX_REBAL_TYPE_SIZE 16 +#define FILE_CNT_INTERVAL 600 /* 10 mins */ +#define ESTIMATE_START_INTERVAL 600 /* 10 mins */ +#define HARDLINK_MIG_INPROGRESS -2 +#define SKIP_MIGRATION_FD_POSITIVE -3 #ifndef MAX -#define MAX(a, b) (((a) > (b))?(a):(b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif - -#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) { \ - idx++; \ - idx %= sv_cnt; \ - } +#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) \ + { \ + idx++; \ + idx %= sv_cnt; \ + } uint64_t g_totalfiles = 0; uint64_t g_totalsize = 0; - void -gf_defrag_free_dir_dfmeta (struct dir_dfmeta *meta, int local_subvols_cnt) +gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) { - int i = 0; + int i = 0; - if (meta) { - for (i = 0; i < local_subvols_cnt; i++) { - gf_dirent_free (&meta->equeue[i]); - } - - GF_FREE (meta->equeue); - GF_FREE (meta->head); - GF_FREE (meta->iterator); - GF_FREE (meta->offset_var); - GF_FREE (meta->fetch_entries); - GF_FREE (meta); + if (meta) { + for (i = 0; i < local_subvols_cnt; i++) { + gf_dirent_free(&meta->equeue[i]); } + + GF_FREE(meta->equeue); + GF_FREE(meta->head); + GF_FREE(meta->iterator); + GF_FREE(meta->offset_var); + GF_FREE(meta->fetch_entries); + GF_FREE(meta); + } } void -gf_defrag_free_container (struct dht_container *container) +gf_defrag_free_container(struct dht_container *container) { - if (container) { - gf_dirent_entry_free (container->df_entry); + if (container) { + gf_dirent_entry_free(container->df_entry); - if (container->parent_loc) { - loc_wipe (container->parent_loc); - } + if (container->parent_loc) { + loc_wipe(container->parent_loc); + } - GF_FREE (container->parent_loc); + GF_FREE(container->parent_loc); - GF_FREE (container); - } + GF_FREE(container); + } } void -dht_set_global_defrag_error (gf_defrag_info_t *defrag, int ret) +dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret) { - LOCK (&defrag->lock); - { - defrag->global_error = ret; - } - UNLOCK (&defrag->lock); - return; + LOCK(&defrag->lock); + { + defrag->global_error = ret; + } + UNLOCK(&defrag->lock); + return; } - static gf_boolean_t -dht_is_tier_command (int cmd) { - - gf_boolean_t is_tier = _gf_false; +dht_is_tier_command(int cmd) +{ + gf_boolean_t is_tier = _gf_false; - switch (cmd) { + switch (cmd) { case GF_DEFRAG_CMD_START_TIER: case GF_DEFRAG_CMD_STATUS_TIER: case GF_DEFRAG_CMD_START_DETACH_TIER: case GF_DEFRAG_CMD_STOP_DETACH_TIER: case GF_DEFRAG_CMD_PAUSE_TIER: case GF_DEFRAG_CMD_RESUME_TIER: - is_tier = _gf_true; - break; + is_tier = _gf_true; + break; default: - break; - } - return is_tier; - + break; + } + return is_tier; } - static int -dht_send_rebalance_event (xlator_t *this, int cmd, gf_defrag_status_t status) +dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status) { - int ret = -1; - char *volname = NULL; - char *tmpstr = NULL; - char *ptr = NULL; - char *suffix = "-dht"; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - int len = 0; - - eventtypes_t event = EVENT_LAST; - - switch (status) { + int ret = -1; + char *volname = NULL; + char *tmpstr = NULL; + char *ptr = NULL; + char *suffix = "-dht"; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + int len = 0; + + eventtypes_t event = EVENT_LAST; + + switch (status) { case GF_DEFRAG_STATUS_COMPLETE: - event = EVENT_VOLUME_REBALANCE_COMPLETE; - break; + event = EVENT_VOLUME_REBALANCE_COMPLETE; + break; case GF_DEFRAG_STATUS_FAILED: - event = EVENT_VOLUME_REBALANCE_FAILED; - break; + event = EVENT_VOLUME_REBALANCE_FAILED; + break; case GF_DEFRAG_STATUS_STOPPED: - event = EVENT_VOLUME_REBALANCE_STOP; - break; + event = EVENT_VOLUME_REBALANCE_STOP; + break; default: - break; - - } - - if (dht_is_tier_command (cmd)) { - /* We should have the tier volume name*/ - conf = this->private; - defrag = conf->defrag; - volname = defrag->tier_conf.volname; - } else { - /* DHT volume */ - len = strlen (this->name) - strlen (suffix); - tmpstr = gf_strdup (this->name); - if (tmpstr) { - ptr = tmpstr + len; - if (!strcmp (ptr, suffix)) { - tmpstr[len] = '\0'; - volname = tmpstr; - } - } - } + break; + } - if (!volname) { - /* Better than nothing */ - volname = this->name; - } - - if (event != EVENT_LAST) { - gf_event (event, "volume=%s", volname); - } - - GF_FREE (tmpstr); - return ret; + if (dht_is_tier_command(cmd)) { + /* We should have the tier volume name*/ + conf = this->private; + defrag = conf->defrag; + volname = defrag->tier_conf.volname; + } else { + /* DHT volume */ + len = strlen(this->name) - strlen(suffix); + tmpstr = gf_strdup(this->name); + if (tmpstr) { + ptr = tmpstr + len; + if (!strcmp(ptr, suffix)) { + tmpstr[len] = '\0'; + volname = tmpstr; + } + } + } + + if (!volname) { + /* Better than nothing */ + volname = this->name; + } + + if (event != EVENT_LAST) { + gf_event(event, "volume=%s", volname); + } + + GF_FREE(tmpstr); + return ret; } - static void -dht_strip_out_acls (dict_t *dict) +dht_strip_out_acls(dict_t *dict) { - if (dict) { - dict_del (dict, "trusted.SGI_ACL_FILE"); - dict_del (dict, POSIX_ACL_ACCESS_XATTR); - } + if (dict) { + dict_del(dict, "trusted.SGI_ACL_FILE"); + dict_del(dict, POSIX_ACL_ACCESS_XATTR); + } } - - static int -dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count, - int32_t size, off_t offset, struct iobref *iobref, - int *fop_errno) +dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count, + int32_t size, off_t offset, struct iobref *iobref, + int *fop_errno) { - int i = 0; - int ret = -1; - int start_idx = 0; - int tmp_offset = 0; - int write_needed = 0; - int buf_len = 0; - int size_pending = 0; - char *buf = NULL; - - /* loop through each vector */ - for (i = 0; i < count; i++) { - buf = vec[i].iov_base; - buf_len = vec[i].iov_len; - - for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; - start_idx += GF_DISK_SECTOR_SIZE) { - - if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { - write_needed = 1; - continue; - } - - if (write_needed) { - ret = syncop_write (to, fd, (buf + tmp_offset), - (start_idx - tmp_offset), - (offset + tmp_offset), - iobref, 0, NULL, NULL); - /* 'path' will be logged in calling function */ - if (ret < 0) { - gf_log (THIS->name, GF_LOG_WARNING, - "failed to write (%s)", - strerror (-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - - write_needed = 0; - } - tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; + int i = 0; + int ret = -1; + int start_idx = 0; + int tmp_offset = 0; + int write_needed = 0; + int buf_len = 0; + int size_pending = 0; + char *buf = NULL; + + /* loop through each vector */ + for (i = 0; i < count; i++) { + buf = vec[i].iov_base; + buf_len = vec[i].iov_len; + + for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; + start_idx += GF_DISK_SECTOR_SIZE) { + if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { + write_needed = 1; + continue; + } + + if (write_needed) { + ret = syncop_write( + to, fd, (buf + tmp_offset), (start_idx - tmp_offset), + (offset + tmp_offset), iobref, 0, NULL, NULL); + /* 'path' will be logged in calling function */ + if (ret < 0) { + gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", + strerror(-ret)); + *fop_errno = -ret; + ret = -1; + goto out; } - if ((start_idx < buf_len) || write_needed) { - /* This means, last chunk is not yet written.. write it */ - ret = syncop_write (to, fd, (buf + tmp_offset), - (buf_len - tmp_offset), - (offset + tmp_offset), iobref, 0, - NULL, NULL); - if (ret < 0) { - /* 'path' will be logged in calling function */ - gf_log (THIS->name, GF_LOG_WARNING, - "failed to write (%s)", - strerror (-ret)); - *fop_errno = -ret; - ret = -1; - goto out; - } - } + write_needed = 0; + } + tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; + } - size_pending = (size - buf_len); - if (!size_pending) - break; + if ((start_idx < buf_len) || write_needed) { + /* This means, last chunk is not yet written.. write it */ + ret = syncop_write(to, fd, (buf + tmp_offset), + (buf_len - tmp_offset), (offset + tmp_offset), + iobref, 0, NULL, NULL); + if (ret < 0) { + /* 'path' will be logged in calling function */ + gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", + strerror(-ret)); + *fop_errno = -ret; + ret = -1; + goto out; + } } - ret = size; -out: - return ret; + size_pending = (size - buf_len); + if (!size_pending) + break; + } + ret = size; +out: + return ret; } /* @@ -294,323 +279,318 @@ be converted to "0" in dht_migrate_file. */ int32_t -gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, int *fop_errno) +gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno) { - int32_t ret = -1; - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *linkto_subvol = NULL; - data_t *data = NULL; - struct iatt iatt = {0,}; - int32_t op_errno = 0; - dht_conf_t *conf = NULL; - gf_loglevel_t loglevel = 0; - dict_t *link_xattr = NULL; - dict_t *dict = NULL; - dict_t *xattr_rsp = NULL; - struct iatt stbuf = {0,}; - + int32_t ret = -1; + xlator_t *cached_subvol = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *linkto_subvol = NULL; + data_t *data = NULL; + struct iatt iatt = { + 0, + }; + int32_t op_errno = 0; + dht_conf_t *conf = NULL; + gf_loglevel_t loglevel = 0; + dict_t *link_xattr = NULL; + dict_t *dict = NULL; + dict_t *xattr_rsp = NULL; + struct iatt stbuf = { + 0, + }; + + *fop_errno = EINVAL; + + GF_VALIDATE_OR_GOTO("defrag", loc, out); + GF_VALIDATE_OR_GOTO("defrag", loc->name, out); + GF_VALIDATE_OR_GOTO("defrag", this, out); + GF_VALIDATE_OR_GOTO("defrag", this->private, out); + + conf = this->private; + + if (gf_uuid_is_null(loc->pargfid)) { + gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "loc->pargfid is NULL for %s", + loc->path); *fop_errno = EINVAL; + ret = -1; + goto out; + } + + if (gf_uuid_is_null(loc->gfid)) { + gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "loc->gfid is NULL for %s", + loc->path); + *fop_errno = EINVAL; + ret = -1; + goto out; + } - GF_VALIDATE_OR_GOTO ("defrag", loc, out); - GF_VALIDATE_OR_GOTO ("defrag", loc->name, out); - GF_VALIDATE_OR_GOTO ("defrag", this, out); - GF_VALIDATE_OR_GOTO ("defrag", this->private, out); - - conf = this->private; - - if (gf_uuid_is_null (loc->pargfid)) { - gf_msg ("", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "loc->pargfid is NULL for %s", loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - if (gf_uuid_is_null (loc->gfid)) { - gf_msg ("", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "loc->gfid is NULL for %s", loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - link_xattr = dict_new (); - if (!link_xattr) { - ret = -1; - *fop_errno = ENOMEM; - goto out; - } - - /* - Parallel migration can lead to migration of the hard link multiple - times which can lead to data loss. Hence, adding a fresh lookup to - decide whether migration is required or not. - - Elaborating the scenario for let say 10 hardlinks [link{1..10}]: - Let say the first hard link "link1" does the setxattr of the - new hashed subvolume info on the cached file. As there are multiple - threads working, we might have already all the links created on the - new hashed by the time we reach hardlink let say link5. Now the - number of links on hashed is equal to that of cached. Hence, file - migration will happen for link6. - - Cached Hashed - --------T link6 rwxrwxrwx link6 - - Now post above state all the link file on the cached will be zero - byte linkto files. Hence, if we still do migration for the following - files link{7..10}, we will end up migrating 0 data leading to data - loss. - Hence, a lookup can make sure whether we need to migrate the - file or not. - */ - - dict = dict_new (); - if (!dict) { - ret = -1; - *fop_errno = ENOMEM; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "could not allocate memory for dict"); - goto out; - } - - ret = dict_set_int32 (dict, conf->link_xattr_name, 256); + link_xattr = dict_new(); + if (!link_xattr) { + ret = -1; + *fop_errno = ENOMEM; + goto out; + } + + /* + Parallel migration can lead to migration of the hard link multiple + times which can lead to data loss. Hence, adding a fresh lookup to + decide whether migration is required or not. + + Elaborating the scenario for let say 10 hardlinks [link{1..10}]: + Let say the first hard link "link1" does the setxattr of the + new hashed subvolume info on the cached file. As there are multiple + threads working, we might have already all the links created on the + new hashed by the time we reach hardlink let say link5. Now the + number of links on hashed is equal to that of cached. Hence, file + migration will happen for link6. + + Cached Hashed + --------T link6 rwxrwxrwx link6 + + Now post above state all the link file on the cached will be zero + byte linkto files. Hence, if we still do migration for the following + files link{7..10}, we will end up migrating 0 data leading to data + loss. + Hence, a lookup can make sure whether we need to migrate the + file or not. + */ + + dict = dict_new(); + if (!dict) { + ret = -1; + *fop_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "could not allocate memory for dict"); + goto out; + } + + ret = dict_set_int32(dict, conf->link_xattr_name, 256); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to set 'linkto' key in dict", + loc->path); + goto out; + } + + ret = syncop_lookup(this, loc, &stbuf, NULL, dict, &xattr_rsp); + if (ret) { + /*Ignore ENOENT and ESTALE as file might have been + migrated already*/ + if (-ret == ENOENT || -ret == ESTALE) { + ret = -2; + goto out; + } + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:%s lookup failed with ret = %d", loc->path, + ret); + *fop_errno = -ret; + ret = -1; + goto out; + } + + cached_subvol = dht_subvol_get_cached(this, loc->inode); + if (!cached_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Failed to get cached subvol" + " for %s on %s", + loc->name, this->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (!hashed_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Failed to get hashed subvol" + " for %s on %s", + loc->name, this->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + /* Hardlink migration happens only with remove-brick. So this condition will + * be true only when the migration has happened. In case hardlinks are + * migrated for rebalance case, remove this check. Having this check here + * avoid redundant calls below*/ + if (hashed_subvol == cached_subvol) { + ret = -2; + goto out; + } + + gf_log(this->name, GF_LOG_INFO, + "Attempting to migrate hardlink %s " + "with gfid %s from %s -> %s", + loc->name, uuid_utoa(loc->gfid), cached_subvol->name, + hashed_subvol->name); + + data = dict_get(xattr_rsp, conf->link_xattr_name); + /* set linkto on cached -> hashed if not present, else link it */ + if (!data) { + ret = dict_set_str(link_xattr, conf->link_xattr_name, + hashed_subvol->name); if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to set 'linkto' key in dict", loc->path); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Failed to set dictionary value:" + " key = %s for %s", + conf->link_xattr_name, loc->name); + *fop_errno = ENOMEM; + ret = -1; + goto out; } - ret = syncop_lookup (this, loc, &stbuf, NULL, dict, &xattr_rsp); + ret = syncop_setxattr(cached_subvol, loc, link_xattr, 0, NULL, NULL); if (ret) { - /*Ignore ENOENT and ESTALE as file might have been - migrated already*/ - if (-ret == ENOENT || -ret == ESTALE) { - ret = -2; - goto out; - } - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:%s lookup failed with ret = %d", - loc->path, ret); - *fop_errno = -ret; - ret = -1; - goto out; - } - - cached_subvol = dht_subvol_get_cached (this, loc->inode); - if (!cached_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Failed to get cached subvol" - " for %s on %s", loc->name, this->name); - *fop_errno = EINVAL; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :" + "Linkto setxattr failed %s -> %s", + cached_subvol->name, loc->name); + *fop_errno = -ret; + ret = -1; + goto out; } - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (!hashed_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Failed to get hashed subvol" - " for %s on %s", loc->name, this->name); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - /* Hardlink migration happens only with remove-brick. So this condition will - * be true only when the migration has happened. In case hardlinks are migrated - * for rebalance case, remove this check. Having this check here avoid redundant - * calls below*/ - if (hashed_subvol == cached_subvol) { - ret = -2; - goto out; - } - - gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s " - "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid), - cached_subvol->name, hashed_subvol->name); - - data = dict_get (xattr_rsp, conf->link_xattr_name); - /* set linkto on cached -> hashed if not present, else link it */ - if (!data) { - ret = dict_set_str (link_xattr, conf->link_xattr_name, - hashed_subvol->name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Failed to set dictionary value:" - " key = %s for %s", - conf->link_xattr_name, loc->name); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (cached_subvol, loc, link_xattr, 0, NULL, - NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :" - "Linkto setxattr failed %s -> %s", - cached_subvol->name, - loc->name); - *fop_errno = -ret; - ret = -1; - goto out; - } + gf_msg_debug(this->name, 0, + "hardlink target subvol created on %s " + ",cached %s, file %s", + hashed_subvol->name, cached_subvol->name, loc->path); - gf_msg_debug (this->name, 0, "hardlink target subvol created on %s " - ",cached %s, file %s", - hashed_subvol->name, cached_subvol->name, loc->path); - - ret = -2; - goto out; + ret = -2; + goto out; + } else { + linkto_subvol = dht_linkfile_subvol(this, NULL, NULL, xattr_rsp); + if (!linkto_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR, + "Failed to get " + "linkto subvol for %s", + loc->name); } else { - linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattr_rsp); - if (!linkto_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SUBVOL_ERROR, - "Failed to get " - "linkto subvol for %s", loc->name); - } else { - hashed_subvol = linkto_subvol; - } - - ret = syncop_link (hashed_subvol, loc, loc, &iatt, NULL, NULL); - if (ret) { - op_errno = -ret; - ret = -1; - - loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : \ - GF_LOG_ERROR; - gf_msg (this->name, loglevel, op_errno, - DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED, - "link of %s -> %s" - " failed on subvol %s", loc->name, - uuid_utoa(loc->gfid), - hashed_subvol->name); - if (op_errno != EEXIST) { - *fop_errno = op_errno; - goto out; - } - } else { - gf_msg_debug (this->name, 0, "syncop_link successful for" - " hardlink %s on subvol %s, cached %s", loc->path, - hashed_subvol->name, cached_subvol->name); - - } + hashed_subvol = linkto_subvol; } - ret = syncop_lookup (hashed_subvol, loc, &iatt, NULL, NULL, NULL); + ret = syncop_link(hashed_subvol, loc, loc, &iatt, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed :Failed lookup %s on %s ", - loc->name, hashed_subvol->name); - - *fop_errno = -ret; - ret = -1; - goto out; + op_errno = -ret; + ret = -1; + + loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_ERROR; + gf_msg(this->name, loglevel, op_errno, + DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED, + "link of %s -> %s" + " failed on subvol %s", + loc->name, uuid_utoa(loc->gfid), hashed_subvol->name); + if (op_errno != EEXIST) { + *fop_errno = op_errno; + goto out; + } + } else { + gf_msg_debug(this->name, 0, + "syncop_link successful for" + " hardlink %s on subvol %s, cached %s", + loc->path, hashed_subvol->name, cached_subvol->name); } + } - /* There is a race where on the target subvol for the hardlink - * (note: hash subvol for the hardlink might differ from this), some - * other client(non-rebalance) would have created a linkto file for that - * hardlink as part of lookup. So let say there are 10 hardlinks, on the - * 5th hardlink it self the hardlinks might have migrated. Now for - * (6..10th) hardlinks the cached and target would be same as the file - * has already migrated. Hence this check is needed */ - if (cached_subvol == hashed_subvol) { - gf_msg_debug (this->name, 0, "source %s and destination %s " - "for hardlink %s are same", cached_subvol->name, - hashed_subvol->name, loc->path); - ret = -2; - goto out; - } + ret = syncop_lookup(hashed_subvol, loc, &iatt, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed :Failed lookup %s on %s ", loc->name, + hashed_subvol->name); - if (iatt.ia_nlink == stbuf.ia_nlink) { - ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol, - GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS, - fop_errno); - if (ret) { - goto out; - } - } + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* There is a race where on the target subvol for the hardlink + * (note: hash subvol for the hardlink might differ from this), some + * other client(non-rebalance) would have created a linkto file for that + * hardlink as part of lookup. So let say there are 10 hardlinks, on the + * 5th hardlink it self the hardlinks might have migrated. Now for + * (6..10th) hardlinks the cached and target would be same as the file + * has already migrated. Hence this check is needed */ + if (cached_subvol == hashed_subvol) { + gf_msg_debug(this->name, 0, + "source %s and destination %s " + "for hardlink %s are same", + cached_subvol->name, hashed_subvol->name, loc->path); ret = -2; + goto out; + } + + if (iatt.ia_nlink == stbuf.ia_nlink) { + ret = dht_migrate_file(this, loc, cached_subvol, hashed_subvol, + GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS, fop_errno); + if (ret) { + goto out; + } + } + ret = -2; out: - if (link_xattr) - dict_unref (link_xattr); + if (link_xattr) + dict_unref(link_xattr); - if (xattr_rsp) - dict_unref (xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } - - static int -__check_file_has_hardlink (xlator_t *this, loc_t *loc, - struct iatt *stbuf, dict_t *xattrs, int flags, - gf_defrag_info_t *defrag, dht_conf_t *conf, int *fop_errno) +__check_file_has_hardlink(xlator_t *this, loc_t *loc, struct iatt *stbuf, + dict_t *xattrs, int flags, gf_defrag_info_t *defrag, + dht_conf_t *conf, int *fop_errno) { - int ret = 0; + int ret = 0; - if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) { - ret = 0; - return ret; - } - if (stbuf->ia_nlink > 1) { - /* support for decomission */ - if (flags == GF_DHT_MIGRATE_HARDLINK) { - synclock_lock (&conf->link_lock); - ret = gf_defrag_handle_hardlink - (this, loc, fop_errno); - synclock_unlock (&conf->link_lock); - /* - Returning zero will force the file to be remigrated. - Checkout gf_defrag_handle_hardlink for more information. - */ - if (ret && ret != -2) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to migrate file with link", - loc->path); - } - } else { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migration skipped for:" - "%s: file has hardlinks", loc->path); - *fop_errno = ENOTSUP; - ret = 1; - } - } + if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) { + ret = 0; + return ret; + } + if (stbuf->ia_nlink > 1) { + /* support for decomission */ + if (flags == GF_DHT_MIGRATE_HARDLINK) { + synclock_lock(&conf->link_lock); + ret = gf_defrag_handle_hardlink(this, loc, fop_errno); + synclock_unlock(&conf->link_lock); + /* + Returning zero will force the file to be remigrated. + Checkout gf_defrag_handle_hardlink for more information. + */ + if (ret && ret != -2) { + gf_msg(this->name, GF_LOG_WARNING, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to migrate file with link", + loc->path); + } + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migration skipped for:" + "%s: file has hardlinks", + loc->path); + *fop_errno = ENOTSUP; + ret = 1; + } + } - return ret; + return ret; } - /* return values 0 : File will be migrated @@ -620,964 +600,911 @@ __check_file_has_hardlink (xlator_t *this, loc_t *loc, -1 : failure */ static int -__is_file_migratable (xlator_t *this, loc_t *loc, - struct iatt *stbuf, dict_t *xattrs, int flags, - gf_defrag_info_t *defrag, dht_conf_t *conf, - int *fop_errno) +__is_file_migratable(xlator_t *this, loc_t *loc, struct iatt *stbuf, + dict_t *xattrs, int flags, gf_defrag_info_t *defrag, + dht_conf_t *conf, int *fop_errno) { - int ret = -1; - int lock_count = 0; - - if (IA_ISDIR (stbuf->ia_type)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: migrate-file called on directory", loc->path); - *fop_errno = EISDIR; - ret = -1; - goto out; - } - - if (!conf->lock_migration_enabled) { - ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT, - &lock_count); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: Unable to get lock count for file", - loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - if (lock_count) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s: File has locks." - " Skipping file migration", loc->path); - *fop_errno = ENOTSUP; - ret = 1; - goto out; - } - } + int ret = -1; + int lock_count = 0; + + if (IA_ISDIR(stbuf->ia_type)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: migrate-file called on directory", + loc->path); + *fop_errno = EISDIR; + ret = -1; + goto out; + } - /* Check if file has hardlink*/ - ret = __check_file_has_hardlink (this, loc, stbuf, xattrs, - flags, defrag, conf, fop_errno); + if (!conf->lock_migration_enabled) { + ret = dict_get_int32(xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: Unable to get lock count for file", + loc->path); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + if (lock_count) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: File has locks." + " Skipping file migration", + loc->path); + *fop_errno = ENOTSUP; + ret = 1; + goto out; + } + } + + /* Check if file has hardlink*/ + ret = __check_file_has_hardlink(this, loc, stbuf, xattrs, flags, defrag, + conf, fop_errno); out: - return ret; + return ret; } - static int -__dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, - loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, - int *fop_errno) +__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, + int *fop_errno) { - int ret = -1; - int ret2 = -1; - fd_t *fd = NULL; - struct iatt new_stbuf = {0,}; - struct iatt check_stbuf= {0,}; - dht_conf_t *conf = NULL; - dict_t *dict = NULL; - dict_t *xdata = NULL; - - conf = this->private; + int ret = -1; + int ret2 = -1; + fd_t *fd = NULL; + struct iatt new_stbuf = { + 0, + }; + struct iatt check_stbuf = { + 0, + }; + dht_conf_t *conf = NULL; + dict_t *dict = NULL; + dict_t *xdata = NULL; + + conf = this->private; + + dict = dict_new(); + if (!dict) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "dictionary allocation failed for" + "path:%s", + loc->path); + goto out; + } + ret = dict_set_gfuuid(dict, "gfid-req", stbuf->ia_gfid, true); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = gfid-req", loc->path); + goto out; + } + + ret = dict_set_str(dict, conf->link_xattr_name, from->name); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + conf->link_xattr_name); + goto out; + } + + fd = fd_create(loc->inode, DHT_REBALANCE_PID); + if (!fd) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: fd create failed (destination)", loc->path); + goto out; + } - dict = dict_new (); - if (!dict) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, "dictionary allocation failed for" - "path:%s", loc->path); - goto out; - } - ret = dict_set_gfuuid (dict, "gfid-req", stbuf->ia_gfid, true); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = gfid-req", - loc->path); - goto out; + if (!!dht_is_tier_xlator(this)) { + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)", + loc->path); + goto out; } - ret = dict_set_str (dict, conf->link_xattr_name, from->name); + ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1); if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", - loc->path, conf->link_xattr_name); - goto out; - } - - fd = fd_create (loc->inode, DHT_REBALANCE_PID); - if (!fd) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: fd create failed (destination)", - loc->path); - goto out; - } - - if (!!dht_is_tier_xlator (this)) { - xdata = dict_new (); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: dict_new failed)", - loc->path); - goto out; - } - - ret = dict_set_int32 (xdata, GF_CLEAN_WRITE_PROTECTION, 1); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "%s: failed to set dictionary value: key = %s ", - loc->path, GF_CLEAN_WRITE_PROTECTION); - goto out; - } - } - - ret = syncop_lookup (to, loc, &new_stbuf, NULL, xdata, NULL); - if (!ret) { - /* File exits in the destination, check if gfid matches */ - if (gf_uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_GFID_MISMATCH, - "file %s exists in %s with different gfid", - loc->path, to->name); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - } - if ((ret < 0) && (-ret != ENOENT)) { - /* File exists in destination, but not accessible */ - gf_msg (THIS->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to lookup file", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } - - /* Create the destination with LINKFILE mode, and linkto xattr, - if the linkfile already exists, just open the file */ - if (!ret) { - /* - * File already present, just open the file. - */ - ret = syncop_open (to, loc, O_RDWR, fd, NULL, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to open %s on %s", - loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - } else { - ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, - &new_stbuf, dict, NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to create %s on %s", - loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - } - - fd_bind (fd); + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", loc->path, + GF_CLEAN_WRITE_PROTECTION); + goto out; + } + } + + ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL); + if (!ret) { + /* File exits in the destination, check if gfid matches */ + if (gf_uuid_compare(stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, + "file %s exists in %s with different gfid", loc->path, + to->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + } + if ((ret < 0) && (-ret != ENOENT)) { + /* File exists in destination, but not accessible */ + gf_msg(THIS->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to lookup file", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } - /*Reason of doing lookup after create again: - *In the create, there is some time-gap between opening fd at the - *server (posix_layer) and binding it in server (incrementing fd count), - *so if in that time-gap, if other process sends unlink considering it - *as a linkto file, because inode->fd count will be 0, so file will be - *unlinked at the backend. And because further operations are performed - *on fd, so though migration will be done but will end with no file - *at the backend. + /* Create the destination with LINKFILE mode, and linkto xattr, + if the linkfile already exists, just open the file */ + if (!ret) { + /* + * File already present, just open the file. */ - - ret = syncop_lookup (to, loc, &check_stbuf, NULL, NULL, NULL); - if (!ret) { - - if (gf_uuid_compare (stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_GFID_MISMATCH, - "file %s exists in %s with different gfid," - "found in lookup after create", - loc->path, to->name); - *fop_errno = EINVAL; - ret = -1; - goto out; + ret = syncop_open(to, loc, O_RDWR, fd, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to open %s on %s", loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + } else { + ret = syncop_create(to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, &new_stbuf, + dict, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to create %s on %s", loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + } + + fd_bind(fd); + + /*Reason of doing lookup after create again: + *In the create, there is some time-gap between opening fd at the + *server (posix_layer) and binding it in server (incrementing fd count), + *so if in that time-gap, if other process sends unlink considering it + *as a linkto file, because inode->fd count will be 0, so file will be + *unlinked at the backend. And because further operations are performed + *on fd, so though migration will be done but will end with no file + *at the backend. + */ + + ret = syncop_lookup(to, loc, &check_stbuf, NULL, NULL, NULL); + if (!ret) { + if (gf_uuid_compare(stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, + "file %s exists in %s with different gfid," + "found in lookup after create", + loc->path, to->name); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + } + + if (-ret == ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: file does not exist" + "on %s", + loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + ret = syncop_fsetattr(to, fd, stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), + NULL, NULL, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "chown failed for %s on %s", loc->path, to->name); + } + + /* No need to bother about 0 byte size files */ + if (stbuf->ia_size > 0) { + if (conf->use_fallocate) { + ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL); + if (ret < 0) { + if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) { + conf->use_fallocate = _gf_false; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "fallocate failed for %s on %s", loc->path, + to->name); + + *fop_errno = -ret; + + /* fallocate does not release the space + * in some cases + */ + ret2 = syncop_ftruncate(to, fd, 0, NULL, NULL, NULL, NULL); + if (ret2 < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret2, + DHT_MSG_MIGRATE_FILE_FAILED, + "ftruncate failed for " + "%s on %s", + loc->path, to->name); + } + goto out; } - + } } - if (-ret == ENOENT) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, "%s: file does not exist" - "on %s", loc->path, to->name); + if (!conf->use_fallocate) { + ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL, + NULL); + if (ret < 0) { *fop_errno = -ret; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "ftruncate failed for %s on %s", loc->path, to->name); + } } + } - ret = syncop_fsetattr (to, fd, stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID), - NULL, NULL, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "chown failed for %s on %s", - loc->path, to->name); - } + /* success */ + ret = 0; - /* No need to bother about 0 byte size files */ - if (stbuf->ia_size > 0) { - if (conf->use_fallocate) { - ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size, - NULL, NULL); - if (ret < 0) { - if (ret == -EOPNOTSUPP || ret == -EINVAL || - ret == -ENOSYS) { - conf->use_fallocate = _gf_false; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "fallocate failed for %s on %s", - loc->path, to->name); - - *fop_errno = -ret; - - /* fallocate does not release the space - * in some cases - */ - ret2 = syncop_ftruncate (to, fd, 0, - NULL, NULL, - NULL, NULL); - if (ret2 < 0) { - gf_msg (this->name, - GF_LOG_WARNING, -ret2, - DHT_MSG_MIGRATE_FILE_FAILED, - "ftruncate failed for " - "%s on %s", - loc->path, to->name); - } - goto out; - } - } - } - - if (!conf->use_fallocate) { - ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, - NULL, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "ftruncate failed for %s on %s", - loc->path, to->name); - } - } - } - - /* success */ - ret = 0; - - if (dst_fd) - *dst_fd = fd; + if (dst_fd) + *dst_fd = fd; out: - if (ret) { - if (fd) { - fd_unref (fd); - } + if (ret) { + if (fd) { + fd_unref(fd); } - if (dict) - dict_unref (dict); - - if (xdata) - dict_unref (dict); + } + if (dict) + dict_unref(dict); + if (xdata) + dict_unref(dict); - return ret; + return ret; } static int -__dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, - loc_t *loc, struct iatt *stbuf, int flag, - dht_conf_t *conf, gf_boolean_t *target_changed, - xlator_t **new_subvol, int *fop_errno) +__dht_check_free_space(xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc, + struct iatt *stbuf, int flag, dht_conf_t *conf, + gf_boolean_t *target_changed, xlator_t **new_subvol, + int *fop_errno) { - struct statvfs src_statfs = {0,}; - struct statvfs dst_statfs = {0,}; - int ret = -1; - dict_t *xdata = NULL; - dht_layout_t *layout = NULL; - uint64_t src_statfs_blocks = 1; - uint64_t dst_statfs_blocks = 1; - double dst_post_availspacepercent = 0; - double src_post_availspacepercent = 0; - uint64_t file_blocks = 0; - uint64_t src_total_blocks = 0; - uint64_t dst_total_blocks = 0; - - xdata = dict_new (); - if (!xdata) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_NO_MEMORY, - "failed to allocate dictionary"); - goto out; - } - - ret = dict_set_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set " - GF_INTERNAL_IGNORE_DEEM_STATFS" in dict"); - ret = -1; - *fop_errno = ENOMEM; - goto out; - } - - ret = syncop_statfs (from, loc, &src_statfs, xdata, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to get statfs of %s on %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - ret = syncop_statfs (to, loc, &dst_statfs, xdata, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to get statfs of %s on %s", - loc->path, to->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - gf_msg_debug (this->name, 0, "min_free_disk - %f , block available - " - "%lu , block size - %lu ", conf->min_free_disk, - dst_statfs.f_bavail, dst_statfs.f_bsize); - - dst_statfs_blocks = dst_statfs.f_bavail * - (dst_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - src_statfs_blocks = src_statfs.f_bavail * - (src_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - dst_total_blocks = dst_statfs.f_blocks * - (dst_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - src_total_blocks = src_statfs.f_blocks * - (src_statfs.f_frsize / - GF_DISK_SECTOR_SIZE); - - /* if force option is given, do not check for space @ dst. - * Check only if space is avail for the file */ - if (flag != GF_DHT_MIGRATE_DATA) - goto check_avail_space; - - /* Check: - During rebalance `migrate-data` - Destination subvol experiences - a `reduction` in 'blocks' of free space, at the same time source - subvol gains certain 'blocks' of free space. A valid check is - necessary here to avoid erroneous move to destination where - the space could be scantily available. - With heterogeneous brick support, an actual space comparison could - prevent any files being migrated to newly added bricks if they are - smaller then the free space available on the existing bricks. - */ - if (stbuf) { - if (!conf->use_fallocate) { - file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1; - file_blocks /= GF_DISK_SECTOR_SIZE; - - if (file_blocks >= dst_statfs_blocks) { - dst_statfs_blocks = 0; - } else { - dst_statfs_blocks -= file_blocks; - } - } - - src_post_availspacepercent = - ((src_statfs_blocks + file_blocks) * 100) / src_total_blocks; - - dst_post_availspacepercent = - (dst_statfs_blocks * 100) / dst_total_blocks; - - if (dst_post_availspacepercent < src_post_availspacepercent) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "data movement of file " - "{blocks:%"PRIu64" name:(%s)} would result in " - "dst node (%s:%"PRIu64") having lower disk " - "space than the source node (%s:%"PRIu64")" - ".Skipping file.", stbuf->ia_blocks, loc->path, - to->name, dst_statfs_blocks, from->name, - src_statfs_blocks); - - /* this is not a 'failure', but we don't want to - consider this as 'success' too :-/ */ - *fop_errno = ENOSPC; - ret = 1; - goto out; - } - } + struct statvfs src_statfs = { + 0, + }; + struct statvfs dst_statfs = { + 0, + }; + int ret = -1; + dict_t *xdata = NULL; + dht_layout_t *layout = NULL; + uint64_t src_statfs_blocks = 1; + uint64_t dst_statfs_blocks = 1; + double dst_post_availspacepercent = 0; + double src_post_availspacepercent = 0; + uint64_t file_blocks = 0; + uint64_t src_total_blocks = 0; + uint64_t dst_total_blocks = 0; + + xdata = dict_new(); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "failed to allocate dictionary"); + goto out; + } + + ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); + ret = -1; + *fop_errno = ENOMEM; + goto out; + } + + ret = syncop_statfs(from, loc, &src_statfs, xdata, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to get statfs of %s on %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + ret = syncop_statfs(to, loc, &dst_statfs, xdata, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to get statfs of %s on %s", loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + gf_msg_debug(this->name, 0, + "min_free_disk - %f , block available - " + "%lu , block size - %lu ", + conf->min_free_disk, dst_statfs.f_bavail, dst_statfs.f_bsize); + + dst_statfs_blocks = dst_statfs.f_bavail * + (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + src_statfs_blocks = src_statfs.f_bavail * + (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + dst_total_blocks = dst_statfs.f_blocks * + (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + src_total_blocks = src_statfs.f_blocks * + (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE); + + /* if force option is given, do not check for space @ dst. + * Check only if space is avail for the file */ + if (flag != GF_DHT_MIGRATE_DATA) + goto check_avail_space; + + /* Check: + During rebalance `migrate-data` - Destination subvol experiences + a `reduction` in 'blocks' of free space, at the same time source + subvol gains certain 'blocks' of free space. A valid check is + necessary here to avoid erroneous move to destination where + the space could be scantily available. + With heterogeneous brick support, an actual space comparison could + prevent any files being migrated to newly added bricks if they are + smaller then the free space available on the existing bricks. + */ + if (stbuf) { + if (!conf->use_fallocate) { + file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1; + file_blocks /= GF_DISK_SECTOR_SIZE; + + if (file_blocks >= dst_statfs_blocks) { + dst_statfs_blocks = 0; + } else { + dst_statfs_blocks -= file_blocks; + } + } + + src_post_availspacepercent = ((src_statfs_blocks + file_blocks) * 100) / + src_total_blocks; + + dst_post_availspacepercent = (dst_statfs_blocks * 100) / + dst_total_blocks; + + if (dst_post_availspacepercent < src_post_availspacepercent) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "data movement of file " + "{blocks:%" PRIu64 + " name:(%s)} would result in " + "dst node (%s:%" PRIu64 + ") having lower disk " + "space than the source node (%s:%" PRIu64 + ")" + ".Skipping file.", + stbuf->ia_blocks, loc->path, to->name, dst_statfs_blocks, + from->name, src_statfs_blocks); + + /* this is not a 'failure', but we don't want to + consider this as 'success' too :-/ */ + *fop_errno = ENOSPC; + ret = 1; + goto out; + } + } check_avail_space: - if (conf->disk_unit == 'p' && dst_statfs.f_blocks) { - dst_post_availspacepercent = - (dst_statfs_blocks * 100) / dst_total_blocks; - - gf_msg_debug (this->name, 0, "file : %s, post_availspacepercent" - " : %lf f_bavail : %lu min-free-disk: %lf", - loc->path, dst_post_availspacepercent, - dst_statfs.f_bavail, conf->min_free_disk); - - if (dst_post_availspacepercent < conf->min_free_disk) { - gf_msg (this->name, GF_LOG_WARNING, 0, 0, - "Write will cross min-free-disk for " - "file - %s on subvol - %s. Looking " - "for new subvol", loc->path, to->name); - - goto find_new_subvol; - } else { - ret = 0; - goto out; - } + if (conf->disk_unit == 'p' && dst_statfs.f_blocks) { + dst_post_availspacepercent = (dst_statfs_blocks * 100) / + dst_total_blocks; + + gf_msg_debug(this->name, 0, + "file : %s, post_availspacepercent" + " : %lf f_bavail : %lu min-free-disk: %lf", + loc->path, dst_post_availspacepercent, dst_statfs.f_bavail, + conf->min_free_disk); + + if (dst_post_availspacepercent < conf->min_free_disk) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "Write will cross min-free-disk for " + "file - %s on subvol - %s. Looking " + "for new subvol", + loc->path, to->name); + + goto find_new_subvol; + } else { + ret = 0; + goto out; } + } - if (conf->disk_unit != 'p') { - if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) < - conf->min_free_disk) { - gf_msg_debug (this->name, 0, "file : %s, destination " - "frsize: %lu f_bavail : %lu " - "min-free-disk: %lf", loc->path, - dst_statfs.f_frsize, dst_statfs.f_bavail, - conf->min_free_disk); + if (conf->disk_unit != 'p') { + if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) < conf->min_free_disk) { + gf_msg_debug(this->name, 0, + "file : %s, destination " + "frsize: %lu f_bavail : %lu " + "min-free-disk: %lf", + loc->path, dst_statfs.f_frsize, dst_statfs.f_bavail, + conf->min_free_disk); - gf_msg (this->name, GF_LOG_WARNING, 0, 0, "write will" - " cross min-free-disk for file - %s on subvol -" - " %s. looking for new subvol", loc->path, - to->name); + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "write will" + " cross min-free-disk for file - %s on subvol -" + " %s. looking for new subvol", + loc->path, to->name); - goto find_new_subvol; + goto find_new_subvol; - } else { - ret = 0; - goto out; - } + } else { + ret = 0; + goto out; } + } find_new_subvol: - layout = dht_layout_get (this, loc->parent); - if (!layout) { - gf_log (this->name, GF_LOG_ERROR, "Layout is NULL"); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - *new_subvol = dht_subvol_with_free_space_inodes (this, to, from, layout, - stbuf->ia_size); - if ((!(*new_subvol)) || (*new_subvol == from)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_SUBVOL_INSUFF_SPACE, "Could not find any subvol" - " with space accommodating the file - %s. Consider " - "adding bricks", loc->path); - - *target_changed = _gf_false; - *fop_errno = ENOSPC; - ret = -1; - } else { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "new target found - %s" - " for file - %s", (*new_subvol)->name, loc->path); - *target_changed = _gf_true; - ret = 0; - } + layout = dht_layout_get(this, loc->parent); + if (!layout) { + gf_log(this->name, GF_LOG_ERROR, "Layout is NULL"); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + *new_subvol = dht_subvol_with_free_space_inodes(this, to, from, layout, + stbuf->ia_size); + if ((!(*new_subvol)) || (*new_subvol == from)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE, + "Could not find any subvol" + " with space accommodating the file - %s. Consider " + "adding bricks", + loc->path); + + *target_changed = _gf_false; + *fop_errno = ENOSPC; + ret = -1; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "new target found - %s" + " for file - %s", + (*new_subvol)->name, loc->path); + *target_changed = _gf_true; + ret = 0; + } out: - if (xdata) - dict_unref (xdata); - return ret; + if (xdata) + dict_unref(xdata); + return ret; } static int -__dht_rebalance_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, - xlator_t *from, xlator_t *to, fd_t *src, - fd_t *dst, uint64_t ia_size, int hole_exists, - int *fop_errno) +__dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst, + uint64_t ia_size, int hole_exists, int *fop_errno) { - int ret = 0; - int count = 0; - off_t offset = 0; - struct iovec *vector = NULL; - struct iobref *iobref = NULL; - uint64_t total = 0; - size_t read_size = 0; - dict_t *xdata = NULL; - dht_conf_t *conf = NULL; - - conf = this->private; - /* if file size is '0', no need to enter this loop */ - while (total < ia_size) { - read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? - DHT_REBALANCE_BLKSIZE : (ia_size - total)); - - ret = syncop_readv (from, src, read_size, - offset, 0, &vector, &count, &iobref, NULL, - NULL, NULL); - if (!ret || (ret < 0)) { - *fop_errno = -ret; - break; - } - - if (hole_exists) { - ret = dht_write_with_holes (to, dst, vector, count, - ret, offset, iobref, - fop_errno); - } else { - if (!conf->force_migration && - !dht_is_tier_xlator (this)) { - xdata = dict_new (); - if (!xdata) { - gf_msg ("dht", GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "insufficient memory"); - ret = -1; - *fop_errno = ENOMEM; - break; - } - - /* Fail this write and abort rebalance if we - * detect a write from client since migration of - * this file started. This is done to avoid - * potential data corruption due to out of order - * writes from rebalance and client to the same - * region (as compared between src and dst - * files). See - * https://github.com/gluster/glusterfs/issues/308 - * for more details. - */ - ret = dict_set_int32 (xdata, - GF_AVOID_OVERWRITE, 1); - if (ret) { - gf_msg ("dht", GF_LOG_ERROR, 0, - ENOMEM, "failed to set dict"); - ret = -1; - *fop_errno = ENOMEM; - break; - } - - } - - ret = syncop_writev (to, dst, vector, count, - offset, iobref, 0, NULL, NULL, - xdata, NULL); - if (ret < 0) { - *fop_errno = -ret; - } - } - - if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && - (gf_defrag_get_pause_state (&defrag->tier_conf) != TIER_RUNNING)) { - gf_msg ("tier", GF_LOG_INFO, 0, - DHT_MSG_TIER_PAUSED, - "Migrate file paused"); - ret = -1; + int ret = 0; + int count = 0; + off_t offset = 0; + struct iovec *vector = NULL; + struct iobref *iobref = NULL; + uint64_t total = 0; + size_t read_size = 0; + dict_t *xdata = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + /* if file size is '0', no need to enter this loop */ + while (total < ia_size) { + read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) + ? DHT_REBALANCE_BLKSIZE + : (ia_size - total)); + + ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count, + &iobref, NULL, NULL, NULL); + if (!ret || (ret < 0)) { + *fop_errno = -ret; + break; + } + + if (hole_exists) { + ret = dht_write_with_holes(to, dst, vector, count, ret, offset, + iobref, fop_errno); + } else { + if (!conf->force_migration && !dht_is_tier_xlator(this)) { + xdata = dict_new(); + if (!xdata) { + gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "insufficient memory"); + ret = -1; + *fop_errno = ENOMEM; + break; } - if (ret < 0) { - break; + /* Fail this write and abort rebalance if we + * detect a write from client since migration of + * this file started. This is done to avoid + * potential data corruption due to out of order + * writes from rebalance and client to the same + * region (as compared between src and dst + * files). See + * https://github.com/gluster/glusterfs/issues/308 + * for more details. + */ + ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, + "failed to set dict"); + ret = -1; + *fop_errno = ENOMEM; + break; } + } - offset += ret; - total += ret; - - GF_FREE (vector); - if (iobref) - iobref_unref (iobref); - iobref = NULL; - vector = NULL; - } - if (iobref) - iobref_unref (iobref); - GF_FREE (vector); - - if (ret >= 0) - ret = 0; - else - ret = -1; - - if (xdata) { - dict_unref (xdata); + ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, + NULL, xdata, NULL); + if (ret < 0) { + *fop_errno = -ret; + } } - return ret; -} - - -static int -__dht_rebalance_open_src_file (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, - struct iatt *stbuf, fd_t **src_fd, - gf_boolean_t *clean_src, int *fop_errno) -{ - - int ret = 0; - fd_t *fd = NULL; - dict_t *dict = NULL; - struct iatt iatt = {0,}; - dht_conf_t *conf = NULL; - - conf = this->private; - - *clean_src = _gf_false; - - fd = fd_create (loc->inode, DHT_REBALANCE_PID); - if (!fd) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: fd create failed (source)", loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; + if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && + (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { + gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, + "Migrate file paused"); + ret = -1; } - ret = syncop_open (from, loc, O_RDWR, fd, NULL, NULL); if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to open file %s on %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; + break; } - fd_bind (fd); - - if (src_fd) - *src_fd = fd; + offset += ret; + total += ret; + GF_FREE(vector); + if (iobref) + iobref_unref(iobref); + iobref = NULL; + vector = NULL; + } + if (iobref) + iobref_unref(iobref); + GF_FREE(vector); + + if (ret >= 0) + ret = 0; + else ret = -1; - dict = dict_new (); - if (!dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: Could not allocate memory for dict", loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - ret = dict_set_str (dict, conf->link_xattr_name, to->name); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to set xattr in dict for %s (linkto:%s)", - loc->path, to->name); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } + if (xdata) { + dict_unref(xdata); + } - /* Once the migration starts, the source should have 'linkto' key set - to show which is the target, so other clients can work around it */ - ret = syncop_setxattr (from, loc, dict, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to set xattr on %s in %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } + return ret; +} - /* Reset source mode/xattr if migration fails*/ - *clean_src = _gf_true; +static int +__dht_rebalance_open_src_file(xlator_t *this, xlator_t *from, xlator_t *to, + loc_t *loc, struct iatt *stbuf, fd_t **src_fd, + gf_boolean_t *clean_src, int *fop_errno) +{ + int ret = 0; + fd_t *fd = NULL; + dict_t *dict = NULL; + struct iatt iatt = { + 0, + }; + dht_conf_t *conf = NULL; + + conf = this->private; + + *clean_src = _gf_false; + + fd = fd_create(loc->inode, DHT_REBALANCE_PID); + if (!fd) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: fd create failed (source)", loc->path); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + ret = syncop_open(from, loc, O_RDWR, fd, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to open file %s on %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } - /* mode should be (+S+T) to indicate migration is in progress */ - iatt.ia_prot = stbuf->ia_prot; - iatt.ia_type = stbuf->ia_type; - iatt.ia_prot.sticky = 1; - iatt.ia_prot.sgid = 1; + fd_bind(fd); - ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to set mode on %s in %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } + if (src_fd) + *src_fd = fd; - /* success */ - ret = 0; + ret = -1; + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: Could not allocate memory for dict", loc->path); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + ret = dict_set_str(dict, conf->link_xattr_name, to->name); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set xattr in dict for %s (linkto:%s)", loc->path, + to->name); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + /* Once the migration starts, the source should have 'linkto' key set + to show which is the target, so other clients can work around it */ + ret = syncop_setxattr(from, loc, dict, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to set xattr on %s in %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* Reset source mode/xattr if migration fails*/ + *clean_src = _gf_true; + + /* mode should be (+S+T) to indicate migration is in progress */ + iatt.ia_prot = stbuf->ia_prot; + iatt.ia_type = stbuf->ia_type; + iatt.ia_prot.sticky = 1; + iatt.ia_prot.sgid = 1; + + ret = syncop_setattr(from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL, NULL, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to set mode on %s in %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* success */ + ret = 0; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } int -migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, - struct iatt *buf, int *fop_errno) +migrate_special_files(xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, + struct iatt *buf, int *fop_errno) { - int ret = -1; - dict_t *rsp_dict = NULL; - dict_t *dict = NULL; - char *link = NULL; - struct iatt stbuf = {0,}; - dht_conf_t *conf = this->private; - - dict = dict_new (); - if (!dict) { - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - ret = dict_set_int32 (dict, conf->link_xattr_name, 256); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to set 'linkto' key in dict", loc->path); - goto out; - } - - /* check in the destination if the file is link file */ - ret = syncop_lookup (to, loc, &stbuf, NULL, dict, &rsp_dict); - if ((ret < 0) && (-ret != ENOENT)) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: lookup failed", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } - - /* we no more require this key */ - dict_del (dict, conf->link_xattr_name); - - /* file exists in target node, only if it is 'linkfile' its valid, - otherwise, error out */ - if (!ret) { - if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict, - conf->link_xattr_name)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: file exists in destination", loc->path); - *fop_errno = EINVAL; - ret = -1; - goto out; - } - - /* as file is linkfile, delete it */ - ret = syncop_unlink (to, loc, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to delete the linkfile", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } - } - - /* Set the gfid of the source file in dict */ - ret = dict_set_gfuuid (dict, "gfid-req", buf->ia_gfid, true); + int ret = -1; + dict_t *rsp_dict = NULL; + dict_t *dict = NULL; + char *link = NULL; + struct iatt stbuf = { + 0, + }; + dht_conf_t *conf = this->private; + + dict = dict_new(); + if (!dict) { + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + ret = dict_set_int32(dict, conf->link_xattr_name, 256); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_log(this->name, GF_LOG_ERROR, + "%s: failed to set 'linkto' key in dict", loc->path); + goto out; + } + + /* check in the destination if the file is link file */ + ret = syncop_lookup(to, loc, &stbuf, NULL, dict, &rsp_dict); + if ((ret < 0) && (-ret != ENOENT)) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: lookup failed", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* we no more require this key */ + dict_del(dict, conf->link_xattr_name); + + /* file exists in target node, only if it is 'linkfile' its valid, + otherwise, error out */ + if (!ret) { + if (!check_is_linkfile(loc->inode, &stbuf, rsp_dict, + conf->link_xattr_name)) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: file exists in destination", loc->path); + *fop_errno = EINVAL; + ret = -1; + goto out; + } + + /* as file is linkfile, delete it */ + ret = syncop_unlink(to, loc, NULL, NULL); if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to set gfid in dict for create", loc->path); - goto out; - } - - /* Create the file in target */ - if (IA_ISLNK (buf->ia_type)) { - /* Handle symlinks separately */ - ret = syncop_readlink (from, loc, &link, buf->ia_size, NULL, - NULL); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: readlink on symlink failed", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } - - ret = syncop_symlink (to, loc, link, 0, dict, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: creating symlink failed", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } - - goto done; + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to delete the linkfile", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } + } + + /* Set the gfid of the source file in dict */ + ret = dict_set_gfuuid(dict, "gfid-req", buf->ia_gfid, true); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_log(this->name, GF_LOG_ERROR, + "%s: failed to set gfid in dict for create", loc->path); + goto out; + } + + /* Create the file in target */ + if (IA_ISLNK(buf->ia_type)) { + /* Handle symlinks separately */ + ret = syncop_readlink(from, loc, &link, buf->ia_size, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: readlink on symlink failed", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; } - ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot, - buf->ia_type), - makedev (ia_major (buf->ia_rdev), - ia_minor (buf->ia_rdev)), 0, dict, NULL); + ret = syncop_symlink(to, loc, link, 0, dict, NULL); if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: mknod failed", - loc->path); - *fop_errno = -ret; - ret = -1; - goto out; - } + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, "%s: creating symlink failed", + loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } + + goto done; + } + + ret = syncop_mknod(to, loc, st_mode_from_ia(buf->ia_prot, buf->ia_type), + makedev(ia_major(buf->ia_rdev), ia_minor(buf->ia_rdev)), + 0, dict, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: mknod failed", loc->path); + *fop_errno = -ret; + ret = -1; + goto out; + } done: - ret = syncop_setattr (to, loc, buf, - (GF_SET_ATTR_MTIME | - GF_SET_ATTR_UID | GF_SET_ATTR_GID | - GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to perform setattr on %s", - loc->path, to->name); - *fop_errno = -ret; - } - - ret = syncop_unlink (from, loc, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: unlink failed", - loc->path); - *fop_errno = -ret; - ret = -1; - } + ret = syncop_setattr(to, loc, buf, + (GF_SET_ATTR_MTIME | GF_SET_ATTR_UID | + GF_SET_ATTR_GID | GF_SET_ATTR_MODE), + NULL, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to perform setattr on %s", loc->path, to->name); + *fop_errno = -ret; + } + + ret = syncop_unlink(from, loc, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: unlink failed", loc->path); + *fop_errno = -ret; + ret = -1; + } out: - GF_FREE (link); - if (dict) - dict_unref (dict); + GF_FREE(link); + if (dict) + dict_unref(dict); - if (rsp_dict) - dict_unref (rsp_dict); + if (rsp_dict) + dict_unref(rsp_dict); - return ret; + return ret; } static int -__dht_migration_cleanup_src_file (xlator_t *this, loc_t *loc, fd_t *fd, - xlator_t *from, ia_prot_t *src_ia_prot) +__dht_migration_cleanup_src_file(xlator_t *this, loc_t *loc, fd_t *fd, + xlator_t *from, ia_prot_t *src_ia_prot) { - int ret = -1; - dht_conf_t *conf = NULL; - struct iatt new_stbuf = {0,}; - - if (!this || !fd || !from || !src_ia_prot) { - goto out; - } - - conf = this->private; - - /*Revert source mode and xattr changes*/ - ret = syncop_fstat (from, fd, &new_stbuf, NULL, NULL); - if (ret < 0) { - /* Failed to get the stat info */ - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file cleanup failed: failed to fstat " - "file %s on %s ", loc->path, from->name); - ret = -1; - goto out; - } - - - /* Remove the sticky bit and sgid bit set, reset it to 0*/ - if (!src_ia_prot->sticky) - new_stbuf.ia_prot.sticky = 0; + int ret = -1; + dht_conf_t *conf = NULL; + struct iatt new_stbuf = { + 0, + }; + + if (!this || !fd || !from || !src_ia_prot) { + goto out; + } + + conf = this->private; + + /*Revert source mode and xattr changes*/ + ret = syncop_fstat(from, fd, &new_stbuf, NULL, NULL); + if (ret < 0) { + /* Failed to get the stat info */ + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file cleanup failed: failed to fstat " + "file %s on %s ", + loc->path, from->name); + ret = -1; + goto out; + } - if (!src_ia_prot->sgid) - new_stbuf.ia_prot.sgid = 0; + /* Remove the sticky bit and sgid bit set, reset it to 0*/ + if (!src_ia_prot->sticky) + new_stbuf.ia_prot.sticky = 0; - ret = syncop_fsetattr (from, fd, &new_stbuf, - (GF_SET_ATTR_GID | GF_SET_ATTR_MODE), - NULL, NULL, NULL, NULL); + if (!src_ia_prot->sgid) + new_stbuf.ia_prot.sgid = 0; - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file cleanup failed:" - "%s: failed to perform fsetattr on %s ", - loc->path, from->name); - ret = -1; - goto out; - } + ret = syncop_fsetattr(from, fd, &new_stbuf, + (GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL, + NULL, NULL); - ret = syncop_fremovexattr (from, fd, conf->link_xattr_name, 0, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to remove linkto xattr on %s (%s)", - loc->path, from->name, strerror (-ret)); - ret = -1; - goto out; - } + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file cleanup failed:" + "%s: failed to perform fsetattr on %s ", + loc->path, from->name); + ret = -1; + goto out; + } + + ret = syncop_fremovexattr(from, fd, conf->link_xattr_name, 0, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to remove linkto xattr on %s (%s)", loc->path, + from->name, strerror(-ret)); + ret = -1; + goto out; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } - - /* return values: @@ -1586,1098 +1513,1078 @@ out: 1 : not a failure, but we can't migrate data as of now */ int -dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, - int flag, int *fop_errno) +dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + int flag, int *fop_errno) { - int ret = -1; - struct iatt new_stbuf = {0,}; - struct iatt stbuf = {0,}; - struct iatt empty_iatt = {0,}; - ia_prot_t src_ia_prot = {0,}; - fd_t *src_fd = NULL; - fd_t *dst_fd = NULL; - dict_t *dict = NULL; - dict_t *xattr = NULL; - dict_t *xattr_rsp = NULL; - int file_has_holes = 0; - dht_conf_t *conf = this->private; - int rcvd_enoent_from_src = 0; - struct gf_flock flock = {0, }; - struct gf_flock plock = {0, }; - loc_t tmp_loc = {0, }; - loc_t parent_loc = {0, }; - gf_boolean_t inodelk_locked = _gf_false; - gf_boolean_t entrylk_locked = _gf_false; - gf_boolean_t p_locked = _gf_false; - int lk_ret = -1; - gf_defrag_info_t *defrag = NULL; - gf_boolean_t clean_src = _gf_false; - gf_boolean_t clean_dst = _gf_false; - int log_level = GF_LOG_INFO; - gf_boolean_t delete_src_linkto = _gf_true; - lock_migration_info_t locklist; - dict_t *meta_dict = NULL; - gf_boolean_t meta_locked = _gf_false; - gf_boolean_t target_changed = _gf_false; - xlator_t *new_target = NULL; - xlator_t *old_target = NULL; - xlator_t *hashed_subvol = NULL; - fd_t *linkto_fd = NULL; - - - if (from == to) { - gf_msg_debug (this->name, 0, "destination and source are same. file %s" - " might have migrated already", loc->path); - ret = 0; - goto out; - } + int ret = -1; + struct iatt new_stbuf = { + 0, + }; + struct iatt stbuf = { + 0, + }; + struct iatt empty_iatt = { + 0, + }; + ia_prot_t src_ia_prot = { + 0, + }; + fd_t *src_fd = NULL; + fd_t *dst_fd = NULL; + dict_t *dict = NULL; + dict_t *xattr = NULL; + dict_t *xattr_rsp = NULL; + int file_has_holes = 0; + dht_conf_t *conf = this->private; + int rcvd_enoent_from_src = 0; + struct gf_flock flock = { + 0, + }; + struct gf_flock plock = { + 0, + }; + loc_t tmp_loc = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t inodelk_locked = _gf_false; + gf_boolean_t entrylk_locked = _gf_false; + gf_boolean_t p_locked = _gf_false; + int lk_ret = -1; + gf_defrag_info_t *defrag = NULL; + gf_boolean_t clean_src = _gf_false; + gf_boolean_t clean_dst = _gf_false; + int log_level = GF_LOG_INFO; + gf_boolean_t delete_src_linkto = _gf_true; + lock_migration_info_t locklist; + dict_t *meta_dict = NULL; + gf_boolean_t meta_locked = _gf_false; + gf_boolean_t target_changed = _gf_false; + xlator_t *new_target = NULL; + xlator_t *old_target = NULL; + xlator_t *hashed_subvol = NULL; + fd_t *linkto_fd = NULL; + + if (from == to) { + gf_msg_debug(this->name, 0, + "destination and source are same. file %s" + " might have migrated already", + loc->path); + ret = 0; + goto out; + } - /* If defrag is NULL, it should be assumed that migration is triggered - * from client */ - defrag = conf->defrag; + /* If defrag is NULL, it should be assumed that migration is triggered + * from client */ + defrag = conf->defrag; - /* migration of files from clients is restricted to non-tiered clients - * for now */ - if (!defrag && dht_is_tier_xlator (this)) { - ret = ENOTSUP; - goto out; - } + /* migration of files from clients is restricted to non-tiered clients + * for now */ + if (!defrag && dht_is_tier_xlator(this)) { + ret = ENOTSUP; + goto out; + } - if (defrag && defrag->tier_conf.is_tier) - log_level = GF_LOG_TRACE; + if (defrag && defrag->tier_conf.is_tier) + log_level = GF_LOG_TRACE; - gf_log (this->name, - log_level, "%s: attempting to move from %s to %s", - loc->path, from->name, to->name); + gf_log(this->name, log_level, "%s: attempting to move from %s to %s", + loc->path, from->name, to->name); - dict = dict_new (); - if (!dict) { - ret = -1; - *fop_errno = ENOMEM; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "Could not allocate memory for dict"); - goto out; - } - ret = dict_set_int32 (dict, conf->link_xattr_name, 256); + dict = dict_new(); + if (!dict) { + ret = -1; + *fop_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, + "Could not allocate memory for dict"); + goto out; + } + ret = dict_set_int32(dict, conf->link_xattr_name, 256); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to set 'linkto' key in dict", + loc->path); + goto out; + } + + /* Do not migrate file in case lock migration is not enabled on the + * volume*/ + if (!conf->lock_migration_enabled) { + ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t)); if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to set 'linkto' key in dict", loc->path); - goto out; - } - - /* Do not migrate file in case lock migration is not enabled on the - * volume*/ - if (!conf->lock_migration_enabled) { - ret = dict_set_int32 (dict, - GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t)); - if (ret) { - *fop_errno = ENOMEM; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s: failed to " - "set "GLUSTERFS_POSIXLK_COUNT" key in dict", - loc->path); - goto out; - } - } else { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "locks will be migrated" - " for file: %s", loc->path); - } - - ret = dht_build_parent_loc (this, &parent_loc, loc, fop_errno); - if (ret < 0) { - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to build parent loc, which is needed to " - "acquire entrylk to synchronize with renames on this " - "path. Skipping migration", loc->path); - goto out; - } - - hashed_subvol = dht_subvol_get_hashed (this, loc); - if (hashed_subvol == NULL) { - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: cannot find hashed subvol which is needed to " - "synchronize with renames on this path. " - "Skipping migration", loc->path); - goto out; - } - - flock.l_type = F_WRLCK; - - tmp_loc.inode = inode_ref (loc->inode); - gf_uuid_copy (tmp_loc.gfid, loc->gfid); - tmp_loc.path = gf_strdup(loc->path); - - /* this inodelk happens with flock.owner being zero. But to synchronize - * hardlink migration we need to have different lkowner for each migration - * Filed a bug here: https://bugzilla.redhat.com/show_bug.cgi?id=1468202 to - * track the fix for this. Currently synclock takes care of synchronizing - * hardlink migration. Once this bug is fixed we can avoid taking synclock */ - ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW, - &flock, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "migrate file failed: " - "%s: failed to lock file on %s", - loc->path, from->name); - goto out; - } - - inodelk_locked = _gf_true; - - /* dht_rename has changed to use entrylk on hashed subvol for - * synchronization. So, rebalance too has to acquire an entrylk on - * hashed subvol. - */ - ret = syncop_entrylk (hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, &parent_loc, - loc->name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, - NULL); - if (ret < 0) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to acquire entrylk on subvol %s", - loc->path, hashed_subvol->name); - goto out; - } - - entrylk_locked = _gf_true; - - /* Phase 1 - Data migration is in progress from now on */ - ret = syncop_lookup (from, loc, &stbuf, NULL, dict, &xattr_rsp); + *fop_errno = ENOMEM; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: failed to " + "set " GLUSTERFS_POSIXLK_COUNT " key in dict", + loc->path); + goto out; + } + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "locks will be migrated" + " for file: %s", + loc->path); + } + + ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno); + if (ret < 0) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to build parent loc, which is needed to " + "acquire entrylk to synchronize with renames on this " + "path. Skipping migration", + loc->path); + goto out; + } + + hashed_subvol = dht_subvol_get_hashed(this, loc); + if (hashed_subvol == NULL) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: cannot find hashed subvol which is needed to " + "synchronize with renames on this path. " + "Skipping migration", + loc->path); + goto out; + } + + flock.l_type = F_WRLCK; + + tmp_loc.inode = inode_ref(loc->inode); + gf_uuid_copy(tmp_loc.gfid, loc->gfid); + tmp_loc.path = gf_strdup(loc->path); + + /* this inodelk happens with flock.owner being zero. But to synchronize + * hardlink migration we need to have different lkowner for each migration + * Filed a bug here: https://bugzilla.redhat.com/show_bug.cgi?id=1468202 to + * track the fix for this. Currently synclock takes care of synchronizing + * hardlink migration. Once this bug is fixed we can avoid taking synclock + */ + ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW, + &flock, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "migrate file failed: " + "%s: failed to lock file on %s", + loc->path, from->name); + goto out; + } + + inodelk_locked = _gf_true; + + /* dht_rename has changed to use entrylk on hashed subvol for + * synchronization. So, rebalance too has to acquire an entrylk on + * hashed subvol. + */ + ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, &parent_loc, + loc->name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to acquire entrylk on subvol %s", loc->path, + hashed_subvol->name); + goto out; + } + + entrylk_locked = _gf_true; + + /* Phase 1 - Data migration is in progress from now on */ + ret = syncop_lookup(from, loc, &stbuf, NULL, dict, &xattr_rsp); + if (ret) { + *fop_errno = -ret; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: lookup failed on %s", + loc->path, from->name); + goto out; + } + + /* preserve source mode, so set the same to the destination */ + src_ia_prot = stbuf.ia_prot; + + /* Check if file can be migrated */ + ret = __is_file_migratable(this, loc, &stbuf, xattr_rsp, flag, defrag, conf, + fop_errno); + if (ret) { + if (ret == HARDLINK_MIG_INPROGRESS) + ret = 0; + goto out; + } + + /* Take care of the special files */ + if (!IA_ISREG(stbuf.ia_type)) { + /* Special files */ + ret = migrate_special_files(this, from, to, loc, &stbuf, fop_errno); + goto out; + } + + /* create the destination, with required modes/xattr */ + ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd, + fop_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Create dst failed" + " on - %s for file - %s", + to->name, loc->path); + goto out; + } + + clean_dst = _gf_true; + + ret = __dht_check_free_space(this, to, from, loc, &stbuf, flag, conf, + &target_changed, &new_target, fop_errno); + if (target_changed) { + /* Can't handle for hardlinks. Marking this as failure */ + if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_INSUFF_SPACE, + "Exiting migration for" + " file - %s. flag - %d, stbuf.ia_nlink - %d", + loc->path, flag, stbuf.ia_nlink); + ret = -1; + goto out; + } + + ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL); if (ret) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: lookup failed on %s", - loc->path, from->name); - goto out; + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform truncate on %s (%s)", loc->path, + to->name, strerror(-ret)); } - /* preserve source mode, so set the same to the destination */ - src_ia_prot = stbuf.ia_prot; + syncop_close(dst_fd); + dst_fd = NULL; - /* Check if file can be migrated */ - ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag, conf, - fop_errno); - if (ret) { - if (ret == HARDLINK_MIG_INPROGRESS) - ret = 0; - goto out; - } + old_target = to; + to = new_target; - /* Take care of the special files */ - if (!IA_ISREG (stbuf.ia_type)) { - /* Special files */ - ret = migrate_special_files (this, from, to, loc, &stbuf, - fop_errno); - goto out; - } + clean_dst = _gf_false; - /* create the destination, with required modes/xattr */ - ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, - &dst_fd, fop_errno); + /* if the file migration is successful to this new target, then + * update the xattr on the old destination to point the new + * destination. We need to do update this only post migration + * as in case of failure the linkto needs to point to the source + * subvol */ + ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, + &dst_fd, fop_errno); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed" - " on - %s for file - %s", to->name, loc->path); - goto out; - } - - clean_dst = _gf_true; - - ret = __dht_check_free_space (this, to, from, loc, &stbuf, flag, conf, - &target_changed, &new_target, fop_errno); - if (target_changed) { - /* Can't handle for hardlinks. Marking this as failure */ - if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SUBVOL_INSUFF_SPACE, "Exiting migration for" - " file - %s. flag - %d, stbuf.ia_nlink - %d", - loc->path, flag, stbuf.ia_nlink); - ret = -1; - goto out; - } + gf_log(this->name, GF_LOG_ERROR, + "Create dst failed" + " on - %s for file - %s", + to->name, loc->path); + goto out; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "destination for file " + "- %s is changed to - %s", + loc->path, to->name); + clean_dst = _gf_true; + } + } + + if (ret) { + goto out; + } + + /* Open the source, and also update mode/xattr */ + ret = __dht_rebalance_open_src_file(this, from, to, loc, &stbuf, &src_fd, + &clean_src, fop_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: failed to open %s on %s", loc->path, + from->name); + goto out; + } + + /* TODO: move all xattr related operations to fd based operations */ + ret = syncop_listxattr(from, loc, &xattr, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, *fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to get xattr from %s", + loc->path, from->name); + ret = -1; + goto out; + } + + /* Copying posix acls to the linkto file messes up the permissions*/ + dht_strip_out_acls(xattr); + + /* Remove the linkto xattr as we don't want to overwrite the value + * set on the dst. + */ + dict_del(xattr, conf->link_xattr_name); + + /* We need to error out if this fails as having the wrong shard xattrs + * set on the dst could cause data corruption + */ + ret = syncop_fsetxattr(to, dst_fd, xattr, 0, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to set xattr on %s", loc->path, to->name); + ret = -1; + goto out; + } + if (xattr_rsp) { + /* we no more require this key */ + dict_del(dict, conf->link_xattr_name); + dict_unref(xattr_rsp); + } + + ret = syncop_fstat(from, src_fd, &stbuf, dict, &xattr_rsp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:failed to lookup %s on %s ", loc->path, + from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } + + /* Check again if file has hardlink */ + ret = __check_file_has_hardlink(this, loc, &stbuf, xattr_rsp, flag, defrag, + conf, fop_errno); + if (ret) { + if (ret == HARDLINK_MIG_INPROGRESS) + ret = 0; + goto out; + } + /* Try to preserve 'holes' while migrating data */ + if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) + file_has_holes = 1; + + ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes, + fop_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: failed to migrate data", loc->path); - ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform truncate on %s (%s)", - loc->path, to->name, strerror (-ret)); - } + ret = -1; + goto out; + } + + /* TODO: Sync the locks */ + + ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", + loc->path, to->name, strerror(-ret)); + *fop_errno = -ret; + } + + /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */ + + ret = syncop_fstat(from, src_fd, &new_stbuf, NULL, NULL); + if (ret < 0) { + /* Failed to get the stat info */ + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: failed to fstat file %s on %s ", loc->path, + from->name); + *fop_errno = -ret; + ret = -1; + goto out; + } - syncop_close (dst_fd); - dst_fd = NULL; + /* Lock the entire source file to prevent clients from taking a + lock on it as dht_lk does not handle file migration. - old_target = to; - to = new_target; + This still leaves a small window where conflicting locks can + be granted to different clients. If client1 requests a blocking + lock on the src file, it will be granted after the migrating + process releases its lock. If client2 requests a lock on the dst + data file, it will also be granted, but all FOPs will be redirected + to the dst data file. + */ - clean_dst = _gf_false; + /* Take meta lock */ + if (conf->lock_migration_enabled) { + meta_dict = dict_new(); + if (!meta_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "dict_new failed"); - /* if the file migration is successful to this new target, then - * update the xattr on the old destination to point the new - * destination. We need to do update this only post migration - * as in case of failure the linkto needs to point to the source - * subvol */ - ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf, - &dst_fd, fop_errno); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Create dst failed" - " on - %s for file - %s", to->name, loc->path); - goto out; - } else { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "destination for file " - "- %s is changed to - %s", loc->path, to->name); - clean_dst = _gf_true; - } + *fop_errno = ENOMEM; + ret = -1; + goto out; } + ret = dict_set_str(meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); if (ret) { - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, + "Failed to set dictionary value: key = %s," + " path = %s", + GLUSTERFS_INTERNAL_FOP_KEY, loc->path); + *fop_errno = ENOMEM; + ret = -1; + goto out; } - /* Open the source, and also update mode/xattr */ - ret = __dht_rebalance_open_src_file (this, from, to, loc, &stbuf, &src_fd, - &clean_src, fop_errno); + ret = dict_set_int32(meta_dict, GF_META_LOCK_KEY, 1); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: failed to open %s on %s", - loc->path, from->name); - goto out; - } - - /* TODO: move all xattr related operations to fd based operations */ - ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, *fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to get xattr from %s", - loc->path, from->name); - ret = -1; - goto out; - } - - /* Copying posix acls to the linkto file messes up the permissions*/ - dht_strip_out_acls (xattr); - - /* Remove the linkto xattr as we don't want to overwrite the value - * set on the dst. - */ - dict_del (xattr, conf->link_xattr_name); - - /* We need to error out if this fails as having the wrong shard xattrs - * set on the dst could cause data corruption - */ - ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to set xattr on %s", - loc->path, to->name); - ret = -1; - goto out; - } - - if (xattr_rsp) { - /* we no more require this key */ - dict_del (dict, conf->link_xattr_name); - dict_unref (xattr_rsp); + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace dict_set failed"); + *fop_errno = ENOMEM; + ret = -1; + goto out; } - ret = syncop_fstat (from, src_fd, &stbuf, dict, &xattr_rsp); + ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:failed to lookup %s on %s ", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace syncop_setxattr metalock failed"); - /* Check again if file has hardlink */ - ret = __check_file_has_hardlink (this, loc, &stbuf, xattr_rsp, - flag, defrag, conf, fop_errno); - if (ret) { - if (ret == HARDLINK_MIG_INPROGRESS) - ret = 0; - goto out; + *fop_errno = -ret; + ret = -1; + goto out; + } else { + meta_locked = _gf_true; } - /* Try to preserve 'holes' while migrating data */ - if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) - file_has_holes = 1; + } + if (!conf->lock_migration_enabled) { + plock.l_type = F_WRLCK; + plock.l_start = 0; + plock.l_len = 0; + plock.l_whence = SEEK_SET; - ret = __dht_rebalance_migrate_data (this, defrag, from, to, - src_fd, dst_fd, stbuf.ia_size, - file_has_holes, fop_errno); + ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s: failed to migrate data", - loc->path); - - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: Failed to lock on %s", + loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto out; } - /* TODO: Sync the locks */ + p_locked = _gf_true; - ret = syncop_fsync (to, dst_fd, 0, NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to fsync on %s (%s)", - loc->path, to->name, strerror (-ret)); - *fop_errno = -ret; - } + } else { + INIT_LIST_HEAD(&locklist.list); + ret = syncop_getactivelk(from, loc, &locklist, NULL, NULL); + if (ret == 0) { + gf_log(this->name, GF_LOG_INFO, "No active locks on:%s", loc->path); - /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */ + } else if (ret > 0) { + ret = syncop_setactivelk(to, loc, &locklist, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_LOCK_MIGRATION_FAILED, "write lock failed on:%s", + loc->path); - ret = syncop_fstat (from, src_fd, &new_stbuf, NULL, NULL); - if (ret < 0) { - /* Failed to get the stat info */ - gf_msg ( this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: failed to fstat file %s on %s ", - loc->path, from->name); *fop_errno = -ret; ret = -1; - goto out; - } - - /* Lock the entire source file to prevent clients from taking a - lock on it as dht_lk does not handle file migration. - - This still leaves a small window where conflicting locks can - be granted to different clients. If client1 requests a blocking - lock on the src file, it will be granted after the migrating - process releases its lock. If client2 requests a lock on the dst - data file, it will also be granted, but all FOPs will be redirected - to the dst data file. - */ - - /* Take meta lock */ - - if (conf->lock_migration_enabled) { - meta_dict = dict_new (); - if (!meta_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "dict_new failed"); - - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = dict_set_str (meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value: key = %s," - " path = %s", GLUSTERFS_INTERNAL_FOP_KEY, - loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = dict_set_int32 (meta_dict, GF_META_LOCK_KEY, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace dict_set failed"); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (from, loc, meta_dict, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace syncop_setxattr metalock failed"); - - *fop_errno = -ret; - ret = -1; - goto out; - } else { - meta_locked = _gf_true; - } - } - - if (!conf->lock_migration_enabled) { - plock.l_type = F_WRLCK; - plock.l_start = 0; - plock.l_len = 0; - plock.l_whence = SEEK_SET; - - ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: Failed to lock on %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto out; - } - - p_locked = _gf_true; - + goto metaunlock; + } } else { - - INIT_LIST_HEAD (&locklist.list); - - ret = syncop_getactivelk (from, loc, &locklist, NULL, NULL); - if (ret == 0) { - gf_log (this->name, GF_LOG_INFO, "No active locks on:%s" - , loc->path); - - } else if (ret > 0) { - - ret = syncop_setactivelk (to, loc, &locklist, NULL, - NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOCK_MIGRATION_FAILED, - "write lock failed on:%s", loc->path); - - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LOCK_MIGRATION_FAILED, - "getactivelk failed for file: %s", loc->path); - *fop_errno = -ret; - } - } - - - /* source would have both sticky bit and sgid bit set, reset it to 0, - and set the source permission on destination, if it was not set - prior to setting rebalance-modes in source */ - if (!src_ia_prot.sticky) - new_stbuf.ia_prot.sticky = 0; - - if (!src_ia_prot.sgid) - new_stbuf.ia_prot.sgid = 0; - - /* TODO: if the source actually had sticky bit, or sgid bit set, - we are not handling it */ - - ret = syncop_fsetattr (to, dst_fd, &new_stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID | - GF_SET_ATTR_MODE), NULL, NULL, NULL, NULL); + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_LOCK_MIGRATION_FAILED, + "getactivelk failed for file: %s", loc->path); + *fop_errno = -ret; + } + } + + /* source would have both sticky bit and sgid bit set, reset it to 0, + and set the source permission on destination, if it was not set + prior to setting rebalance-modes in source */ + if (!src_ia_prot.sticky) + new_stbuf.ia_prot.sticky = 0; + + if (!src_ia_prot.sgid) + new_stbuf.ia_prot.sgid = 0; + + /* TODO: if the source actually had sticky bit, or sgid bit set, + we are not handling it */ + + ret = syncop_fsetattr( + to, dst_fd, &new_stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL, + NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to perform setattr on %s ", + loc->path, to->name); + *fop_errno = -ret; + ret = -1; + goto metaunlock; + } + + /* Because 'futimes' is not portable */ + ret = syncop_setattr(to, loc, &new_stbuf, + (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), NULL, NULL, + NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform setattr on %s ", loc->path, to->name); + *fop_errno = -ret; + } + + if (target_changed) { + dict_del(dict, GLUSTERFS_POSIXLK_COUNT); + ret = dict_set_str(dict, conf->link_xattr_name, to->name); if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to perform setattr on %s ", - loc->path, to->name); - *fop_errno = -ret; + gf_log(this->name, GF_LOG_ERROR, + "failed to set xattr in dict for %s (linkto:%s)", loc->path, + to->name); + *fop_errno = ENOMEM; + ret = -1; + goto out; + } + + ret = syncop_setxattr(old_target, loc, dict, 0, NULL, NULL); + if (ret && -ret != ESTALE && -ret != ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "failed to set xattr on %s in %s", loc->path, + old_target->name); + *fop_errno = -ret; + ret = -1; + goto out; + } else if (-ret == ESTALE || -ret == ENOENT) { + /* The failure ESTALE indicates that the linkto + * file on the hashed subvol might have been deleted. + * In this case will create a linkto file with new target + * as linkto xattr value*/ + linkto_fd = fd_create(loc->inode, DHT_REBALANCE_PID); + if (!linkto_fd) { + gf_msg(this->name, GF_LOG_ERROR, errno, + DHT_MSG_MIGRATE_FILE_FAILED, "%s: fd create failed", + loc->path); + *fop_errno = ENOMEM; ret = -1; - goto metaunlock; - } - - /* Because 'futimes' is not portable */ - ret = syncop_setattr (to, loc, &new_stbuf, - (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), - NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform setattr on %s ", - loc->path, to->name); + goto out; + } + ret = syncop_create(old_target, loc, O_RDWR, DHT_LINKFILE_MODE, + linkto_fd, NULL, dict, NULL); + if (ret != 0 && -ret != EEXIST && -ret != ESTALE) { *fop_errno = -ret; - } - - if (target_changed) { - dict_del (dict, GLUSTERFS_POSIXLK_COUNT); - ret = dict_set_str (dict, conf->link_xattr_name, to->name); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to set xattr in dict for %s (linkto:%s)", - loc->path, to->name); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (old_target, loc, dict, 0, NULL, NULL); - if (ret && -ret != ESTALE && -ret != ENOENT) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to set xattr on %s in %s", - loc->path, old_target->name); - *fop_errno = -ret; - ret = -1; - goto out; - } else if (-ret == ESTALE || -ret == ENOENT) { - /* The failure ESTALE indicates that the linkto - * file on the hashed subvol might have been deleted. - * In this case will create a linkto file with new target - * as linkto xattr value*/ - linkto_fd = fd_create (loc->inode, DHT_REBALANCE_PID); - if (!linkto_fd) { - gf_msg (this->name, GF_LOG_ERROR, errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: fd create failed", loc->path); - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - ret = syncop_create (old_target, loc, O_RDWR, - DHT_LINKFILE_MODE, linkto_fd, - NULL, dict, NULL); - if (ret != 0 && -ret != EEXIST && -ret != ESTALE) { - *fop_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "failed to create linkto file on %s in %s", - loc->path, old_target->name); - goto out; - } else if (ret == 0) { - ret = syncop_fsetattr (old_target, linkto_fd, &stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID), - NULL, NULL, NULL, NULL); - if (ret < 0) { - *fop_errno = -ret; - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "chown failed for %s on %s", - loc->path, old_target->name); - } - } + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "failed to create linkto file on %s in %s", loc->path, + old_target->name); + goto out; + } else if (ret == 0) { + ret = syncop_fsetattr(old_target, linkto_fd, &stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL, + NULL, NULL, NULL); + if (ret < 0) { + *fop_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "chown failed for %s on %s", loc->path, + old_target->name); } - } - - clean_dst = _gf_false; - - /* Posix acls are not set on DHT linkto files as part of the initial - * initial xattrs set on the dst file, so these need - * to be set on the dst file after the linkto attrs are removed. - * TODO: Optimize this. - */ - if (xattr) { - dict_unref (xattr); - xattr = NULL; - } - - /* Set only the Posix ACLs this time */ - ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, - NULL, NULL); + } + } + } + + clean_dst = _gf_false; + + /* Posix acls are not set on DHT linkto files as part of the initial + * initial xattrs set on the dst file, so these need + * to be set on the dst file after the linkto attrs are removed. + * TODO: Optimize this. + */ + if (xattr) { + dict_unref(xattr); + xattr = NULL; + } + + /* Set only the Posix ACLs this time */ + ret = syncop_getxattr(from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, NULL, + NULL); + if (ret < 0) { + if ((-ret != ENODATA) && (-ret != ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to get xattr from %s", + loc->path, from->name); + *fop_errno = -ret; + } + } else { + ret = syncop_setxattr(to, loc, xattr, 0, NULL, NULL); if (ret < 0) { - if ((-ret != ENODATA) && (-ret != ENOATTR)) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to get xattr from %s", - loc->path, from->name); - *fop_errno = -ret; - } + /* Potential problem here where Posix ACLs will + * not be set on the target file */ + + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to set xattr on %s", + loc->path, to->name); + *fop_errno = -ret; + } + } + + /* store size of previous migrated file */ + if (defrag && defrag->tier_conf.is_tier) { + if (from != TIER_HASHED_SUBVOL) { + defrag->tier_conf.st_last_promoted_size = stbuf.ia_size; } else { - ret = syncop_setxattr (to, loc, xattr, 0, NULL, NULL); - if (ret < 0) { - /* Potential problem here where Posix ACLs will - * not be set on the target file */ - - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to set xattr on %s", - loc->path, to->name); - *fop_errno = -ret; - } - } - - /* store size of previous migrated file */ - if (defrag && defrag->tier_conf.is_tier) { - if (from != TIER_HASHED_SUBVOL) { - defrag->tier_conf.st_last_promoted_size = stbuf.ia_size; - } else { - /* Don't delete the linkto file on the hashed subvol */ - delete_src_linkto = _gf_false; - defrag->tier_conf.st_last_demoted_size = stbuf.ia_size; - } - } - - /* The src file is being unlinked after this so we don't need - to clean it up */ - clean_src = _gf_false; - - /* Make the source as a linkfile first before deleting it */ - empty_iatt.ia_prot.sticky = 1; - ret = syncop_fsetattr (from, src_fd, &empty_iatt, - GF_SET_ATTR_MODE, NULL, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed:" - "%s: failed to perform setattr on %s ", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - - /* Free up the data blocks on the source node, as the whole - file is migrated */ - ret = syncop_ftruncate (from, src_fd, 0, NULL, NULL, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform truncate on %s (%s)", - loc->path, from->name, strerror (-ret)); - *fop_errno = -ret; - } - - /* remove the 'linkto' xattr from the destination */ - ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name, 0, NULL); + /* Don't delete the linkto file on the hashed subvol */ + delete_src_linkto = _gf_false; + defrag->tier_conf.st_last_demoted_size = stbuf.ia_size; + } + } + + /* The src file is being unlinked after this so we don't need + to clean it up */ + clean_src = _gf_false; + + /* Make the source as a linkfile first before deleting it */ + empty_iatt.ia_prot.sticky = 1; + ret = syncop_fsetattr(from, src_fd, &empty_iatt, GF_SET_ATTR_MODE, NULL, + NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: failed to perform setattr on %s ", + loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto metaunlock; + } + + /* Free up the data blocks on the source node, as the whole + file is migrated */ + ret = syncop_ftruncate(from, src_fd, 0, NULL, NULL, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform truncate on %s (%s)", loc->path, + from->name, strerror(-ret)); + *fop_errno = -ret; + } + + /* remove the 'linkto' xattr from the destination */ + ret = syncop_fremovexattr(to, dst_fd, conf->link_xattr_name, 0, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s: failed to perform removexattr on %s (%s)", loc->path, + to->name, strerror(-ret)); + *fop_errno = -ret; + } + + /* Do a stat and check the gfid before unlink */ + + /* + * Cached file changes its state from non-linkto to linkto file after + * migrating data. If lookup from any other mount-point is performed, + * converted-linkto-cached file will be treated as a stale and will be + * unlinked. But by this time, file is already migrated. So further + * failure because of ENOENT should not be treated as error + */ + + ret = syncop_stat(from, loc, &empty_iatt, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to do a stat on %s", loc->path, from->name); + + if (-ret != ENOENT) { + *fop_errno = -ret; + ret = -1; + goto metaunlock; + } + + rcvd_enoent_from_src = 1; + } + + if ((gf_uuid_compare(empty_iatt.ia_gfid, loc->gfid) == 0) && + (!rcvd_enoent_from_src) && delete_src_linkto) { + /* take out the source from namespace */ + ret = syncop_unlink(from, loc, NULL, NULL); if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform removexattr on %s (%s)", - loc->path, to->name, strerror (-ret)); - *fop_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to perform unlink on %s", loc->path, from->name); + *fop_errno = -ret; + ret = -1; + goto metaunlock; } + } - /* Do a stat and check the gfid before unlink */ - - /* - * Cached file changes its state from non-linkto to linkto file after - * migrating data. If lookup from any other mount-point is performed, - * converted-linkto-cached file will be treated as a stale and will be - * unlinked. But by this time, file is already migrated. So further - * failure because of ENOENT should not be treated as error - */ + ret = syncop_lookup(this, loc, NULL, NULL, NULL, NULL); + if (ret) { + gf_msg_debug(this->name, -ret, + "%s: failed to lookup the file on subvolumes", loc->path); + *fop_errno = -ret; + } - ret = syncop_stat (from, loc, &empty_iatt, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to do a stat on %s", - loc->path, from->name); - - if (-ret != ENOENT) { - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - - rcvd_enoent_from_src = 1; - } + gf_msg(this->name, log_level, 0, DHT_MSG_MIGRATE_FILE_COMPLETE, + "completed migration of %s from subvolume %s to %s", loc->path, + from->name, to->name); - - if ((gf_uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0 ) && - (!rcvd_enoent_from_src) && delete_src_linkto) { - /* take out the source from namespace */ - ret = syncop_unlink (from, loc, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to perform unlink on %s", - loc->path, from->name); - *fop_errno = -ret; - ret = -1; - goto metaunlock; - } - } - - ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL); - if (ret) { - gf_msg_debug (this->name, -ret, - "%s: failed to lookup the file on subvolumes", - loc->path); - *fop_errno = -ret; - } - - gf_msg (this->name, log_level, 0, - DHT_MSG_MIGRATE_FILE_COMPLETE, - "completed migration of %s from subvolume %s to %s", - loc->path, from->name, to->name); - - ret = 0; + ret = 0; metaunlock: - if (conf->lock_migration_enabled && meta_locked) { + if (conf->lock_migration_enabled && meta_locked) { + dict_del(meta_dict, GF_META_LOCK_KEY); - dict_del (meta_dict, GF_META_LOCK_KEY); - - ret = dict_set_int32 (meta_dict, GF_META_UNLOCK_KEY, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace dict_set failed"); - - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - if (clean_dst == _gf_false) - ret = dict_set_int32 (meta_dict, "status", 1); - else - ret = dict_set_int32 (meta_dict, "status", 0); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace dict_set failed"); - - *fop_errno = ENOMEM; - ret = -1; - goto out; - } - - ret = syncop_setxattr (from, loc, meta_dict, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Trace syncop_setxattr meta unlock failed"); - - *fop_errno = -ret; - ret = -1; - goto out; - } - } - -out: - if (clean_src) { - /* Revert source mode and xattr changes*/ - lk_ret = __dht_migration_cleanup_src_file (this, loc, src_fd, - from, &src_ia_prot); - if (lk_ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to cleanup source file on %s", - loc->path, from->name); - } - } + ret = dict_set_int32(meta_dict, GF_META_UNLOCK_KEY, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace dict_set failed"); - /* reset the destination back to 0 */ - if (clean_dst) { - lk_ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL, - NULL, NULL); - if (lk_ret) { - gf_msg (this->name, GF_LOG_ERROR, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: " - "%s: failed to reset target size back to 0", - loc->path); - } + *fop_errno = ENOMEM; + ret = -1; + goto out; } - if (inodelk_locked) { - flock.l_type = F_UNLCK; + if (clean_dst == _gf_false) + ret = dict_set_int32(meta_dict, "status", 1); + else + ret = dict_set_int32(meta_dict, "status", 0); - lk_ret = syncop_inodelk (from, DHT_FILE_MIGRATE_DOMAIN, - &tmp_loc, F_SETLK, &flock, NULL, NULL); - if (lk_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to unlock file on %s", - loc->path, from->name); - } - } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace dict_set failed"); - if (entrylk_locked) { - lk_ret = syncop_entrylk (hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, - &parent_loc, loc->name, ENTRYLK_UNLOCK, - ENTRYLK_UNLOCK, NULL, NULL); - if (lk_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to unlock entrylk on %s", - loc->path, hashed_subvol->name); - } + *fop_errno = ENOMEM; + ret = -1; + goto out; } - if (p_locked) { - plock.l_type = F_UNLCK; - lk_ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL); + ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Trace syncop_setxattr meta unlock failed"); - if (lk_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "%s: failed to unlock file on %s", - loc->path, from->name); - } + *fop_errno = -ret; + ret = -1; + goto out; } + } - if (!dht_is_tier_xlator (this)) { - lk_ret = syncop_removexattr (to, loc, - GF_PROTECT_FROM_EXTERNAL_WRITES, - NULL, NULL); - if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { - gf_msg (this->name, GF_LOG_WARNING, -lk_ret, 0, - "%s: removexattr failed key %s", loc->path, - GF_PROTECT_FROM_EXTERNAL_WRITES); - } +out: + if (clean_src) { + /* Revert source mode and xattr changes*/ + lk_ret = __dht_migration_cleanup_src_file(this, loc, src_fd, from, + &src_ia_prot); + if (lk_ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to cleanup source file on %s", loc->path, + from->name); + } + } + + /* reset the destination back to 0 */ + if (clean_dst) { + lk_ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL); + if (lk_ret) { + gf_msg(this->name, GF_LOG_ERROR, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: " + "%s: failed to reset target size back to 0", + loc->path); + } + } + + if (inodelk_locked) { + flock.l_type = F_UNLCK; + + lk_ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, + F_SETLK, &flock, NULL, NULL); + if (lk_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to unlock file on %s", loc->path, from->name); + } + } + + if (entrylk_locked) { + lk_ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, + &parent_loc, loc->name, ENTRYLK_UNLOCK, + ENTRYLK_UNLOCK, NULL, NULL); + if (lk_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to unlock entrylk on %s", loc->path, + hashed_subvol->name); + } + } + + if (p_locked) { + plock.l_type = F_UNLCK; + lk_ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL); + + if (lk_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to unlock file on %s", loc->path, from->name); + } + } + + if (!dht_is_tier_xlator(this)) { + lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, + NULL, NULL); + if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) { + gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_PROTECT_FROM_EXTERNAL_WRITES); } + } - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - if (xattr) - dict_unref (xattr); - if (xattr_rsp) - dict_unref (xattr_rsp); + if (xattr) + dict_unref(xattr); + if (xattr_rsp) + dict_unref(xattr_rsp); - if (dst_fd) - syncop_close (dst_fd); - if (src_fd) - syncop_close (src_fd); - if (linkto_fd) - syncop_close (linkto_fd); + if (dst_fd) + syncop_close(dst_fd); + if (src_fd) + syncop_close(src_fd); + if (linkto_fd) + syncop_close(linkto_fd); - loc_wipe (&tmp_loc); - loc_wipe (&parent_loc); + loc_wipe(&tmp_loc); + loc_wipe(&parent_loc); - return ret; + return ret; } static int -rebalance_task (void *data) +rebalance_task(void *data) { - int ret = -1; - dht_local_t *local = NULL; - call_frame_t *frame = NULL; - int fop_errno = 0; + int ret = -1; + dht_local_t *local = NULL; + call_frame_t *frame = NULL; + int fop_errno = 0; - frame = data; + frame = data; - local = frame->local; + local = frame->local; - /* This function is 'synchrounous', hence if it returns, - we are done with the task */ - ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol, - local->rebalance.target_node, local->flags, - &fop_errno); + /* This function is 'synchrounous', hence if it returns, + we are done with the task */ + ret = dht_migrate_file(THIS, &local->loc, local->rebalance.from_subvol, + local->rebalance.target_node, local->flags, + &fop_errno); - return ret; + return ret; } static int -rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data) +rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data) { - int32_t op_errno = EINVAL; - - if (op_ret == -1) { - /* Failure of migration process, mostly due to write process. - as we can't preserve the exact errno, lets say there was - no space to migrate-data - */ - op_errno = ENOSPC; - } else if (op_ret == 1) { - /* migration didn't happen, but is not a failure, let the user - understand that he doesn't have permission to migrate the - file. - */ - op_ret = -1; - op_errno = EPERM; - } else if (op_ret != 0) { - op_errno = -op_ret; - op_ret = -1; - } + int32_t op_errno = EINVAL; - DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL); - return 0; + if (op_ret == -1) { + /* Failure of migration process, mostly due to write process. + as we can't preserve the exact errno, lets say there was + no space to migrate-data + */ + op_errno = ENOSPC; + } else if (op_ret == 1) { + /* migration didn't happen, but is not a failure, let the user + understand that he doesn't have permission to migrate the + file. + */ + op_ret = -1; + op_errno = EPERM; + } else if (op_ret != 0) { + op_errno = -op_ret; + op_ret = -1; + } + + DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL); + return 0; } int -dht_start_rebalance_task (xlator_t *this, call_frame_t *frame) +dht_start_rebalance_task(xlator_t *this, call_frame_t *frame) { - int ret = -1; + int ret = -1; - ret = synctask_new (this->ctx->env, rebalance_task, - rebalance_task_completion, - frame, frame); - return ret; + ret = synctask_new(this->ctx->env, rebalance_task, + rebalance_task_completion, frame, frame); + return ret; } - int -gf_listener_stop (xlator_t *this) +gf_listener_stop(xlator_t *this) { - glusterfs_ctx_t *ctx = NULL; - cmd_args_t *cmd_args = NULL; - int ret = 0; - - ctx = this->ctx; - GF_ASSERT (ctx); - cmd_args = &ctx->cmd_args; - if (cmd_args->sock_file) { - ret = sys_unlink (cmd_args->sock_file); - if (ret && (ENOENT == errno)) { - ret = 0; - } - } - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - DHT_MSG_SOCKET_ERROR, - "Failed to unlink listener " - "socket %s", cmd_args->sock_file); - } - return ret; + glusterfs_ctx_t *ctx = NULL; + cmd_args_t *cmd_args = NULL; + int ret = 0; + + ctx = this->ctx; + GF_ASSERT(ctx); + cmd_args = &ctx->cmd_args; + if (cmd_args->sock_file) { + ret = sys_unlink(cmd_args->sock_file); + if (ret && (ENOENT == errno)) { + ret = 0; + } + } + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_SOCKET_ERROR, + "Failed to unlink listener " + "socket %s", + cmd_args->sock_file); + } + return ret; } void -dht_build_root_inode (xlator_t *this, inode_t **inode) +dht_build_root_inode(xlator_t *this, inode_t **inode) { - inode_table_t *itable = NULL; - uuid_t root_gfid = {0, }; + inode_table_t *itable = NULL; + uuid_t root_gfid = { + 0, + }; - itable = inode_table_new (0, this); - if (!itable) - return; + itable = inode_table_new(0, this); + if (!itable) + return; - root_gfid[15] = 1; - *inode = inode_find (itable, root_gfid); + root_gfid[15] = 1; + *inode = inode_find(itable, root_gfid); } void -dht_build_root_loc (inode_t *inode, loc_t *loc) +dht_build_root_loc(inode_t *inode, loc_t *loc) { - loc->path = "/"; - loc->inode = inode; - loc->inode->ia_type = IA_IFDIR; - memset (loc->gfid, 0, 16); - loc->gfid[15] = 1; + loc->path = "/"; + loc->inode = inode; + loc->inode->ia_type = IA_IFDIR; + memset(loc->gfid, 0, 16); + loc->gfid[15] = 1; } /* return values: 1 -> error, bug ignore and continue 0 -> proceed -1 -> error, handle it */ int32_t -gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag) +gf_defrag_handle_migrate_error(int32_t op_errno, gf_defrag_info_t *defrag) { - int ret = 0; - /* if errno is not ENOTCONN, we can still continue - with rebalance process */ - if (op_errno != ENOTCONN) { - ret = 1; - goto out; - } - - if (op_errno == ENOTCONN) { - /* Most probably mount point went missing (mostly due - to a brick down), say rebalance failure to user, - let him restart it if everything is fine */ - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } + int ret = 0; + /* if errno is not ENOTCONN, we can still continue + with rebalance process */ + if (op_errno != ENOTCONN) { + ret = 1; + goto out; + } + + if (op_errno == ENOTCONN) { + /* Most probably mount point went missing (mostly due + to a brick down), say rebalance failure to user, + let him restart it if everything is fine */ + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + ret = -1; + goto out; + } out: - return ret; + return ret; } static gf_boolean_t -gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size) +gf_defrag_pattern_match(gf_defrag_info_t *defrag, char *name, uint64_t size) { - gf_defrag_pattern_list_t *trav = NULL; - gf_boolean_t match = _gf_false; - gf_boolean_t ret = _gf_false; + gf_defrag_pattern_list_t *trav = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t ret = _gf_false; - GF_VALIDATE_OR_GOTO ("dht", defrag, out); + GF_VALIDATE_OR_GOTO("dht", defrag, out); - trav = defrag->defrag_pattern; - while (trav) { - if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) { - match = _gf_true; - break; - } - trav = trav->next; + trav = defrag->defrag_pattern; + while (trav) { + if (!fnmatch(trav->path_pattern, name, FNM_NOESCAPE)) { + match = _gf_true; + break; } + trav = trav->next; + } - if ((match == _gf_true) && (size >= trav->size)) - ret = _gf_true; + if ((match == _gf_true) && (size >= trav->size)) + ret = _gf_true; - out: - return ret; +out: + return ret; } -int dht_dfreaddirp_done (dht_dfoffset_ctx_t *offset_var, int cnt) { - - int i; - int result = 1; +int +dht_dfreaddirp_done(dht_dfoffset_ctx_t *offset_var, int cnt) +{ + int i; + int result = 1; - for (i = 0; i < cnt; i++) { - if (offset_var[i].readdir_done == 0) { - result = 0; - break; - } + for (i = 0; i < cnt; i++) { + if (offset_var[i].readdir_done == 0) { + result = 0; + break; } - return result; + } + return result; } -int static -gf_defrag_ctx_subvols_init (dht_dfoffset_ctx_t *offset_var, xlator_t *this) { - - int i; - dht_conf_t *conf = NULL; +int static gf_defrag_ctx_subvols_init(dht_dfoffset_ctx_t *offset_var, + xlator_t *this) +{ + int i; + dht_conf_t *conf = NULL; - conf = this->private; + conf = this->private; - if (!conf) - return -1; + if (!conf) + return -1; - for (i = 0; i < conf->local_subvols_cnt; i++) { - offset_var[i].this = conf->local_subvols[i]; - offset_var[i].offset = (off_t) 0; - offset_var[i].readdir_done = 0; - } + for (i = 0; i < conf->local_subvols_cnt; i++) { + offset_var[i].this = conf->local_subvols[i]; + offset_var[i].offset = (off_t)0; + offset_var[i].readdir_done = 0; + } - return 0; + return 0; } - static int -dht_get_first_non_null_index (subvol_nodeuuids_info_t *entry) +dht_get_first_non_null_index(subvol_nodeuuids_info_t *entry) { - int i = 0; - int index = 0; + int i = 0; + int index = 0; - for (i = 0; i < entry->count; i++) { - if (!gf_uuid_is_null (entry->elements[i].uuid)) { - index = i; - goto out; - } + for (i = 0; i < entry->count; i++) { + if (!gf_uuid_is_null(entry->elements[i].uuid)) { + index = i; + goto out; } + } - if (i == entry->count) { - index = -1; - } + if (i == entry->count) { + index = -1; + } out: - return index; + return index; } - /* Return value * 0 : this node does not migrate the file * 1 : this node migrates the file @@ -2688,1022 +2595,986 @@ out: */ int -gf_defrag_should_i_migrate (xlator_t *this, int local_subvol_index, uuid_t gfid) +gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid) { - int ret = 0; - int i = local_subvol_index; - char *str = NULL; - uint32_t hashval = 0; - int32_t index = 0; - dht_conf_t *conf = NULL; - char buf[UUID_CANONICAL_FORM_LEN + 1] = {0, }; - subvol_nodeuuids_info_t *entry = NULL; - - - conf = this->private; - - /* Pure distribute. A subvol in this case - will be handled by only one node */ - - entry = &(conf->local_nodeuuids[i]); - if (entry->count == 1) { - return 1; - } - - str = uuid_utoa_r (gfid, buf); - ret = dht_hash_compute (this, 0, str, &hashval); - if (ret == 0) { - index = (hashval % entry->count); - if (entry->elements[index].info - == REBAL_NODEUUID_MINE) { - /* Index matches this node's nodeuuid.*/ - ret = 1; - goto out; - } - - /* Brick down - some other node has to migrate these files*/ - if (gf_uuid_is_null (entry->elements[index].uuid)) { - /* Fall back to the first non-null index */ - index = dht_get_first_non_null_index (entry); - - if (index == -1) { - /* None of the bricks in the subvol are up. - * CHILD_DOWN will kill the process soon */ + int ret = 0; + int i = local_subvol_index; + char *str = NULL; + uint32_t hashval = 0; + int32_t index = 0; + dht_conf_t *conf = NULL; + char buf[UUID_CANONICAL_FORM_LEN + 1] = { + 0, + }; + subvol_nodeuuids_info_t *entry = NULL; + + conf = this->private; + + /* Pure distribute. A subvol in this case + will be handled by only one node */ + + entry = &(conf->local_nodeuuids[i]); + if (entry->count == 1) { + return 1; + } + + str = uuid_utoa_r(gfid, buf); + ret = dht_hash_compute(this, 0, str, &hashval); + if (ret == 0) { + index = (hashval % entry->count); + if (entry->elements[index].info == REBAL_NODEUUID_MINE) { + /* Index matches this node's nodeuuid.*/ + ret = 1; + goto out; + } + + /* Brick down - some other node has to migrate these files*/ + if (gf_uuid_is_null(entry->elements[index].uuid)) { + /* Fall back to the first non-null index */ + index = dht_get_first_non_null_index(entry); + + if (index == -1) { + /* None of the bricks in the subvol are up. + * CHILD_DOWN will kill the process soon */ - return 0; - } + return 0; + } - if (entry->elements[index].info == REBAL_NODEUUID_MINE) { - /* Index matches this node's nodeuuid.*/ - ret = 1; - goto out; - } - } + if (entry->elements[index].info == REBAL_NODEUUID_MINE) { + /* Index matches this node's nodeuuid.*/ + ret = 1; + goto out; + } } + } out: - return ret; + return ret; } - int -gf_defrag_migrate_single_file (void *opaque) +gf_defrag_migrate_single_file(void *opaque) { - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - int ret = 0; - gf_dirent_t *entry = NULL; - struct timeval start = {0,}; - loc_t entry_loc = {0,}; - loc_t *loc = NULL; - struct iatt iatt = {0,}; - dict_t *migrate_data = NULL; - struct timeval end = {0,}; - double elapsed = {0,}; - struct dht_container *rebal_entry = NULL; - inode_t *inode = NULL; - xlator_t *hashed_subvol = NULL; - xlator_t *cached_subvol = NULL; - call_frame_t *statfs_frame = NULL; - xlator_t *old_THIS = NULL; - data_t *tmp = NULL; - int fop_errno = 0; - gf_dht_migrate_data_type_t rebal_type = GF_DHT_MIGRATE_DATA; - char value[MAX_REBAL_TYPE_SIZE] = {0,}; - struct iatt *iatt_ptr = NULL; - gf_boolean_t update_skippedcount = _gf_true; - int i = 0; - - rebal_entry = (struct dht_container *)opaque; - if (!rebal_entry) { - gf_log ("DHT", GF_LOG_ERROR, "rebal_entry is NULL"); - ret = -1; - goto out; - } + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + int ret = 0; + gf_dirent_t *entry = NULL; + struct timeval start = { + 0, + }; + loc_t entry_loc = { + 0, + }; + loc_t *loc = NULL; + struct iatt iatt = { + 0, + }; + dict_t *migrate_data = NULL; + struct timeval end = { + 0, + }; + double elapsed = { + 0, + }; + struct dht_container *rebal_entry = NULL; + inode_t *inode = NULL; + xlator_t *hashed_subvol = NULL; + xlator_t *cached_subvol = NULL; + call_frame_t *statfs_frame = NULL; + xlator_t *old_THIS = NULL; + data_t *tmp = NULL; + int fop_errno = 0; + gf_dht_migrate_data_type_t rebal_type = GF_DHT_MIGRATE_DATA; + char value[MAX_REBAL_TYPE_SIZE] = { + 0, + }; + struct iatt *iatt_ptr = NULL; + gf_boolean_t update_skippedcount = _gf_true; + int i = 0; + + rebal_entry = (struct dht_container *)opaque; + if (!rebal_entry) { + gf_log("DHT", GF_LOG_ERROR, "rebal_entry is NULL"); + ret = -1; + goto out; + } - this = rebal_entry->this; + this = rebal_entry->this; - conf = this->private; + conf = this->private; - defrag = conf->defrag; + defrag = conf->defrag; - loc = rebal_entry->parent_loc; + loc = rebal_entry->parent_loc; - migrate_data = rebal_entry->migrate_data; + migrate_data = rebal_entry->migrate_data; - entry = rebal_entry->df_entry; - iatt_ptr = &entry->d_stat; + entry = rebal_entry->df_entry; + iatt_ptr = &entry->d_stat; - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - goto out; - } - - if (defrag->stats == _gf_true) { - gettimeofday (&start, NULL); - } - - if (defrag->defrag_pattern && - (gf_defrag_pattern_match (defrag, entry->d_name, - entry->d_stat.ia_size) == _gf_false)) { - gf_log (this->name, GF_LOG_ERROR, "pattern_match failed"); - goto out; - } - - memset (&entry_loc, 0, sizeof (entry_loc)); - - ret = dht_build_child_loc (this, &entry_loc, loc, entry->d_name); - if (ret) { - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - } - UNLOCK (&defrag->lock); + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = -1; + goto out; + } - ret = 0; + if (defrag->stats == _gf_true) { + gettimeofday(&start, NULL); + } - gf_log (this->name, GF_LOG_ERROR, "Child loc build failed"); + if (defrag->defrag_pattern && + (gf_defrag_pattern_match(defrag, entry->d_name, + entry->d_stat.ia_size) == _gf_false)) { + gf_log(this->name, GF_LOG_ERROR, "pattern_match failed"); + goto out; + } - goto out; - } + memset(&entry_loc, 0, sizeof(entry_loc)); - if (!gf_defrag_should_i_migrate (this, rebal_entry->local_subvol_index, - entry->d_stat.ia_gfid)) { - gf_msg_debug (this->name, 0, "Don't migrate %s ", - entry_loc.path); - goto out; + ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name); + if (ret) { + LOCK(&defrag->lock); + { + defrag->total_failures += 1; } + UNLOCK(&defrag->lock); - gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); + ret = 0; - gf_uuid_copy (entry_loc.pargfid, loc->gfid); + gf_log(this->name, GF_LOG_ERROR, "Child loc build failed"); - ret = syncop_lookup (this, &entry_loc, &iatt, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: %s lookup failed", - entry_loc.path); - - /* Increase failure count only for remove-brick op, so that - * user is warned to check the removed-brick for any files left - * unmigrated - */ - if (conf->decommission_subvols_cnt) { - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - } - UNLOCK (&defrag->lock); - } + goto out; + } - ret = 0; - goto out; - } + if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, + entry->d_stat.ia_gfid)) { + gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); + goto out; + } - iatt_ptr = &iatt; + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); - hashed_subvol = dht_subvol_get_hashed (this, &entry_loc); - if (!hashed_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get hashed subvol for %s", - entry_loc.path); - ret = 0; - goto out; - } - - cached_subvol = dht_subvol_get_cached (this, entry_loc.inode); - if (!cached_subvol) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_CACHED_SUBVOL_GET_FAILED, - "Failed to get cached subvol for %s", - entry_loc.path); - - ret = 0; - goto out; - } + gf_uuid_copy(entry_loc.pargfid, loc->gfid); - if (hashed_subvol == cached_subvol) { - ret = 0; - goto out; - } + ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s lookup failed", entry_loc.path); - inode = inode_link (entry_loc.inode, entry_loc.parent, entry->d_name, &iatt); - inode_unref (entry_loc.inode); - /* use the inode returned by inode_link */ - entry_loc.inode = inode; - - old_THIS = THIS; - THIS = this; - statfs_frame = create_frame (this, this->ctx->pool); - if (!statfs_frame) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "Insufficient memory. Frame creation failed"); - ret = -1; - goto out; + /* Increase failure count only for remove-brick op, so that + * user is warned to check the removed-brick for any files left + * unmigrated + */ + if (conf->decommission_subvols_cnt) { + LOCK(&defrag->lock); + { + defrag->total_failures += 1; + } + UNLOCK(&defrag->lock); } - /* async statfs information for honoring min-free-disk */ - dht_get_du_info (statfs_frame, this, loc); - THIS = old_THIS; + ret = 0; + goto out; + } - tmp = dict_get (migrate_data, GF_XATTR_FILE_MIGRATE_KEY); - if (tmp) { - memcpy (value, tmp->data, tmp->len); - if (strcmp (value, "force") == 0) - rebal_type = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS; + iatt_ptr = &iatt; - if (conf->decommission_in_progress) - rebal_type = GF_DHT_MIGRATE_HARDLINK; - } + hashed_subvol = dht_subvol_get_hashed(this, &entry_loc); + if (!hashed_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, + "Failed to get hashed subvol for %s", entry_loc.path); + ret = 0; + goto out; + } - ret = dht_migrate_file (this, &entry_loc, cached_subvol, - hashed_subvol, rebal_type, &fop_errno); - if (ret == 1) { - if (fop_errno == ENOSPC) { - gf_msg_debug (this->name, 0, "migrate-data skipped for" - " %s due to space constraints", - entry_loc.path); - - /* For remove-brick case if the source is not one of the - * removed-brick, do not mark the error as failure */ - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] == cached_subvol) { - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - update_skippedcount = _gf_false; - } - UNLOCK (&defrag->lock); - - break; - } - } - } + cached_subvol = dht_subvol_get_cached(this, entry_loc.inode); + if (!cached_subvol) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CACHED_SUBVOL_GET_FAILED, + "Failed to get cached subvol for %s", entry_loc.path); - if (update_skippedcount) { - LOCK (&defrag->lock); - { - defrag->skipped += 1; - } - UNLOCK (&defrag->lock); - - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_MIGRATE_FILE_SKIPPED, - "File migration skipped for %s.", - entry_loc.path); - } + ret = 0; + goto out; + } - } else if (fop_errno == ENOTSUP) { - gf_msg_debug (this->name, 0, "migrate-data skipped for" - " hardlink %s ", entry_loc.path); - LOCK (&defrag->lock); + if (hashed_subvol == cached_subvol) { + ret = 0; + goto out; + } + + inode = inode_link(entry_loc.inode, entry_loc.parent, entry->d_name, &iatt); + inode_unref(entry_loc.inode); + /* use the inode returned by inode_link */ + entry_loc.inode = inode; + + old_THIS = THIS; + THIS = this; + statfs_frame = create_frame(this, this->ctx->pool); + if (!statfs_frame) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "Insufficient memory. Frame creation failed"); + ret = -1; + goto out; + } + + /* async statfs information for honoring min-free-disk */ + dht_get_du_info(statfs_frame, this, loc); + THIS = old_THIS; + + tmp = dict_get(migrate_data, GF_XATTR_FILE_MIGRATE_KEY); + if (tmp) { + memcpy(value, tmp->data, tmp->len); + if (strcmp(value, "force") == 0) + rebal_type = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS; + + if (conf->decommission_in_progress) + rebal_type = GF_DHT_MIGRATE_HARDLINK; + } + + ret = dht_migrate_file(this, &entry_loc, cached_subvol, hashed_subvol, + rebal_type, &fop_errno); + if (ret == 1) { + if (fop_errno == ENOSPC) { + gf_msg_debug(this->name, 0, + "migrate-data skipped for" + " %s due to space constraints", + entry_loc.path); + + /* For remove-brick case if the source is not one of the + * removed-brick, do not mark the error as failure */ + if (conf->decommission_subvols_cnt) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->decommissioned_bricks[i] == cached_subvol) { + LOCK(&defrag->lock); { - defrag->skipped += 1; + defrag->total_failures += 1; + update_skippedcount = _gf_false; } - UNLOCK (&defrag->lock); + UNLOCK(&defrag->lock); - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_MIGRATE_FILE_SKIPPED, - "File migration skipped for %s.", - entry_loc.path); + break; + } } + } - ret = 0; - goto out; - } else if (ret < 0) { - if (fop_errno != EEXIST) { - gf_msg (this->name, GF_LOG_ERROR, fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "migrate-data failed for %s", entry_loc.path); - - LOCK (&defrag->lock); - { - defrag->total_failures += 1; - } - UNLOCK (&defrag->lock); - + if (update_skippedcount) { + LOCK(&defrag->lock); + { + defrag->skipped += 1; } + UNLOCK(&defrag->lock); - ret = gf_defrag_handle_migrate_error (fop_errno, defrag); + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED, + "File migration skipped for %s.", entry_loc.path); + } - if (!ret) { - gf_msg(this->name, GF_LOG_ERROR, fop_errno, - DHT_MSG_MIGRATE_FILE_FAILED, - "migrate-data on %s failed:", entry_loc.path); - } else if (ret == 1) { - ret = 0; - } + } else if (fop_errno == ENOTSUP) { + gf_msg_debug(this->name, 0, + "migrate-data skipped for" + " hardlink %s ", + entry_loc.path); + LOCK(&defrag->lock); + { + defrag->skipped += 1; + } + UNLOCK(&defrag->lock); - goto out; + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED, + "File migration skipped for %s.", entry_loc.path); } - LOCK (&defrag->lock); - { - defrag->total_files += 1; - defrag->total_data += iatt.ia_size; - } - UNLOCK (&defrag->lock); - - if (defrag->stats == _gf_true) { - gettimeofday (&end, NULL); - elapsed = (end.tv_sec - start.tv_sec) * 1e6 + - (end.tv_usec - start.tv_usec); - gf_log (this->name, GF_LOG_INFO, "Migration of " - "file:%s size:%"PRIu64" bytes took %.2f" - "secs and ret: %d", entry_loc.name, - iatt.ia_size, elapsed/1e6, ret); + ret = 0; + goto out; + } else if (ret < 0) { + if (fop_errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, "migrate-data failed for %s", + entry_loc.path); + + LOCK(&defrag->lock); + { + defrag->total_failures += 1; + } + UNLOCK(&defrag->lock); } + ret = gf_defrag_handle_migrate_error(fop_errno, defrag); + + if (!ret) { + gf_msg(this->name, GF_LOG_ERROR, fop_errno, + DHT_MSG_MIGRATE_FILE_FAILED, + "migrate-data on %s failed:", entry_loc.path); + } else if (ret == 1) { + ret = 0; + } + + goto out; + } + + LOCK(&defrag->lock); + { + defrag->total_files += 1; + defrag->total_data += iatt.ia_size; + } + UNLOCK(&defrag->lock); + + if (defrag->stats == _gf_true) { + gettimeofday(&end, NULL); + elapsed = (end.tv_sec - start.tv_sec) * 1e6 + + (end.tv_usec - start.tv_usec); + gf_log(this->name, GF_LOG_INFO, + "Migration of " + "file:%s size:%" PRIu64 + " bytes took %.2f" + "secs and ret: %d", + entry_loc.name, iatt.ia_size, elapsed / 1e6, ret); + } + out: - if (statfs_frame) { - STACK_DESTROY (statfs_frame->root); - } + if (statfs_frame) { + STACK_DESTROY(statfs_frame->root); + } - if (iatt_ptr) { - LOCK (&defrag->lock); - { - defrag->size_processed += iatt_ptr->ia_size; - } - UNLOCK (&defrag->lock); + if (iatt_ptr) { + LOCK(&defrag->lock); + { + defrag->size_processed += iatt_ptr->ia_size; } - loc_wipe (&entry_loc); - - return ret; + UNLOCK(&defrag->lock); + } + loc_wipe(&entry_loc); + return ret; } void * -gf_defrag_task (void *opaque) +gf_defrag_task(void *opaque) { - struct list_head *q_head = NULL; - struct dht_container *iterator = NULL; - gf_defrag_info_t *defrag = NULL; - int ret = 0; - pid_t pid = GF_CLIENT_PID_DEFRAG; - - defrag = (gf_defrag_info_t *)opaque; - if (!defrag) { - gf_msg ("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL"); - goto out; + struct list_head *q_head = NULL; + struct dht_container *iterator = NULL; + gf_defrag_info_t *defrag = NULL; + int ret = 0; + pid_t pid = GF_CLIENT_PID_DEFRAG; + + defrag = (gf_defrag_info_t *)opaque; + if (!defrag) { + gf_msg("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL"); + goto out; + } + + syncopctx_setfspid(&pid); + + q_head = &(defrag->queue[0].list); + + /* The following while loop will dequeue one entry from the defrag->queue + under lock. We will update the defrag->global_error only when there + is an error which is critical to stop the rebalance process. The stop + message will be intimated to other migrator threads by setting the + defrag->defrag_status to GF_DEFRAG_STATUS_FAILED. + + In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is + maintained so that crawler does not starve the file migration + workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that + crawler does not go far ahead in filling up the queue. + */ + + while (_gf_true) { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + pthread_cond_broadcast(&defrag->rebalance_crawler_alarm); + pthread_cond_broadcast(&defrag->parallel_migration_cond); + goto out; } - syncopctx_setfspid (&pid); + pthread_mutex_lock(&defrag->dfq_mutex); + { + /*Throttle down: + If the reconfigured count is less than current thread + count, then the current thread will sleep */ + + /*TODO: Need to refactor the following block to work + *under defrag->lock. For now access + * defrag->current_thread_count and rthcount under + * dfq_mutex lock */ + while (!defrag->crawl_done && (defrag->recon_thread_count < + defrag->current_thread_count)) { + defrag->current_thread_count--; + gf_msg_debug("DHT", 0, + "Thread sleeping. " + "current thread count: %d", + defrag->current_thread_count); + + pthread_cond_wait(&defrag->df_wakeup_thread, + &defrag->dfq_mutex); + + defrag->current_thread_count++; + gf_msg_debug("DHT", 0, + "Thread wokeup. " + "current thread count: %d", + defrag->current_thread_count); + } + + if (defrag->q_entry_count) { + iterator = list_entry(q_head->next, typeof(*iterator), list); + + gf_msg_debug("DHT", 0, + "picking entry " + "%s", + iterator->df_entry->d_name); + + list_del_init(&(iterator->list)); + + defrag->q_entry_count--; + + if ((defrag->q_entry_count < MIN_MIGRATE_QUEUE_COUNT) && + defrag->wakeup_crawler) { + pthread_cond_broadcast(&defrag->rebalance_crawler_alarm); + } + pthread_mutex_unlock(&defrag->dfq_mutex); + ret = gf_defrag_migrate_single_file((void *)iterator); - q_head = &(defrag->queue[0].list); + /*Critical errors: ENOTCONN and ENOSPACE*/ + if (ret) { + dht_set_global_defrag_error(defrag, ret); - /* The following while loop will dequeue one entry from the defrag->queue - under lock. We will update the defrag->global_error only when there - is an error which is critical to stop the rebalance process. The stop - message will be intimated to other migrator threads by setting the - defrag->defrag_status to GF_DEFRAG_STATUS_FAILED. + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is - maintained so that crawler does not starve the file migration - workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that - crawler does not go far ahead in filling up the queue. - */ + pthread_cond_broadcast(&defrag->rebalance_crawler_alarm); - while (_gf_true) { + pthread_cond_broadcast(&defrag->parallel_migration_cond); - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - pthread_cond_broadcast ( - &defrag->rebalance_crawler_alarm); - pthread_cond_broadcast ( - &defrag->parallel_migration_cond); - goto out; + goto out; } - pthread_mutex_lock (&defrag->dfq_mutex); - { - - /*Throttle down: - If the reconfigured count is less than current thread - count, then the current thread will sleep */ - - /*TODO: Need to refactor the following block to work - *under defrag->lock. For now access - * defrag->current_thread_count and rthcount under - * dfq_mutex lock */ - while (!defrag->crawl_done && - (defrag->recon_thread_count < - defrag->current_thread_count)) { - defrag->current_thread_count--; - gf_msg_debug ("DHT", 0, "Thread sleeping. " - "current thread count: %d", - defrag->current_thread_count); - - pthread_cond_wait ( - &defrag->df_wakeup_thread, - &defrag->dfq_mutex); - - defrag->current_thread_count++; - gf_msg_debug ("DHT", 0, "Thread wokeup. " - "current thread count: %d", - defrag->current_thread_count); - - } + gf_defrag_free_container(iterator); + + continue; + } else { + /* defrag->crawl_done flag is set means crawling + file system is done and hence a list_empty when + the above flag is set indicates there are no more + entries to be added to the queue and rebalance is + finished */ + + if (!defrag->crawl_done) { + defrag->current_thread_count--; + gf_msg_debug("DHT", 0, + "Thread " + "sleeping while waiting " + "for migration entries. " + "current thread count:%d", + defrag->current_thread_count); + + pthread_cond_wait(&defrag->parallel_migration_cond, + &defrag->dfq_mutex); + } - if (defrag->q_entry_count) { - iterator = list_entry (q_head->next, - typeof(*iterator), list); - - gf_msg_debug ("DHT", 0, "picking entry " - "%s", iterator->df_entry->d_name); - - list_del_init (&(iterator->list)); - - defrag->q_entry_count--; - - if ((defrag->q_entry_count < - MIN_MIGRATE_QUEUE_COUNT) && - defrag->wakeup_crawler) { - pthread_cond_broadcast ( - &defrag->rebalance_crawler_alarm); - } - pthread_mutex_unlock (&defrag->dfq_mutex); - ret = gf_defrag_migrate_single_file - ((void *)iterator); - - /*Critical errors: ENOTCONN and ENOSPACE*/ - if (ret) { - dht_set_global_defrag_error - (defrag, ret); - - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; - - pthread_cond_broadcast ( - &defrag->rebalance_crawler_alarm); - - pthread_cond_broadcast ( - &defrag->parallel_migration_cond); - - goto out; - } - - gf_defrag_free_container (iterator); - - continue; - } else { - - /* defrag->crawl_done flag is set means crawling - file system is done and hence a list_empty when - the above flag is set indicates there are no more - entries to be added to the queue and rebalance is - finished */ - - if (!defrag->crawl_done) { - - defrag->current_thread_count--; - gf_msg_debug ("DHT", 0, "Thread " - "sleeping while waiting " - "for migration entries. " - "current thread count:%d", - defrag->current_thread_count); - - pthread_cond_wait ( - &defrag->parallel_migration_cond, - &defrag->dfq_mutex); - } - - if (defrag->crawl_done && - !defrag->q_entry_count) { - defrag->current_thread_count++; - gf_msg_debug ("DHT", 0, "Exiting thread"); - - pthread_cond_broadcast ( - &defrag->parallel_migration_cond); - goto unlock; - } else { - defrag->current_thread_count++; - gf_msg_debug ("DHT", 0, "Thread woke up" - " as found migrating entries. " - "current thread count:%d", - defrag->current_thread_count); - - pthread_mutex_unlock - (&defrag->dfq_mutex); - continue; - } - } + if (defrag->crawl_done && !defrag->q_entry_count) { + defrag->current_thread_count++; + gf_msg_debug("DHT", 0, "Exiting thread"); + pthread_cond_broadcast(&defrag->parallel_migration_cond); + goto unlock; + } else { + defrag->current_thread_count++; + gf_msg_debug("DHT", 0, + "Thread woke up" + " as found migrating entries. " + "current thread count:%d", + defrag->current_thread_count); + + pthread_mutex_unlock(&defrag->dfq_mutex); + continue; } -unlock: - pthread_mutex_unlock (&defrag->dfq_mutex); - break; + } } + unlock: + pthread_mutex_unlock(&defrag->dfq_mutex); + break; + } out: - return NULL; + return NULL; } -int static -gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container, - loc_t *loc, dht_conf_t *conf, gf_defrag_info_t *defrag, - fd_t *fd, dict_t *migrate_data, - struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, - int *should_commit_hash, int *perrno) +int static gf_defrag_get_entry(xlator_t *this, int i, + struct dht_container **container, loc_t *loc, + dht_conf_t *conf, gf_defrag_info_t *defrag, + fd_t *fd, dict_t *migrate_data, + struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, + int *should_commit_hash, int *perrno) { - int ret = -1; - char is_linkfile = 0; - gf_dirent_t *df_entry = NULL; - struct dht_container *tmp_container = NULL; + int ret = -1; + char is_linkfile = 0; + gf_dirent_t *df_entry = NULL; + struct dht_container *tmp_container = NULL; - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - goto out; - } - - if (dir_dfmeta->offset_var[i].readdir_done == 1) { - ret = 0; - goto out; - } - - if (dir_dfmeta->fetch_entries[i] == 1) { - ret = syncop_readdirp (conf->local_subvols[i], fd, 131072, - dir_dfmeta->offset_var[i].offset, - &(dir_dfmeta->equeue[i]), - xattr_req, NULL); - if (ret == 0) { - dir_dfmeta->offset_var[i].readdir_done = 1; - ret = 0; - goto out; - } - - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_DATA_FAILED, - "Readdirp failed. Aborting data migration for " - "directory: %s", loc->path); - *perrno = -ret; - ret = -1; - goto out; - } + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = -1; + goto out; + } - if (list_empty (&(dir_dfmeta->equeue[i].list))) { - dir_dfmeta->offset_var[i].readdir_done = 1; - ret = 0; - goto out; - } + if (dir_dfmeta->offset_var[i].readdir_done == 1) { + ret = 0; + goto out; + } - dir_dfmeta->fetch_entries[i] = 0; + if (dir_dfmeta->fetch_entries[i] == 1) { + ret = syncop_readdirp(conf->local_subvols[i], fd, 131072, + dir_dfmeta->offset_var[i].offset, + &(dir_dfmeta->equeue[i]), xattr_req, NULL); + if (ret == 0) { + dir_dfmeta->offset_var[i].readdir_done = 1; + ret = 0; + goto out; } - while (1) { - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - goto out; - } - - df_entry = list_entry (dir_dfmeta->iterator[i]->next, - typeof (*df_entry), list); - - if (&df_entry->list == dir_dfmeta->head[i]) { - gf_dirent_free (&(dir_dfmeta->equeue[i])); - INIT_LIST_HEAD (&(dir_dfmeta->equeue[i].list)); - dir_dfmeta->fetch_entries[i] = 1; - dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; - ret = 0; - goto out; - } - - dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next; - - dir_dfmeta->offset_var[i].offset = df_entry->d_off; - if (!strcmp (df_entry->d_name, ".") || - !strcmp (df_entry->d_name, "..")) - continue; - - if (IA_ISDIR (df_entry->d_stat.ia_type)) { - defrag->size_processed += df_entry->d_stat.ia_size; - continue; - } - - defrag->num_files_lookedup++; - - if (defrag->defrag_pattern && - (gf_defrag_pattern_match (defrag, df_entry->d_name, - df_entry->d_stat.ia_size) - == _gf_false)) { - defrag->size_processed += df_entry->d_stat.ia_size; - continue; - } - - is_linkfile = check_is_linkfile (NULL, &df_entry->d_stat, - df_entry->dict, - conf->link_xattr_name); - - if (is_linkfile) { - /* No need to add linkto file to the queue for - migration. Only the actual data file need to - be checked for migration criteria. - */ - - gf_msg_debug (this->name, 0, "Skipping linkfile" - " %s on subvol: %s", df_entry->d_name, - conf->local_subvols[i]->name); - continue; - } - - /*Build Container Structure */ + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + DHT_MSG_MIGRATE_DATA_FAILED, + "Readdirp failed. Aborting data migration for " + "directory: %s", + loc->path); + *perrno = -ret; + ret = -1; + goto out; + } - tmp_container = GF_CALLOC (1, sizeof(struct dht_container), - gf_dht_mt_container_t); - if (!tmp_container) { - gf_log (this->name, GF_LOG_ERROR, "Failed to allocate " - "memory for container"); - ret = -1; - goto out; - } - tmp_container->df_entry = gf_dirent_for_name (df_entry->d_name); - if (!tmp_container->df_entry) { - gf_log (this->name, GF_LOG_ERROR, "Failed to allocate " - "memory for df_entry"); - ret = -1; - goto out; - } + if (list_empty(&(dir_dfmeta->equeue[i].list))) { + dir_dfmeta->offset_var[i].readdir_done = 1; + ret = 0; + goto out; + } - tmp_container->local_subvol_index = i; + dir_dfmeta->fetch_entries[i] = 0; + } - tmp_container->df_entry->d_stat = df_entry->d_stat; + while (1) { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = -1; + goto out; + } - tmp_container->df_entry->d_ino = df_entry->d_ino; + df_entry = list_entry(dir_dfmeta->iterator[i]->next, typeof(*df_entry), + list); - tmp_container->df_entry->d_type = df_entry->d_type; + if (&df_entry->list == dir_dfmeta->head[i]) { + gf_dirent_free(&(dir_dfmeta->equeue[i])); + INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list)); + dir_dfmeta->fetch_entries[i] = 1; + dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; + ret = 0; + goto out; + } - tmp_container->df_entry->d_len = df_entry->d_len; + dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next; - tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc), - gf_dht_mt_loc_t); - if (!tmp_container->parent_loc) { - gf_log (this->name, GF_LOG_ERROR, "Failed to allocate " - "memory for loc"); - ret = -1; - goto out; - } + dir_dfmeta->offset_var[i].offset = df_entry->d_off; + if (!strcmp(df_entry->d_name, ".") || !strcmp(df_entry->d_name, "..")) + continue; + if (IA_ISDIR(df_entry->d_stat.ia_type)) { + defrag->size_processed += df_entry->d_stat.ia_size; + continue; + } - ret = loc_copy (tmp_container->parent_loc, loc); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "loc_copy failed"); - ret = -1; - goto out; - } + defrag->num_files_lookedup++; - tmp_container->migrate_data = migrate_data; + if (defrag->defrag_pattern && + (gf_defrag_pattern_match(defrag, df_entry->d_name, + df_entry->d_stat.ia_size) == _gf_false)) { + defrag->size_processed += df_entry->d_stat.ia_size; + continue; + } - tmp_container->this = this; + is_linkfile = check_is_linkfile(NULL, &df_entry->d_stat, df_entry->dict, + conf->link_xattr_name); - if (df_entry->dict) - tmp_container->df_entry->dict = - dict_ref (df_entry->dict); + if (is_linkfile) { + /* No need to add linkto file to the queue for + migration. Only the actual data file need to + be checked for migration criteria. + */ - /*Build Container Structure >> END*/ + gf_msg_debug(this->name, 0, + "Skipping linkfile" + " %s on subvol: %s", + df_entry->d_name, conf->local_subvols[i]->name); + continue; + } - ret = 0; - goto out; + /*Build Container Structure */ + tmp_container = GF_CALLOC(1, sizeof(struct dht_container), + gf_dht_mt_container_t); + if (!tmp_container) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to allocate " + "memory for container"); + ret = -1; + goto out; } - -out: - if (ret == 0) { - *container = tmp_container; - } else { - if (tmp_container) { - gf_defrag_free_container (tmp_container); - } + tmp_container->df_entry = gf_dirent_for_name(df_entry->d_name); + if (!tmp_container->df_entry) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to allocate " + "memory for df_entry"); + ret = -1; + goto out; } - return ret; -} + tmp_container->local_subvol_index = i; -int -gf_defrag_process_dir (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, - dict_t *migrate_data, int *perrno) -{ - int ret = -1; - fd_t *fd = NULL; - dht_conf_t *conf = NULL; - gf_dirent_t entries; - dict_t *xattr_req = NULL; - struct timeval dir_start = {0,}; - struct timeval end = {0,}; - double elapsed = {0,}; - int local_subvols_cnt = 0; - int i = 0; - int j = 0; - struct dht_container *container = NULL; - int ldfq_count = 0; - int dfc_index = 0; - int throttle_up = 0; - struct dir_dfmeta *dir_dfmeta = NULL; - int should_commit_hash = 1; - - gf_log (this->name, GF_LOG_INFO, "migrate data called on %s", - loc->path); - gettimeofday (&dir_start, NULL); + tmp_container->df_entry->d_stat = df_entry->d_stat; - conf = this->private; - local_subvols_cnt = conf->local_subvols_cnt; + tmp_container->df_entry->d_ino = df_entry->d_ino; - if (!local_subvols_cnt) { - ret = 0; - goto out; - } + tmp_container->df_entry->d_type = df_entry->d_type; - fd = fd_create (loc->inode, defrag->pid); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); - ret = -1; - goto out; - } + tmp_container->df_entry->d_len = df_entry->d_len; - ret = syncop_opendir (this, loc, fd, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_MIGRATE_DATA_FAILED, - "Migrate data failed: Failed to open dir %s", - loc->path); - *perrno = -ret; - ret = -1; - goto out; + tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc), gf_dht_mt_loc_t); + if (!tmp_container->parent_loc) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to allocate " + "memory for loc"); + ret = -1; + goto out; } - fd_bind (fd); - dir_dfmeta = GF_CALLOC (1, sizeof (*dir_dfmeta), - gf_common_mt_pointer); - if (!dir_dfmeta) { - gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); - ret = -1; - goto out; + ret = loc_copy(tmp_container->parent_loc, loc); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "loc_copy failed"); + ret = -1; + goto out; } + tmp_container->migrate_data = migrate_data; - dir_dfmeta->head = GF_CALLOC (local_subvols_cnt, - sizeof (*(dir_dfmeta->head)), - gf_common_mt_pointer); - if (!dir_dfmeta->head) { - gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL"); - ret = -1; - goto out; - } + tmp_container->this = this; - dir_dfmeta->iterator = GF_CALLOC (local_subvols_cnt, - sizeof (*(dir_dfmeta->iterator)), - gf_common_mt_pointer); - if (!dir_dfmeta->iterator) { - gf_log (this->name, GF_LOG_ERROR, - "dir_dfmeta->iterator is NULL"); - ret = -1; - goto out; - } + if (df_entry->dict) + tmp_container->df_entry->dict = dict_ref(df_entry->dict); - dir_dfmeta->equeue = GF_CALLOC (local_subvols_cnt, sizeof (entries), - gf_dht_mt_dirent_t); - if (!dir_dfmeta->equeue) { - gf_log (this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL"); - ret = -1; - goto out; - } + /*Build Container Structure >> END*/ - dir_dfmeta->offset_var = GF_CALLOC (local_subvols_cnt, - sizeof (dht_dfoffset_ctx_t), - gf_dht_mt_octx_t); - if (!dir_dfmeta->offset_var) { - gf_log (this->name, GF_LOG_ERROR, - "dir_dfmeta->offset_var is NULL"); - ret = -1; - goto out; - } - ret = gf_defrag_ctx_subvols_init (dir_dfmeta->offset_var, this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "dht_dfoffset_ctx_t" - "initialization failed"); - ret = -1; - goto out; - } - - dir_dfmeta->fetch_entries = GF_CALLOC (local_subvols_cnt, - sizeof (int), gf_common_mt_int); - if (!dir_dfmeta->fetch_entries) { - gf_log (this->name, GF_LOG_ERROR, - "dir_dfmeta->fetch_entries is NULL"); - ret = -1; - goto out; - } - - for (i = 0; i < local_subvols_cnt ; i++) { - INIT_LIST_HEAD (&(dir_dfmeta->equeue[i].list)); - dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list); - dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; - dir_dfmeta->fetch_entries[i] = 1; - } + ret = 0; + goto out; + } - xattr_req = dict_new (); - if (!xattr_req) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - ret = -1; - goto out; +out: + if (ret == 0) { + *container = tmp_container; + } else { + if (tmp_container) { + gf_defrag_free_container(tmp_container); } + } - ret = dict_set_uint32 (xattr_req, - conf->link_xattr_name, 256); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to set dict for " - "key: %s", conf->link_xattr_name); - ret = -1; - goto out; - } + return ret; +} - /* - Job: Read entries from each local subvol and store the entries - in equeue array of linked list. Now pick one entry from the - equeue array in a round robin basis and add them to defrag Queue. - */ +int +gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *migrate_data, int *perrno) +{ + int ret = -1; + fd_t *fd = NULL; + dht_conf_t *conf = NULL; + gf_dirent_t entries; + dict_t *xattr_req = NULL; + struct timeval dir_start = { + 0, + }; + struct timeval end = { + 0, + }; + double elapsed = { + 0, + }; + int local_subvols_cnt = 0; + int i = 0; + int j = 0; + struct dht_container *container = NULL; + int ldfq_count = 0; + int dfc_index = 0; + int throttle_up = 0; + struct dir_dfmeta *dir_dfmeta = NULL; + int should_commit_hash = 1; + + gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); + gettimeofday(&dir_start, NULL); + + conf = this->private; + local_subvols_cnt = conf->local_subvols_cnt; + + if (!local_subvols_cnt) { + ret = 0; + goto out; + } - while (!dht_dfreaddirp_done(dir_dfmeta->offset_var, - local_subvols_cnt)) { + fd = fd_create(loc->inode, defrag->pid); + if (!fd) { + gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + ret = -1; + goto out; + } + + ret = syncop_opendir(this, loc, fd, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED, + "Migrate data failed: Failed to open dir %s", loc->path); + *perrno = -ret; + ret = -1; + goto out; + } - pthread_mutex_lock (&defrag->dfq_mutex); - { + fd_bind(fd); + dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); + if (!dir_dfmeta) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); + ret = -1; + goto out; + } - /*Throttle up: If reconfigured count is higher than - current thread count, wake up the sleeping threads - TODO: Need to refactor this. Instead of making the - thread sleep and wake, we should terminate and spawn - threads on-demand*/ - - if (defrag->recon_thread_count > - defrag->current_thread_count) { - throttle_up = - (defrag->recon_thread_count - - defrag->current_thread_count); - for (j = 0; j < throttle_up; j++) { - pthread_cond_signal ( - &defrag->df_wakeup_thread); - } + dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)), + gf_common_mt_pointer); + if (!dir_dfmeta->head) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL"); + ret = -1; + goto out; + } + + dir_dfmeta->iterator = GF_CALLOC(local_subvols_cnt, + sizeof(*(dir_dfmeta->iterator)), + gf_common_mt_pointer); + if (!dir_dfmeta->iterator) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->iterator is NULL"); + ret = -1; + goto out; + } - } + dir_dfmeta->equeue = GF_CALLOC(local_subvols_cnt, sizeof(entries), + gf_dht_mt_dirent_t); + if (!dir_dfmeta->equeue) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL"); + ret = -1; + goto out; + } - while (defrag->q_entry_count > - MAX_MIGRATE_QUEUE_COUNT) { - defrag->wakeup_crawler = 1; - pthread_cond_wait ( - &defrag->rebalance_crawler_alarm, - &defrag->dfq_mutex); - } + dir_dfmeta->offset_var = GF_CALLOC( + local_subvols_cnt, sizeof(dht_dfoffset_ctx_t), gf_dht_mt_octx_t); + if (!dir_dfmeta->offset_var) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->offset_var is NULL"); + ret = -1; + goto out; + } + ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "dht_dfoffset_ctx_t" + "initialization failed"); + ret = -1; + goto out; + } - ldfq_count = defrag->q_entry_count; + dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int), + gf_common_mt_int); + if (!dir_dfmeta->fetch_entries) { + gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL"); + ret = -1; + goto out; + } + + for (i = 0; i < local_subvols_cnt; i++) { + INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list)); + dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list); + dir_dfmeta->iterator[i] = dir_dfmeta->head[i]; + dir_dfmeta->fetch_entries[i] = 1; + } + + xattr_req = dict_new(); + if (!xattr_req) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set dict for " + "key: %s", + conf->link_xattr_name); + ret = -1; + goto out; + } - if (defrag->wakeup_crawler) { - defrag->wakeup_crawler = 0; - } + /* + Job: Read entries from each local subvol and store the entries + in equeue array of linked list. Now pick one entry from the + equeue array in a round robin basis and add them to defrag Queue. + */ + while (!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { + pthread_mutex_lock(&defrag->dfq_mutex); + { + /*Throttle up: If reconfigured count is higher than + current thread count, wake up the sleeping threads + TODO: Need to refactor this. Instead of making the + thread sleep and wake, we should terminate and spawn + threads on-demand*/ + + if (defrag->recon_thread_count > defrag->current_thread_count) { + throttle_up = (defrag->recon_thread_count - + defrag->current_thread_count); + for (j = 0; j < throttle_up; j++) { + pthread_cond_signal(&defrag->df_wakeup_thread); } - pthread_mutex_unlock (&defrag->dfq_mutex); - - while (ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && - !dht_dfreaddirp_done(dir_dfmeta->offset_var, - local_subvols_cnt)) { + } - ret = gf_defrag_get_entry (this, dfc_index, &container, - loc, conf, defrag, fd, - migrate_data, dir_dfmeta, - xattr_req, - &should_commit_hash, perrno); + while (defrag->q_entry_count > MAX_MIGRATE_QUEUE_COUNT) { + defrag->wakeup_crawler = 1; + pthread_cond_wait(&defrag->rebalance_crawler_alarm, + &defrag->dfq_mutex); + } - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Found " - "error from gf_defrag_get_entry"); + ldfq_count = defrag->q_entry_count; - ret = -1; - goto out; - } - - /* Check if we got an entry, else we need to move the - index to the next subvol */ - if (!container) { - GF_CRAWL_INDEX_MOVE(dfc_index, - local_subvols_cnt); - continue; - } + if (defrag->wakeup_crawler) { + defrag->wakeup_crawler = 0; + } + } + pthread_mutex_unlock(&defrag->dfq_mutex); - /* Q this entry in the dfq */ - pthread_mutex_lock (&defrag->dfq_mutex); - { - list_add_tail (&container->list, - &(defrag->queue[0].list)); - defrag->q_entry_count++; - ldfq_count = defrag->q_entry_count; - - gf_msg_debug (this->name, 0, "added " - "file:%s parent:%s to the queue ", - container->df_entry->d_name, - container->parent_loc->path); - - pthread_cond_signal ( - &defrag->parallel_migration_cond); - } - pthread_mutex_unlock (&defrag->dfq_mutex); + while ( + ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && + !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { + ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, + defrag, fd, migrate_data, dir_dfmeta, + xattr_req, &should_commit_hash, perrno); - GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt); - } - } + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Found " + "error from gf_defrag_get_entry"); - gettimeofday (&end, NULL); - elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + - (end.tv_usec - dir_start.tv_usec); - gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took " - "%.2f secs", loc->path, elapsed/1e6); - ret = 0; + ret = -1; + goto out; + } + + /* Check if we got an entry, else we need to move the + index to the next subvol */ + if (!container) { + GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt); + continue; + } + + /* Q this entry in the dfq */ + pthread_mutex_lock(&defrag->dfq_mutex); + { + list_add_tail(&container->list, &(defrag->queue[0].list)); + defrag->q_entry_count++; + ldfq_count = defrag->q_entry_count; + + gf_msg_debug(this->name, 0, + "added " + "file:%s parent:%s to the queue ", + container->df_entry->d_name, + container->parent_loc->path); + + pthread_cond_signal(&defrag->parallel_migration_cond); + } + pthread_mutex_unlock(&defrag->dfq_mutex); + + GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt); + } + } + + gettimeofday(&end, NULL); + elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + + (end.tv_usec - dir_start.tv_usec); + gf_log(this->name, GF_LOG_INFO, + "Migration operation on dir %s took " + "%.2f secs", + loc->path, elapsed / 1e6); + ret = 0; out: - gf_defrag_free_dir_dfmeta (dir_dfmeta, local_subvols_cnt); + gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt); - if (xattr_req) - dict_unref(xattr_req); + if (xattr_req) + dict_unref(xattr_req); - if (fd) - fd_unref (fd); + if (fd) + fd_unref(fd); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } + if (ret == 0 && should_commit_hash == 0) { + ret = 2; + } - /* It does not matter if it errored out - this number is - * used to calculate rebalance estimated time to complete. - * No locking required as dirs are processed by a single thread. - */ - defrag->num_dirs_processed++; - return ret; + /* It does not matter if it errored out - this number is + * used to calculate rebalance estimated time to complete. + * No locking required as dirs are processed by a single thread. + */ + defrag->num_dirs_processed++; + return ret; } int -gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag, - loc_t *loc, dict_t *fix_layout) +gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *fix_layout) { - int ret; - dht_conf_t *conf = NULL; - /* - * Now we're ready to update the directory commit hash for the volume - * root, so that hash miscompares and broadcast lookups can stop. - * However, we want to skip that if fix-layout is all we did. In - * that case, we want the miscompares etc. to continue until a real - * rebalance is complete. - */ - if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX - || defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - return 0; - } - - conf = this->private; - if (!conf) { - /*Uh oh - */ - return -1; - } + int ret; + dht_conf_t *conf = NULL; + /* + * Now we're ready to update the directory commit hash for the volume + * root, so that hash miscompares and broadcast lookups can stop. + * However, we want to skip that if fix-layout is all we did. In + * that case, we want the miscompares etc. to continue until a real + * rebalance is complete. + */ + if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX || + defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || + defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { + return 0; + } - if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) { - /* Commit hash updates are only done on local subvolumes and - * only when lookup optimization is needed (for older client - * support) - */ - return 0; - } + conf = this->private; + if (!conf) { + /*Uh oh + */ + return -1; + } - ret = dict_set_uint32 (fix_layout, "new-commit-hash", - defrag->new_commit_hash); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set new-commit-hash"); - return -1; - } + if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) { + /* Commit hash updates are only done on local subvolumes and + * only when lookup optimization is needed (for older client + * support) + */ + return 0; + } - ret = syncop_setxattr (this, loc, fix_layout, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LAYOUT_FIX_FAILED, - "fix layout on %s failed", loc->path); + ret = dict_set_uint32(fix_layout, "new-commit-hash", + defrag->new_commit_hash); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to set new-commit-hash"); + return -1; + } - if (-ret == ENOENT || -ret == ESTALE) { - /* Dir most likely is deleted */ - return 0; - } + ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "fix layout on %s failed", loc->path); - return -1; + if (-ret == ENOENT || -ret == ESTALE) { + /* Dir most likely is deleted */ + return 0; } - /* TBD: find more efficient solution than adding/deleting every time */ - dict_del(fix_layout, "new-commit-hash"); - - return 0; -} + return -1; + } + /* TBD: find more efficient solution than adding/deleting every time */ + dict_del(fix_layout, "new-commit-hash"); + return 0; +} /* Function for doing a named lookup on file inodes during an attach tier * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal @@ -3717,1721 +3588,1672 @@ gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag, * eventual consistency * */ int -gf_fix_layout_tier_attach_lookup (xlator_t *this, - loc_t *parent_loc, +gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc, gf_dirent_t *file_dentry) { - int ret = -1; - dict_t *lookup_xdata = NULL; - dht_conf_t *conf = NULL; - loc_t file_loc = {0,}; - struct iatt iatt = {0,}; + int ret = -1; + dict_t *lookup_xdata = NULL; + dht_conf_t *conf = NULL; + loc_t file_loc = { + 0, + }; + struct iatt iatt = { + 0, + }; - GF_VALIDATE_OR_GOTO ("tier", this, out); + GF_VALIDATE_OR_GOTO("tier", this, out); - GF_VALIDATE_OR_GOTO (this->name, parent_loc, out); + GF_VALIDATE_OR_GOTO(this->name, parent_loc, out); - GF_VALIDATE_OR_GOTO (this->name, file_dentry, out); + GF_VALIDATE_OR_GOTO(this->name, file_dentry, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - if (!parent_loc->inode) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s parent is NULL", parent_loc->path, - file_dentry->d_name); - goto out; - } + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + if (!parent_loc->inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name); + goto out; + } - conf = this->private; + conf = this->private; - loc_wipe (&file_loc); + loc_wipe(&file_loc); - if (gf_uuid_is_null (file_dentry->d_stat.ia_gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s gfid not present", parent_loc->path, - file_dentry->d_name); - goto out; - } + if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s gfid not present", parent_loc->path, file_dentry->d_name); + goto out; + } - gf_uuid_copy (file_loc.gfid, file_dentry->d_stat.ia_gfid); - - if (gf_uuid_is_null (parent_loc->gfid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "%s/%s" - " gfid not present", parent_loc->path, - file_dentry->d_name); - goto out; - } + gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid); - gf_uuid_copy (file_loc.pargfid, parent_loc->gfid); + if (gf_uuid_is_null(parent_loc->gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s" + " gfid not present", + parent_loc->path, file_dentry->d_name); + goto out; + } + gf_uuid_copy(file_loc.pargfid, parent_loc->gfid); - ret = dht_build_child_loc (this, &file_loc, parent_loc, - file_dentry->d_name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Child loc build failed"); - ret = -1; - goto out; - } - - lookup_xdata = dict_new (); - if (!lookup_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed creating lookup dict for %s", - file_dentry->d_name); - goto out; - } - - ret = dict_set_int32 (lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set lookup flag"); - goto out; - } - - gf_uuid_copy (file_loc.parent->gfid, parent_loc->gfid); - - /* Sending lookup to cold tier only */ - ret = syncop_lookup (conf->subvolumes[0], &file_loc, &iatt, - NULL, lookup_xdata, NULL); - if (ret) { - /* If the file does not exist on the cold tier than it must */ - /* have been discovered on the hot tier. This is not an error. */ - gf_msg (this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s lookup to cold tier on attach heal failed", file_loc.path); - goto out; - } - - ret = 0; + ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Child loc build failed"); + ret = -1; + goto out; + } + + lookup_xdata = dict_new(); + if (!lookup_xdata) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed creating lookup dict for %s", file_dentry->d_name); + goto out; + } + + ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to set lookup flag"); + goto out; + } + + gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid); + + /* Sending lookup to cold tier only */ + ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL, + lookup_xdata, NULL); + if (ret) { + /* If the file does not exist on the cold tier than it must */ + /* have been discovered on the hot tier. This is not an error. */ + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "%s lookup to cold tier on attach heal failed", file_loc.path); + goto out; + } + + ret = 0; out: - loc_wipe (&file_loc); + loc_wipe(&file_loc); - if (lookup_xdata) - dict_unref (lookup_xdata); + if (lookup_xdata) + dict_unref(lookup_xdata); - return ret; + return ret; } int -gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, - dict_t *fix_layout, dict_t *migrate_data) +gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *fix_layout, dict_t *migrate_data) { - int ret = -1; - loc_t entry_loc = {0,}; - fd_t *fd = NULL; - gf_dirent_t entries; - gf_dirent_t *tmp = NULL; - gf_dirent_t *entry = NULL; - gf_boolean_t free_entries = _gf_false; - off_t offset = 0; - struct iatt iatt = {0,}; - inode_t *linked_inode = NULL, *inode = NULL; - dht_conf_t *conf = NULL; - int should_commit_hash = 1; - int perrno = 0; - - conf = this->private; - if (!conf) { - ret = -1; - goto out; - } - - ret = syncop_lookup (this, loc, &iatt, NULL, NULL, NULL); - if (ret) { - if (strcmp (loc->path, "/") == 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "lookup failed for:%s", loc->path); - - defrag->total_failures++; - ret = -1; - goto out; - } + int ret = -1; + loc_t entry_loc = { + 0, + }; + fd_t *fd = NULL; + gf_dirent_t entries; + gf_dirent_t *tmp = NULL; + gf_dirent_t *entry = NULL; + gf_boolean_t free_entries = _gf_false; + off_t offset = 0; + struct iatt iatt = { + 0, + }; + inode_t *linked_inode = NULL, *inode = NULL; + dht_conf_t *conf = NULL; + int should_commit_hash = 1; + int perrno = 0; + + conf = this->private; + if (!conf) { + ret = -1; + goto out; + } - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "Dir:%s renamed or removed. Skipping", - loc->path); - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - goto out; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "lookup failed for:%s", loc->path); + ret = syncop_lookup(this, loc, &iatt, NULL, NULL, NULL); + if (ret) { + if (strcmp(loc->path, "/") == 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED, + "lookup failed for:%s", loc->path); - defrag->total_failures++; - goto out; - } + defrag->total_failures++; + ret = -1; + goto out; } - fd = fd_create (loc->inode, defrag->pid); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); - ret = -1; - goto out; - } + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. Skipping", loc->path); + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + ret = 0; + goto out; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED, + "lookup failed for:%s", loc->path); - ret = syncop_opendir (this, loc, fd, NULL, NULL); - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - goto out; - } + defrag->total_failures++; + goto out; + } + } - gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s, " - "err:%d", loc->path, -ret); + fd = fd_create(loc->inode, defrag->pid); + if (!fd) { + gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + ret = -1; + goto out; + } - ret = -1; - goto out; + ret = syncop_opendir(this, loc, fd, NULL, NULL); + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + ret = 0; + goto out; } - fd_bind (fd); - INIT_LIST_HEAD (&entries.list); + gf_log(this->name, GF_LOG_ERROR, + "Failed to open dir %s, " + "err:%d", + loc->path, -ret); - while ((ret = syncop_readdirp (this, fd, 131072, offset, &entries, - NULL, NULL)) != 0) - { - if (ret < 0) { - if (-ret == ENOENT || -ret == ESTALE) { - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; - goto out; - } + ret = -1; + goto out; + } - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_READDIR_ERROR, "readdirp failed for " - "path %s. Aborting fix-layout", loc->path); + fd_bind(fd); + INIT_LIST_HEAD(&entries.list); - ret = -1; - goto out; + while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, NULL, + NULL)) != 0) { + if (ret < 0) { + if (-ret == ENOENT || -ret == ESTALE) { + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; } + ret = 0; + goto out; + } - if (list_empty (&entries.list)) - break; - - free_entries = _gf_true; - - list_for_each_entry_safe (entry, tmp, &entries.list, list) { - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = 1; - goto out; - } - - offset = entry->d_off; - - if (!strcmp (entry->d_name, ".") || - !strcmp (entry->d_name, "..")) - continue; - if (!IA_ISDIR (entry->d_stat.ia_type)) { - - /* If its a fix layout during the attach - * tier operation do lookups on files - * on cold subvolume so that there is a - * CTR DB Lookup Heal triggered on existing - * data. - * */ - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - gf_fix_layout_tier_attach_lookup - (this, loc, entry); - } - - continue; - } - loc_wipe (&entry_loc); - - ret = dht_build_child_loc (this, &entry_loc, loc, - entry->d_name); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Child loc" - " build failed for entry: %s", - entry->d_name); - - if (conf->decommission_in_progress) { - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; - - goto out; - } else { - should_commit_hash = 0; + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_READDIR_ERROR, + "readdirp failed for " + "path %s. Aborting fix-layout", + loc->path); - continue; - } - } + ret = -1; + goto out; + } - if (gf_uuid_is_null (entry->d_stat.ia_gfid)) { - gf_log (this->name, GF_LOG_ERROR, "%s/%s" - " gfid not present", loc->path, - entry->d_name); - continue; - } + if (list_empty(&entries.list)) + break; + free_entries = _gf_true; - gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + ret = 1; + goto out; + } + + offset = entry->d_off; + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + if (!IA_ISDIR(entry->d_stat.ia_type)) { + /* If its a fix layout during the attach + * tier operation do lookups on files + * on cold subvolume so that there is a + * CTR DB Lookup Heal triggered on existing + * data. + * */ + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + gf_fix_layout_tier_attach_lookup(this, loc, entry); + } - /*In case the gfid stored in the inode by inode_link - * and the gfid obtained in the lookup differs, then - * client3_3_lookup_cbk will return ESTALE and proper - * error will be captured - */ + continue; + } + loc_wipe(&entry_loc); - linked_inode = inode_link (entry_loc.inode, loc->inode, - entry->d_name, - &entry->d_stat); + ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Child loc" + " build failed for entry: %s", + entry->d_name); - inode = entry_loc.inode; - entry_loc.inode = linked_inode; - inode_unref (inode); + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - if (gf_uuid_is_null (loc->gfid)) { - gf_log (this->name, GF_LOG_ERROR, "%s/%s" - " gfid not present", loc->path, - entry->d_name); - continue; - } + goto out; + } else { + should_commit_hash = 0; - gf_uuid_copy (entry_loc.pargfid, loc->gfid); - - ret = syncop_lookup (this, &entry_loc, &iatt, NULL, - NULL, NULL); - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "Dir:%s renamed or removed. " - "Skipping", loc->path); - ret = 0; - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - continue; - } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_DIR_LOOKUP_FAILED, - "lookup failed for:%s", - entry_loc.path); - - defrag->total_failures++; - - if (conf->decommission_in_progress) { - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } else { - should_commit_hash = 0; - continue; - } - } - } + continue; + } + } + + if (gf_uuid_is_null(entry->d_stat.ia_gfid)) { + gf_log(this->name, GF_LOG_ERROR, + "%s/%s" + " gfid not present", + loc->path, entry->d_name); + continue; + } + + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); + + /*In case the gfid stored in the inode by inode_link + * and the gfid obtained in the lookup differs, then + * client3_3_lookup_cbk will return ESTALE and proper + * error will be captured + */ + + linked_inode = inode_link(entry_loc.inode, loc->inode, + entry->d_name, &entry->d_stat); + + inode = entry_loc.inode; + entry_loc.inode = linked_inode; + inode_unref(inode); + + if (gf_uuid_is_null(loc->gfid)) { + gf_log(this->name, GF_LOG_ERROR, + "%s/%s" + " gfid not present", + loc->path, entry->d_name); + continue; + } + + gf_uuid_copy(entry_loc.pargfid, loc->gfid); + + ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL); + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, + "Dir:%s renamed or removed. " + "Skipping", + loc->path); + ret = 0; + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + continue; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, + DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s", + entry_loc.path); - /* A return value of 2 means, either process_dir or - * lookup of a dir failed. Hence, don't commit hash - * for the current directory*/ + defrag->total_failures++; - ret = gf_defrag_fix_layout (this, defrag, &entry_loc, - fix_layout, migrate_data); + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + ret = -1; + goto out; + } else { + should_commit_hash = 0; + continue; + } + } + } - if (ret && ret != 2) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_LAYOUT_FIX_FAILED, - "Fix layout failed for %s", - entry_loc.path); + /* A return value of 2 means, either process_dir or + * lookup of a dir failed. Hence, don't commit hash + * for the current directory*/ - defrag->total_failures++; + ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, + migrate_data); - if (conf->decommission_in_progress) { - defrag->defrag_status = - GF_DEFRAG_STATUS_FAILED; + if (ret && ret != 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, + "Fix layout failed for %s", entry_loc.path); - goto out; - } else { - /* Let's not commit-hash if - * gf_defrag_fix_layout failed*/ - continue; - } - } - } + defrag->total_failures++; - gf_dirent_free (&entries); - free_entries = _gf_false; - INIT_LIST_HEAD (&entries.list); - } + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = syncop_setxattr (this, loc, fix_layout, 0, NULL, NULL); - if (ret) { - if (-ret == ENOENT || -ret == ESTALE) { - gf_msg (this->name, GF_LOG_INFO, -ret, - DHT_MSG_LAYOUT_FIX_FAILED, - "Setxattr failed. Dir %s " - "renamed or removed", - loc->path); - if (conf->decommission_subvols_cnt) { - defrag->total_failures++; - } - ret = 0; + goto out; } else { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_LAYOUT_FIX_FAILED, - "Setxattr failed for %s", - loc->path); + /* Let's not commit-hash if + * gf_defrag_fix_layout failed*/ + continue; + } + } + } + + gf_dirent_free(&entries); + free_entries = _gf_false; + INIT_LIST_HEAD(&entries.list); + } + + ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed. Dir %s " + "renamed or removed", + loc->path); + if (conf->decommission_subvols_cnt) { + defrag->total_failures++; + } + ret = 0; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed for %s", loc->path); - defrag->total_failures++; + defrag->total_failures++; - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - ret = -1; - goto out; - } - } + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + ret = -1; + goto out; + } } + } - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && - (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { - ret = gf_defrag_process_dir (this, defrag, loc, migrate_data, - &perrno); - - if (ret && (ret != 2)) { - if (perrno == ENOENT || perrno == ESTALE) { - ret = 0; - goto out; - } else { - - defrag->total_failures++; + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && + (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, - "gf_defrag_process_dir failed for " - "directory: %s", loc->path); + if (ret && (ret != 2)) { + if (perrno == ENOENT || perrno == ESTALE) { + ret = 0; + goto out; + } else { + defrag->total_failures++; - if (conf->decommission_in_progress) { - goto out; - } + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, + "gf_defrag_process_dir failed for " + "directory: %s", + loc->path); - should_commit_hash = 0; - } - } else if (ret == 2) { - should_commit_hash = 0; + if (conf->decommission_in_progress) { + goto out; } - } - gf_msg_trace (this->name, 0, "fix layout called on %s", loc->path); + should_commit_hash = 0; + } + } else if (ret == 2) { + should_commit_hash = 0; + } + } - if (should_commit_hash && - gf_defrag_settle_hash (this, defrag, loc, fix_layout) != 0) { + gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path); - defrag->total_failures++; + if (should_commit_hash && + gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) { + defrag->total_failures++; - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_SETTLE_HASH_FAILED, - "Settle hash failed for %s", - loc->path); + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED, + "Settle hash failed for %s", loc->path); - ret = -1; + ret = -1; - if (conf->decommission_in_progress) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto out; - } + if (conf->decommission_in_progress) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + goto out; } + } - ret = 0; + ret = 0; out: - if (free_entries) - gf_dirent_free (&entries); + if (free_entries) + gf_dirent_free(&entries); - loc_wipe (&entry_loc); + loc_wipe(&entry_loc); - if (fd) - fd_unref (fd); + if (fd) + fd_unref(fd); - if (ret == 0 && should_commit_hash == 0) { - ret = 2; - } - - return ret; + if (ret == 0 && should_commit_hash == 0) { + ret = 2; + } + return ret; } - - /****************************************************************************** * Tier background Fix layout functions ******************************************************************************/ /* This is the background tier fixlayout thread */ void * -gf_tier_do_fix_layout (void *args) +gf_tier_do_fix_layout(void *args) { - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args; - int ret = -1; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - dict_t *dict = NULL; - loc_t loc = {0,}; - struct iatt iatt = {0,}; - struct iatt parent = {0,}; - - GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg, out); - GF_VALIDATE_OR_GOTO ("tier", tier_fix_layout_arg->this, out); - this = tier_fix_layout_arg->this; - - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO (this->name, defrag, out); - GF_VALIDATE_OR_GOTO (this->name, defrag->root_inode, out); - - GF_VALIDATE_OR_GOTO (this->name, tier_fix_layout_arg->fix_layout, out); - - - /* Get Root loc_t */ - dht_build_root_loc (defrag->root_inode, &loc); - ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_REBALANCE_START_FAILED, - "Lookup on root failed."); - ret = -1; - goto out; - } - - - /* Start the crawl */ - gf_msg (this->name, GF_LOG_INFO, 0, - DHT_MSG_LOG_TIER_STATUS, "Tiering Fixlayout started"); - - ret = gf_defrag_fix_layout (this, defrag, &loc, - tier_fix_layout_arg->fix_layout, NULL); - if (ret && ret != 2) { - gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, - "Tiering fixlayout failed."); - ret = -1; - goto out; - } - - if (ret != 2 && gf_defrag_settle_hash - (this, defrag, &loc, - tier_fix_layout_arg->fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } - - dict = dict_new (); - if (!dict) { - ret = -1; - goto out; - } + gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args; + int ret = -1; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + dict_t *dict = NULL; + loc_t loc = { + 0, + }; + struct iatt iatt = { + 0, + }; + struct iatt parent = { + 0, + }; + + GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out); + GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out); + this = tier_fix_layout_arg->this; + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO(this->name, defrag, out); + GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out); + + GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out); + + /* Get Root loc_t */ + dht_build_root_loc(defrag->root_inode, &loc); + ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED, + "Lookup on root failed."); + ret = -1; + goto out; + } + + /* Start the crawl */ + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, + "Tiering Fixlayout started"); + + ret = gf_defrag_fix_layout(this, defrag, &loc, + tier_fix_layout_arg->fix_layout, NULL); + if (ret && ret != 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, + "Tiering fixlayout failed."); + ret = -1; + goto out; + } - ret = dict_set_str (dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_REBALANCE_FAILED, - "Failed to set dictionary value: key = %s", - GF_XATTR_TIER_LAYOUT_FIXED_KEY); - ret = -1; - goto out; - } + if (ret != 2 && + gf_defrag_settle_hash(this, defrag, &loc, + tier_fix_layout_arg->fix_layout) != 0) { + defrag->total_failures++; + ret = -1; + goto out; + } - /* Marking the completion of tiering fix layout via a xattr on root */ - ret = syncop_setxattr (this, &loc, dict, 0, NULL, NULL); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to set tiering fix " - "layout completed xattr on %s", loc.path); - ret = -1; - goto out; - } + dict = dict_new(); + if (!dict) { + ret = -1; + goto out; + } + + ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED, + "Failed to set dictionary value: key = %s", + GF_XATTR_TIER_LAYOUT_FIXED_KEY); + ret = -1; + goto out; + } + + /* Marking the completion of tiering fix layout via a xattr on root */ + ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set tiering fix " + "layout completed xattr on %s", + loc.path); + ret = -1; + goto out; + } - ret = 0; + ret = 0; out: - if (ret && defrag) - defrag->total_failures++; + if (ret && defrag) + defrag->total_failures++; - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return NULL; + return NULL; } int -gf_tier_start_fix_layout (xlator_t *this, - loc_t *loc, - gf_defrag_info_t *defrag, +gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag, dict_t *fix_layout) { - int ret = -1; - dict_t *tier_dict = NULL; - gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL; - - tier_dict = dict_new (); - if (!tier_dict) { - gf_log ("tier", GF_LOG_ERROR, "Tier fix layout failed :" - "Creation of tier_dict failed"); - ret = -1; - goto out; - } - - /* Check if layout is fixed already */ - ret = syncop_getxattr (this, loc, &tier_dict, - GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret != 0) { - - tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg; - - /*Fill crawl arguments */ - tier_fix_layout_arg->this = this; - tier_fix_layout_arg->fix_layout = fix_layout; - - /* Spawn the fix layout thread so that its done in the - * background */ - ret = gf_thread_create (&tier_fix_layout_arg->thread_id, NULL, - gf_tier_do_fix_layout, - tier_fix_layout_arg, "tierfixl"); - if (ret) { - gf_log ("tier", GF_LOG_ERROR, "Thread creation failed. " - "Background fix layout for tiering will not " - "work."); - defrag->total_failures++; - goto out; - } - } - ret = 0; + int ret = -1; + dict_t *tier_dict = NULL; + gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL; + + tier_dict = dict_new(); + if (!tier_dict) { + gf_log("tier", GF_LOG_ERROR, + "Tier fix layout failed :" + "Creation of tier_dict failed"); + ret = -1; + goto out; + } + + /* Check if layout is fixed already */ + ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, + NULL, NULL); + if (ret != 0) { + tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg; + + /*Fill crawl arguments */ + tier_fix_layout_arg->this = this; + tier_fix_layout_arg->fix_layout = fix_layout; + + /* Spawn the fix layout thread so that its done in the + * background */ + ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL, + gf_tier_do_fix_layout, tier_fix_layout_arg, + "tierfixl"); + if (ret) { + gf_log("tier", GF_LOG_ERROR, + "Thread creation failed. " + "Background fix layout for tiering will not " + "work."); + defrag->total_failures++; + goto out; + } + } + ret = 0; out: - if (tier_dict) - dict_unref (tier_dict); + if (tier_dict) + dict_unref(tier_dict); - return ret; + return ret; } void -gf_tier_clear_fix_layout (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) +gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) { - int ret = -1; - dict_t *dict = NULL; - - GF_VALIDATE_OR_GOTO ("tier", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, defrag, out); - - /* Check if background fixlayout is completed. This is not - * multi-process safe i.e there is a possibility that by the time - * we move to remove the xattr there it might have been cleared by some - * other detach process from other node. We ignore the error if such - * a thing happens */ - ret = syncop_getxattr (this, loc, &dict, - GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, NULL); - if (ret) { - /* Background fixlayout not complete - nothing to clear*/ - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_LOG_TIER_STATUS, - "Unable to retrieve fixlayout xattr." - "Assume background fix layout not complete"); - goto out; - } - - ret = syncop_removexattr (this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - DHT_MSG_LOG_TIER_STATUS, - "Failed removing tier fix layout " - "xattr from %s", loc->path); - goto out; - } - ret = 0; + int ret = -1; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("tier", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, defrag, out); + + /* Check if background fixlayout is completed. This is not + * multi-process safe i.e there is a possibility that by the time + * we move to remove the xattr there it might have been cleared by some + * other detach process from other node. We ignore the error if such + * a thing happens */ + ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, + NULL, NULL); + if (ret) { + /* Background fixlayout not complete - nothing to clear*/ + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, + "Unable to retrieve fixlayout xattr." + "Assume background fix layout not complete"); + goto out; + } + + ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS, + "Failed removing tier fix layout " + "xattr from %s", + loc->path); + goto out; + } + ret = 0; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); } void -gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) { - if (defrag->tier_conf.tier_fix_layout_arg.thread_id) { - pthread_join (defrag->tier_conf.tier_fix_layout_arg.thread_id, - NULL); - } +gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag) +{ + if (defrag->tier_conf.tier_fix_layout_arg.thread_id) { + pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL); + } } /******************Tier background Fix layout functions END********************/ int -dht_init_local_subvols_and_nodeuuids (xlator_t *this, dht_conf_t *conf, - loc_t *loc) +dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf, + loc_t *loc) { - - dict_t *dict = NULL; - gf_defrag_info_t *defrag = NULL; - uuid_t *uuid_ptr = NULL; - int ret = -1; - int i = 0; - int j = 0; - - defrag = conf->defrag; - - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { - /* Find local subvolumes */ - ret = syncop_getxattr (this, loc, &dict, - GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret && (ret != -ENODATA)) { - gf_msg (this->name, GF_LOG_ERROR, -ret, 0, "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - - if (!ret) - goto out; + dict_t *dict = NULL; + gf_defrag_info_t *defrag = NULL; + uuid_t *uuid_ptr = NULL; + int ret = -1; + int i = 0; + int j = 0; + + defrag = conf->defrag; + + if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) { + /* Find local subvolumes */ + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, + NULL, NULL); + if (ret && (ret != -ENODATA)) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; } - ret = syncop_getxattr (this, loc, &dict, - GF_REBAL_OLD_FIND_LOCAL_SUBVOL, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, 0, "local " - "subvolume determination failed with error: %d", - -ret); - ret = -1; - goto out; - } - ret = 0; + if (!ret) + goto out; + } + + ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL, + NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; + } + ret = 0; out: - if (ret) { - return ret; - } + if (ret) { + return ret; + } - for (i = 0 ; i < conf->local_subvols_cnt; i++) { - gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: " - "%s", conf->local_subvols[i]->name); + for (i = 0; i < conf->local_subvols_cnt; i++) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "local subvol: " + "%s", + conf->local_subvols[i]->name); - for (j = 0; j < conf->local_nodeuuids[i].count; j++) { - uuid_ptr = &(conf->local_nodeuuids[i].elements[j].uuid); - gf_msg (this->name, GF_LOG_INFO, 0, 0, - "node uuid : %s", - uuid_utoa(*uuid_ptr)); - } + for (j = 0; j < conf->local_nodeuuids[i].count; j++) { + uuid_ptr = &(conf->local_nodeuuids[i].elements[j].uuid); + gf_msg(this->name, GF_LOG_INFO, 0, 0, "node uuid : %s", + uuid_utoa(*uuid_ptr)); } + } - return ret; + return ret; } - /* Functions for the rebalance estimates feature */ uint64_t -gf_defrag_subvol_file_size (xlator_t *this, loc_t *root_loc) +gf_defrag_subvol_file_size(xlator_t *this, loc_t *root_loc) { - int ret = -1; - struct statvfs buf = {0,}; + int ret = -1; + struct statvfs buf = { + 0, + }; - if (!this) - return 0; + if (!this) + return 0; - ret = syncop_statfs (this, root_loc, &buf, NULL, NULL); - if (ret) { - /* Aargh! */ - return 0; - } - return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize); + ret = syncop_statfs(this, root_loc, &buf, NULL, NULL); + if (ret) { + /* Aargh! */ + return 0; + } + return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize); } uint64_t -gf_defrag_total_file_size (xlator_t *this, loc_t *root_loc) +gf_defrag_total_file_size(xlator_t *this, loc_t *root_loc) { - dht_conf_t *conf = NULL; - int i = 0; - uint64_t size_files = 0; - uint64_t total_size = 0; - - conf = this->private; - if (!conf) { - return 0; - } - - for (i = 0 ; i < conf->local_subvols_cnt; i++) { - size_files = gf_defrag_subvol_file_size (conf->local_subvols[i], - root_loc); - total_size += size_files; - gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s," - "cnt = %"PRIu64, conf->local_subvols[i]->name, - size_files); - } - - gf_msg (this->name, GF_LOG_INFO, 0, 0, - "Total size files = %"PRIu64, total_size); + dht_conf_t *conf = NULL; + int i = 0; + uint64_t size_files = 0; + uint64_t total_size = 0; - return total_size; + conf = this->private; + if (!conf) { + return 0; + } + + for (i = 0; i < conf->local_subvols_cnt; i++) { + size_files = gf_defrag_subvol_file_size(conf->local_subvols[i], + root_loc); + total_size += size_files; + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "local subvol: %s," + "cnt = %" PRIu64, + conf->local_subvols[i]->name, size_files); + } + + gf_msg(this->name, GF_LOG_INFO, 0, 0, "Total size files = %" PRIu64, + total_size); + + return total_size; } - -static void* -dht_file_counter_thread (void *args) +static void * +dht_file_counter_thread(void *args) { - gf_defrag_info_t *defrag = NULL; - loc_t root_loc = {0,}; - struct timespec time_to_wait = {0,}; - struct timeval now = {0,}; - uint64_t tmp_size = 0; - - - if (!args) - return NULL; - - defrag = (gf_defrag_info_t *) args; - dht_build_root_loc (defrag->root_inode, &root_loc); - - while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { - - gettimeofday (&now, NULL); - time_to_wait.tv_sec = now.tv_sec + 600; - time_to_wait.tv_nsec = 0; + gf_defrag_info_t *defrag = NULL; + loc_t root_loc = { + 0, + }; + struct timespec time_to_wait = { + 0, + }; + struct timeval now = { + 0, + }; + uint64_t tmp_size = 0; + + if (!args) + return NULL; + defrag = (gf_defrag_info_t *)args; + dht_build_root_loc(defrag->root_inode, &root_loc); - pthread_mutex_lock (&defrag->fc_mutex); - pthread_cond_timedwait (&defrag->fc_wakeup_cond, - &defrag->fc_mutex, - &time_to_wait); + while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { + gettimeofday(&now, NULL); + time_to_wait.tv_sec = now.tv_sec + 600; + time_to_wait.tv_nsec = 0; - pthread_mutex_unlock (&defrag->fc_mutex); + pthread_mutex_lock(&defrag->fc_mutex); + pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex, + &time_to_wait); + pthread_mutex_unlock(&defrag->fc_mutex); - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) - break; + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) + break; - tmp_size = gf_defrag_total_file_size (defrag->this, - &root_loc); + tmp_size = gf_defrag_total_file_size(defrag->this, &root_loc); - gf_log ("dht", GF_LOG_INFO, - "tmp data size =%"PRIu64, - tmp_size); + gf_log("dht", GF_LOG_INFO, "tmp data size =%" PRIu64, tmp_size); - if (!tmp_size) { - gf_msg ("dht", GF_LOG_ERROR, 0, 0, "Failed to get " - "the total data size. Unable to estimate " - "time to complete rebalance."); - } else { - g_totalsize = tmp_size; - gf_msg_debug ("dht", 0, - "total data size =%"PRIu64, - g_totalsize); - } + if (!tmp_size) { + gf_msg("dht", GF_LOG_ERROR, 0, 0, + "Failed to get " + "the total data size. Unable to estimate " + "time to complete rebalance."); + } else { + g_totalsize = tmp_size; + gf_msg_debug("dht", 0, "total data size =%" PRIu64, g_totalsize); } + } - return NULL; + return NULL; } int -gf_defrag_estimates_cleanup (xlator_t *this, gf_defrag_info_t *defrag, - pthread_t filecnt_thread) +gf_defrag_estimates_cleanup(xlator_t *this, gf_defrag_info_t *defrag, + pthread_t filecnt_thread) { - int ret = -1; - - /* Wake up the filecounter thread. - * By now the defrag status will no longer be - * GF_DEFRAG_STATUS_STARTED so the thread will exit the loop. - */ - pthread_mutex_lock (&defrag->fc_mutex); - { - pthread_cond_broadcast (&defrag->fc_wakeup_cond); - } - pthread_mutex_unlock (&defrag->fc_mutex); - - ret = pthread_join (filecnt_thread, NULL); - if (ret) { - gf_msg ("dht", GF_LOG_ERROR, ret, 0, - "file_counter_thread: pthread_join failed."); - ret = -1; - } - return ret; + int ret = -1; + + /* Wake up the filecounter thread. + * By now the defrag status will no longer be + * GF_DEFRAG_STATUS_STARTED so the thread will exit the loop. + */ + pthread_mutex_lock(&defrag->fc_mutex); + { + pthread_cond_broadcast(&defrag->fc_wakeup_cond); + } + pthread_mutex_unlock(&defrag->fc_mutex); + + ret = pthread_join(filecnt_thread, NULL); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, ret, 0, + "file_counter_thread: pthread_join failed."); + ret = -1; + } + return ret; } - int -gf_defrag_estimates_init (xlator_t *this, loc_t *loc, - pthread_t *filecnt_thread) +gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread) { - int ret = -1; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - - conf = this->private; - defrag = conf->defrag; - - g_totalsize = gf_defrag_total_file_size (this, loc); - if (!g_totalsize) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get " - "the total data size. Unable to estimate " - "time to complete rebalance."); - goto out; - } - - ret = gf_thread_create (filecnt_thread, NULL, - &dht_file_counter_thread, - (void *)defrag, "dhtfcnt"); - - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ret, 0, "Failed to " - "create the file counter thread "); - ret = -1; - goto out; - } - ret = 0; + int ret = -1; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + + conf = this->private; + defrag = conf->defrag; + + g_totalsize = gf_defrag_total_file_size(this, loc); + if (!g_totalsize) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Failed to get " + "the total data size. Unable to estimate " + "time to complete rebalance."); + goto out; + } + + ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread, + (void *)defrag, "dhtfcnt"); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ret, 0, + "Failed to " + "create the file counter thread "); + ret = -1; + goto out; + } + ret = 0; out: - return ret; + return ret; } - /* Init and cleanup functions for parallel file migration*/ int -gf_defrag_parallel_migration_init (xlator_t *this, gf_defrag_info_t *defrag, - pthread_t **tid_array, int *thread_index) +gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag, + pthread_t **tid_array, int *thread_index) { - int ret = -1; - int thread_spawn_count = 0; - int index = 0; - pthread_t *tid = NULL; - char thread_name[GF_THREAD_NAMEMAX] = {0,}; - - if (!defrag) - goto out; - - /* Initialize global entry queue */ - defrag->queue = GF_CALLOC (1, sizeof (struct dht_container), - gf_dht_mt_container_t); - - if (!defrag->queue) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, 0, - "Failed to initialise migration queue"); - ret = -1; - goto out; - } + int ret = -1; + int thread_spawn_count = 0; + int index = 0; + pthread_t *tid = NULL; + char thread_name[GF_THREAD_NAMEMAX] = { + 0, + }; + + if (!defrag) + goto out; + + /* Initialize global entry queue */ + defrag->queue = GF_CALLOC(1, sizeof(struct dht_container), + gf_dht_mt_container_t); + + if (!defrag->queue) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "Failed to initialise migration queue"); + ret = -1; + goto out; + } - INIT_LIST_HEAD (&(defrag->queue[0].list)); + INIT_LIST_HEAD(&(defrag->queue[0].list)); - thread_spawn_count = MAX (MAX_REBAL_THREADS, 4); + thread_spawn_count = MAX(MAX_REBAL_THREADS, 4); - gf_msg_debug (this->name, 0, "thread_spawn_count: %d", - thread_spawn_count); + gf_msg_debug(this->name, 0, "thread_spawn_count: %d", thread_spawn_count); - tid = GF_CALLOC (thread_spawn_count, sizeof (pthread_t), - gf_common_mt_pthread_t); - if (!tid) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, 0, - "Failed to create migration threads"); - ret = -1; - goto out; - } - defrag->current_thread_count = thread_spawn_count; - - /*Spawn Threads Here*/ - while (index < thread_spawn_count) { - snprintf (thread_name, sizeof(thread_name), - "dhtmig%d", ((index + 1) & 0x3ff)); - ret = gf_thread_create (&(tid[index]), NULL, - &gf_defrag_task, (void *)defrag, - thread_name); - if (ret != 0) { - gf_msg ("DHT", GF_LOG_ERROR, ret, 0, - "Thread[%d] creation failed. ", - index); - ret = -1; - goto out; - } else { - gf_log ("DHT", GF_LOG_INFO, "Thread[%d] " - "creation successful", index); - } - index++; + tid = GF_CALLOC(thread_spawn_count, sizeof(pthread_t), + gf_common_mt_pthread_t); + if (!tid) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "Failed to create migration threads"); + ret = -1; + goto out; + } + defrag->current_thread_count = thread_spawn_count; + + /*Spawn Threads Here*/ + while (index < thread_spawn_count) { + snprintf(thread_name, sizeof(thread_name), "dhtmig%d", + ((index + 1) & 0x3ff)); + ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task, + (void *)defrag, thread_name); + if (ret != 0) { + gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ", + index); + ret = -1; + goto out; + } else { + gf_log("DHT", GF_LOG_INFO, + "Thread[%d] " + "creation successful", + index); } + index++; + } - ret = 0; + ret = 0; out: - *thread_index = index; - *tid_array = tid; + *thread_index = index; + *tid_array = tid; - return ret; + return ret; } int -gf_defrag_parallel_migration_cleanup (gf_defrag_info_t *defrag, - pthread_t *tid_array, int thread_index) +gf_defrag_parallel_migration_cleanup(gf_defrag_info_t *defrag, + pthread_t *tid_array, int thread_index) { - int ret = -1; - int i = 0; + int ret = -1; + int i = 0; - if (!defrag) - goto out; + if (!defrag) + goto out; - /* Wake up all migration threads */ - pthread_mutex_lock (&defrag->dfq_mutex); - { - defrag->crawl_done = 1; + /* Wake up all migration threads */ + pthread_mutex_lock(&defrag->dfq_mutex); + { + defrag->crawl_done = 1; - pthread_cond_broadcast (&defrag->parallel_migration_cond); - pthread_cond_broadcast (&defrag->df_wakeup_thread); - } - pthread_mutex_unlock (&defrag->dfq_mutex); + pthread_cond_broadcast(&defrag->parallel_migration_cond); + pthread_cond_broadcast(&defrag->df_wakeup_thread); + } + pthread_mutex_unlock(&defrag->dfq_mutex); - /*Wait for all the threads to complete their task*/ - for (i = 0; i < thread_index; i++) { - pthread_join (tid_array[i], NULL); - } + /*Wait for all the threads to complete their task*/ + for (i = 0; i < thread_index; i++) { + pthread_join(tid_array[i], NULL); + } - GF_FREE (tid_array); + GF_FREE(tid_array); - /* Cleanup the migration queue */ - if (defrag->queue) { - gf_dirent_free (defrag->queue[0].df_entry); - INIT_LIST_HEAD (&(defrag->queue[0].list)); - } + /* Cleanup the migration queue */ + if (defrag->queue) { + gf_dirent_free(defrag->queue[0].df_entry); + INIT_LIST_HEAD(&(defrag->queue[0].list)); + } - GF_FREE (defrag->queue); + GF_FREE(defrag->queue); - ret = 0; + ret = 0; out: - return ret; + return ret; } - - int -gf_defrag_start_crawl (void *data) +gf_defrag_start_crawl(void *data) { - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - dict_t *fix_layout = NULL; - dict_t *migrate_data = NULL; - dict_t *status = NULL; - glusterfs_ctx_t *ctx = NULL; - dht_methods_t *methods = NULL; - call_frame_t *statfs_frame = NULL; - xlator_t *old_THIS = NULL; - int ret = -1; - loc_t loc = {0,}; - struct iatt iatt = {0,}; - struct iatt parent = {0,}; - int thread_index = 0; - pthread_t *tid = NULL; - pthread_t filecnt_thread; - gf_boolean_t is_tier_detach = _gf_false; - gf_boolean_t fc_thread_started = _gf_false; - - this = data; - if (!this) - goto exit; - - ctx = this->ctx; - if (!ctx) - goto exit; - - conf = this->private; - if (!conf) - goto exit; - - defrag = conf->defrag; - if (!defrag) - goto exit; - - gettimeofday (&defrag->start_time, NULL); - dht_build_root_inode (this, &defrag->root_inode); - if (!defrag->root_inode) - goto out; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + dict_t *fix_layout = NULL; + dict_t *migrate_data = NULL; + dict_t *status = NULL; + glusterfs_ctx_t *ctx = NULL; + dht_methods_t *methods = NULL; + call_frame_t *statfs_frame = NULL; + xlator_t *old_THIS = NULL; + int ret = -1; + loc_t loc = { + 0, + }; + struct iatt iatt = { + 0, + }; + struct iatt parent = { + 0, + }; + int thread_index = 0; + pthread_t *tid = NULL; + pthread_t filecnt_thread; + gf_boolean_t is_tier_detach = _gf_false; + gf_boolean_t fc_thread_started = _gf_false; + + this = data; + if (!this) + goto exit; + + ctx = this->ctx; + if (!ctx) + goto exit; + + conf = this->private; + if (!conf) + goto exit; + + defrag = conf->defrag; + if (!defrag) + goto exit; + + gettimeofday(&defrag->start_time, NULL); + dht_build_root_inode(this, &defrag->root_inode); + if (!defrag->root_inode) + goto out; + + dht_build_root_loc(defrag->root_inode, &loc); + + /* fix-layout on '/' first */ + + ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED, + "Failed to start rebalance: look up on / failed"); + ret = -1; + goto out; + } - dht_build_root_loc (defrag->root_inode, &loc); + old_THIS = THIS; + THIS = this; - /* fix-layout on '/' first */ + statfs_frame = create_frame(this, this->ctx->pool); + if (!statfs_frame) { + gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, + "Insufficient memory. Frame creation failed"); + ret = -1; + goto out; + } - ret = syncop_lookup (this, &loc, &iatt, &parent, NULL, NULL); + /* async statfs update for honoring min-free-disk */ + dht_get_du_info(statfs_frame, this, &loc); + THIS = old_THIS; - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_REBALANCE_START_FAILED, - "Failed to start rebalance: look up on / failed"); - ret = -1; - goto out; - } + fix_layout = dict_new(); + if (!fix_layout) { + ret = -1; + goto out; + } + + /* + * Unfortunately, we can't do special xattrs (like fix.layout) and + * real ones in the same call currently, and changing it seems + * riskier than just doing two calls. + */ + + gf_log(this->name, GF_LOG_INFO, "%s using commit hash %u", __func__, + conf->vol_commit_hash); + + ret = dict_set_uint32(fix_layout, conf->commithash_xattr_name, + conf->vol_commit_hash); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to set %s", + conf->commithash_xattr_name); + defrag->total_failures++; + ret = -1; + goto out; + } + + ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set commit hash on %s. " + "Rebalance cannot proceed.", + loc.path); + defrag->total_failures++; + ret = -1; + goto out; + } + + /* We now return to our regularly scheduled program. */ + + ret = dict_set_str(fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED, + "Failed to start rebalance:" + "Failed to set dictionary value: key = %s", + GF_XATTR_FIX_LAYOUT_KEY); + defrag->total_failures++; + ret = -1; + goto out; + } - old_THIS = THIS; - THIS = this; + defrag->new_commit_hash = conf->vol_commit_hash; - statfs_frame = create_frame (this, this->ctx->pool); - if (!statfs_frame) { - gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM, - "Insufficient memory. Frame creation failed"); - ret = -1; - goto out; - } + ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_FAILED, + "fix layout on %s failed", loc.path); + defrag->total_failures++; + ret = -1; + goto out; + } - /* async statfs update for honoring min-free-disk */ - dht_get_du_info (statfs_frame, this, &loc); - THIS = old_THIS; + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { + /* We need to migrate files */ - fix_layout = dict_new (); - if (!fix_layout) { - ret = -1; - goto out; + migrate_data = dict_new(); + if (!migrate_data) { + defrag->total_failures++; + ret = -1; + goto out; } - - /* - * Unfortunately, we can't do special xattrs (like fix.layout) and - * real ones in the same call currently, and changing it seems - * riskier than just doing two calls. - */ - - gf_log (this->name, GF_LOG_INFO, "%s using commit hash %u", - __func__, conf->vol_commit_hash); - - ret = dict_set_uint32 (fix_layout, conf->commithash_xattr_name, - conf->vol_commit_hash); + ret = dict_set_str( + migrate_data, GF_XATTR_FILE_MIGRATE_KEY, + (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) ? "force" : "non-force"); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set %s", conf->commithash_xattr_name); - defrag->total_failures++; - ret = -1; - goto out; + defrag->total_failures++; + ret = -1; + goto out; } - ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL); + ret = dht_init_local_subvols_and_nodeuuids(this, conf, &loc); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to set commit hash on %s. " - "Rebalance cannot proceed.", - loc.path); - defrag->total_failures++; - ret = -1; - goto out; + ret = -1; + goto out; } - /* We now return to our regularly scheduled program. */ - - ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes"); + /* Initialise the structures required for parallel migration */ + ret = gf_defrag_parallel_migration_init(this, defrag, &tid, + &thread_index); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_REBALANCE_START_FAILED, - "Failed to start rebalance:" - "Failed to set dictionary value: key = %s", - GF_XATTR_FIX_LAYOUT_KEY); - defrag->total_failures++; - ret = -1; - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Aborting rebalance."); + goto out; } - defrag->new_commit_hash = conf->vol_commit_hash; - - ret = syncop_setxattr (this, &loc, fix_layout, 0, NULL, NULL); + ret = gf_defrag_estimates_init(this, &loc, &filecnt_thread); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - DHT_MSG_REBALANCE_FAILED, - "fix layout on %s failed", - loc.path); - defrag->total_failures++; - ret = -1; - goto out; - } - - if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { - /* We need to migrate files */ - - migrate_data = dict_new (); - if (!migrate_data) { - defrag->total_failures++; - ret = -1; - goto out; - } - ret = dict_set_str (migrate_data, GF_XATTR_FILE_MIGRATE_KEY, - (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) - ? "force" : "non-force"); - if (ret) { - defrag->total_failures++; - ret = -1; - goto out; - } - - ret = dht_init_local_subvols_and_nodeuuids (this, conf, &loc); - if (ret) { - ret = -1; - goto out; - } - - /* Initialise the structures required for parallel migration */ - ret = gf_defrag_parallel_migration_init (this, defrag, &tid, - &thread_index); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, - "Aborting rebalance."); - goto out; - } - - ret = gf_defrag_estimates_init (this, &loc, &filecnt_thread); - if (ret) { - /* Not a fatal error. Allow the rebalance to proceed*/ - ret = 0; - } else { - fc_thread_started = _gf_true; - } - } - - - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Fix layout for attach tier */ - ret = gf_tier_start_fix_layout (this, &loc, defrag, fix_layout); - if (ret) { - goto out; - } - - methods = &(conf->methods); - - /* Calling tier_start of tier.c */ - methods->migration_other(this, defrag); - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - - ret = dict_set_str (migrate_data, - GF_XATTR_FILE_MIGRATE_KEY, - "force"); - if (ret) - goto out; - - } + /* Not a fatal error. Allow the rebalance to proceed*/ + ret = 0; } else { - ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout, - migrate_data); - if (ret && ret != 2) { - defrag->total_failures++; - ret = -1; - goto out; - } - - if (ret != 2 && gf_defrag_settle_hash - (this, defrag, &loc, fix_layout) != 0) { - defrag->total_failures++; - ret = -1; - goto out; - } - - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) - is_tier_detach = _gf_true; - + fc_thread_started = _gf_true; } + } - gf_log ("DHT", GF_LOG_INFO, "crawling file-system completed"); -out: - - /* We are here means crawling the entire file system is done - or something failed. Set defrag->crawl_done flag to intimate - the migrator threads to exhaust the defrag->queue and terminate*/ - + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + /* Fix layout for attach tier */ + ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout); if (ret) { - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + goto out; } + methods = &(conf->methods); - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { - /* Wait for the tier fixlayout to - * complete if its was started.*/ - gf_tier_wait_fix_lookup (defrag); - } - - if (is_tier_detach && ret == 0) { - /* If it was a detach remove the tier fix-layout - * xattr on root. Ignoring the failure, as nothing has to be - * done, logging is done in gf_tier_clear_fix_layout */ - gf_tier_clear_fix_layout (this, &loc, defrag); - } - - gf_defrag_parallel_migration_cleanup (defrag, tid, thread_index); - - if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) && - (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) { - defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + /* Calling tier_start of tier.c */ + methods->migration_other(this, defrag); + if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || + defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { + ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, + "force"); + if (ret) + goto out; } - - if (fc_thread_started) { - gf_defrag_estimates_cleanup (this, defrag, filecnt_thread); + } else { + ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, + migrate_data); + if (ret && ret != 2) { + defrag->total_failures++; + ret = -1; + goto out; } - dht_send_rebalance_event (this, defrag->cmd, defrag->defrag_status); - - LOCK (&defrag->lock); - { - status = dict_new (); - gf_defrag_status_get (conf, status); - if (ctx && ctx->notify) - ctx->notify (GF_EN_DEFRAG_STATUS, status); - if (status) - dict_unref (status); - defrag->is_exiting = 1; + if (ret != 2 && + gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) { + defrag->total_failures++; + ret = -1; + goto out; } - UNLOCK (&defrag->lock); + if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || + defrag->cmd == GF_DEFRAG_CMD_DETACH_START) + is_tier_detach = _gf_true; + } - GF_FREE (defrag); - conf->defrag = NULL; - - if (migrate_data) - dict_unref (migrate_data); + gf_log("DHT", GF_LOG_INFO, "crawling file-system completed"); +out: - if (statfs_frame) { - STACK_DESTROY (statfs_frame->root); - } + /* We are here means crawling the entire file system is done + or something failed. Set defrag->crawl_done flag to intimate + the migrator threads to exhaust the defrag->queue and terminate*/ + + if (ret) { + defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + } + + if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) { + /* Wait for the tier fixlayout to + * complete if its was started.*/ + gf_tier_wait_fix_lookup(defrag); + } + + if (is_tier_detach && ret == 0) { + /* If it was a detach remove the tier fix-layout + * xattr on root. Ignoring the failure, as nothing has to be + * done, logging is done in gf_tier_clear_fix_layout */ + gf_tier_clear_fix_layout(this, &loc, defrag); + } + + gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index); + + if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) && + (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) { + defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + } + + if (fc_thread_started) { + gf_defrag_estimates_cleanup(this, defrag, filecnt_thread); + } + + dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status); + + LOCK(&defrag->lock); + { + status = dict_new(); + gf_defrag_status_get(conf, status); + if (ctx && ctx->notify) + ctx->notify(GF_EN_DEFRAG_STATUS, status); + if (status) + dict_unref(status); + defrag->is_exiting = 1; + } + UNLOCK(&defrag->lock); + + GF_FREE(defrag); + conf->defrag = NULL; + + if (migrate_data) + dict_unref(migrate_data); + + if (statfs_frame) { + STACK_DESTROY(statfs_frame->root); + } exit: - return ret; + return ret; } - - static int -gf_defrag_done (int ret, call_frame_t *sync_frame, void *data) +gf_defrag_done(int ret, call_frame_t *sync_frame, void *data) { - gf_listener_stop (sync_frame->this); + gf_listener_stop(sync_frame->this); - STACK_DESTROY (sync_frame->root); - kill (getpid(), SIGTERM); - return 0; + STACK_DESTROY(sync_frame->root); + kill(getpid(), SIGTERM); + return 0; } void * -gf_defrag_start (void *data) +gf_defrag_start(void *data) { - int ret = -1; - call_frame_t *frame = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - xlator_t *this = NULL; - xlator_t *old_THIS = NULL; - - this = data; - conf = this->private; - if (!conf) - goto out; + int ret = -1; + call_frame_t *frame = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; + xlator_t *this = NULL; + xlator_t *old_THIS = NULL; - defrag = conf->defrag; - if (!defrag) - goto out; + this = data; + conf = this->private; + if (!conf) + goto out; - frame = create_frame (this, this->ctx->pool); - if (!frame) - goto out; + defrag = conf->defrag; + if (!defrag) + goto out; + + frame = create_frame(this, this->ctx->pool); + if (!frame) + goto out; - frame->root->pid = GF_CLIENT_PID_DEFRAG; + frame->root->pid = GF_CLIENT_PID_DEFRAG; - defrag->pid = frame->root->pid; + defrag->pid = frame->root->pid; - defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; + defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; - old_THIS = THIS; - THIS = this; - ret = synctask_new (this->ctx->env, gf_defrag_start_crawl, - gf_defrag_done, frame, this); + old_THIS = THIS; + THIS = this; + ret = synctask_new(this->ctx->env, gf_defrag_start_crawl, gf_defrag_done, + frame, this); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_REBALANCE_START_FAILED, - "Could not create task for rebalance"); - THIS = old_THIS; + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED, + "Could not create task for rebalance"); + THIS = old_THIS; out: - return NULL; + return NULL; } - uint64_t -gf_defrag_get_estimates_based_on_size (dht_conf_t *conf) +gf_defrag_get_estimates_based_on_size(dht_conf_t *conf) { - gf_defrag_info_t *defrag = NULL; - double rate_processed = 0; - uint64_t total_processed = 0; - uint64_t tmp_count = 0; - uint64_t time_to_complete = 0; - struct timeval now = {0,}; - double elapsed = 0; + gf_defrag_info_t *defrag = NULL; + double rate_processed = 0; + uint64_t total_processed = 0; + uint64_t tmp_count = 0; + uint64_t time_to_complete = 0; + struct timeval now = { + 0, + }; + double elapsed = 0; - defrag = conf->defrag; + defrag = conf->defrag; - if (!g_totalsize) - goto out; + if (!g_totalsize) + goto out; - gettimeofday (&now, NULL); - elapsed = now.tv_sec - defrag->start_time.tv_sec; + gettimeofday(&now, NULL); + elapsed = now.tv_sec - defrag->start_time.tv_sec; - /* Don't calculate the estimates for the first 10 minutes. - * It is unlikely to be accurate and estimates are not required - * if the process finishes in less than 10 mins. - */ + /* Don't calculate the estimates for the first 10 minutes. + * It is unlikely to be accurate and estimates are not required + * if the process finishes in less than 10 mins. + */ - if (elapsed < ESTIMATE_START_INTERVAL) { - gf_msg (THIS->name, GF_LOG_INFO, 0, 0, - "Rebalance estimates will not be available for the " - "first %d seconds.", ESTIMATE_START_INTERVAL); + if (elapsed < ESTIMATE_START_INTERVAL) { + gf_msg(THIS->name, GF_LOG_INFO, 0, 0, + "Rebalance estimates will not be available for the " + "first %d seconds.", + ESTIMATE_START_INTERVAL); - goto out; - } + goto out; + } - total_processed = defrag->size_processed; + total_processed = defrag->size_processed; - /* rate at which files processed */ - rate_processed = (total_processed)/elapsed; + /* rate at which files processed */ + rate_processed = (total_processed) / elapsed; - tmp_count = g_totalsize; + tmp_count = g_totalsize; - if (rate_processed) { - time_to_complete = (tmp_count)/rate_processed; + if (rate_processed) { + time_to_complete = (tmp_count) / rate_processed; - } else { - gf_msg (THIS->name, GF_LOG_ERROR, 0, 0, - "Unable to calculate estimated time for rebalance"); - } + } else { + gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, + "Unable to calculate estimated time for rebalance"); + } - gf_log (THIS->name, GF_LOG_INFO, - "TIME: (size) total_processed=%"PRIu64" tmp_cnt = %"PRIu64"," - "rate_processed=%f, elapsed = %f", total_processed, tmp_count, - rate_processed, elapsed); + gf_log(THIS->name, GF_LOG_INFO, + "TIME: (size) total_processed=%" PRIu64 " tmp_cnt = %" PRIu64 + "," + "rate_processed=%f, elapsed = %f", + total_processed, tmp_count, rate_processed, elapsed); out: - return time_to_complete; + return time_to_complete; } - int -gf_defrag_status_get (dht_conf_t *conf, dict_t *dict) +gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) { - int ret = 0; - uint64_t files = 0; - uint64_t size = 0; - uint64_t lookup = 0; - uint64_t failures = 0; - uint64_t skipped = 0; - uint64_t promoted = 0; - uint64_t demoted = 0; - char *status = ""; - double elapsed = 0; - struct timeval end = {0,}; - uint64_t time_to_complete = 0; - uint64_t time_left = 0; - gf_defrag_info_t *defrag = conf->defrag; - - if (!defrag) - goto out; - - ret = 0; - if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) - goto out; - - files = defrag->total_files; - size = defrag->total_data; - lookup = defrag->num_files_lookedup; - failures = defrag->total_failures; - skipped = defrag->skipped; - promoted = defrag->total_files_promoted; - demoted = defrag->total_files_demoted; - - gettimeofday (&end, NULL); - - elapsed = end.tv_sec - defrag->start_time.tv_sec; - - - /* The rebalance is still in progress */ - - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) - && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { - - time_to_complete = gf_defrag_get_estimates_based_on_size (conf); - - if (time_to_complete && (time_to_complete > elapsed)) - time_left = time_to_complete - elapsed; - - gf_log (THIS->name, GF_LOG_INFO, - "TIME: Estimated total time to complete (size)= %"PRIu64 - " seconds, seconds left = %"PRIu64"", - time_to_complete, time_left); - } - - if (!dict) - goto log; - - ret = dict_set_uint64 (dict, "promoted", promoted); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set promoted count"); - - ret = dict_set_uint64 (dict, "demoted", demoted); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set demoted count"); - - ret = dict_set_uint64 (dict, "files", files); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set file count"); - - ret = dict_set_uint64 (dict, "size", size); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set size of xfer"); - - ret = dict_set_uint64 (dict, "lookups", lookup); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set lookedup file count"); - - - ret = dict_set_int32 (dict, "status", defrag->defrag_status); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set status"); - - ret = dict_set_double (dict, "run-time", elapsed); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set run-time"); - - ret = dict_set_uint64 (dict, "failures", failures); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set failure count"); - - ret = dict_set_uint64 (dict, "skipped", skipped); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set skipped file count"); - - ret = dict_set_uint64 (dict, "time-left", time_left); - if (ret) - gf_log (THIS->name, GF_LOG_WARNING, - "failed to set time-left"); + int ret = 0; + uint64_t files = 0; + uint64_t size = 0; + uint64_t lookup = 0; + uint64_t failures = 0; + uint64_t skipped = 0; + uint64_t promoted = 0; + uint64_t demoted = 0; + char *status = ""; + double elapsed = 0; + struct timeval end = { + 0, + }; + uint64_t time_to_complete = 0; + uint64_t time_left = 0; + gf_defrag_info_t *defrag = conf->defrag; + + if (!defrag) + goto out; + + ret = 0; + if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) + goto out; + + files = defrag->total_files; + size = defrag->total_data; + lookup = defrag->num_files_lookedup; + failures = defrag->total_failures; + skipped = defrag->skipped; + promoted = defrag->total_files_promoted; + demoted = defrag->total_files_demoted; + + gettimeofday(&end, NULL); + + elapsed = end.tv_sec - defrag->start_time.tv_sec; + + /* The rebalance is still in progress */ + + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && + (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { + time_to_complete = gf_defrag_get_estimates_based_on_size(conf); + + if (time_to_complete && (time_to_complete > elapsed)) + time_left = time_to_complete - elapsed; + + gf_log(THIS->name, GF_LOG_INFO, + "TIME: Estimated total time to complete (size)= %" PRIu64 + " seconds, seconds left = %" PRIu64 "", + time_to_complete, time_left); + } + + if (!dict) + goto log; + + ret = dict_set_uint64(dict, "promoted", promoted); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count"); + + ret = dict_set_uint64(dict, "demoted", demoted); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count"); + + ret = dict_set_uint64(dict, "files", files); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count"); + + ret = dict_set_uint64(dict, "size", size); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set size of xfer"); + + ret = dict_set_uint64(dict, "lookups", lookup); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set lookedup file count"); + + ret = dict_set_int32(dict, "status", defrag->defrag_status); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set status"); + + ret = dict_set_double(dict, "run-time", elapsed); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set run-time"); + + ret = dict_set_uint64(dict, "failures", failures); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set failure count"); + + ret = dict_set_uint64(dict, "skipped", skipped); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set skipped file count"); + + ret = dict_set_uint64(dict, "time-left", time_left); + if (ret) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left"); log: - switch (defrag->defrag_status) { + switch (defrag->defrag_status) { case GF_DEFRAG_STATUS_NOT_STARTED: - status = "not started"; - break; + status = "not started"; + break; case GF_DEFRAG_STATUS_STARTED: - status = "in progress"; - break; + status = "in progress"; + break; case GF_DEFRAG_STATUS_STOPPED: - status = "stopped"; - break; + status = "stopped"; + break; case GF_DEFRAG_STATUS_COMPLETE: - status = "completed"; - break; + status = "completed"; + break; case GF_DEFRAG_STATUS_FAILED: - status = "failed"; - break; + status = "failed"; + break; default: - break; - } - - gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, - "Rebalance is %s. Time taken is %.2f secs", - status, elapsed); - gf_msg (THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, - "Files migrated: %"PRIu64", size: %" - PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: " - "%"PRIu64, files, size, lookup, failures, skipped); + break; + } + + gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, + "Rebalance is %s. Time taken is %.2f secs", status, elapsed); + gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, + "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64 + ", failures: %" PRIu64 + ", skipped: " + "%" PRIu64, + files, size, lookup, failures, skipped); out: - return 0; + return 0; } void -gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state) +gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state) { - pthread_mutex_lock (&tier_conf->pause_mutex); - tier_conf->pause_state = state; - pthread_mutex_unlock (&tier_conf->pause_mutex); + pthread_mutex_lock(&tier_conf->pause_mutex); + tier_conf->pause_state = state; + pthread_mutex_unlock(&tier_conf->pause_mutex); } - tier_pause_state_t -gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf) +gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf) { - int state; + int state; - pthread_mutex_lock (&tier_conf->pause_mutex); - state = tier_conf->pause_state; - pthread_mutex_unlock (&tier_conf->pause_mutex); + pthread_mutex_lock(&tier_conf->pause_mutex); + state = tier_conf->pause_state; + pthread_mutex_unlock(&tier_conf->pause_mutex); - return state; + return state; } tier_pause_state_t -gf_defrag_check_pause_tier (gf_tier_conf_t *tier_conf) +gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf) { - int woke = 0; - int state = -1; + int woke = 0; + int state = -1; - pthread_mutex_lock (&tier_conf->pause_mutex); + pthread_mutex_lock(&tier_conf->pause_mutex); - if (tier_conf->pause_state == TIER_RUNNING) - goto out; + if (tier_conf->pause_state == TIER_RUNNING) + goto out; - if (tier_conf->pause_state == TIER_PAUSED) - goto out; + if (tier_conf->pause_state == TIER_PAUSED) + goto out; - if (tier_conf->promote_in_progress || - tier_conf->demote_in_progress) - goto out; + if (tier_conf->promote_in_progress || tier_conf->demote_in_progress) + goto out; - tier_conf->pause_state = TIER_PAUSED; + tier_conf->pause_state = TIER_PAUSED; - if (tier_conf->pause_synctask) { - synctask_wake (tier_conf->pause_synctask); - tier_conf->pause_synctask = 0; - woke = 1; - } + if (tier_conf->pause_synctask) { + synctask_wake(tier_conf->pause_synctask); + tier_conf->pause_synctask = 0; + woke = 1; + } - gf_msg ("tier", GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "woken %d", woke); + gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke); - gf_event (EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname); + gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname); out: - state = tier_conf->pause_state; + state = tier_conf->pause_state; - pthread_mutex_unlock (&tier_conf->pause_mutex); + pthread_mutex_unlock(&tier_conf->pause_mutex); - return state; + return state; } void -gf_defrag_pause_tier_timeout (void *data) +gf_defrag_pause_tier_timeout(void *data) { - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + gf_defrag_info_t *defrag = NULL; - this = (xlator_t *) data; - GF_VALIDATE_OR_GOTO ("tier", this, out); + this = (xlator_t *)data; + GF_VALIDATE_OR_GOTO("tier", this, out); - conf = this->private; - GF_VALIDATE_OR_GOTO (this->name, conf, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO (this->name, defrag, out); + defrag = conf->defrag; + GF_VALIDATE_OR_GOTO(this->name, defrag, out); - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "Request pause timer timeout"); + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, + "Request pause timer timeout"); - gf_defrag_check_pause_tier (&defrag->tier_conf); + gf_defrag_check_pause_tier(&defrag->tier_conf); out: - return; + return; } int -gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag) +gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag) { - int ret = 0; - struct timespec delta = {0,}; - int delay = 2; - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) - goto out; - - /* - * Set flag requesting to pause tiering. Wait 'delay' seconds for - * tiering to actually stop as indicated by the pause state - * before returning success or failure. - */ - gf_defrag_set_pause_state (&defrag->tier_conf, TIER_REQUEST_PAUSE); - - /* - * If migration is not underway, can pause immediately. - */ - gf_defrag_check_pause_tier (&defrag->tier_conf); - if (gf_defrag_get_pause_state (&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "Request pause tier"); - - defrag->tier_conf.pause_synctask = synctask_get (); - delta.tv_sec = delay; - delta.tv_nsec = 0; - defrag->tier_conf.pause_timer = - gf_timer_call_after (this->ctx, delta, - gf_defrag_pause_tier_timeout, - this); - - synctask_yield (defrag->tier_conf.pause_synctask); - - if (gf_defrag_get_pause_state (&defrag->tier_conf) == TIER_PAUSED) - goto out; - - gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING); - - ret = -1; + int ret = 0; + struct timespec delta = { + 0, + }; + int delay = 2; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) + goto out; + + /* + * Set flag requesting to pause tiering. Wait 'delay' seconds for + * tiering to actually stop as indicated by the pause state + * before returning success or failure. + */ + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); + + /* + * If migration is not underway, can pause immediately. + */ + gf_defrag_check_pause_tier(&defrag->tier_conf); + if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) + goto out; + + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, + "Request pause tier"); + + defrag->tier_conf.pause_synctask = synctask_get(); + delta.tv_sec = delay; + delta.tv_nsec = 0; + defrag->tier_conf.pause_timer = gf_timer_call_after( + this->ctx, delta, gf_defrag_pause_tier_timeout, this); + + synctask_yield(defrag->tier_conf.pause_synctask); + + if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) + goto out; + + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); + + ret = -1; out: - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_PAUSED, - "Pause tiering ret=%d", ret); + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, + "Pause tiering ret=%d", ret); - return ret; + return ret; } int -gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag) +gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag) { - gf_msg (this->name, GF_LOG_DEBUG, 0, - DHT_MSG_TIER_RESUME, - "Pause end. Resume tiering"); + gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME, + "Pause end. Resume tiering"); - gf_defrag_set_pause_state (&defrag->tier_conf, TIER_RUNNING); + gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - gf_event (EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname); + gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname); - return 0; + return 0; } int -gf_defrag_start_detach_tier (gf_defrag_info_t *defrag) +gf_defrag_start_detach_tier(gf_defrag_info_t *defrag) { - defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; + defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER; - return 0; + return 0; } int -gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status, - dict_t *output) +gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) { - /* TODO: set a variable 'stop_defrag' here, it should be checked - in defrag loop */ - int ret = -1; - gf_defrag_info_t *defrag = conf->defrag; + /* TODO: set a variable 'stop_defrag' here, it should be checked + in defrag loop */ + int ret = -1; + gf_defrag_info_t *defrag = conf->defrag; - GF_ASSERT (defrag); + GF_ASSERT(defrag); - if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { - goto out; - } + if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { + goto out; + } - gf_msg ("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED, - "Received stop command on rebalance"); - defrag->defrag_status = status; + gf_msg("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED, + "Received stop command on rebalance"); + defrag->defrag_status = status; - if (output) - gf_defrag_status_get (conf, output); - ret = 0; + if (output) + gf_defrag_status_get(conf, output); + ret = 0; out: - gf_msg_debug ("", 0, "Returning %d", ret); - return ret; + gf_msg_debug("", 0, "Returning %d", ret); + return ret; } |