diff options
author | Amar Tumballi <amar@gluster.com> | 2011-07-19 20:51:22 +0530 |
---|---|---|
committer | Anand Avati <avati@gluster.com> | 2011-07-31 23:29:12 -0700 |
commit | a59fc489bebce1c81d0a22d53794d7e41f3f4067 (patch) | |
tree | 52a8ec2aa4f0872a923849edda0f2bb9292fd2e3 | |
parent | 12752faeb094a0d7d240418b8518e78dd28f5fac (diff) |
glusterd rebalance: handle the write failure properly
also, make sure the sizes are same before renaming the target file
to the original file, hence prevent a possible data-loss.
Change-Id: Ie88224ba62a4604f8c0149f84fa462abfbd6ad78
BUG: 3193
Reviewed-on: http://review.gluster.com/29
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 63 |
1 files changed, 53 insertions, 10 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index f8194a50ca2..f98dea0db88 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -154,9 +154,11 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) struct dirent *entry = NULL; struct stat stbuf = {0,}; struct stat new_stbuf = {0,}; + struct stat dst_stbuf = {0,}; char full_path[1024] = {0,}; char tmp_filename[1024] = {0,}; char value[16] = {0,}; + char file_not_copied_fully = 0; if (!volinfo->defrag) goto out; @@ -210,24 +212,25 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) while (1) { ret = read (src_fd, defrag->databuf, 131072); - if (!ret || (ret < 0)) { + if (ret < 0) { + file_not_copied_fully = 1; break; } + /* If EOF is hit, then we get 'ret == 0' */ + if (!ret) + break; + ret = write (dst_fd, defrag->databuf, ret); if (ret < 0) { + file_not_copied_fully = 1; break; } } - ret = lstat (full_path, &new_stbuf); - if (ret < 0) { - close (dst_fd); - close (src_fd); - continue; - } - /* No need to rebalance, if there is some - activity on source file */ - if (new_stbuf.st_mtime != stbuf.st_mtime) { + if (file_not_copied_fully) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to copy the file fully : %s (%s)", + full_path, strerror (errno)); close (dst_fd); close (src_fd); continue; @@ -254,6 +257,46 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) tmp_filename, strerror (errno)); } + ret = fstat (src_fd, &new_stbuf); + if (ret < 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get stat: %s (%s)", + full_path, strerror (errno)); + close (dst_fd); + close (src_fd); + continue; + } + + ret = fstat (dst_fd, &dst_stbuf); + if (ret < 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get stat on temp file: %s (%s)", + tmp_filename, strerror (errno)); + close (dst_fd); + close (src_fd); + continue; + } + + /* No need to rebalance, if there is some + activity on source file */ + if (new_stbuf.st_mtime != stbuf.st_mtime) { + gf_log (THIS->name, GF_LOG_WARNING, + "file got changed after we started copying %s", + full_path); + close (dst_fd); + close (src_fd); + continue; + } + + if (new_stbuf.st_size != dst_stbuf.st_size) { + gf_log (THIS->name, GF_LOG_WARNING, + "file sizes are not same : %s", + full_path); + close (dst_fd); + close (src_fd); + continue; + } + ret = rename (tmp_filename, full_path); if (ret != -1) { LOCK (&defrag->lock); |