diff options
author | Aravinda VK <avishwan@redhat.com> | 2015-12-02 19:37:55 +0530 |
---|---|---|
committer | Venky Shankar <vshankar@redhat.com> | 2016-02-26 02:15:01 -0800 |
commit | d136a789258e8f600e536717da156a242d8ed9a5 (patch) | |
tree | 452ce976e079b38584bdd7aa46e3fc7a0463642d /geo-replication/syncdaemon/resource.py | |
parent | 656a0e64e5f465561ed29297421ec150de32f2a1 (diff) |
geo-rep: Handling Rsync/Tar errors efficiently
Geo-rep processes Changelogs in Batch, if one file in batch
fails with rsync error that Changelog file is reprocessed multiple times.
After MAX_RETRY, it logs all the GFIDs from that batch as Skipped.
This patch addresses following issues,
1. When Rsync/Tar fails do not parse Changelog again for retry
2. When Rsync/Tar fails do not replay Entry operations, only retry
rsync/tar for those GFIDs
3. Log Error in Rsync/Tar only in the last Retry
4. Do not log Skipped GFIDs since Rsync/Tar errors are logged for
only failed files.
5. Changed Entry failures as Error instead of Warning
BUG: 1287723
Change-Id: Ie134ce2572693056ab9b9008cd8aa5b5d87f7975
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: http://review.gluster.org/12856
Reviewed-by: Kotresh HR <khiremat@redhat.com>
Reviewed-by: Saravanakumar Arumugam <sarumuga@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Diffstat (limited to 'geo-replication/syncdaemon/resource.py')
-rw-r--r-- | geo-replication/syncdaemon/resource.py | 49 |
1 files changed, 26 insertions, 23 deletions
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 19363401e65..ac697eb39ed 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -906,7 +906,7 @@ class SlaveRemote(object): "RePCe major version mismatch: local %s, remote %s" % (exrv, rv)) - def rsync(self, files, *args): + def rsync(self, files, *args, **kw): """invoke rsync""" if not files: raise GsyncdError("no files to sync") @@ -932,12 +932,15 @@ class SlaveRemote(object): po.stdin.write(f) po.stdin.write('\0') - po.stdin.close() + stdout, stderr = po.communicate() + + if kw.get("log_err", False): + for errline in stderr.strip().split("\n")[:-1]: + logging.error("SYNC Error(Rsync): %s" % errline) if gconf.log_rsync_performance: - out = po.stdout.read() rsync_msg = [] - for line in out.split("\n"): + for line in stdout.split("\n"): if line.startswith("Number of files:") or \ line.startswith("Number of regular files transferred:") or \ line.startswith("Total file size:") or \ @@ -949,12 +952,10 @@ class SlaveRemote(object): line.startswith("sent "): rsync_msg.append(line) logging.info("rsync performance: %s" % ", ".join(rsync_msg)) - po.wait() - po.terminate_geterr(fail_on_err=False) return po - def tarssh(self, files, slaveurl): + def tarssh(self, files, slaveurl, log_err=False): """invoke tar+ssh -z (compress) can be use if needed, but omitting it now as it results in weird error (tar+ssh errors out (errcode: 2) @@ -975,15 +976,16 @@ class SlaveRemote(object): for f in files: p0.stdin.write(f) p0.stdin.write('\n') - p0.stdin.close() - # wait() for tar to terminate, collecting any errors, further + p0.stdin.close() + p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits. + # wait for tar to terminate, collecting any errors, further # waiting for transfer to complete - p0.wait() - p0.terminate_geterr(fail_on_err=False) + _, stderr1 = p1.communicate() - p1.wait() - p1.terminate_geterr(fail_on_err=False) + if log_err: + for errline in stderr1.strip().split("\n")[:-1]: + logging.error("SYNC Error(Untar): %s" % errline) return p1 @@ -1045,8 +1047,8 @@ class FILE(AbstractUrl, SlaveLocal, SlaveRemote): """inhibit the resource beyond""" os.chdir(self.path) - def rsync(self, files): - return sup(self, files, self.path) + def rsync(self, files, log_err=False): + return sup(self, files, self.path, log_err=log_err) class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): @@ -1460,11 +1462,11 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): else: sup(self, *args) - def rsync(self, files): - return sup(self, files, self.slavedir) + def rsync(self, files, log_err=False): + return sup(self, files, self.slavedir, log_err=log_err) - def tarssh(self, files): - return sup(self, files, self.slavedir) + def tarssh(self, files, log_err=False): + return sup(self, files, self.slavedir, log_err=log_err) class SSH(AbstractUrl, SlaveRemote): @@ -1571,12 +1573,13 @@ class SSH(AbstractUrl, SlaveRemote): self.fd_pair = (i, o) return 'should' - def rsync(self, files): + def rsync(self, files, log_err=False): return sup(self, files, '-e', " ".join(gconf.ssh_command.split() + ["-p", str(gconf.ssh_port)] + gconf.ssh_ctl_args), - *(gconf.rsync_ssh_options.split() + [self.slaveurl])) + *(gconf.rsync_ssh_options.split() + [self.slaveurl]), + log_err=log_err) - def tarssh(self, files): - return sup(self, files, self.slaveurl) + def tarssh(self, files, log_err=False): + return sup(self, files, self.slaveurl, log_err=log_err) |