diff options
author | Kotresh HR <khiremat@redhat.com> | 2017-05-23 11:47:18 -0400 |
---|---|---|
committer | Aravinda VK <avishwan@redhat.com> | 2017-05-24 06:12:02 +0000 |
commit | 0bcf9143f500dd519c887af2b6948e2d332b3bfd (patch) | |
tree | b3e24aa4850e80ce4189a17df57f05f2ca11ede9 | |
parent | a8624b8b13a1f4222e4d3e33fa5836d7b45369bc (diff) |
geo-rep: Make changelog-batch-size configurable
Changelog batch size is set to 727040 bytes which
is the size of all the changelogs in a single batch.
It's based on few tests which approximately processes
5K entries. But it might vary on different machines.
Making it configurable gives more control on the
frequency of stime updates. This patch does the same.
Change-Id: I9a5ebb3d92c1327dded0e0a712c43a5a9046c1b0
BUG: 1454872
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://review.gluster.org/17376
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Amar Tumballi <amarts@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Aravinda VK <avishwan@redhat.com>
-rw-r--r-- | geo-replication/syncdaemon/gsyncd.py | 7 | ||||
-rw-r--r-- | geo-replication/syncdaemon/master.py | 10 |
2 files changed, 8 insertions, 9 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index 45b5ff546fa..3ddcb7f5454 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -278,6 +278,13 @@ def main_i(): op.add_option('--log-rsync-performance', default=False, action='store_true') op.add_option('--max-rsync-retries', type=int, default=10) + # Max size of Changelogs to process per batch, Changelogs Processing is + # not limited by the number of changelogs but instead based on + # size of the changelog file, One sample changelog file size was 145408 + # with ~1000 CREATE and ~1000 DATA. 5 such files in one batch is 727040 + # If geo-rep worker crashes while processing a batch, it has to retry only + # that batch since stime will get updated after each batch. + op.add_option('--changelog-batch-size', type=int, default=727040) op.add_option('--pause-on-start', default=False, action='store_true') op.add_option('-L', '--log-level', metavar='LVL') op.add_option('-r', '--remote-gsyncd', metavar='CMD', diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index b096bc77ebe..6365df8c532 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -37,14 +37,6 @@ URXTIME = (-1, 0) # crawl before starting live changelog crawl. CHANGELOG_ROLLOVER_TIME = 15 -# Max size of Changelogs to process per batch, Changelogs Processing is -# not limited by the number of changelogs but instead based on -# size of the changelog file, One sample changelog file size was 145408 -# with ~1000 CREATE and ~1000 DATA. 5 such files in one batch is 727040 -# If geo-rep worker crashes while processing a batch, it has to retry only -# that batch since stime will get updated after each batch. -MAX_CHANGELOG_BATCH_SIZE = 727040 - # Utility functions to help us to get to closer proximity # of the DRY principle (no, don't look for elevated or # perspectivistic things here) @@ -1166,7 +1158,7 @@ class GMasterChangelogMixin(GMasterCommon): current_size = 0 for c in changes: si = os.lstat(c).st_size - if (si + current_size) > MAX_CHANGELOG_BATCH_SIZE: + if (si + current_size) > int(gconf.changelog_batch_size): # Create new batch if single Changelog file greater than # Max Size! or current batch size exceeds Max size changelogs_batches.append([c]) |