summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2017-05-23 11:47:18 -0400
committerAravinda VK <avishwan@redhat.com>2017-05-24 06:12:02 +0000
commit0bcf9143f500dd519c887af2b6948e2d332b3bfd (patch)
treeb3e24aa4850e80ce4189a17df57f05f2ca11ede9
parenta8624b8b13a1f4222e4d3e33fa5836d7b45369bc (diff)
geo-rep: Make changelog-batch-size configurable
Changelog batch size is set to 727040 bytes which is the size of all the changelogs in a single batch. It's based on few tests which approximately processes 5K entries. But it might vary on different machines. Making it configurable gives more control on the frequency of stime updates. This patch does the same. Change-Id: I9a5ebb3d92c1327dded0e0a712c43a5a9046c1b0 BUG: 1454872 Signed-off-by: Kotresh HR <khiremat@redhat.com> Reviewed-on: https://review.gluster.org/17376 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Amar Tumballi <amarts@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Aravinda VK <avishwan@redhat.com>
-rw-r--r--geo-replication/syncdaemon/gsyncd.py7
-rw-r--r--geo-replication/syncdaemon/master.py10
2 files changed, 8 insertions, 9 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index 45b5ff546fa..3ddcb7f5454 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -278,6 +278,13 @@ def main_i():
op.add_option('--log-rsync-performance', default=False,
action='store_true')
op.add_option('--max-rsync-retries', type=int, default=10)
+ # Max size of Changelogs to process per batch, Changelogs Processing is
+ # not limited by the number of changelogs but instead based on
+ # size of the changelog file, One sample changelog file size was 145408
+ # with ~1000 CREATE and ~1000 DATA. 5 such files in one batch is 727040
+ # If geo-rep worker crashes while processing a batch, it has to retry only
+ # that batch since stime will get updated after each batch.
+ op.add_option('--changelog-batch-size', type=int, default=727040)
op.add_option('--pause-on-start', default=False, action='store_true')
op.add_option('-L', '--log-level', metavar='LVL')
op.add_option('-r', '--remote-gsyncd', metavar='CMD',
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index b096bc77ebe..6365df8c532 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -37,14 +37,6 @@ URXTIME = (-1, 0)
# crawl before starting live changelog crawl.
CHANGELOG_ROLLOVER_TIME = 15
-# Max size of Changelogs to process per batch, Changelogs Processing is
-# not limited by the number of changelogs but instead based on
-# size of the changelog file, One sample changelog file size was 145408
-# with ~1000 CREATE and ~1000 DATA. 5 such files in one batch is 727040
-# If geo-rep worker crashes while processing a batch, it has to retry only
-# that batch since stime will get updated after each batch.
-MAX_CHANGELOG_BATCH_SIZE = 727040
-
# Utility functions to help us to get to closer proximity
# of the DRY principle (no, don't look for elevated or
# perspectivistic things here)
@@ -1166,7 +1158,7 @@ class GMasterChangelogMixin(GMasterCommon):
current_size = 0
for c in changes:
si = os.lstat(c).st_size
- if (si + current_size) > MAX_CHANGELOG_BATCH_SIZE:
+ if (si + current_size) > int(gconf.changelog_batch_size):
# Create new batch if single Changelog file greater than
# Max Size! or current batch size exceeds Max size
changelogs_batches.append([c])