diff options
author | Kotresh H R <khiremat@redhat.com> | 2014-05-25 23:41:48 +0530 |
---|---|---|
committer | Venky Shankar <vshankar@redhat.com> | 2014-06-05 22:58:26 -0700 |
commit | 77498fdbbca8554880eae4b8f559b9d6876e35b7 (patch) | |
tree | 29662e2b91634c1abd1c107d5aeeb1d2a4434698 | |
parent | 535003ca20a9dd00a09dd34ad26947d888aabe39 (diff) |
feature/geo-rep: Fix to retain pause state of gsyncd on restart.
A new gsyncd options '--pause-on-start' is introduced. When node
reboots, if the status is paused, gsyncd is started with this
option. After gsyncd spawns worker and agent, worker will send
SIGSTOP to negative pid of monitor to enter pause mode.
Change-Id: I5aad82c9a9fc8c243f384940b77d25e26e520d6d
BUG: 1101410
Signed-off-by: Kotresh H R <khiremat@redhat.com>
Reviewed-on: http://review.gluster.org/7885
Reviewed-by: Aravinda VK <avishwan@redhat.com>
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Tested-by: Venky Shankar <vshankar@redhat.com>
-rw-r--r-- | geo-replication/syncdaemon/gsyncd.py | 1 | ||||
-rw-r--r-- | geo-replication/syncdaemon/monitor.py | 9 | ||||
-rw-r--r-- | geo-replication/syncdaemon/resource.py | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 5 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 19 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 2 |
6 files changed, 33 insertions, 11 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index 7d463ad23f3..7ddd51267a6 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -227,6 +227,7 @@ def main_i(): op.add_option('--ignore-deletes', default=False, action='store_true') op.add_option('--isolated-slave', default=False, action='store_true') op.add_option('--use-rsync-xattrs', default=False, action='store_true') + op.add_option('--pause-on-start', default=False, action='store_true') op.add_option('-L', '--log-level', metavar='LVL') op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0])) diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py index f485fe18605..f3700c1a390 100644 --- a/geo-replication/syncdaemon/monitor.py +++ b/geo-replication/syncdaemon/monitor.py @@ -73,10 +73,11 @@ class Monitor(object): """class which spawns and manages gsyncd workers""" ST_INIT = 'Initializing...' + ST_INIT_PAUSE = 'Initializing...(Paused)' ST_STABLE = 'Stable' ST_FAULTY = 'faulty' ST_INCON = 'inconsistent' - _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON] + _ST_ORD = [ST_STABLE, ST_INIT, ST_INIT_PAUSE, ST_FAULTY, ST_INCON] def __init__(self): self.lock = Lock() @@ -128,7 +129,11 @@ class Monitor(object): due to the keep-alive thread) """ - self.set_state(self.ST_INIT, w) + if gconf.pause_on_start: + self.set_state(self.ST_INIT_PAUSE, w) + else: + self.set_state(self.ST_INIT, w) + ret = 0 def nwait(p, o=0): diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index dadfc965336..8192a54b0d4 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -13,6 +13,7 @@ import os import sys import stat import time +import signal import fcntl import errno import types @@ -1290,6 +1291,13 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): logging.debug("Changelog register failed: %s - %s" % (e.errno, e.strerror)) + # Check if gsyncd restarted in pause state. If + # yes, send SIGSTOP to negative of monitor pid + # to go back to pause state. + if gconf.pause_on_start: + os.kill(-os.getppid(), signal.SIGSTOP) + gconf.pause_on_start = False + # oneshot: Try to use changelog history api, if not # available switch to FS crawl # Note: if config.change_detector is xsync then diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 3e2e308ec13..aa3cc99fbff 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -3724,7 +3724,7 @@ glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, if (ret == 0) ret = glusterd_start_gsync (volinfo, slave, path_list, conf_path, uuid_utoa(MY_UUID), - NULL); + NULL, _gf_false); out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -4499,7 +4499,8 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) } ret = glusterd_start_gsync (volinfo, slave, path_list, - conf_path, host_uuid, op_errstr); + conf_path, host_uuid, op_errstr, + _gf_false); } if (type == GF_GSYNC_OPTION_TYPE_STOP || diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index a2a746d247e..15e91ad24dd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -6572,6 +6572,7 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) char *op_errstr = NULL; glusterd_conf_t *priv = NULL; gf_boolean_t is_template_in_use = _gf_false; + gf_boolean_t is_paused = _gf_false; GF_ASSERT (THIS); priv = THIS->private; @@ -6665,9 +6666,9 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) "%s and %s::%s. Not Restarting", volinfo->volname, slave_ip, slave_vol); goto out; - } - - if ((!strcmp (buf, "Config Corrupted"))) { + } else if (strstr(buf, "Paused")) { + is_paused = _gf_true; + } else if ((!strcmp (buf, "Config Corrupted"))) { gf_log ("", GF_LOG_INFO, "Recovering from a corrupted config. " "Not Restarting. Use start (force) to " @@ -6677,8 +6678,12 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) goto out; } - glusterd_start_gsync (volinfo, slave, path_list, confpath, - uuid_str, NULL); + if (is_paused) + glusterd_start_gsync (volinfo, slave, path_list, confpath, + uuid_str, NULL, _gf_true); + else + glusterd_start_gsync (volinfo, slave, path_list, confpath, + uuid_str, NULL, _gf_false); out: if (statefile) @@ -8294,7 +8299,7 @@ int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, char *path_list, char *conf_path, char *glusterd_uuid_str, - char **op_errstr) + char **op_errstr, gf_boolean_t is_pause) { int32_t ret = 0; int32_t status = 0; @@ -8356,6 +8361,8 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, runner_argprintf (&runner, "--glusterd-uuid=%s", uuid_utoa (priv->uuid)); runner_add_arg (&runner, slave); + if (is_pause) + runner_add_arg (&runner, "--pause-on-start"); synclock_unlock (&priv->big_lock); ret = runner_run (&runner); synclock_lock (&priv->big_lock); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 4b6e2b0cdbc..834d4a52156 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -413,7 +413,7 @@ int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, char *path_list, char *conf_path, char *glusterd_uuid_str, - char **op_errstr); + char **op_errstr, gf_boolean_t is_pause); int glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, char **pathlist); |