summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKotresh H R <khiremat@redhat.com>2014-05-02 13:09:03 +0530
committerVenky Shankar <vshankar@redhat.com>2014-05-13 23:10:10 -0700
commit5c65850c99829668ac199a49a0760443db74b581 (patch)
treeffa594a1264ca8d0b2f9aab95d3ecb2f64042485
parent7d22fca1f5c9d792b148bae15e8d26a9c5d63efa (diff)
geo-rep/glusterd: Pause and Resume feature for geo-replication
This patch introduces pause and resume cli command for geo-replication. Signed-off-by: Kotresh H R <khiremat@redhat.com> Change-Id: I4f5e58e9175fe85077d56088473252391fb57de7 BUG: 1093602 Signed-off-by: Kotresh H R <khiremat@redhat.com> Reviewed-on: http://review.gluster.org/7643 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-by: Avra Sengupta <asengupt@redhat.com> Reviewed-by: Venky Shankar <vshankar@redhat.com> Tested-by: Venky Shankar <vshankar@redhat.com>
-rw-r--r--cli/src/cli-cmd-parser.c19
-rw-r--r--cli/src/cli-cmd-volume.c2
-rw-r--r--cli/src/cli-rpc-ops.c14
-rw-r--r--cli/src/cli-xml-output.c2
-rw-r--r--rpc/xdr/src/cli1-xdr.x4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c277
6 files changed, 300 insertions, 18 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index a41c0800ac1..287943777df 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -1802,7 +1802,9 @@ force_push_pem_parse (const char **words, int wordcount,
if ((strcmp ((char *)words[wordcount-2], "start")) &&
(strcmp ((char *)words[wordcount-2], "stop")) &&
(strcmp ((char *)words[wordcount-2], "create")) &&
- (strcmp ((char *)words[wordcount-2], "push-pem"))) {
+ (strcmp ((char *)words[wordcount-2], "push-pem")) &&
+ (strcmp ((char *)words[wordcount-2], "pause")) &&
+ (strcmp ((char *)words[wordcount-2], "resume"))) {
ret = -1;
goto out;
}
@@ -1852,7 +1854,8 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
unsigned cmdi = 0;
char *opwords[] = { "create", "status", "start", "stop",
"config", "force", "delete",
- "push-pem", "detail", NULL };
+ "push-pem", "detail", "pause",
+ "resume", NULL };
char *w = NULL;
GF_ASSERT (words);
@@ -1869,6 +1872,8 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
* volume geo-replication [$m] $s config [[!]$opt [$val]]
* volume geo-replication $m $s start|stop [force]
* volume geo-replication $m $s delete
+ * volume geo-replication $m $s pause [force]
+ * volume geo-replication $m $s resume [force]
*/
if (wordcount < 3)
@@ -1962,6 +1967,16 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
if (!masteri || !slavei)
goto out;
+ } else if (strcmp (w, "pause") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_PAUSE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp (w, "resume") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_RESUME;
+
+ if (!masteri || !slavei)
+ goto out;
} else
GF_ASSERT (!"opword mismatch");
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 6072fcc5193..97984721cbd 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -2397,7 +2397,7 @@ struct cli_cmd volume_cmds[] = {
#if (SYNCDAEMON_COMPILE)
{"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {create [push-pem] [force]"
- "|start [force]|stop [force]|config|status [detail]|delete} [options...]",
+ "|start [force]|stop [force]|pause [force]|resume [force]|config|status [detail]|delete} [options...]",
cli_cmd_volume_gsync_set_cbk,
"Geo-sync operations",
cli_cmd_check_gsync_exists_cbk},
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index f174e27e46d..bdde185cca5 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -4514,6 +4514,20 @@ gf_cli_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
master, slave);
break;
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+
+ cli_out ("%s " GEOREP " session between %s & %s"
+ " has been successful",
+ type == GF_GSYNC_OPTION_TYPE_PAUSE ?
+ "Pausing" : "Resuming",
+ master, slave);
+ break;
+
case GF_GSYNC_OPTION_TYPE_CONFIG:
ret = gf_cli_gsync_config_command (dict);
break;
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index 0eb23bff20d..5434dd4069a 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -3801,6 +3801,8 @@ cli_xml_output_vol_gsync (dict_t *dict, int op_ret, int op_errno,
switch (type) {
case GF_GSYNC_OPTION_TYPE_START:
case GF_GSYNC_OPTION_TYPE_STOP:
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
if (dict_get_str (dict, "master", &master) != 0)
master = "???";
if (dict_get_str (dict, "slave", &slave) != 0)
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 2b68cc26068..f852ab65070 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -86,7 +86,9 @@ enum gf1_cli_gsync_set {
GF_GSYNC_OPTION_TYPE_STATUS,
GF_GSYNC_OPTION_TYPE_ROTATE,
GF_GSYNC_OPTION_TYPE_CREATE,
- GF_GSYNC_OPTION_TYPE_DELETE
+ GF_GSYNC_OPTION_TYPE_DELETE,
+ GF_GSYNC_OPTION_TYPE_PAUSE,
+ GF_GSYNC_OPTION_TYPE_RESUME
};
enum gf1_cli_stats_op {
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index 29022a30733..1cd2323188d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -312,6 +312,14 @@ __glusterd_handle_gsync_set (rpcsvc_request_t *req)
strncpy (operation, "stop", sizeof (operation));
break;
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ strncpy (operation, "pause", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ strncpy (operation, "resume", sizeof (operation));
+ break;
+
case GF_GSYNC_OPTION_TYPE_CONFIG:
strncpy (operation, "config", sizeof (operation));
break;
@@ -2299,6 +2307,56 @@ out:
return ret;
}
+/* pre-condition check for geo-rep pause/resume.
+ * Return: 0 on success
+ * -1 on any check failed.
+ */
+static int
+gd_pause_resume_validation (int type, glusterd_volinfo_t *volinfo,
+ char *slave, char *statefile, char **op_errstr)
+{
+ int ret = 0;
+ char errmsg[PATH_MAX] = {0,};
+ char monitor_status[NAME_MAX] = {0,};
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (statefile);
+ GF_ASSERT (op_errstr);
+
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
+ snprintf (errmsg, sizeof(errmsg), "Pause check Failed:"
+ " Geo-rep session is not setup");
+ ret = -1;
+ goto out;
+ }
+
+ if ( type == GF_GSYNC_OPTION_TYPE_PAUSE &&
+ strstr (monitor_status, "Paused")) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s already Paused.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ if ( type == GF_GSYNC_OPTION_TYPE_RESUME &&
+ !strstr (monitor_status, "Paused")) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s is not Paused.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret && (errmsg[0] != '\0')) {
+ *op_errstr = gf_strdup (errmsg);
+ }
+ return ret;
+}
+
int
glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
{
@@ -2417,7 +2475,9 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
* session. */
if ((type == GF_GSYNC_OPTION_TYPE_CONFIG) ||
((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) ||
- (type == GF_GSYNC_OPTION_TYPE_DELETE)) {
+ (type == GF_GSYNC_OPTION_TYPE_DELETE) ||
+ (type == GF_GSYNC_OPTION_TYPE_PAUSE) ||
+ (type == GF_GSYNC_OPTION_TYPE_RESUME)) {
ret = lstat (statefile, &stbuf);
if (ret) {
snprintf (errmsg, sizeof(errmsg), "Geo-replication"
@@ -2432,7 +2492,9 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
/* Check if all peers that are a part of the volume are up or not */
if ((type == GF_GSYNC_OPTION_TYPE_DELETE) ||
- ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force)) {
+ ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) ||
+ (type == GF_GSYNC_OPTION_TYPE_PAUSE) ||
+ (type == GF_GSYNC_OPTION_TYPE_RESUME)) {
if (!strcmp (uuid_str, host_uuid)) {
ret = glusterd_are_vol_all_peers_up (volinfo,
&conf->peers,
@@ -2514,6 +2576,28 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
}
break;
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ if (is_template_in_use) {
+ snprintf (errmsg, sizeof(errmsg),
+ "state-file entry missing in "
+ "the config file(%s).", conf_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_verify_gsync_running (volinfo, slave,
+ conf_path, op_errstr);
+ if (ret)
+ goto out;
+ if (!is_force) {
+ ret = gd_pause_resume_validation (type, volinfo, slave,
+ statefile, op_errstr);
+ if (ret)
+ goto out;
+ }
+ break;
+
case GF_GSYNC_OPTION_TYPE_CONFIG:
if (is_template_in_use) {
snprintf (errmsg, sizeof(errmsg), "state-file entry "
@@ -2593,6 +2677,148 @@ out:
}
static int
+gd_pause_or_resume_gsync (dict_t *dict, char *master, char *slave,
+ char *slave_ip, char *slave_vol, char *conf_path,
+ char **op_errstr, gf_boolean_t is_pause)
+{
+ int32_t ret = 0;
+ int pfd = -1;
+ pid_t pid = 0;
+ char pidfile[PATH_MAX] = {0,};
+ char errmsg[PATH_MAX] = "";
+ char buf [1024] = {0,};
+ int i = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ char monitor_status[NAME_MAX] = {0,};
+ char *statefile = NULL;
+ char *token = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (master);
+ GF_ASSERT (slave);
+ GF_ASSERT (slave_ip);
+ GF_ASSERT (slave_vol);
+ GF_ASSERT (conf_path);
+
+ pfd = gsyncd_getpidfile (master, slave, pidfile,
+ conf_path, &is_template_in_use);
+ if (pfd == -2) {
+ snprintf (errmsg, sizeof(errmsg),
+ "pid-file entry mising in config file and "
+ "template config file.");
+ gf_log (this->name, GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ if (gsync_status_byfd (pfd) == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "gsyncd b/w %s & %s is not"
+ " running", master, slave);
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ /* Prepare to update status file*/
+ ret = dict_get_str (dict, "statefile", &statefile);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Pause/Resume Failed:"
+ " Unable to fetch statefile path");
+ goto out;
+ }
+ ret = glusterd_gsync_read_frm_status (statefile, monitor_status,
+ sizeof (monitor_status));
+ if (ret <= 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Pause/Resume Failed: "
+ "Unable to read status file for %s(master)"
+ " %s(slave)", master, slave);
+ goto out;
+ }
+
+ ret = read (pfd, buf, 1024);
+ if (ret > 0) {
+ pid = strtol (buf, NULL, 10);
+ if (is_pause) {
+ ret = kill (-pid, SIGSTOP);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed"
+ " to pause gsyncd. Error: %s",
+ strerror (errno));
+ goto out;
+ }
+ /*On pause force, if status is already paused
+ do not update status again*/
+ if (strstr (monitor_status, "Paused"))
+ goto out;
+ (void) strcat (monitor_status, "(Paused)");
+ ret = glusterd_create_status_file ( master, slave,
+ slave_ip, slave_vol,
+ monitor_status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to update state_file."
+ " Error : %s", strerror (errno));
+ /* If status cannot be updated resume back */
+ if (kill (-pid, SIGCONT)) {
+ snprintf (errmsg, sizeof(errmsg),
+ "Pause successful but could "
+ "not update status file. "
+ "Please use 'resume force' to"
+ " resume back and retry pause"
+ " to reflect in status");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Resume back Failed. Error: %s",
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ }
+ goto out;
+ }
+ } else {
+ ret = kill (-pid, SIGCONT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to resume gsyncd. Error: %s",
+ strerror (errno));
+ goto out;
+ }
+ token = strtok (monitor_status, "(");
+ ret = glusterd_create_status_file ( master, slave,
+ slave_ip, slave_vol, token);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to update state_file."
+ " Error : %s", strerror (errno));
+ /* If status cannot be updated pause back */
+ if (kill (-pid, SIGSTOP)) {
+ snprintf (errmsg, sizeof(errmsg),
+ "Resume successful but could "
+ "not update status file."
+ " Please use 'pause force' to"
+ " pause back and retry resume"
+ " to reflect in status");
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pause back Failed. Error: %s",
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ }
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+
+out:
+ sys_close (pfd);
+ return ret;
+}
+
+static int
stop_gsync (char *master, char *slave, char **msg,
char *conf_path, char **op_errstr,
gf_boolean_t is_force)
@@ -4136,6 +4362,7 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
char *status_msg = NULL;
gf_boolean_t is_running = _gf_false;
char *conf_path = NULL;
+ char errmsg[PATH_MAX] = "";
GF_ASSERT (THIS);
GF_ASSERT (THIS->private);
@@ -4239,7 +4466,9 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
conf_path, host_uuid, op_errstr);
}
- if (type == GF_GSYNC_OPTION_TYPE_STOP) {
+ if (type == GF_GSYNC_OPTION_TYPE_STOP ||
+ type == GF_GSYNC_OPTION_TYPE_PAUSE ||
+ type == GF_GSYNC_OPTION_TYPE_RESUME) {
ret = glusterd_check_gsync_running_local (volinfo->volname,
slave, conf_path,
&is_running);
@@ -4251,19 +4480,39 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- ret = stop_gsync (volname, slave, &status_msg, conf_path,
- op_errstr, is_force);
- if (ret == 0 && status_msg)
- ret = dict_set_str (rsp_dict, "gsync-status",
- status_msg);
- if (!ret) {
- ret = glusterd_create_status_file (volinfo->volname,
+ if (type == GF_GSYNC_OPTION_TYPE_PAUSE) {
+ ret = gd_pause_or_resume_gsync (dict, volname, slave,
+ slave_ip, slave_vol,
+ conf_path, op_errstr,
+ _gf_true);
+ if (ret)
+ gf_log("", GF_LOG_ERROR, GEOREP
+ " Pause Failed");
+ } else if (type == GF_GSYNC_OPTION_TYPE_RESUME) {
+ ret = gd_pause_or_resume_gsync (dict, volname, slave,
+ slave_ip, slave_vol,
+ conf_path, op_errstr,
+ _gf_false);
+ if (ret)
+ gf_log("", GF_LOG_ERROR, GEOREP
+ " Resume Failed");
+ } else {
+ ret = stop_gsync (volname, slave, &status_msg,
+ conf_path, op_errstr, is_force);
+
+ if (ret == 0 && status_msg)
+ ret = dict_set_str (rsp_dict, "gsync-status",
+ status_msg);
+ if (!ret) {
+ ret = glusterd_create_status_file (
+ volinfo->volname,
slave, slave_ip,
slave_vol,"Stopped");
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to update"
- "state_file. Error : %s",
- strerror (errno));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to "
+ "update state_file. Error : %s",
+ strerror (errno));
+ }
}
}
}