From b198e072cda4bbb98e19701399c4bb4f0743cf20 Mon Sep 17 00:00:00 2001 From: Ajeet Jha Date: Mon, 2 Dec 2013 12:55:18 +0530 Subject: glusterd/geo-rep: more glusterd and cli fixes for geo-rep. -> handle option validation cases in reset case. -> Creating valid conf path when glusterd restarts. -> Reading the gsyncd worker thread status and displaying it. -> Displaying status-detail per worker. -> Fetch checkpoint info in geo-rep status. -> use-tarssh value validation added. misc: misc geo-rep fixes based on cluster, logrotate etc.. -> cluster/dht: fix 'stime' getxattr getting overwritten. -> cluster/afr: return max of 'stime' values in subvol. -> geo-rep-logrotate: Sending SIGHUP to geo-rep auxiliary. -> cluster/dht: fix convoluted logic while aggregating. -> cluster/*: fix 'stime' min/max fetch logic. Change-Id: I811acea0bbd6194797a3e55d89295d1ea021ac85 BUG: 1036552 Signed-off-by: Ajeet Jha Reviewed-on: http://review.gluster.org/6405 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Anand Avati --- cli/src/cli-cmd-parser.c | 12 +- cli/src/cli-rpc-ops.c | 258 +++-------- cli/src/cli.h | 14 - extras/glusterfs-georep-logrotate | 18 + libglusterfs/src/mem-types.h | 3 +- rpc/rpc-lib/src/protocol-common.h | 17 + xlators/cluster/afr/src/afr-inode-read.c | 2 +- xlators/cluster/dht/src/dht-common.c | 8 +- xlators/lib/src/libxlator.c | 55 +++ xlators/lib/src/libxlator.h | 3 + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 563 +++++++++++++++-------- xlators/mgmt/glusterd/src/glusterd-mountbroker.c | 1 - xlators/mgmt/glusterd/src/glusterd-utils.c | 136 +++--- xlators/mgmt/glusterd/src/glusterd-utils.h | 11 + xlators/mgmt/glusterd/src/glusterd.c | 21 +- xlators/mgmt/glusterd/src/glusterd.h | 2 +- 16 files changed, 646 insertions(+), 478 deletions(-) diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 8a37d8e78..0aeaccd0c 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -1737,13 +1737,13 @@ config_parse (const char **words, int wordcount, dict_t *dict, } append_str[append_len - 2] = '\0'; /* "checkpoint now" is special: we resolve that "now" */ - if (strcmp (words[cmdi + 1], "checkpoint") == 0 && - strcmp (append_str, "now") == 0) { + if ((strcmp (words[cmdi + 1], "checkpoint") == 0) && + (strcmp (append_str, "now") == 0)) { struct timeval tv = {0,}; ret = gettimeofday (&tv, NULL); if (ret == -1) - goto out; /* FIXME: free append_str? */ + goto out; GF_FREE (append_str); append_str = GF_CALLOC (1, 300, cli_mt_append_str); @@ -1751,10 +1751,8 @@ config_parse (const char **words, int wordcount, dict_t *dict, ret = -1; goto out; } - strcpy (append_str, "as of "); - gf_time_fmt (append_str + strlen ("as of "), - 300 - strlen ("as of "), - tv.tv_sec, gf_timefmt_FT); + snprintf (append_str, 300, "now:%ld.%06ld", + tv.tv_sec, tv.tv_usec); } ret = dict_set_dynstr (dict, "op_value", append_str); diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 2cb0ba3d4..d1b39014d 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -3874,85 +3874,22 @@ gf_cli_gsync_config_command (dict_t *dict) return runner_run (&runner); } -int -gf_cli_fetch_gsyncd_status_values (char *status, - gf_cli_gsync_status_t *sts_val) -{ - int32_t ret = -1; - char *tmp = NULL; - char *save_ptr = NULL; - char *key = NULL; - char *value = NULL; - - if (!status || !sts_val) { - gf_log ("", GF_LOG_ERROR, "status or sts_val is null"); - goto out; - } - - tmp = strtok_r (status, "\n", &save_ptr); - - if (tmp) - sts_val->health = gf_strdup (tmp); - - while (tmp) { - key = strtok_r (tmp, "=", &value); - - if ((key) && (!strcmp(key, "Uptime"))) - sts_val->uptime = gf_strdup (value); - - if ((key) && (!strcmp(key, "FilesSyncd"))) - sts_val->files_syncd = gf_strdup (value); - - if ((key) && (!strcmp(key, "FilesPending"))) - sts_val->files_pending = gf_strdup (value); - - if ((key) && (!strcmp(key, "BytesPending"))) { - value = gf_uint64_2human_readable(atol(value)); - sts_val->bytes_pending = gf_strdup (value); - } - - if ((key) && (!strcmp(key, "DeletesPending"))) - sts_val->deletes_pending = gf_strdup (value); - - tmp = strtok_r (NULL, ";", &save_ptr); - } - - if (sts_val->health) - ret = 0; - - if (!sts_val->uptime) - sts_val->uptime = gf_strdup ("N/A"); - - if (!sts_val->files_syncd) - sts_val->files_syncd = gf_strdup ("N/A"); - - if (!sts_val->files_pending) - sts_val->files_pending = gf_strdup ("N/A"); - - if (!sts_val->bytes_pending) - sts_val->bytes_pending = gf_strdup ("N/A"); - - if (!sts_val->deletes_pending) - sts_val->deletes_pending = gf_strdup ("N/A"); - -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret); - return ret; -} - char* -get_struct_variable (int mem_num, gf_cli_gsync_status_t *sts_val) +get_struct_variable (int mem_num, gf_gsync_status_t *sts_val) { switch (mem_num) { - case 0: return (sts_val->node); - case 1: return (sts_val->master); - case 2: return (sts_val->slave); - case 3: return (sts_val->health); - case 4: return (sts_val->uptime); - case 5: return (sts_val->files_syncd); - case 6: return (sts_val->files_pending); - case 7: return (sts_val->bytes_pending); - case 8: return (sts_val->deletes_pending); + case 0: return (sts_val->node); + case 1: return (sts_val->master); + case 2: return (sts_val->brick); + case 3: return (sts_val->slave_node); + case 4: return (sts_val->worker_status); + case 5: return (sts_val->checkpoint_status); + case 6: return (sts_val->crawl_status); + case 7: return (sts_val->files_syncd); + case 8: return (sts_val->files_remaining); + case 9: return (sts_val->bytes_remaining); + case 10: return (sts_val->purges_remaining); + case 11: return (sts_val->total_files_skipped); default: goto out; } @@ -3963,28 +3900,23 @@ out: int gf_cli_print_status (char **title_values, - gf_cli_gsync_status_t **sts_vals, + gf_gsync_status_t **sts_vals, int *spacing, int gsync_count, int number_of_fields, int is_detail) { - int indents = 0; int i = 0; int j = 0; int ret = 0; + int status_fields = 6; /* Indexed at 0 */ int total_spacing = 0; char **output_values = NULL; char *tmp = NULL; char *hyphens = NULL; - char heading[PATH_MAX] = {0, }; - char indent_spaces[PATH_MAX] = {0, }; /* calculating spacing for hyphens */ for (i = 0; i < number_of_fields; i++) { - /* Suppressing master and slave output for status detail */ - if ((is_detail) && ((i == 1) || (i == 2))) { - total_spacing++; - continue; - } else if ((!is_detail) && (i > 4)) { + /* Suppressing detail output for status */ + if ((!is_detail) && (i > status_fields)) { /* Suppressing detailed output for * status */ continue; @@ -4018,64 +3950,29 @@ gf_cli_print_status (char **title_values, goto out; } - ret = snprintf(heading, sizeof(heading), "MASTER: %s SLAVE: %s", - sts_vals[0]->master, sts_vals[0]->slave); - if (ret) { - if (ret < sizeof(heading)) - heading[ret] = '\0'; - else - heading[sizeof(heading) - 1] = '\0'; - ret = 0; - } else { - ret = -1; - goto out; - } - - if (is_detail) { - cli_out (" "); - if (strlen(heading) > total_spacing) - cli_out ("%s", heading); - else { - /* Printing the heading with centre justification */ - indents = (total_spacing - strlen(heading)) / 2; - memset (indent_spaces, ' ', indents); - indent_spaces[indents] = '\0'; - ret = snprintf (hyphens, total_spacing, "%s%s", - indent_spaces, heading); - if (ret) { - hyphens[ret] = '\0'; - cli_out ("%s", hyphens); - ret = 0; - } else { - ret = -1; - goto out; - } - } - cli_out (" "); - } + cli_out (" "); /* setting the title "NODE", "MASTER", etc. from title_values[] and printing the same */ for (j = 0; j < number_of_fields; j++) { - /* Suppressing master and slave output for status detail */ - if ((is_detail) && ((j == 1) || (j == 2))) { - output_values[j][0] = '\0'; - continue; - } else if ((!is_detail) && (j > 4)) { + if ((!is_detail) && (j > status_fields)) { /* Suppressing detailed output for * status */ output_values[j][0] = '\0'; - continue; + continue; } memset (output_values[j], ' ', spacing[j]); memcpy (output_values[j], title_values[j], strlen(title_values[j])); output_values[j][spacing[j]] = '\0'; } - cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0], - output_values[1], output_values[2], output_values[3], - output_values[4], output_values[5], output_values[6], - output_values[7], output_values[8]); + cli_out ("%s %s %s %s %s %s %s %s %s %s %s %s", + output_values[0], output_values[1], + output_values[2], output_values[3], + output_values[4], output_values[5], + output_values[6], output_values[7], + output_values[8], output_values[9], + output_values[10], output_values[11]); /* setting and printing the hyphens */ memset (hyphens, '-', total_spacing); @@ -4084,12 +3981,7 @@ gf_cli_print_status (char **title_values, for (i = 0; i < gsync_count; i++) { for (j = 0; j < number_of_fields; j++) { - /* Suppressing master and slave output for - * status detail */ - if ((is_detail) && ((j == 1) || (j == 2))) { - output_values[j][0] = '\0'; - continue; - } else if ((!is_detail) && (j > 4)) { + if ((!is_detail) && (j > status_fields)) { /* Suppressing detailed output for * status */ output_values[j][0] = '\0'; @@ -4107,10 +3999,13 @@ gf_cli_print_status (char **title_values, output_values[j][spacing[j]] = '\0'; } - cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0], - output_values[1], output_values[2], output_values[3], - output_values[4], output_values[5], output_values[6], - output_values[7], output_values[8]); + cli_out ("%s %s %s %s %s %s %s %s %s %s %s %s", + output_values[0], output_values[1], + output_values[2], output_values[3], + output_values[4], output_values[5], + output_values[6], output_values[7], + output_values[8], output_values[9], + output_values[10], output_values[11]); } out: @@ -4130,47 +4025,23 @@ out: int gf_cli_read_status_data (dict_t *dict, - gf_cli_gsync_status_t **sts_vals, + gf_gsync_status_t **sts_vals, int *spacing, int gsync_count, int number_of_fields) { - int ret = 0; - int i = 0; - int j = 0; - char mst[PATH_MAX] = {0, }; - char slv[PATH_MAX] = {0, }; - char sts[PATH_MAX] = {0, }; - char nds[PATH_MAX] = {0, }; - char *status = NULL; - char *tmp = NULL; + char *tmp = NULL; + char sts_val_name[PATH_MAX] = ""; + int ret = 0; + int i = 0; + int j = 0; /* Storing per node status info in each object */ for (i = 0; i < gsync_count; i++) { - snprintf (nds, sizeof(nds), "node%d", i + 1); - snprintf (mst, sizeof(mst), "master%d", i + 1); - snprintf (slv, sizeof(slv), "slave%d", i + 1); - snprintf (sts, sizeof(sts), "status%d", i + 1); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i); /* Fetching the values from dict, and calculating the max length for each field */ - ret = dict_get_str (dict, nds, &(sts_vals[i]->node)); - if (ret) - goto out; - - ret = dict_get_str (dict, mst, &(sts_vals[i]->master)); - if (ret) - goto out; - - ret = dict_get_str (dict, slv, &(sts_vals[i]->slave)); - if (ret) - goto out; - - ret = dict_get_str (dict, sts, &status); - if (ret) - goto out; - - /* Fetching health and uptime from sts_val */ - ret = gf_cli_fetch_gsyncd_status_values (status, sts_vals[i]); + ret = dict_get_bin (dict, sts_val_name, (void **)&(sts_vals[i])); if (ret) goto out; @@ -4192,25 +4063,23 @@ out: } int -gf_cli_gsync_status_output (dict_t *dict, int status_detail) +gf_cli_gsync_status_output (dict_t *dict, gf_boolean_t is_detail) { int gsync_count = 0; int i = 0; - int j = 0; int ret = 0; - int spacing[10] = {0}; - int num_of_fields = 9; + int spacing[13] = {0}; + int num_of_fields = 12; char errmsg[1024] = ""; char *master = NULL; char *slave = NULL; - char *tmp = NULL; - char *title_values[] = {"NODE", "MASTER", "SLAVE", - "HEALTH", "UPTIME", - "FILES SYNCD", - "FILES PENDING", - "BYTES PENDING", - "DELETES PENDING"}; - gf_cli_gsync_status_t **sts_vals = NULL; + char *title_values[] = {"MASTER NODE", "MASTER VOL", + "MASTER BRICK", "SLAVE", + "STATUS", "CHECKPOINT STATUS", + "CRAWL STATUS", "FILES SYNCD", + "FILES PENDING", "BYTES PENDING", + "DELETES PENDING", "FILES SKIPPED"}; + gf_gsync_status_t **sts_vals = NULL; /* Checks if any session is active or not */ ret = dict_get_int32 (dict, "gsync-count", &gsync_count); @@ -4244,14 +4113,14 @@ gf_cli_gsync_status_output (dict_t *dict, int status_detail) /* gsync_count = number of nodes reporting output. each sts_val object will store output of each node */ - sts_vals = GF_CALLOC (gsync_count, sizeof (gf_cli_gsync_status_t *), + sts_vals = GF_CALLOC (gsync_count, sizeof (gf_gsync_status_t *), gf_common_mt_char); if (!sts_vals) { ret = -1; goto out; } for (i = 0; i < gsync_count; i++) { - sts_vals[i] = GF_CALLOC (1, sizeof (gf_cli_gsync_status_t), + sts_vals[i] = GF_CALLOC (1, sizeof (gf_gsync_status_t), gf_common_mt_char); if (!sts_vals[i]) { ret = -1; @@ -4267,23 +4136,15 @@ gf_cli_gsync_status_output (dict_t *dict, int status_detail) } ret = gf_cli_print_status (title_values, sts_vals, spacing, gsync_count, - num_of_fields, status_detail); + num_of_fields, is_detail); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to print status output"); goto out; } out: - if (sts_vals) { - for (i = 0; i < gsync_count; i++) { - for (j = 3; j < num_of_fields; j++) { - tmp = get_struct_variable(j, sts_vals[i]); - if (tmp) - GF_FREE (tmp); - } - } + if (sts_vals) GF_FREE (sts_vals); - } return ret; } @@ -4602,7 +4463,10 @@ gf_cli_gsync_set_cbk (struct rpc_req *req, struct iovec *iov, status_detail = dict_get_str_boolean (dict, "status-detail", _gf_false); - ret = gf_cli_gsync_status_output (dict, status_detail); + if (status_detail) + ret = gf_cli_gsync_status_output (dict, status_detail); + else + ret = gf_cli_gsync_status_output (dict, status_detail); break; case GF_GSYNC_OPTION_TYPE_DELETE: diff --git a/cli/src/cli.h b/cli/src/cli.h index f5db29383..53537c642 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -148,18 +148,6 @@ struct cli_local { #endif }; -struct gf_cli_gsync_detailed_status_ { - char *node; - char *master; - char *slave; - char *health; - char *uptime; - char *files_syncd; - char *files_pending; - char *bytes_pending; - char *deletes_pending; -}; - struct cli_volume_status { int port; int online; @@ -178,8 +166,6 @@ struct cli_volume_status { #endif }; -typedef struct gf_cli_gsync_detailed_status_ gf_cli_gsync_status_t; - typedef struct cli_volume_status cli_volume_status_t; typedef struct cli_local cli_local_t; diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate index 6a69ab1e3..85e69d2c0 100644 --- a/extras/glusterfs-georep-logrotate +++ b/extras/glusterfs-georep-logrotate @@ -7,12 +7,30 @@ delaycompress notifempty /var/log/glusterfs/geo-replication/*/*.log { + sharedscripts + postrotate + for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript } /var/log/glusterfs/geo-replication-slaves/*.log { + sharedscripts + postrotate + for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript } /var/log/glusterfs/geo-replication-slaves/*/*.log { + sharedscripts + postrotate + for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript } diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index fc0aa9018..726d38eb6 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -118,6 +118,7 @@ enum gf_common_mem_types_ { gf_common_mt_auxgids = 102, gf_common_mt_syncopctx = 103, gf_common_mt_iobrefs = 104, - gf_common_mt_end = 105 + gf_common_mt_gsync_status_t = 105, + gf_common_mt_end = 106 }; #endif diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index d64f280cf..5876a500b 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -221,6 +221,23 @@ typedef enum { GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA, } gf_xl_afr_op_t ; +struct gf_gsync_detailed_status_ { + char node[NAME_MAX]; + char master[NAME_MAX]; + char brick[NAME_MAX]; + char slave_node[NAME_MAX]; + char worker_status[NAME_MAX]; + char checkpoint_status[NAME_MAX]; + char crawl_status[NAME_MAX]; + char files_syncd[NAME_MAX]; + char files_remaining[NAME_MAX]; + char bytes_remaining[NAME_MAX]; + char purges_remaining[NAME_MAX]; + char total_files_skipped[NAME_MAX]; +}; + +typedef struct gf_gsync_detailed_status_ gf_gsync_status_t; + #define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */ #define GLUSTER_HNDSK_VERSION 2 /* 0.0.2 */ diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 8a2853319..3e80129f9 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1348,7 +1348,7 @@ afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data) int ret = 0; if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) - ret = gf_get_min_stime (THIS, data, key, value); + ret = gf_get_max_stime (THIS, data, key, value); return ret; } diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 898f41f0e..ed4babd32 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2015,13 +2015,7 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local->xattr) { local->xattr = dict_copy_with_ref (xattr, NULL); } else { - /* first aggregate everything into xattr and then copy into - * local->xattr. This is required as we want to have - * 'local->xattr' as the proper final dictionary passed above - * distribute xlator. - */ - dht_aggregate_xattr (xattr, local->xattr); - local->xattr = dict_copy (xattr, local->xattr); + dht_aggregate_xattr (local->xattr, xattr); } out: if (is_last_call (this_call_cnt)) { diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c index 9e5357255..63e9bcf9f 100644 --- a/xlators/lib/src/libxlator.c +++ b/xlators/lib/src/libxlator.c @@ -452,6 +452,61 @@ gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value) /* can't use 'min()' macro here as we need to compare two fields in the array, selectively */ + if ((host_value_timebuf[0] < host_timebuf[0]) || + ((host_value_timebuf[0] == host_timebuf[0]) && + (host_value_timebuf[1] < host_timebuf[1]))) { + update_timebuf (value_timebuf, net_timebuf); + } + + ret = 0; +out: + return ret; +error: + /* To be used only when net_timebuf is not set in the dict */ + if (net_timebuf) + GF_FREE (net_timebuf); + + return ret; +} + +int +gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value) +{ + int ret = -1; + uint32_t *net_timebuf = NULL; + uint32_t *value_timebuf = NULL; + uint32_t host_timebuf[2] = {0,}; + uint32_t host_value_timebuf[2] = {0,}; + + /* stime should be maximum of all the other nodes */ + ret = dict_get_bin (dst, key, (void **)&net_timebuf); + if (ret < 0) { + net_timebuf = GF_CALLOC (1, sizeof (int64_t), + gf_common_mt_char); + if (!net_timebuf) + goto out; + + ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t)); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "key=%s: dict set failed", key); + goto error; + } + } + + value_timebuf = data_to_bin (value); + if (!value_timebuf) { + gf_log (this->name, GF_LOG_WARNING, + "key=%s: getting value of stime failed", key); + ret = -1; + goto out; + } + + get_hosttime (value_timebuf, host_value_timebuf); + get_hosttime (net_timebuf, host_timebuf); + + /* can't use 'max()' macro here as we need to compare two fields + in the array, selectively */ if ((host_value_timebuf[0] > host_timebuf[0]) || ((host_value_timebuf[0] == host_timebuf[0]) && (host_value_timebuf[1] > host_timebuf[1]))) { diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h index 08bd77b91..175d3141d 100644 --- a/xlators/lib/src/libxlator.h +++ b/xlators/lib/src/libxlator.h @@ -151,4 +151,7 @@ match_uuid_local (const char *name, char *uuid); int gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value); +int +gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value); + #endif /* !_LIBXLATOR_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 5786694bd..5d724cc2a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -28,17 +28,6 @@ static int dict_get_param (dict_t *dict, char *key, char **param); -static int -glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, - char *conf_path, char **statefile); - -static int -glusterd_get_slave_info (char *slave, char **slave_ip, - char **slave_vol, char **op_errstr); - -static int -glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); - struct gsync_config_opt_vals_ gsync_confopt_vals[] = { {.op_name = "change_detector", .no_of_pos_vals = 2, @@ -55,6 +44,11 @@ struct gsync_config_opt_vals_ gsync_confopt_vals[] = { .case_sensitive = _gf_false, .values = {"critical", "error", "warning", "info", "debug"} }, + {.op_name = "use-tarssh", + .no_of_pos_vals = 6, + .case_sensitive = _gf_false, + .values = {"true", "false", "0", "1", "yes", "no"} + }, {.op_name = NULL, }, }; @@ -74,6 +68,11 @@ static char *gsync_reserved_opts[] = { NULL }; +static char *gsync_no_restart_opts[] = { + "checkpoint", + NULL +}; + int __glusterd_handle_sys_exec (rpcsvc_request_t *req) { @@ -899,6 +898,8 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) } if (op_match) { + if (!op_value) + goto out; val_match = _gf_false; for (i = 0; i < conf_vals->no_of_pos_vals; i++) { if(conf_vals->case_sensitive){ @@ -912,7 +913,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) if (!val_match) { ret = snprintf (errmsg, sizeof(errmsg) - 1, - "Invalid values (%s) for" + "Invalid value(%s) for" " option %s", op_value, op_name); errmsg[ret] = '\0'; @@ -923,7 +924,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) } } } - +out: return 0; } @@ -1581,7 +1582,7 @@ out: return ret; } -static int +int glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, char *conf_path, char **statefile) { @@ -1736,7 +1737,7 @@ glusterd_verify_slave (char *volname, char *slave_ip, char *slave, gf_log ("", GF_LOG_ERROR, "Not a valid slave"); ret = glusterd_gsync_read_frm_status (log_file_path, buf, sizeof(buf)); - if (ret) { + if (ret <= 0) { gf_log ("", GF_LOG_ERROR, "Unable to read from %s", log_file_path); goto out; @@ -2391,6 +2392,8 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, char *slave_ip = NULL; char *slave_vol = NULL; struct stat stbuf = {0, }; + gf_boolean_t restart_required = _gf_true; + char **resopt = NULL; GF_ASSERT (slave); GF_ASSERT (op_errstr); @@ -2495,18 +2498,28 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, out: if (!ret && volinfo) { + for (resopt = gsync_no_restart_opts; *resopt; resopt++) { + restart_required = _gf_true; + if (!strcmp ((*resopt), op_name)){ + restart_required = _gf_false; + break; + } + } + + if (restart_required) { ret = glusterd_check_restart_gsync_session (volinfo, slave, resp_dict, path_list, conf_path, 0); if (ret) - *op_errstr = gf_strdup ("internal error"); + *op_errstr = gf_strdup ("internal error"); + } } gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -static int +int glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) { int ret = 0; @@ -2530,7 +2543,6 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) char *p = buf + len - 1; while (isspace (*p)) *p-- = '\0'; - ret = 0; } } else if (ret < 0) gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt"); @@ -2540,20 +2552,146 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) } static int -glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) +dict_get_param (dict_t *dict, char *key, char **param) +{ + char *dk = NULL; + char *s = NULL; + char x = '\0'; + int ret = 0; + + if (dict_get_str (dict, key, param) == 0) + return 0; + + dk = gf_strdup (key); + if (!key) + return -1; + + s = strpbrk (dk, "-_"); + if (!s) + return -1; + x = (*s == '-') ? '_' : '-'; + *s++ = x; + while ((s = strpbrk (s, "-_"))) + *s++ = x; + + ret = dict_get_str (dict, dk, param); + + GF_FREE (dk); + return ret; +} + +static int +glusterd_parse_gsync_status (char *buf, gf_gsync_status_t *sts_val) +{ + int ret = -1; + int i = -1; + int num_of_fields = 8; + char *token = NULL; + char **tokens = NULL; + char **ptr = NULL; + char *save_ptr = NULL; + char na_buf[] = "N/A"; + + if (!buf) { + gf_log ("", GF_LOG_ERROR, "Empty buf"); + goto out; + } + + tokens = calloc (num_of_fields, sizeof (char *)); + if (!tokens) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } + + ptr = tokens; + + for (token = strtok_r (buf, ",", &save_ptr); token; + token = strtok_r (NULL, ",", &save_ptr)) { + *ptr = gf_strdup(token); + if (!*ptr) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } + ptr++; + } + + for (i = 0; i < num_of_fields; i++) { + token = strtok_r (tokens[i], ":", &save_ptr); + token = strtok_r (NULL, "\0", &save_ptr); + token++; + + /* token NULL check */ + if (!token && (i != 0) && + (i != 5) && (i != 7)) + token = na_buf; + + if (i == 0) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 1] = '\0'; + } + memcpy (sts_val->slave_node, token, strlen(token)); + } + if (i == 1) + memcpy (sts_val->files_syncd, token, strlen(token)); + if (i == 2) + memcpy (sts_val->purges_remaining, token, strlen(token)); + if (i == 3) + memcpy (sts_val->total_files_skipped, token, strlen(token)); + if (i == 4) + memcpy (sts_val->files_remaining, token, strlen(token)); + if (i == 5) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 1] = '\0'; + } + memcpy (sts_val->worker_status, token, strlen(token)); + } + if (i == 6) + memcpy (sts_val->bytes_remaining, token, strlen(token)); + if (i == 7) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 2] = '\0'; + } + memcpy (sts_val->crawl_status, token, strlen(token)); + } + } + + ret = 0; +out: + for (i = 0; i< num_of_fields; i++) + if (tokens[i]) + GF_FREE(tokens[i]); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_gsync_fetch_status_extra (char *path, gf_gsync_status_t *sts_val) { char sockpath[PATH_MAX] = {0,}; struct sockaddr_un sa = {0,}; - size_t l = 0; int s = -1; struct pollfd pfd = {0,}; int ret = 0; - l = strlen (buf); - /* seek to end of data in buf */ - buf += l; - blen -= l; - glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath)); strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path)); @@ -2581,66 +2719,40 @@ glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) ret = -1; goto out; } - ret = read(s, buf, blen); + ret = read(s, sts_val->checkpoint_status, + sizeof(sts_val->checkpoint_status)); /* we expect a terminating 0 byte */ - if (ret == 0 || (ret > 0 && buf[ret - 1])) + if (ret == 0 || (ret > 0 && sts_val->checkpoint_status[ret - 1])) ret = -1; - if (ret > 0) + if (ret > 0) { ret = 0; + } - out: +out: close (s); return ret; } -static int -dict_get_param (dict_t *dict, char *key, char **param) -{ - char *dk = NULL; - char *s = NULL; - char x = '\0'; - int ret = 0; - - if (dict_get_str (dict, key, param) == 0) - return 0; - - dk = gf_strdup (key); - if (!key) - return -1; - - s = strpbrk (dk, "-_"); - if (!s) - return -1; - x = (*s == '-') ? '_' : '-'; - *s++ = x; - while ((s = strpbrk (s, "-_"))) - *s++ = x; - - ret = dict_get_str (dict, dk, param); - - GF_FREE (dk); - return ret; -} - -static int +int glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, char *conf_path, dict_t *dict, char *node) { - glusterd_conf_t *priv = NULL; - int ret = 0; - char *statefile = NULL; - char *master = NULL; - char buf[1024] = "defunct"; - char nds[1024] = {0, }; - char mst[1024] = {0, }; - char slv[1024] = {0, }; - char sts[1024] = {0, }; - char *bufp = NULL; - dict_t *confd = NULL; - int gsync_count = 0; - int status = 0; - char *dyn_node = NULL; - char *path_list = NULL; + char brick_state_file[PATH_MAX] = ""; + char brick_path[PATH_MAX] = ""; + char *georep_session_wrkng_dir = NULL; + char *master = NULL; + char tmp[1024] = ""; + char sts_val_name[1024] = ""; + char monitor_status[PATH_MAX] = ""; + char *statefile = NULL; + char *socketfile = NULL; + dict_t *confd = NULL; + int gsync_count = 0; + int i = 0; + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + gf_gsync_status_t *sts_val = NULL; + glusterd_conf_t *priv = NULL; GF_ASSERT (THIS); GF_ASSERT (THIS->private); @@ -2661,7 +2773,7 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" "for %s(master), %s(slave)", master, slave); - goto done; + goto out; } @@ -2670,120 +2782,168 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name " "for %s(master), %s(slave). Please check gsync " "config file.", master, slave); - goto done; + goto out; } - ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); - if (ret) { + + ret = glusterd_gsync_read_frm_status (statefile, monitor_status, + sizeof (monitor_status)); + if (ret <= 0) { gf_log ("", GF_LOG_ERROR, "Unable to read the status" "file for %s(master), %s(slave)", master, slave); - strncpy (buf, "defunct", sizeof (buf)); - goto done; - } - - ret = gsync_status (master, slave, conf_path, &status); - if (ret == 0 && status == -1) { - if ((strcmp (buf, "Not Started")) && - (strcmp (buf, "Stopped"))) - strncpy (buf, "defunct", sizeof (buf)); - goto done; - } else if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Unable to get gsync status"); - goto done; + strncpy (monitor_status, "defunct", sizeof (monitor_status)); } - if (strcmp (buf, "Stable") != 0) - goto done; - - ret = dict_get_param (confd, "state_socket_unencoded", &statefile); + ret = dict_get_param (confd, "georep_session_working_dir", + &georep_session_wrkng_dir); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded" - " filepath. Please check gsync config file."); - goto done; + gf_log ("", GF_LOG_ERROR, "Unable to get geo-rep session's " + "working directory name for %s(master), %s(slave). " + "Please check gsync config file.", master, slave); + goto out; } - ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf)); + + ret = dict_get_param (confd, "state_socket_unencoded", &socketfile); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status" - "for %s(master), %s(slave)", master, slave); - /* there is a slight chance that this occurs due to race - * -- in that case, the following options all seem bad: - * - * - suppress irregurlar behavior by just leaving status - * on "OK" - * - freak out users with a misleading "defunct" - * - overload the meaning of the regular error signal - * mechanism of gsyncd, that is, when status is "faulty" - * - * -- so we just come up with something new... - */ - strncpy (buf, "N/A", sizeof (buf)); - goto done; + gf_log ("", GF_LOG_ERROR, "Unable to get socket file's name " + "for %s(master), %s(slave). Please check gsync " + "config file.", master, slave); + goto out; } - done: - if ((!strcmp (buf, "defunct")) || - (!strcmp (buf, "Not Started")) || - (!strcmp (buf, "Stopped"))) { - ret = glusterd_get_local_brickpaths (volinfo, &path_list); - if (!path_list) { - gf_log ("", GF_LOG_DEBUG, "This node not being part of" - " volume should not be running gsyncd. Hence" - " shouldn't display status for this node."); - ret = 0; + ret = dict_get_int32 (dict, "gsync-count", &gsync_count); + if (ret) + gsync_count = 0; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!sts_val) { + gf_log ("", GF_LOG_ERROR, "Out Of Memory"); goto out; } - } - ret = dict_get_int32 (dict, "gsync-count", &gsync_count); + /* Creating the brick state file's path */ + memset(brick_state_file, '\0', PATH_MAX); + memcpy (brick_path, brickinfo->path, PATH_MAX - 1); + for (i = 0; i < strlen(brick_path) - 1; i++) + if (brick_path[i] == '/') + brick_path[i] = '_'; + ret = snprintf(brick_state_file, PATH_MAX - 1, "%s%s.status", + georep_session_wrkng_dir, brick_path); + brick_state_file[ret] = '\0'; + + gf_log ("", GF_LOG_DEBUG, "brick_state_file = %s", brick_state_file); + + memset (tmp, '\0', sizeof(tmp)); + + ret = glusterd_gsync_read_frm_status (brick_state_file, + tmp, sizeof (tmp)); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read the status" + "file for %s brick for %s(master), %s(slave) " + "session", brickinfo->path, master, slave); + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); + sts_val->worker_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); + sts_val->files_syncd[ret] = '\0'; + ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); + sts_val->purges_remaining[ret] = '\0'; + ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); + sts_val->total_files_skipped[ret] = '\0'; + ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); + sts_val->files_remaining[ret] = '\0'; + ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); + sts_val->bytes_remaining[ret] = '\0'; + goto store_status; + } - if (ret) - gsync_count = 1; - else - gsync_count++; + ret = glusterd_gsync_fetch_status_extra (socketfile, sts_val); + if (ret || strlen(sts_val->checkpoint_status) == 0) { + gf_log ("", GF_LOG_DEBUG, "No checkpoint status" + "for %s(master), %s(slave)", master, slave); + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + } - (void) snprintf (nds, sizeof (nds), "node%d", gsync_count); - dyn_node = gf_strdup (node); - if (!dyn_node) - goto out; - ret = dict_set_dynstr (dict, nds, dyn_node); - if (ret) { - GF_FREE (dyn_node); - goto out; - } + ret = glusterd_parse_gsync_status (tmp, sts_val); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to parse the gsync status for %s", + brickinfo->path); + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); + sts_val->worker_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); + sts_val->files_syncd[ret] = '\0'; + ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); + sts_val->purges_remaining[ret] = '\0'; + ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); + sts_val->total_files_skipped[ret] = '\0'; + ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); + sts_val->files_remaining[ret] = '\0'; + ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); + sts_val->bytes_remaining[ret] = '\0'; + } - snprintf (mst, sizeof (mst), "master%d", gsync_count); - master = gf_strdup (master); - if (!master) - goto out; - ret = dict_set_dynstr (dict, mst, master); - if (ret) { - GF_FREE (master); - goto out; - } +store_status: + if ((strcmp (monitor_status, "Stable"))) { + memcpy (sts_val->worker_status, monitor_status, strlen(monitor_status)); + sts_val->worker_status[strlen(monitor_status)] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + } - snprintf (slv, sizeof (slv), "slave%d", gsync_count); - slave = gf_strdup (slave); - if (!slave) - goto out; - ret = dict_set_dynstr (dict, slv, slave); - if (ret) { - GF_FREE (slave); - goto out; - } + if (strcmp (sts_val->worker_status, "Active")) { + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + } - snprintf (sts, sizeof (slv), "status%d", gsync_count); - bufp = gf_strdup (buf); - if (!bufp) - goto out; - ret = dict_set_dynstr (dict, sts, bufp); - if (ret) { - GF_FREE (bufp); - goto out; + if (!strcmp (sts_val->slave_node, "N/A")) { + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + } + + memcpy (sts_val->node, node, strlen(node)); + sts_val->node[strlen(node)] = '\0'; + memcpy (sts_val->brick, brickinfo->path, strlen(brickinfo->path)); + sts_val->brick[strlen(brickinfo->path)] = '\0'; + memcpy (sts_val->master, master, strlen(master)); + sts_val->master[strlen(master)] = '\0'; + + snprintf (sts_val_name, sizeof (sts_val_name), "status_value%d", gsync_count); + ret = dict_set_bin (dict, sts_val_name, sts_val, sizeof(gf_gsync_status_t)); + if (ret) { + GF_FREE (sts_val); + goto out; + } + + gsync_count++; + sts_val = NULL; } + ret = dict_set_int32 (dict, "gsync-count", gsync_count); if (ret) goto out; - out: +out: dict_destroy (confd); return 0; @@ -3246,30 +3406,32 @@ glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } - ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); - if (ptr) { - ret = dict_get_int32 (rsp_dict, "output_count", &output_count); - if (ret) - output_count = 1; - else - output_count++; - memset (output_name, '\0', sizeof (output_name)); - snprintf (output_name, sizeof (output_name), - "output_%d", output_count); - if (buf[strlen(buf) - 1] == '\n') - buf[strlen(buf) - 1] = '\0'; - bufp = gf_strdup (buf); - if (!bufp) - gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); - ret = dict_set_dynstr (rsp_dict, output_name, bufp); - if (ret) { - GF_FREE (bufp); - gf_log ("", GF_LOG_ERROR, "output set failed."); + do { + ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + ret = dict_get_int32 (rsp_dict, "output_count", &output_count); + if (ret) + output_count = 1; + else + output_count++; + memset (output_name, '\0', sizeof (output_name)); + snprintf (output_name, sizeof (output_name), + "output_%d", output_count); + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + bufp = gf_strdup (buf); + if (!bufp) + gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); + ret = dict_set_dynstr (rsp_dict, output_name, bufp); + if (ret) { + GF_FREE (bufp); + gf_log ("", GF_LOG_ERROR, "output set failed."); + } + ret = dict_set_int32 (rsp_dict, "output_count", output_count); + if (ret) + gf_log ("", GF_LOG_ERROR, "output_count set failed."); } - ret = dict_set_int32 (rsp_dict, "output_count", output_count); - if (ret) - gf_log ("", GF_LOG_ERROR, "output_count set failed."); - } + } while (ptr); ret = runner_end (&runner); if (ret) { @@ -3708,7 +3870,7 @@ out: } -static int +int glusterd_get_slave_info (char *slave, char **slave_ip, char **slave_vol, char **op_errstr) { @@ -3888,7 +4050,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", ".", NULL); RUN_GSYNCD_CMD; @@ -3902,6 +4064,16 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* ssh-command tar */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "ssh-command-tar"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* pid-file */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_arg (&runner, "pid-file"); @@ -3909,6 +4081,13 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* geo-rep-working-dir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "georep-session-working-dir"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* state-file */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_arg (&runner, "state-file"); @@ -3986,7 +4165,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", NULL); RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c index 0d67d1303..4ce441da8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c @@ -231,7 +231,6 @@ parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc) const char *georep_mnt_desc_template = "SUP(" - "xlator-option=\\*-dht.assert-no-child-down=true " "volfile-server=localhost " "client-pid=%d " "user-map-root=%s " diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 15c40f3e4..8cf9f790f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -4591,14 +4591,24 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) { char *path_list = NULL; char *slave = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *statefile = NULL; + char buf[1024] = "faulty"; int uuid_len = 0; int ret = 0; char uuid_str[64] = {0}; - glusterd_volinfo_t *volinfo = NULL; - char *conf_path = NULL; + glusterd_volinfo_t *volinfo = NULL; + char confpath[PATH_MAX] = ""; + char *op_errstr = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (THIS); + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (data); volinfo = data; - GF_ASSERT (volinfo); slave = strchr(value->data, ':'); if (slave) slave ++; @@ -4608,22 +4618,63 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) strncpy (uuid_str, (char*)value->data, uuid_len); + /* Getting Local Brickpaths */ ret = glusterd_get_local_brickpaths (volinfo, &path_list); - ret = dict_get_str (this, "conf_path", &conf_path); + /*Generating the conf file path needed by gsyncd */ + ret = glusterd_get_slave_info (slave, &slave_ip, + &slave_vol, &op_errstr); if (ret) { gf_log ("", GF_LOG_ERROR, - "Unable to fetch conf file path."); + "Unable to fetch slave details."); + ret = -1; goto out; } - glusterd_start_gsync (volinfo, slave, path_list, conf_path, - uuid_str, NULL); + ret = snprintf (confpath, sizeof(confpath) - 1, + "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, volinfo->volname, + slave_ip, slave_vol); + confpath[ret] = '\0'; + + /* Fetching the last status of the node */ + ret = glusterd_get_statefile_name (volinfo, slave, + confpath, &statefile); + if (ret) { + if (!strstr(slave, "::")) + gf_log ("", GF_LOG_INFO, + "%s is not a valid slave url.", slave); + else + gf_log ("", GF_LOG_INFO, "Unable to get" + " statefile's name"); + goto out; + } + + ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read the status"); + goto out; + } - GF_FREE (path_list); - path_list = NULL; + /* Looks for the last status, to find if the sessiom was running + * when the node went down. If the session was not started or + * not started, do not restart the geo-rep session */ + if ((!strcmp (buf, "Not Started")) || + (!strcmp (buf, "Stopped"))) { + gf_log ("", GF_LOG_INFO, + "Geo-Rep Session was not started between " + "%s and %s::%s. Not Restarting", volinfo->volname, + slave_ip, slave_vol); + goto out; + } + + glusterd_start_gsync (volinfo, slave, path_list, confpath, + uuid_str, NULL); out: + if (path_list) + GF_FREE (path_list); + return ret; } @@ -7205,21 +7256,16 @@ glusterd_append_gsync_status (dict_t *dst, dict_t *src) } -static int32_t +int32_t glusterd_append_status_dicts (dict_t *dst, dict_t *src) { - int dst_count = 0; - int src_count = 0; - int i = 0; - int ret = 0; - char mst[PATH_MAX] = {0,}; - char slv[PATH_MAX] = {0, }; - char sts[PATH_MAX] = {0, }; - char nds[PATH_MAX] = {0, }; - char *mst_val = NULL; - char *slv_val = NULL; - char *sts_val = NULL; - char *nds_val = NULL; + char sts_val_name[PATH_MAX] = {0, }; + int dst_count = 0; + int src_count = 0; + int i = 0; + int ret = 0; + gf_gsync_status_t *sts_val = NULL; + gf_gsync_status_t *dst_sts_val = NULL; GF_ASSERT (dst); @@ -7237,49 +7283,29 @@ glusterd_append_status_dicts (dict_t *dst, dict_t *src) goto out; } - for (i = 1; i <= src_count; i++) { - snprintf (nds, sizeof(nds), "node%d", i); - snprintf (mst, sizeof(mst), "master%d", i); - snprintf (slv, sizeof(slv), "slave%d", i); - snprintf (sts, sizeof(sts), "status%d", i); - - ret = dict_get_str (src, nds, &nds_val); - if (ret) - goto out; - - ret = dict_get_str (src, mst, &mst_val); - if (ret) - goto out; - - ret = dict_get_str (src, slv, &slv_val); - if (ret) - goto out; + for (i = 0; i < src_count; i++) { + memset (sts_val_name, '\0', sizeof(sts_val_name)); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i); - ret = dict_get_str (src, sts, &sts_val); + ret = dict_get_bin (src, sts_val_name, (void **) &sts_val); if (ret) goto out; - snprintf (nds, sizeof(nds), "node%d", i+dst_count); - snprintf (mst, sizeof(mst), "master%d", i+dst_count); - snprintf (slv, sizeof(slv), "slave%d", i+dst_count); - snprintf (sts, sizeof(sts), "status%d", i+dst_count); - - ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val)); - if (ret) + dst_sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!dst_sts_val) { + gf_log ("", GF_LOG_ERROR, "Out Of Memory"); goto out; + } - ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val)); - if (ret) - goto out; + memcpy (dst_sts_val, sts_val, sizeof(gf_gsync_status_t)); - ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val)); - if (ret) - goto out; + memset (sts_val_name, '\0', sizeof(sts_val_name)); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i + dst_count); - ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val)); + ret = dict_set_bin (dst, sts_val_name, dst_sts_val, sizeof(gf_gsync_status_t)); if (ret) goto out; - } ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 970b1f8a6..20cd00cbe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -582,6 +582,17 @@ glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, char **slave_ip, char **slave_vol, char **conf_path, char **op_errstr); +int +glusterd_get_slave_info (char *slave, char **slave_ip, + char **slave_vol, char **op_errstr); + +int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile); + +int +glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); + int glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, dict_t *resp_dict, char *path_list, diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index c2be2c9da..58833869a 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -594,7 +594,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", ".", NULL); RUN_GSYNCD_CMD; @@ -608,6 +608,16 @@ configure_syncdaemon (glusterd_conf_t *conf) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* ssh-command tar */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "ssh-command-tar"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* pid-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "pid-file"); @@ -615,6 +625,13 @@ configure_syncdaemon (glusterd_conf_t *conf) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* geo-rep working dir */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "georep-session-working-dir"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* state-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "state-file"); @@ -701,7 +718,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", NULL); RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index e1e9e591f..23b4205b0 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -47,7 +47,7 @@ #define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" #define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" #define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub" -#define GEO_CONF_MAX_OPT_VALS 5 +#define GEO_CONF_MAX_OPT_VALS 6 #define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \ "S56glusterd-geo-rep-create-post.sh" -- cgit