diff options
author | Venkatesh Somyajulu <vsomyaju@redhat.com> | 2013-09-12 12:37:37 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-10-14 14:41:44 -0700 |
commit | 047882750e0e97f5eed21ebe3445cdb216b15a9d (patch) | |
tree | 8fc52a198465c233795cba49dda45f7b8c824d95 | |
parent | 9da2cb37834db98596dae8f7fb6d8f6abb513687 (diff) |
cluster/afr : Implementation of command "gluster volume heal vn statistics"
"gluster volume heal volumename statistics" command gives the summary
of the afr crawl done based on the entries present in the xattrop
directory. Whenever afr crawls are attempted, the beginning time of
crawl, end time of crawl, no of files healed, heal-failed count and
number of files in split brain are shown along with the type of the
crawl. If crawl is already in progress then it will give the number
of files healed, heal failed count and number of files in split-brain
from the beginning of the crawl and instead of telling the end time of
the crawl, "CRAWL IN PROGRESS" message will be shown.
Output format:
command: "gluster volume heal volume-name statistics"
Output:
Gathering afr crawl statistics crawl statistics on volume volume-name
has been successful
------------------------------------------------
Crawl statistics for brick no 0
Hostname of brick 192.168.122.248
Starting time of crawl: Wed Jul 10 15:52:38 2013
Ending time of crawl: Wed Jul 10 15:52:38 2013
Type of crawl: INDEX
No. of entries healed: 0
No. of entries in split-brain: 0
No. of heal failed entries: 0
Starting time of crawl: Wed Jul 10 15:52:38 2013
Ending time of crawl: Wed Jul 10 15:52:38 2013
Type of crawl: INDEX
No. of entries healed: 0
No. of entries in split-brain: 0
No. of heal failed entries: 0
------------------------------------------------
Crawl statistics for brick no 1
Hostname of brick 192.168.122.1
Starting time of crawl: Wed Jul 10 15:52:42 2013
Ending time of crawl: Wed Jul 10 15:52:42 2013
Type of crawl: INDEX
No. of entries healed: 0
No. of entries in split-brain: 0
No. of heal failed entries: 0
Starting time of crawl: Wed Jul 10 15:52:42 2013
Ending time of crawl: Wed Jul 10 15:52:42 2013
Type of crawl: INDEX
No. of entries healed: 0
No. of entries in split-brain: 0
No. of heal failed entries: 0
--------------------------------------------------
Change-Id: I10bf9d10b005741db9973fb1352e0dd59ed99aa9
BUG: 949400
Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com>
Reviewed-on: http://review.gluster.org/4790
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | cli/src/cli-cmd-parser.c | 5 | ||||
-rw-r--r-- | cli/src/cli-cmd-volume.c | 2 | ||||
-rw-r--r-- | cli/src/cli-rpc-ops.c | 112 | ||||
-rw-r--r-- | libglusterfs/src/circ-buff.c | 30 | ||||
-rw-r--r-- | libglusterfs/src/circ-buff.h | 5 | ||||
-rw-r--r-- | libglusterfs/src/event-history.c | 6 | ||||
-rw-r--r-- | libglusterfs/src/event-history.h | 3 | ||||
-rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 67 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-mem-types.h | 5 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 427 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 43 | ||||
-rw-r--r-- | xlators/debug/trace/src/trace.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 85 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 2 |
18 files changed, 762 insertions, 62 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 62f5c3723a7..69e7fbdbe16 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -2509,6 +2509,11 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_FULL); goto done; + } else if (!strcmp (words[3], "statistics")) { + ret = dict_set_int32 (dict, "heal-op", + GF_AFR_OP_STATISTICS); + goto done; + } else if (!strcmp (words[3], "info")) { ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_INDEX_SUMMARY); diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 56ad65ebb58..1c6baa5e8e1 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1918,7 +1918,7 @@ struct cli_cmd volume_cmds[] = { cli_cmd_volume_status_cbk, "display status of all or specified volume(s)/brick"}, - { "volume heal <VOLNAME> [{full | info {healed | heal-failed | split-brain}}]", + { "volume heal <VOLNAME> [{full | statistics |info {healed | heal-failed | split-brain}}]", cli_cmd_volume_heal_cbk, "self-heal commands on volume specified by <VOLNAME>"}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 5dddbb41bb0..459dfa18823 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -6953,6 +6953,97 @@ gf_cli_umount (call_frame_t *frame, xlator_t *this, void *data) } void +cmd_heal_volume_statistics_out (dict_t *dict, int brick) +{ + + uint64_t num_entries = 0; + int ret = 0; + char key[256] = {0}; + char *hostname = NULL; + uint64_t i = 0; + uint64_t healed_count = 0; + uint64_t split_brain_count = 0; + uint64_t heal_failed_count = 0; + char *start_time_str = NULL; + char *end_time_str = NULL; + char *crawl_type = NULL; + int progress = -1; + + snprintf (key, sizeof key, "%d-hostname", brick); + ret = dict_get_str (dict, key, &hostname); + if (ret) + goto out; + cli_out ("------------------------------------------------"); + cli_out ("\nCrawl statistics for brick no %d", brick); + cli_out ("Hostname of brick %s", hostname); + + snprintf (key, sizeof key, "statistics-%d-count", brick); + ret = dict_get_uint64 (dict, key, &num_entries); + if (ret) + goto out; + + for (i = 0; i < num_entries; i++) + { + snprintf (key, sizeof key, "statistics_crawl_type-%d-%"PRIu64, + brick, i); + ret = dict_get_str (dict, key, &crawl_type); + if (ret) + goto out; + + snprintf (key, sizeof key, "statistics_healed_cnt-%d-%"PRIu64, + brick,i); + ret = dict_get_uint64 (dict, key, &healed_count); + if (ret) + goto out; + + snprintf (key, sizeof key, "statistics_sb_cnt-%d-%"PRIu64, + brick, i); + ret = dict_get_uint64 (dict, key, &split_brain_count); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_heal_failed_cnt-%d-%"PRIu64, + brick, i); + ret = dict_get_uint64 (dict, key, &heal_failed_count); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_strt_time-%d-%"PRIu64, + brick, i); + ret = dict_get_str (dict, key, &start_time_str); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_end_time-%d-%"PRIu64, + brick, i); + ret = dict_get_str (dict, key, &end_time_str); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_inprogress-%d-%"PRIu64, + brick, i); + ret = dict_get_int32 (dict, key, &progress); + if (ret) + goto out; + + cli_out ("\nStarting time of crawl: %s", start_time_str); + if (progress == 1) + cli_out ("Crawl is in progress"); + else + cli_out ("Ending time of crawl: %s", end_time_str); + + cli_out ("Type of crawl: %s", crawl_type); + cli_out ("No. of entries healed: %"PRIu64, + healed_count); + cli_out ("No. of entries in split-brain: %"PRIu64, + split_brain_count); + cli_out ("No. of heal failed entries: %"PRIu64, + heal_failed_count); + + } + + +out: + return; +} + +void cmd_heal_volume_brick_out (dict_t *dict, int brick) { uint64_t num_entries = 0; @@ -7096,6 +7187,9 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, case GF_AFR_OP_SPLIT_BRAIN_FILES: heal_op_str = "list of split brain entries"; break; + case GF_AFR_OP_STATISTICS: + heal_op_str = "crawl statistics"; + break; case GF_AFR_OP_INVALID: heal_op_str = "invalid heal op"; break; @@ -7156,8 +7250,22 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - for (i = 0; i < brick_count; i++) - cmd_heal_volume_brick_out (dict, i); + switch (heal_op) { + case GF_AFR_OP_STATISTICS: + for (i = 0; i < brick_count; i++) + cmd_heal_volume_statistics_out (dict, i); + break; + case GF_AFR_OP_INDEX_SUMMARY: + case GF_AFR_OP_HEALED_FILES: + case GF_AFR_OP_HEAL_FAILED_FILES: + case GF_AFR_OP_SPLIT_BRAIN_FILES: + for (i = 0; i < brick_count; i++) + cmd_heal_volume_brick_out (dict, i); + break; + default: + break; + } + ret = rsp.op_ret; out: diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c index a48c9287984..484ce7dc939 100644 --- a/libglusterfs/src/circ-buff.c +++ b/libglusterfs/src/circ-buff.c @@ -10,6 +10,17 @@ #include "circ-buff.h" +void +cb_destroy_data (circular_buffer_t *cb, + void (*destroy_buffer_data) (void *data)) +{ + if (destroy_buffer_data) + destroy_buffer_data (cb->data); + GF_FREE (cb->data); + return; +} + + /* hold lock while calling this function */ int __cb_add_entry_buffer (buffer_t *buffer, void *item) @@ -29,7 +40,8 @@ __cb_add_entry_buffer (buffer_t *buffer, void *item) if (buffer->cb[buffer->w_index]) { ptr = buffer->cb[buffer->w_index]; if (ptr->data) { - GF_FREE (ptr->data); + cb_destroy_data (ptr, + buffer->destroy_buffer_data); ptr->data = NULL; GF_FREE (ptr); } @@ -131,7 +143,8 @@ cb_buffer_dump (buffer_t *buffer, void *data, } buffer_t * -cb_buffer_new (size_t buffer_size, gf_boolean_t use_once) +cb_buffer_new (size_t buffer_size, gf_boolean_t use_once, + void (*destroy_buffer_data) (void *data)) { buffer_t *buffer = NULL; @@ -157,6 +170,7 @@ cb_buffer_new (size_t buffer_size, gf_boolean_t use_once) buffer->size_buffer = buffer_size; buffer->use_once = use_once; buffer->used_len = 0; + buffer->destroy_buffer_data = destroy_buffer_data; pthread_mutex_init (&buffer->lock, NULL); out: @@ -166,12 +180,18 @@ out: void cb_buffer_destroy (buffer_t *buffer) { - int i = 0; - + int i = 0; + circular_buffer_t *ptr = NULL; if (buffer) { if (buffer->cb) { for (i = 0; i < buffer->used_len ; i++) { - GF_FREE (buffer->cb[i]); + ptr = buffer->cb[i]; + if (ptr->data) { + cb_destroy_data (ptr, + buffer->destroy_buffer_data); + ptr->data = NULL; + GF_FREE (ptr); + } } GF_FREE (buffer->cb); } diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/circ-buff.h index 5b5acc387bf..e3459f5e3d0 100644 --- a/libglusterfs/src/circ-buff.h +++ b/libglusterfs/src/circ-buff.h @@ -38,7 +38,7 @@ struct _buffer { /* indicates the amount of circular buffer used. */ circular_buffer_t **cb; - + void (*destroy_buffer_data) (void *data); pthread_mutex_t lock; }; @@ -51,7 +51,8 @@ void cb_buffer_show (buffer_t *buffer); buffer_t * -cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once); +cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once, + void (*destroy_data) (void *data)); void cb_buffer_destroy (buffer_t *buffer); diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c index fe511caeb1e..82baa521aec 100644 --- a/libglusterfs/src/event-history.c +++ b/libglusterfs/src/event-history.c @@ -11,7 +11,8 @@ #include "event-history.h" eh_t * -eh_new (size_t buffer_size, gf_boolean_t use_buffer_once) +eh_new (size_t buffer_size, gf_boolean_t use_buffer_once, + void (*destroy_buffer_data) (void *data)) { eh_t *history = NULL; buffer_t *buffer = NULL; @@ -22,7 +23,8 @@ eh_new (size_t buffer_size, gf_boolean_t use_buffer_once) goto out; } - buffer = cb_buffer_new (buffer_size, use_buffer_once); + buffer = cb_buffer_new (buffer_size, use_buffer_once, + destroy_buffer_data); if (!buffer) { gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed"); GF_FREE (history); diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/event-history.h index b1750bbae96..b64f63b5eb4 100644 --- a/libglusterfs/src/event-history.h +++ b/libglusterfs/src/event-history.h @@ -32,7 +32,8 @@ eh_dump (eh_t *event , void *data, int (fn) (circular_buffer_t *buffer, void *data)); eh_t * -eh_new (size_t buffer_size, gf_boolean_t use_buffer_once); +eh_new (size_t buffer_size, gf_boolean_t use_buffer_once, + void (*destroy_data) (void *data)); int eh_save_history (eh_t *history, void *string); diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 704b1540a52..bdc3a69561d 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -209,7 +209,8 @@ typedef enum { GF_AFR_OP_INDEX_SUMMARY, GF_AFR_OP_HEALED_FILES, GF_AFR_OP_HEAL_FAILED_FILES, - GF_AFR_OP_SPLIT_BRAIN_FILES + GF_AFR_OP_SPLIT_BRAIN_FILES, + GF_AFR_OP_STATISTICS, } gf_xl_afr_op_t ; typedef enum { diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 1acbc01baf7..d7d78610812 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -51,7 +51,7 @@ #define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL #define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL - +#define AFR_STATISTICS_HISTORY_SIZE 50 int afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, gf_boolean_t fail_conflict); @@ -1832,7 +1832,8 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this, afr_lookup_set_self_heal_params (local, this); if (afr_can_self_heal_proceed (&local->self_heal, priv)) { - if (afr_is_transaction_running (local)) + if (afr_is_transaction_running (local) && + (!local->allow_sh_for_running_transaction)) goto out; reason = "lookup detected pending operations"; @@ -2427,7 +2428,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, int call_count = 0; uint64_t ctx = 0; int32_t op_errno = 0; - + int allow_sh = 0; priv = this->private; AFR_LOCAL_ALLOC_OR_GOTO (local, out); @@ -2499,6 +2500,11 @@ afr_lookup (call_frame_t *frame, xlator_t *this, /* By default assume ENOTCONN. On success it will be set to 0. */ local->op_errno = ENOTCONN; + ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction", + &allow_sh); + dict_del (xattr_req, "allow-sh-for-running-transaction"); + local->allow_sh_for_running_transaction = allow_sh; + ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc, &gfid_req); if (ret) { @@ -4425,6 +4431,16 @@ afr_priv_destroy (afr_private_t *priv) if (priv->shd.split_brain) eh_destroy (priv->shd.split_brain); + for (i = 0; i < priv->child_count; i++) + { + if (priv->shd.statistics[i]) + eh_destroy (priv->shd.statistics[i]); + } + + GF_FREE (priv->shd.statistics); + + GF_FREE (priv->shd.crawl_events); + GF_FREE (priv->last_event); if (priv->pending_key) { for (i = 0; i < priv->child_count; i++) @@ -4528,3 +4544,48 @@ afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this) } UNLOCK (&inode->lock); } + +int +afr_initialise_statistics (xlator_t *this) +{ + afr_private_t *priv = NULL; + int ret = -1; + int i = 0; + int child_count = 0; + eh_t *stats_per_brick = NULL; + shd_crawl_event_t ***shd_crawl_events = NULL; + priv = this->private; + + priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count, + gf_common_mt_eh_t); + if (!priv->shd.statistics) { + ret = -1; + goto out; + } + child_count = priv->child_count; + for (i=0; i < child_count ; i++) { + stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE, + _gf_false, + _destroy_crawl_event_data); + if (!stats_per_brick) { + ret = -1; + goto out; + } + priv->shd.statistics[i] = stats_per_brick; + + } + + shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events); + *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*), + priv->child_count, + gf_afr_mt_shd_crawl_event_t); + + if (!priv->shd.crawl_events) { + ret = -1; + goto out; + } + ret = 0; +out: + return ret; + +} diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index e01ab366f4f..73594f26526 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -41,7 +41,10 @@ enum gf_afr_mem_types_ { gf_afr_mt_shd_event_t, gf_afr_mt_time_t, gf_afr_mt_pos_data_t, - gf_afr_mt_reply_t, + gf_afr_mt_reply_t, + gf_afr_mt_stats_t, + gf_afr_mt_shd_crawl_event_t, + gf_afr_mt_uint64_t, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index f33b04eed06..89e056335d4 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -84,6 +84,33 @@ _loc_assign_gfid_path (loc_t *loc) } void +_destroy_crawl_event_data (void *data) +{ + shd_crawl_event_t *crawl_event = NULL; + + if (!data) + goto out; + + crawl_event = (shd_crawl_event_t *)data; + GF_FREE (crawl_event->start_time_str); + GF_FREE (crawl_event->end_time_str); + +out: + return; +} + +void +_destroy_shd_event_data (void *data) +{ + shd_event_t *event = NULL; + if (!data) + goto out; + event = (shd_event_t*)data; + GF_FREE (event->path); +out: + return; +} +void shd_cleanup_event (void *event) { shd_event_t *shd_event = event; @@ -128,6 +155,123 @@ _build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) } int +_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child, + shd_crawl_event_t *shd_event, struct timeval *tv) +{ + int ret = 0; + uint64_t count = 0; + char key[256] = {0}; + int xl_id = 0; + uint64_t healed_count = 0; + uint64_t split_brain_count = 0; + uint64_t heal_failed_count = 0; + char *start_time_str = NULL; + char *end_time_str = NULL; + char *crawl_type = NULL; + int progress = -1; + + healed_count = shd_event->healed_count; + split_brain_count = shd_event->split_brain_count; + heal_failed_count = shd_event->heal_failed_count; + start_time_str = shd_event->start_time_str; + end_time_str = shd_event->end_time_str; + crawl_type = shd_event->crawl_type; + + if (!start_time_str) { + ret = -1; + goto out; + } + + + ret = dict_get_int32 (output, this->name, &xl_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); + goto out; + } + + snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child); + ret = dict_get_uint64 (output, key, &count); + + snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64(output, key, healed_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "healed_count to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64 (output, key, split_brain_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "split_brain_count to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_dynstr (output, key, gf_strdup (crawl_type)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "crawl_type to output"); + goto out; + } + snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64 (output, key, heal_failed_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "healed_failed_count to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_dynstr (output, key, gf_strdup(start_time_str)); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "crawl_start_time to outout"); + goto out; + } + + snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64, + xl_id, child, count); + + if (!end_time_str) + end_time_str = "Could not determine the end time"; + ret = dict_set_dynstr (output, key, gf_strdup(end_time_str)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "crawl_end_time to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64, + xl_id, child, count); + + if (shd_event->crawl_inprogress == _gf_true) + progress = 1; + else + progress = 0; + + ret = dict_set_int32 (output, key, progress); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "inprogress to outout"); + goto out; + } + + snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child); + ret = dict_set_uint64 (output, key, count + 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not increment the " + "counter."); + goto out; + } +out: + return ret; +} + +int _add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path, struct timeval *tv, gf_boolean_t dyn) { @@ -233,6 +377,20 @@ out: } int +_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data) +{ + int ret = 0; + shd_dump_t *dump_data = NULL; + shd_crawl_event_t *shd_event = NULL; + + dump_data = data; + shd_event = cb->data; + ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict, + dump_data->child, shd_event, &cb->tv); + return ret; +} + +int _add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child) { shd_dump_t dump_data = {0}; @@ -244,6 +402,26 @@ _add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child) return 0; } + +int +_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child) +{ + shd_dump_t dump_data = {0}; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + + priv = this->private; + shd = &priv->shd; + + dump_data.this = this; + dump_data.dict = dict; + dump_data.child = child; + eh_dump (shd->statistics[child], &dump_data, + _add_crawl_event_statistics_to_dict); + return 0; + +} + void _remove_stale_index (xlator_t *this, xlator_t *readdir_xl, loc_t *parent, char *fname) @@ -307,16 +485,18 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp, afr_crawl_data_t *crawl_data) { - int ret = 0; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - eh_t *eh = NULL; - char *path = NULL; - char gfid_str[64] = {0}; - shd_event_t *event = NULL; - int32_t sh_failed = 0; - gf_boolean_t split_brain = 0; - int32_t actual_sh_done = 0; + int ret = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + eh_t *eh = NULL; + char *path = NULL; + char gfid_str[64] = {0}; + shd_event_t *event = NULL; + int32_t sh_failed = 0; + gf_boolean_t split_brain = 0; + int32_t actual_sh_done = 0; + shd_crawl_event_t **shd_crawl_event = NULL; + priv = this->private; shd = &priv->shd; if (crawl_data->crawl == INDEX) { @@ -343,16 +523,19 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done); } - split_brain = afr_is_split_brain (this, child->inode); + shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events); + split_brain = afr_is_split_brain (this, child->inode); if ((op_ret < 0 && op_errno == EIO) || split_brain) { eh = shd->split_brain; + shd_crawl_event[crawl_data->child]->split_brain_count += 1; } else if ((op_ret < 0) || sh_failed) { eh = shd->heal_failed; + shd_crawl_event[crawl_data->child]->heal_failed_count += 1; } else if (actual_sh_done == 1) { - eh = shd->healed; + eh = shd->healed; + shd_crawl_event[crawl_data->child]->healed_count += 1; } - ret = -1; if (eh != NULL) { @@ -408,10 +591,20 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent struct iatt parentbuf = {0}; int ret = 0; dict_t *xattr_rsp = NULL; + dict_t *xattr_req = NULL; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + ret = -1; + goto out; + } + + ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1); gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path); - ret = syncop_lookup (this, child, NULL, + ret = syncop_lookup (this, child, xattr_req, iattr, &xattr_rsp, &parentbuf); _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp, crawl_data); @@ -420,6 +613,9 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent if (ret == 0) ret = _link_inode_update_loc (this, child, iattr); +out: + if (xattr_req) + dict_unref(xattr_req); return ret; } @@ -567,8 +763,103 @@ _get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output); } +void +afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int i = 0; + priv = this->private; + shd= &priv->shd; + for (i = 0; i < priv->child_count; i++) { + if (shd->pos[i] != AFR_POS_LOCAL) + continue; + _add_statistics_to_dict (this, dict, i); + } + + return ; +} + +static void +reset_crawl_event (shd_crawl_event_t *crawl_event) +{ + crawl_event->healed_count = 0; + crawl_event->split_brain_count = 0; + crawl_event->heal_failed_count = 0; + GF_FREE (crawl_event->start_time_str); + crawl_event->start_time_str = NULL; + crawl_event->end_time_str = NULL; + crawl_event->crawl_type = NULL; + crawl_event->crawl_inprogress = _gf_false; + return; +} + +static void +afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst) +{ + dst->healed_count = src->healed_count; + dst->split_brain_count = src->split_brain_count; + dst->heal_failed_count = src->heal_failed_count; + dst->start_time_str = gf_strdup (src->start_time_str); + dst->end_time_str = "Crawl is already in progress"; + dst->crawl_type = src->crawl_type; + dst->crawl_inprogress = _gf_true; + return; +} + +static int +afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict) +{ + shd_crawl_event_t *evnt = NULL; + int ret = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int i = 0; + priv = this->private; + shd = &priv->shd; + + evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t), + gf_afr_mt_shd_crawl_event_t); + if (!evnt) { + ret = -1; + goto out; + } + LOCK (&priv->lock); + { + for (i = 0; i < priv->child_count; i++) { + if (shd->pos[i] != AFR_POS_LOCAL) + continue; + + reset_crawl_event (evnt); + + if (!shd->crawl_events[i]) { + continue; + } + + afr_copy_crawl_event_struct (shd->crawl_events[i], + evnt); + _add_crawl_stats_to_dict (this, dict, i, evnt, NULL); + + } + } + UNLOCK (&priv->lock); + reset_crawl_event (evnt); + GF_FREE (evnt); + +out: + return ret; +} + +static int +_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict) +{ + int ret = 0; + afr_fill_completed_crawl_statistics_to_dict (this, dict); + ret = afr_fill_crawl_statistics_of_running_crawl (this, dict); + return ret; +} int -_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) +_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; @@ -618,16 +909,19 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) ret = 0; break; case GF_AFR_OP_HEALED_FILES: - ret = _add_all_subvols_eh_to_dict (this, shd->healed, output); + ret = _add_local_subvols_eh_to_dict (this, shd->healed, output); break; case GF_AFR_OP_HEAL_FAILED_FILES: - ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed, + ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed, output); break; case GF_AFR_OP_SPLIT_BRAIN_FILES: - ret = _add_all_subvols_eh_to_dict (this, shd->split_brain, + ret = _add_local_subvols_eh_to_dict (this, shd->split_brain, output); break; + case GF_AFR_OP_STATISTICS: + ret = _add_local_subvols_crawl_statistics_to_dict (this, output); + break; default: gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op); break; @@ -1147,6 +1441,95 @@ out: return ret; } +char * +get_crawl_type_in_string (afr_crawl_type_t crawl) +{ + char *index = "INDEX"; + char *full = "FULL"; + char *crawl_type = NULL; + + if (crawl == INDEX){ + crawl_type = index; + } else if (crawl == FULL) { + crawl_type = full; + } + + return crawl_type; +} + +static int +afr_allocate_crawl_event (xlator_t *this, int child, afr_crawl_type_t crawl) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = 0; + shd_crawl_event_t *crawl_event = NULL; + time_t get_time = 0; + + priv = this->private; + shd = &priv->shd; + + crawl_event = GF_CALLOC (sizeof (shd_crawl_event_t), 1, + gf_afr_mt_shd_crawl_event_t); + if (!crawl_event) { + ret = -1; + goto out; + } + + get_time = time(NULL); + if (get_time == ((time_t)-1)) { + ret = -1; + goto out; + } + + crawl_event->start_time_str = gf_strdup (ctime(&get_time)); + + crawl_event->crawl_type = get_crawl_type_in_string (crawl); + if (!crawl_event->crawl_type) { + ret = -1; + goto out; + } + LOCK (&priv->lock); + { + shd->crawl_events[child] = crawl_event; + } + UNLOCK (&priv->lock); + ret = 0; +out: + return ret; + +} + +static int +afr_put_crawl_event_in_eh (xlator_t *this, int child) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = 0; + time_t get_time = 0; + shd_crawl_event_t **crawl_event = NULL; + + priv = this->private; + shd = &priv->shd; + + get_time = time(NULL); + if (get_time == ((time_t)-1)) { + ret = -1; + goto out; + } + crawl_event = (shd_crawl_event_t**)shd->crawl_events; + LOCK (&priv->lock); + { + crawl_event[child]->end_time_str = gf_strdup (ctime(&get_time)); + ret = eh_save_history (shd->statistics[child], + crawl_event[child]); + crawl_event[child] = NULL; + } + UNLOCK (&priv->lock); +out: + return ret; +} + static int afr_dir_exclusive_crawl (void *data) { @@ -1182,7 +1565,15 @@ afr_dir_exclusive_crawl (void *data) } do { + ret = afr_allocate_crawl_event (this, child, crawl_data->crawl); + if (ret) + goto out; afr_dir_crawl (data); + + ret = afr_put_crawl_event_in_eh (this, child); + if (ret < 0) + goto out; + LOCK (&priv->lock); { if (shd->pending[child] != NONE) { diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 32a8aaca50c..e0c083754e0 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -29,6 +29,19 @@ typedef struct afr_crawl_data_ { struct iatt *iattr); } afr_crawl_data_t; +typedef struct crawl_event_stats_ { + uint64_t healed_count; + uint64_t split_brain_count; + uint64_t heal_failed_count; + char *start_time_str; + char *end_time_str; + char *crawl_type; + gf_boolean_t crawl_inprogress; +} shd_crawl_event_t; + +void _destroy_crawl_event_data (void *data); +void _destroy_shd_event_data (void *data); + typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry, loc_t *child, loc_t *parent, struct iatt *iattr); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 20883392059..aa8d002209c 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -439,15 +439,18 @@ init (xlator_t *this) if (!priv->shd.timer) goto out; - priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false); + priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false, + _destroy_shd_event_data); if (!priv->shd.healed) goto out; - priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false); + priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false, + _destroy_shd_event_data); if (!priv->shd.heal_failed) goto out; - priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false); + priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false, + _destroy_shd_event_data); if (!priv->shd.split_brain) goto out; @@ -457,7 +460,9 @@ init (xlator_t *this) priv->root_inode = inode_ref (this->itable->root); GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out); GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out); - + ret = afr_initialise_statistics (this); + if (ret) + goto out; ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 95e69f3bcd2..8d8b2f649f3 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -98,17 +98,19 @@ typedef enum { } afr_crawl_type_t; typedef struct afr_self_heald_ { - gf_boolean_t enabled; - gf_boolean_t iamshd; - afr_crawl_type_t *pending; - gf_boolean_t *inprogress; - afr_child_pos_t *pos; - gf_timer_t **timer; - eh_t *healed; - eh_t *heal_failed; - eh_t *split_brain; - char *node_uuid; - int timeout; + gf_boolean_t enabled; + gf_boolean_t iamshd; + afr_crawl_type_t *pending; + gf_boolean_t *inprogress; + afr_child_pos_t *pos; + gf_timer_t **timer; + eh_t *healed; + eh_t *heal_failed; + eh_t *split_brain; + eh_t **statistics; + void **crawl_events; + char *node_uuid; + int timeout; } afr_self_heald_t; typedef struct _afr_private { @@ -494,20 +496,23 @@ typedef struct _afr_local { int optimistic_change_log; gf_boolean_t delayed_post_op; + /* Is the current writev() going to perform a stable write? i.e, is fd->flags or @flags writev param have O_SYNC or O_DSYNC? */ - gf_boolean_t stable_write; + gf_boolean_t stable_write; + + /* This write appended to the file. Nnot necessarily O_APPEND, + just means the offset of write was at the end of file. + */ + gf_boolean_t append_write; + + int allow_sh_for_running_transaction; - /* This write appended to the file. Nnot necessarily O_APPEND, - just means the offset of write was at the end of file. - */ - gf_boolean_t append_write; - /* - This struct contains the arguments for the "continuation" - (scheme-like) of fops + /* This struct contains the arguments for the "continuation" + (scheme-like) of fops */ int op; diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c index a9c11babec2..c9d83935647 100644 --- a/xlators/debug/trace/src/trace.c +++ b/xlators/debug/trace/src/trace.c @@ -3134,7 +3134,7 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "logging to history %s", (conf->log_history == _gf_true)?"enabled":"disabled"); - history = eh_new (history_size, _gf_false); + history = eh_new (history_size, _gf_false, NULL); if (!history) { gf_log (this->name, GF_LOG_ERROR, "event history cannot be " "initialized"); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 2dfe3cde109..15cb3d83e47 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -4215,6 +4215,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, return ret; } + static int glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr, struct list_head *selected) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index bef3da74b40..80ec7155cd9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -7279,6 +7279,77 @@ out: } int +_heal_volume_add_shd_rsp_of_statistics (dict_t *this, char *key, data_t + *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0,}; + char key_begin_string[128] = {0,}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *key_begin_str = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + key_begin_str = strchr (key, '-'); + if (!key_begin_str) + goto out; + + int_len = strlen (key) - strlen (key_begin_str); + strncpy (key_begin_string, key, int_len); + key_begin_string[int_len] = '\0'; + + rxl_end = strchr (key_begin_str + 1, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key_begin_str) - strlen (rxl_end) - 1; + strncpy (int_str, key_begin_str + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + brickinfo = glusterd_get_brickinfo_by_position (volinfo, brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, brickinfo)) + goto out; + + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%s-%d%s", key_begin_string, + brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + +out: + return 0; + +} + +int glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, dict_t *op_ctx, char **op_errstr) { @@ -7286,6 +7357,7 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, glusterd_heal_rsp_conv_t rsp_ctx = {0}; char *volname = NULL; glusterd_volinfo_t *volinfo = NULL; + int heal_op = -1; GF_ASSERT (rsp_dict); GF_ASSERT (op_ctx); @@ -7297,6 +7369,13 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, goto out; } + ret = dict_get_int32 (req_dict, "heal-op", &heal_op); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get heal_op"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) @@ -7305,7 +7384,11 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, rsp_ctx.dict = op_ctx; rsp_ctx.volinfo = volinfo; rsp_ctx.this = THIS; - dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + if (heal_op == GF_AFR_OP_STATISTICS) + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics, + &rsp_ctx); + else + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); out: return ret; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index c1812a9a1f1..6a5587c2d2e 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -5355,7 +5355,7 @@ init (xlator_t *this_xl) if (priv->fd == -1) goto cleanup_exit; - event = eh_new (FUSE_EVENT_HISTORY_SIZE, _gf_false); + event = eh_new (FUSE_EVENT_HISTORY_SIZE, _gf_false, NULL); if (!event) { gf_log (this_xl->name, GF_LOG_ERROR, "could not create a new event history"); |