diff options
-rw-r--r-- | cli/src/cli-cmd-parser.c | 5 | ||||
-rw-r--r-- | cli/src/cli-cmd-volume.c | 2 | ||||
-rw-r--r-- | cli/src/cli-rpc-ops.c | 112 | ||||
-rw-r--r-- | libglusterfs/src/circ-buff.c | 30 | ||||
-rw-r--r-- | libglusterfs/src/circ-buff.h | 5 | ||||
-rw-r--r-- | libglusterfs/src/event-history.c | 6 | ||||
-rw-r--r-- | libglusterfs/src/event-history.h | 3 | ||||
-rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 67 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-mem-types.h | 5 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 427 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 43 | ||||
-rw-r--r-- | xlators/debug/trace/src/trace.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 85 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 2 |
18 files changed, 762 insertions, 62 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 62f5c3723..69e7fbdbe 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -2509,6 +2509,11 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_FULL); goto done; + } else if (!strcmp (words[3], "statistics")) { + ret = dict_set_int32 (dict, "heal-op", + GF_AFR_OP_STATISTICS); + goto done; + } else if (!strcmp (words[3], "info")) { ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_INDEX_SUMMARY); diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 56ad65ebb..1c6baa5e8 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1918,7 +1918,7 @@ struct cli_cmd volume_cmds[] = { cli_cmd_volume_status_cbk, "display status of all or specified volume(s)/brick"}, - { "volume heal <VOLNAME> [{full | info {healed | heal-failed | split-brain}}]", + { "volume heal <VOLNAME> [{full | statistics |info {healed | heal-failed | split-brain}}]", cli_cmd_volume_heal_cbk, "self-heal commands on volume specified by <VOLNAME>"}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 5dddbb41b..459dfa188 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -6953,6 +6953,97 @@ gf_cli_umount (call_frame_t *frame, xlator_t *this, void *data) } void +cmd_heal_volume_statistics_out (dict_t *dict, int brick) +{ + + uint64_t num_entries = 0; + int ret = 0; + char key[256] = {0}; + char *hostname = NULL; + uint64_t i = 0; + uint64_t healed_count = 0; + uint64_t split_brain_count = 0; + uint64_t heal_failed_count = 0; + char *start_time_str = NULL; + char *end_time_str = NULL; + char *crawl_type = NULL; + int progress = -1; + + snprintf (key, sizeof key, "%d-hostname", brick); + ret = dict_get_str (dict, key, &hostname); + if (ret) + goto out; + cli_out ("------------------------------------------------"); + cli_out ("\nCrawl statistics for brick no %d", brick); + cli_out ("Hostname of brick %s", hostname); + + snprintf (key, sizeof key, "statistics-%d-count", brick); + ret = dict_get_uint64 (dict, key, &num_entries); + if (ret) + goto out; + + for (i = 0; i < num_entries; i++) + { + snprintf (key, sizeof key, "statistics_crawl_type-%d-%"PRIu64, + brick, i); + ret = dict_get_str (dict, key, &crawl_type); + if (ret) + goto out; + + snprintf (key, sizeof key, "statistics_healed_cnt-%d-%"PRIu64, + brick,i); + ret = dict_get_uint64 (dict, key, &healed_count); + if (ret) + goto out; + + snprintf (key, sizeof key, "statistics_sb_cnt-%d-%"PRIu64, + brick, i); + ret = dict_get_uint64 (dict, key, &split_brain_count); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_heal_failed_cnt-%d-%"PRIu64, + brick, i); + ret = dict_get_uint64 (dict, key, &heal_failed_count); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_strt_time-%d-%"PRIu64, + brick, i); + ret = dict_get_str (dict, key, &start_time_str); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_end_time-%d-%"PRIu64, + brick, i); + ret = dict_get_str (dict, key, &end_time_str); + if (ret) + goto out; + snprintf (key, sizeof key, "statistics_inprogress-%d-%"PRIu64, + brick, i); + ret = dict_get_int32 (dict, key, &progress); + if (ret) + goto out; + + cli_out ("\nStarting time of crawl: %s", start_time_str); + if (progress == 1) + cli_out ("Crawl is in progress"); + else + cli_out ("Ending time of crawl: %s", end_time_str); + + cli_out ("Type of crawl: %s", crawl_type); + cli_out ("No. of entries healed: %"PRIu64, + healed_count); + cli_out ("No. of entries in split-brain: %"PRIu64, + split_brain_count); + cli_out ("No. of heal failed entries: %"PRIu64, + heal_failed_count); + + } + + +out: + return; +} + +void cmd_heal_volume_brick_out (dict_t *dict, int brick) { uint64_t num_entries = 0; @@ -7096,6 +7187,9 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, case GF_AFR_OP_SPLIT_BRAIN_FILES: heal_op_str = "list of split brain entries"; break; + case GF_AFR_OP_STATISTICS: + heal_op_str = "crawl statistics"; + break; case GF_AFR_OP_INVALID: heal_op_str = "invalid heal op"; break; @@ -7156,8 +7250,22 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - for (i = 0; i < brick_count; i++) - cmd_heal_volume_brick_out (dict, i); + switch (heal_op) { + case GF_AFR_OP_STATISTICS: + for (i = 0; i < brick_count; i++) + cmd_heal_volume_statistics_out (dict, i); + break; + case GF_AFR_OP_INDEX_SUMMARY: + case GF_AFR_OP_HEALED_FILES: + case GF_AFR_OP_HEAL_FAILED_FILES: + case GF_AFR_OP_SPLIT_BRAIN_FILES: + for (i = 0; i < brick_count; i++) + cmd_heal_volume_brick_out (dict, i); + break; + default: + break; + } + ret = rsp.op_ret; out: diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c index a48c92879..484ce7dc9 100644 --- a/libglusterfs/src/circ-buff.c +++ b/libglusterfs/src/circ-buff.c @@ -10,6 +10,17 @@ #include "circ-buff.h" +void +cb_destroy_data (circular_buffer_t *cb, + void (*destroy_buffer_data) (void *data)) +{ + if (destroy_buffer_data) + destroy_buffer_data (cb->data); + GF_FREE (cb->data); + return; +} + + /* hold lock while calling this function */ int __cb_add_entry_buffer (buffer_t *buffer, void *item) @@ -29,7 +40,8 @@ __cb_add_entry_buffer (buffer_t *buffer, void *item) if (buffer->cb[buffer->w_index]) { ptr = buffer->cb[buffer->w_index]; if (ptr->data) { - GF_FREE (ptr->data); + cb_destroy_data (ptr, + buffer->destroy_buffer_data); ptr->data = NULL; GF_FREE (ptr); } @@ -131,7 +143,8 @@ cb_buffer_dump (buffer_t *buffer, void *data, } buffer_t * -cb_buffer_new (size_t buffer_size, gf_boolean_t use_once) +cb_buffer_new (size_t buffer_size, gf_boolean_t use_once, + void (*destroy_buffer_data) (void *data)) { buffer_t *buffer = NULL; @@ -157,6 +170,7 @@ cb_buffer_new (size_t buffer_size, gf_boolean_t use_once) buffer->size_buffer = buffer_size; buffer->use_once = use_once; buffer->used_len = 0; + buffer->destroy_buffer_data = destroy_buffer_data; pthread_mutex_init (&buffer->lock, NULL); out: @@ -166,12 +180,18 @@ out: void cb_buffer_destroy (buffer_t *buffer) { - int i = 0; - + int i = 0; + circular_buffer_t *ptr = NULL; if (buffer) { if (buffer->cb) { for (i = 0; i < buffer->used_len ; i++) { - GF_FREE (buffer->cb[i]); + ptr = buffer->cb[i]; + if (ptr->data) { + cb_destroy_data (ptr, + buffer->destroy_buffer_data); + ptr->data = NULL; + GF_FREE (ptr); + } } GF_FREE (buffer->cb); } diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/circ-buff.h index 5b5acc387..e3459f5e3 100644 --- a/libglusterfs/src/circ-buff.h +++ b/libglusterfs/src/circ-buff.h @@ -38,7 +38,7 @@ struct _buffer { /* indicates the amount of circular buffer used. */ circular_buffer_t **cb; - + void (*destroy_buffer_data) (void *data); pthread_mutex_t lock; }; @@ -51,7 +51,8 @@ void cb_buffer_show (buffer_t *buffer); buffer_t * -cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once); +cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once, + void (*destroy_data) (void *data)); void cb_buffer_destroy (buffer_t *buffer); diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c index fe511caeb..82baa521a 100644 --- a/libglusterfs/src/event-history.c +++ b/libglusterfs/src/event-history.c @@ -11,7 +11,8 @@ #include "event-history.h" eh_t * -eh_new (size_t buffer_size, gf_boolean_t use_buffer_once) +eh_new (size_t buffer_size, gf_boolean_t use_buffer_once, + void (*destroy_buffer_data) (void *data)) { eh_t *history = NULL; buffer_t *buffer = NULL; @@ -22,7 +23,8 @@ eh_new (size_t buffer_size, gf_boolean_t use_buffer_once) goto out; } - buffer = cb_buffer_new (buffer_size, use_buffer_once); + buffer = cb_buffer_new (buffer_size, use_buffer_once, + destroy_buffer_data); if (!buffer) { gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed"); GF_FREE (history); diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/event-history.h index b1750bbae..b64f63b5e 100644 --- a/libglusterfs/src/event-history.h +++ b/libglusterfs/src/event-history.h @@ -32,7 +32,8 @@ eh_dump (eh_t *event , void *data, int (fn) (circular_buffer_t *buffer, void *data)); eh_t * -eh_new (size_t buffer_size, gf_boolean_t use_buffer_once); +eh_new (size_t buffer_size, gf_boolean_t use_buffer_once, + void (*destroy_data) (void *data)); int eh_save_history (eh_t *history, void *string); diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 704b1540a..bdc3a6956 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -209,7 +209,8 @@ typedef enum { GF_AFR_OP_INDEX_SUMMARY, GF_AFR_OP_HEALED_FILES, GF_AFR_OP_HEAL_FAILED_FILES, - GF_AFR_OP_SPLIT_BRAIN_FILES + GF_AFR_OP_SPLIT_BRAIN_FILES, + GF_AFR_OP_STATISTICS, } gf_xl_afr_op_t ; typedef enum { diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 1acbc01ba..d7d786108 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -51,7 +51,7 @@ #define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL #define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL - +#define AFR_STATISTICS_HISTORY_SIZE 50 int afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, gf_boolean_t fail_conflict); @@ -1832,7 +1832,8 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this, afr_lookup_set_self_heal_params (local, this); if (afr_can_self_heal_proceed (&local->self_heal, priv)) { - if (afr_is_transaction_running (local)) + if (afr_is_transaction_running (local) && + (!local->allow_sh_for_running_transaction)) goto out; reason = "lookup detected pending operations"; @@ -2427,7 +2428,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, int call_count = 0; uint64_t ctx = 0; int32_t op_errno = 0; - + int allow_sh = 0; priv = this->private; AFR_LOCAL_ALLOC_OR_GOTO (local, out); @@ -2499,6 +2500,11 @@ afr_lookup (call_frame_t *frame, xlator_t *this, /* By default assume ENOTCONN. On success it will be set to 0. */ local->op_errno = ENOTCONN; + ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction", + &allow_sh); + dict_del (xattr_req, "allow-sh-for-running-transaction"); + local->allow_sh_for_running_transaction = allow_sh; + ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc, &gfid_req); if (ret) { @@ -4425,6 +4431,16 @@ afr_priv_destroy (afr_private_t *priv) if (priv->shd.split_brain) eh_destroy (priv->shd.split_brain); + for (i = 0; i < priv->child_count; i++) + { + if (priv->shd.statistics[i]) + eh_destroy (priv->shd.statistics[i]); + } + + GF_FREE (priv->shd.statistics); + + GF_FREE (priv->shd.crawl_events); + GF_FREE (priv->last_event); if (priv->pending_key) { for (i = 0; i < priv->child_count; i++) @@ -4528,3 +4544,48 @@ afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this) } UNLOCK (&inode->lock); } + +int +afr_initialise_statistics (xlator_t *this) +{ + afr_private_t *priv = NULL; + int ret = -1; + int i = 0; + int child_count = 0; + eh_t *stats_per_brick = NULL; + shd_crawl_event_t ***shd_crawl_events = NULL; + priv = this->private; + + priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count, + gf_common_mt_eh_t); + if (!priv->shd.statistics) { + ret = -1; + goto out; + } + child_count = priv->child_count; + for (i=0; i < child_count ; i++) { + stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE, + _gf_false, + _destroy_crawl_event_data); + if (!stats_per_brick) { + ret = -1; + goto out; + } + priv->shd.statistics[i] = stats_per_brick; + + } + + shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events); + *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*), + priv->child_count, + gf_afr_mt_shd_crawl_event_t); + + if (!priv->shd.crawl_events) { + ret = -1; + goto out; + } + ret = 0; +out: + return ret; + +} diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index e01ab366f..73594f265 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -41,7 +41,10 @@ enum gf_afr_mem_types_ { gf_afr_mt_shd_event_t, gf_afr_mt_time_t, gf_afr_mt_pos_data_t, - gf_afr_mt_reply_t, + gf_afr_mt_reply_t, + gf_afr_mt_stats_t, + gf_afr_mt_shd_crawl_event_t, + gf_afr_mt_uint64_t, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index f33b04eed..89e056335 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -84,6 +84,33 @@ _loc_assign_gfid_path (loc_t *loc) } void +_destroy_crawl_event_data (void *data) +{ + shd_crawl_event_t *crawl_event = NULL; + + if (!data) + goto out; + + crawl_event = (shd_crawl_event_t *)data; + GF_FREE (crawl_event->start_time_str); + GF_FREE (crawl_event->end_time_str); + +out: + return; +} + +void +_destroy_shd_event_data (void *data) +{ + shd_event_t *event = NULL; + if (!data) + goto out; + event = (shd_event_t*)data; + GF_FREE (event->path); +out: + return; +} +void shd_cleanup_event (void *event) { shd_event_t *shd_event = event; @@ -128,6 +155,123 @@ _build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) } int +_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child, + shd_crawl_event_t *shd_event, struct timeval *tv) +{ + int ret = 0; + uint64_t count = 0; + char key[256] = {0}; + int xl_id = 0; + uint64_t healed_count = 0; + uint64_t split_brain_count = 0; + uint64_t heal_failed_count = 0; + char *start_time_str = NULL; + char *end_time_str = NULL; + char *crawl_type = NULL; + int progress = -1; + + healed_count = shd_event->healed_count; + split_brain_count = shd_event->split_brain_count; + heal_failed_count = shd_event->heal_failed_count; + start_time_str = shd_event->start_time_str; + end_time_str = shd_event->end_time_str; + crawl_type = shd_event->crawl_type; + + if (!start_time_str) { + ret = -1; + goto out; + } + + + ret = dict_get_int32 (output, this->name, &xl_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); + goto out; + } + + snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child); + ret = dict_get_uint64 (output, key, &count); + + snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64(output, key, healed_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "healed_count to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64 (output, key, split_brain_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "split_brain_count to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_dynstr (output, key, gf_strdup (crawl_type)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "crawl_type to output"); + goto out; + } + snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64 (output, key, heal_failed_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "healed_failed_count to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_dynstr (output, key, gf_strdup(start_time_str)); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "crawl_start_time to outout"); + goto out; + } + + snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64, + xl_id, child, count); + + if (!end_time_str) + end_time_str = "Could not determine the end time"; + ret = dict_set_dynstr (output, key, gf_strdup(end_time_str)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "crawl_end_time to outout"); + goto out; + } + snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64, + xl_id, child, count); + + if (shd_event->crawl_inprogress == _gf_true) + progress = 1; + else + progress = 0; + + ret = dict_set_int32 (output, key, progress); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" + "inprogress to outout"); + goto out; + } + + snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child); + ret = dict_set_uint64 (output, key, count + 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not increment the " + "counter."); + goto out; + } +out: + return ret; +} + +int _add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path, struct timeval *tv, gf_boolean_t dyn) { @@ -233,6 +377,20 @@ out: } int +_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data) +{ + int ret = 0; + shd_dump_t *dump_data = NULL; + shd_crawl_event_t *shd_event = NULL; + + dump_data = data; + shd_event = cb->data; + ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict, + dump_data->child, shd_event, &cb->tv); + return ret; +} + +int _add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child) { shd_dump_t dump_data = {0}; @@ -244,6 +402,26 @@ _add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child) return 0; } + +int +_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child) +{ + shd_dump_t dump_data = {0}; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + + priv = this->private; + shd = &priv->shd; + + dump_data.this = this; + dump_data.dict = dict; + dump_data.child = child; + eh_dump (shd->statistics[child], &dump_data, + _add_crawl_event_statistics_to_dict); + return 0; + +} + void _remove_stale_index (xlator_t *this, xlator_t *readdir_xl, loc_t *parent, char *fname) @@ -307,16 +485,18 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp, afr_crawl_data_t *crawl_data) { - int ret = 0; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - eh_t *eh = NULL; - char *path = NULL; - char gfid_str[64] = {0}; - shd_event_t *event = NULL; - int32_t sh_failed = 0; - gf_boolean_t split_brain = 0; - int32_t actual_sh_done = 0; + int ret = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + eh_t *eh = NULL; + char *path = NULL; + char gfid_str[64] = {0}; + shd_event_t *event = NULL; + int32_t sh_failed = 0; + gf_boolean_t split_brain = 0; + int32_t actual_sh_done = 0; + shd_crawl_event_t **shd_crawl_event = NULL; + priv = this->private; shd = &priv->shd; if (crawl_data->crawl == INDEX) { @@ -343,16 +523,19 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done); } - split_brain = afr_is_split_brain (this, child->inode); + shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events); + split_brain = afr_is_split_brain (this, child->inode); if ((op_ret < 0 && op_errno == EIO) || split_brain) { eh = shd->split_brain; + shd_crawl_event[crawl_data->child]->split_brain_count += 1; } else if ((op_ret < 0) || sh_failed) { eh = shd->heal_failed; + shd_crawl_event[crawl_data->child]->heal_failed_count += 1; } else if (actual_sh_done == 1) { - eh = shd->healed; + eh = shd->healed; + shd_crawl_event[crawl_data->child]->healed_count += 1; } - ret = -1; if (eh != NULL) { @@ -408,10 +591,20 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent struct iatt parentbuf = {0}; int ret = 0; dict_t *xattr_rsp = NULL; + dict_t *xattr_req = NULL; + + xattr_req = dict_new (); + if (!xattr_req) { + errno = ENOMEM; + ret = -1; + goto out; + } + + ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1); gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path); - ret = syncop_lookup (this, child, NULL, + ret = syncop_lookup (this, child, xattr_req, iattr, &xattr_rsp, &parentbuf); _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp, crawl_data); @@ -420,6 +613,9 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent if (ret == 0) ret = _link_inode_update_loc (this, child, iattr); +out: + if (xattr_req) + dict_unref(xattr_req); return ret; } @@ -567,8 +763,103 @@ _get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output); } +void +afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int i = 0; + priv = this->private; + shd= &priv->shd; + for (i = 0; i < priv->child_count; i++) { + if (shd->pos[i] != AFR_POS_LOCAL) + continue; + _add_statistics_to_dict (this, dict, i); + } + + return ; +} + +static void +reset_crawl_event (shd_crawl_event_t *crawl_event) +{ + crawl_event->healed_count = 0; + crawl_event->split_brain_count = 0; + crawl_event->heal_failed_count = 0; + GF_FREE (crawl_event->start_time_str); + crawl_event->start_time_str = NULL; + crawl_event->end_time_str = NULL; + crawl_event->crawl_type = NULL; + crawl_event->crawl_inprogress = _gf_false; + return; +} + +static void +afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst) +{ + dst->healed_count = src->healed_count; + dst->split_brain_count = src->split_brain_count; + dst->heal_failed_count = src->heal_failed_count; + dst->start_time_str = gf_strdup (src->start_time_str); + dst->end_time_str = "Crawl is already in progress"; + dst->crawl_type = src->crawl_type; + dst->crawl_inprogress = _gf_true; + return; +} + +static int +afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict) +{ + shd_crawl_event_t *evnt = NULL; + int ret = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int i = 0; + priv = this->private; + shd = &priv->shd; + + evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t), + gf_afr_mt_shd_crawl_event_t); + if (!evnt) { + ret = -1; + goto out; + } + LOCK (&priv->lock); + { + for (i = 0; i < priv->child_count; i++) { + if (shd->pos[i] != AFR_POS_LOCAL) + continue; + + reset_crawl_event (evnt); + + if (!shd->crawl_events[i]) { + continue; + } + + afr_copy_crawl_event_struct (shd->crawl_events[i], + evnt); + _add_crawl_stats_to_dict (this, dict, i, evnt, NULL); + + } + } + UNLOCK (&priv->lock); + reset_crawl_event (evnt); + GF_FREE (evnt); + +out: + return ret; +} + +static int +_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict) +{ + int ret = 0; + afr_fill_completed_crawl_statistics_to_dict (this, dict); + ret = afr_fill_crawl_statistics_of_running_crawl (this, dict); + return ret; +} int -_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) +_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) { afr_private_t *priv = NULL; afr_self_heald_t *shd = NULL; @@ -618,16 +909,19 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) ret = 0; break; case GF_AFR_OP_HEALED_FILES: - ret = _add_all_subvols_eh_to_dict (this, shd->healed, output); + ret = _add_local_subvols_eh_to_dict (this, shd->healed, output); break; case GF_AFR_OP_HEAL_FAILED_FILES: - ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed, + ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed, output); break; case GF_AFR_OP_SPLIT_BRAIN_FILES: - ret = _add_all_subvols_eh_to_dict (this, shd->split_brain, + ret = _add_local_subvols_eh_to_dict (this, shd->split_brain, output); break; + case GF_AFR_OP_STATISTICS: + ret = _add_local_subvols_crawl_statistics_to_dict (this, output); + break; default: gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op); break; @@ -1147,6 +1441,95 @@ out: return ret; } +char * +get_crawl_type_in_string (afr_crawl_type_t crawl) +{ + char *index = "INDEX"; + char *full = "FULL"; + char *crawl_type = NULL; + + if (crawl == INDEX){ + crawl_type = index; + } else if (crawl == FULL) { + crawl_type = full; + } + + return crawl_type; +} + +static int +afr_allocate_crawl_event (xlator_t *this, int child, afr_crawl_type_t crawl) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = 0; + shd_crawl_event_t *crawl_event = NULL; + time_t get_time = 0; + + priv = this->private; + shd = &priv->shd; + + crawl_event = GF_CALLOC (sizeof (shd_crawl_event_t), 1, + gf_afr_mt_shd_crawl_event_t); + if (!crawl_event) { + ret = -1; + goto out; + } + + get_time = time(NULL); + if (get_time == ((time_t)-1)) { + ret = -1; + goto out; + } + + crawl_event->start_time_str = gf_strdup (ctime(&get_time)); + + crawl_event->crawl_type = get_crawl_type_in_string (crawl); + if (!crawl_event->crawl_type) { + ret = -1; + goto out; + } + LOCK (&priv->lock); + { + shd->crawl_events[child] = crawl_event; + } + UNLOCK (&priv->lock); + ret = 0; +out: + return ret; + +} + +static int +afr_put_crawl_event_in_eh (xlator_t *this, int child) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = 0; + time_t get_time = 0; + shd_crawl_event_t **crawl_event = NULL; + + priv = this->private; + shd = &priv->shd; + + get_time = time(NULL); + if (get_time == ((time_t)-1)) { + ret = -1; + goto out; + } + crawl_event = (shd_crawl_event_t**)shd->crawl_events; + LOCK (&priv->lock); + { + crawl_event[child]->end_time_str = gf_strdup (ctime(&get_time)); + ret = eh_save_history (shd->statistics[child], + crawl_event[child]); + crawl_event[child] = NULL; + } + UNLOCK (&priv->lock); +out: + return ret; +} + static int afr_dir_exclusive_crawl (void *data) { @@ -1182,7 +1565,15 @@ afr_dir_exclusive_crawl (void *data) } do { + ret = afr_allocate_crawl_event (this, child, crawl_data->crawl); + if (ret) + goto out; afr_dir_crawl (data); + + ret = afr_put_crawl_event_in_eh (this, child); + if (ret < 0) + goto out; + LOCK (&priv->lock); { if (shd->pending[child] != NONE) { diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 32a8aaca5..e0c083754 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -29,6 +29,19 @@ typedef struct afr_crawl_data_ { struct iatt *iattr); } afr_crawl_data_t; +typedef struct crawl_event_stats_ { + uint64_t healed_count; + uint64_t split_brain_count; + uint64_t heal_failed_count; + char *start_time_str; + char *end_time_str; + char *crawl_type; + gf_boolean_t crawl_inprogress; +} shd_crawl_event_t; + +void _destroy_crawl_event_data (void *data); +void _destroy_shd_event_data (void *data); + typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry, loc_t *child, loc_t *parent, struct iatt *iattr); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 208833920..aa8d00220 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -439,15 +439,18 @@ init (xlator_t *this) if (!priv->shd.timer) goto out; - priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false); + priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false, + _destroy_shd_event_data); if (!priv->shd.healed) goto out; - priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false); + priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false, + _destroy_shd_event_data); if (!priv->shd.heal_failed) goto out; - priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false); + priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false, + _destroy_shd_event_data); if (!priv->shd.split_brain) goto out; @@ -457,7 +460,9 @@ init (xlator_t *this) priv->root_inode = inode_ref (this->itable->root); GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out); GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out); - + ret = afr_initialise_statistics (this); + if (ret) + goto out; ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 95e69f3bc..8d8b2f649 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -98,17 +98,19 @@ typedef enum { } afr_crawl_type_t; typedef struct afr_self_heald_ { - gf_boolean_t enabled; - gf_boolean_t iamshd; - afr_crawl_type_t *pending; - gf_boolean_t *inprogress; - afr_child_pos_t *pos; - gf_timer_t **timer; - eh_t *healed; - eh_t *heal_failed; - eh_t *split_brain; - char *node_uuid; - int timeout; + gf_boolean_t enabled; + gf_boolean_t iamshd; + afr_crawl_type_t *pending; + gf_boolean_t *inprogress; + afr_child_pos_t *pos; + gf_timer_t **timer; + eh_t *healed; + eh_t *heal_failed; + eh_t *split_brain; + eh_t **statistics; + void **crawl_events; + char *node_uuid; + int timeout; } afr_self_heald_t; typedef struct _afr_private { @@ -494,20 +496,23 @@ typedef struct _afr_local { int optimistic_change_log; gf_boolean_t delayed_post_op; + /* Is the current writev() going to perform a stable write? i.e, is fd->flags or @flags writev param have O_SYNC or O_DSYNC? */ - gf_boolean_t stable_write; + gf_boolean_t stable_write; + + /* This write appended to the file. Nnot necessarily O_APPEND, + just means the offset of write was at the end of file. + */ + gf_boolean_t append_write; + + int allow_sh_for_running_transaction; - /* This write appended to the file. Nnot necessarily O_APPEND, - just means the offset of write was at the end of file. - */ - gf_boolean_t append_write; - /* - This struct contains the arguments for the "continuation" - (scheme-like) of fops + /* This struct contains the arguments for the "continuation" + (scheme-like) of fops */ int op; diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c index a9c11babe..c9d839356 100644 --- a/xlators/debug/trace/src/trace.c +++ b/xlators/debug/trace/src/trace.c @@ -3134,7 +3134,7 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "logging to history %s", (conf->log_history == _gf_true)?"enabled":"disabled"); - history = eh_new (history_size, _gf_false); + history = eh_new (history_size, _gf_false, NULL); if (!history) { gf_log (this->name, GF_LOG_ERROR, "event history cannot be " "initialized"); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 2dfe3cde1..15cb3d83e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -4215,6 +4215,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, return ret; } + static int glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr, struct list_head *selected) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index bef3da74b..80ec7155c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -7279,6 +7279,77 @@ out: } int +_heal_volume_add_shd_rsp_of_statistics (dict_t *this, char *key, data_t + *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0,}; + char key_begin_string[128] = {0,}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *key_begin_str = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + key_begin_str = strchr (key, '-'); + if (!key_begin_str) + goto out; + + int_len = strlen (key) - strlen (key_begin_str); + strncpy (key_begin_string, key, int_len); + key_begin_string[int_len] = '\0'; + + rxl_end = strchr (key_begin_str + 1, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key_begin_str) - strlen (rxl_end) - 1; + strncpy (int_str, key_begin_str + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + brickinfo = glusterd_get_brickinfo_by_position (volinfo, brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, brickinfo)) + goto out; + + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%s-%d%s", key_begin_string, + brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + +out: + return 0; + +} + +int glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, dict_t *op_ctx, char **op_errstr) { @@ -7286,6 +7357,7 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, glusterd_heal_rsp_conv_t rsp_ctx = {0}; char *volname = NULL; glusterd_volinfo_t *volinfo = NULL; + int heal_op = -1; GF_ASSERT (rsp_dict); GF_ASSERT (op_ctx); @@ -7297,6 +7369,13 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, goto out; } + ret = dict_get_int32 (req_dict, "heal-op", &heal_op); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get heal_op"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) @@ -7305,7 +7384,11 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, rsp_ctx.dict = op_ctx; rsp_ctx.volinfo = volinfo; rsp_ctx.this = THIS; - dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + if (heal_op == GF_AFR_OP_STATISTICS) + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics, + &rsp_ctx); + else + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); out: return ret; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index c1812a9a1..6a5587c2d 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -5355,7 +5355,7 @@ init (xlator_t *this_xl) if (priv->fd == -1) goto cleanup_exit; - event = eh_new (FUSE_EVENT_HISTORY_SIZE, _gf_false); + event = eh_new (FUSE_EVENT_HISTORY_SIZE, _gf_false, NULL); if (!event) { gf_log (this_xl->name, GF_LOG_ERROR, "could not create a new event history"); |