diff options
-rw-r--r-- | cli/src/cli-cmd-parser.c | 66 | ||||
-rw-r--r-- | cli/src/cli-cmd-volume.c | 16 | ||||
-rw-r--r-- | cli/src/cli-rpc-ops.c | 166 | ||||
-rw-r--r-- | heal/src/glfs-heal.c | 38 | ||||
-rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 28 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 20 | ||||
-rw-r--r-- | xlators/cluster/ec/src/Makefile.am | 3 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heald.c | 598 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heald.h | 47 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.c | 77 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.h | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 26 |
16 files changed, 898 insertions, 218 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 9c887fa78a5..bbec6aa5f8d 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -2984,7 +2984,7 @@ set_hostname_path_in_dict (const char *token, dict_t *dict, int heal_op) goto out; switch (heal_op) { - case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: ret = dict_set_dynstr (dict, "heal-source-hostname", hostname); if (ret) @@ -2992,7 +2992,7 @@ set_hostname_path_in_dict (const char *token, dict_t *dict, int heal_op) ret = dict_set_dynstr (dict, "heal-source-brickpath", path); break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: ret = dict_set_dynstr (dict, "per-replica-cmd-hostname", hostname); if (ret) @@ -3014,29 +3014,29 @@ heal_command_type_get (const char *command) { int i = 0; /* subcommands are set as NULL */ - char *heal_cmds[GF_AFR_OP_HEAL_DISABLE + 1] = { - [GF_AFR_OP_INVALID] = NULL, - [GF_AFR_OP_HEAL_INDEX] = NULL, - [GF_AFR_OP_HEAL_FULL] = "full", - [GF_AFR_OP_INDEX_SUMMARY] = "info", - [GF_AFR_OP_HEALED_FILES] = NULL, - [GF_AFR_OP_HEAL_FAILED_FILES] = NULL, - [GF_AFR_OP_SPLIT_BRAIN_FILES] = NULL, - [GF_AFR_OP_STATISTICS] = "statistics", - [GF_AFR_OP_STATISTICS_HEAL_COUNT] = NULL, - [GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = NULL, - [GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE] = "split-brain", - [GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK] = "split-brain", - [GF_AFR_OP_HEAL_ENABLE] = "enable", - [GF_AFR_OP_HEAL_DISABLE] = "disable", + char *heal_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = { + [GF_SHD_OP_INVALID] = NULL, + [GF_SHD_OP_HEAL_INDEX] = NULL, + [GF_SHD_OP_HEAL_FULL] = "full", + [GF_SHD_OP_INDEX_SUMMARY] = "info", + [GF_SHD_OP_HEALED_FILES] = NULL, + [GF_SHD_OP_HEAL_FAILED_FILES] = NULL, + [GF_SHD_OP_SPLIT_BRAIN_FILES] = NULL, + [GF_SHD_OP_STATISTICS] = "statistics", + [GF_SHD_OP_STATISTICS_HEAL_COUNT] = NULL, + [GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = NULL, + [GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE] = "split-brain", + [GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK] = "split-brain", + [GF_SHD_OP_HEAL_ENABLE] = "enable", + [GF_SHD_OP_HEAL_DISABLE] = "disable", }; - for (i = 0; i <= GF_AFR_OP_HEAL_DISABLE; i++) { + for (i = 0; i <= GF_SHD_OP_HEAL_DISABLE; i++) { if (heal_cmds[i] && (strcmp (heal_cmds[i], command) == 0)) return i; } - return GF_AFR_OP_INVALID; + return GF_SHD_OP_INVALID; } int @@ -3047,7 +3047,7 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, dict_t *dict = NULL; char *hostname = NULL; char *path = NULL; - gf_xl_afr_op_t op = GF_AFR_OP_INVALID; + gf_xl_afr_op_t op = GF_SHD_OP_INVALID; dict = dict_new (); if (!dict) @@ -3060,13 +3060,13 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, } if (wordcount == 3) { - ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_INDEX); + ret = dict_set_int32 (dict, "heal-op", GF_SHD_OP_HEAL_INDEX); goto done; } if (wordcount == 4) { op = heal_command_type_get (words[3]); - if (op == GF_AFR_OP_INVALID) { + if (op == GF_SHD_OP_INVALID) { ret = -1; goto out; } @@ -3085,17 +3085,17 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, if (!strcmp (words[3], "info")) { if (!strcmp (words[4], "healed")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_HEALED_FILES); + GF_SHD_OP_HEALED_FILES); goto done; } if (!strcmp (words[4], "heal-failed")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_HEAL_FAILED_FILES); + GF_SHD_OP_HEAL_FAILED_FILES); goto done; } if (!strcmp (words[4], "split-brain")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_SPLIT_BRAIN_FILES); + GF_SHD_OP_SPLIT_BRAIN_FILES); goto done; } } @@ -3103,7 +3103,7 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, if (!strcmp (words[3], "statistics")) { if (!strcmp (words[4], "heal-count")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_STATISTICS_HEAL_COUNT); + GF_SHD_OP_STATISTICS_HEAL_COUNT); goto done; } } @@ -3117,7 +3117,7 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, } if (!strcmp (words[4], "bigger-file")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE); + GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE); if (ret) goto out; ret = dict_set_str (dict, "file", (char *)words[5]); @@ -3127,11 +3127,11 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, } if (!strcmp (words[4], "source-brick")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK); + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); if (ret) goto out; ret = set_hostname_path_in_dict (words[5], dict, - GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK); + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); if (ret) goto out; goto done; @@ -3145,11 +3145,11 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, && !strcmp (words[5], "replica")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA); + GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA); if (ret) goto out; ret = set_hostname_path_in_dict (words[6], dict, - GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA); + GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA); if (ret) goto out; goto done; @@ -3158,9 +3158,9 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount, if (!strcmp (words[3], "split-brain") && !strcmp (words[4], "source-brick")) { ret = dict_set_int32 (dict, "heal-op", - GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK); + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); ret = set_hostname_path_in_dict (words[5], dict, - GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK); + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); if (ret) goto out; ret = dict_set_str (dict, "file", diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 1b235bade6f..af9cc6a5aa6 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1924,10 +1924,10 @@ cli_print_brick_status (cli_volume_status_t *status) return 0; } -#define NEEDS_GLFS_HEAL(op) ((op == GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) || \ - (op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) || \ - (op == GF_AFR_OP_INDEX_SUMMARY) || \ - (op == GF_AFR_OP_SPLIT_BRAIN_FILES)) +#define NEEDS_GLFS_HEAL(op) ((op == GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) || \ + (op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) || \ + (op == GF_SHD_OP_INDEX_SUMMARY) || \ + (op == GF_SHD_OP_SPLIT_BRAIN_FILES)) int cli_launch_glfs_heal (int heal_op, dict_t *options) @@ -1947,13 +1947,13 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); switch (heal_op) { - case GF_AFR_OP_INDEX_SUMMARY: + case GF_SHD_OP_INDEX_SUMMARY: break; - case GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: ret = dict_get_str (options, "file", &filename); runner_add_args (&runner, "bigger-file", filename, NULL); break; - case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: ret = dict_get_str (options, "heal-source-hostname", &hostname); ret = dict_get_str (options, "heal-source-brickpath", @@ -1963,7 +1963,7 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) if (dict_get_str (options, "file", &filename) == 0) runner_argprintf (&runner, filename); break; - case GF_AFR_OP_SPLIT_BRAIN_FILES: + case GF_SHD_OP_SPLIT_BRAIN_FILES: runner_add_args (&runner, "split-brain-info", NULL); break; default: diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 4e1c6873cee..b2964b68ff6 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -7379,22 +7379,22 @@ gf_is_cli_heal_get_command (gf_xl_afr_op_t heal_op) { /* If the command is get command value is 1 otherwise 0, for invalid commands -1 */ - int get_cmds[GF_AFR_OP_HEAL_DISABLE + 1] = { - [GF_AFR_OP_INVALID] = -1, - [GF_AFR_OP_HEAL_INDEX] = 0, - [GF_AFR_OP_HEAL_FULL] = 0, - [GF_AFR_OP_INDEX_SUMMARY] = 1, - [GF_AFR_OP_HEALED_FILES] = 1, - [GF_AFR_OP_HEAL_FAILED_FILES] = 1, - [GF_AFR_OP_SPLIT_BRAIN_FILES] = 1, - [GF_AFR_OP_STATISTICS] = 1, - [GF_AFR_OP_STATISTICS_HEAL_COUNT] = 1, - [GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = 1, - [GF_AFR_OP_HEAL_ENABLE] = 0, - [GF_AFR_OP_HEAL_DISABLE] = 0, + int get_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = { + [GF_SHD_OP_INVALID] = -1, + [GF_SHD_OP_HEAL_INDEX] = 0, + [GF_SHD_OP_HEAL_FULL] = 0, + [GF_SHD_OP_INDEX_SUMMARY] = 1, + [GF_SHD_OP_HEALED_FILES] = 1, + [GF_SHD_OP_HEAL_FAILED_FILES] = 1, + [GF_SHD_OP_SPLIT_BRAIN_FILES] = 1, + [GF_SHD_OP_STATISTICS] = 1, + [GF_SHD_OP_STATISTICS_HEAL_COUNT] = 1, + [GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = 1, + [GF_SHD_OP_HEAL_ENABLE] = 0, + [GF_SHD_OP_HEAL_DISABLE] = 0, }; - if (heal_op > GF_AFR_OP_INVALID && heal_op <= GF_AFR_OP_HEAL_DISABLE) + if (heal_op > GF_SHD_OP_INVALID && heal_op <= GF_SHD_OP_HEAL_DISABLE) return get_cmds[heal_op] == 1; return _gf_false; } @@ -7412,7 +7412,7 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, dict_t *dict = NULL; int brick_count = 0; int i = 0; - gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; char *operation = NULL; char *substr = NULL; char *heal_op_str = NULL; @@ -7461,56 +7461,56 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, operation = "Gathering "; substr = ""; switch (heal_op) { - case GF_AFR_OP_HEAL_INDEX: - operation = "Launching heal operation "; - heal_op_str = "to perform index self heal"; - substr = "\nUse heal info commands to check" - " status"; - break; - case GF_AFR_OP_HEAL_FULL: - operation = "Launching heal operation "; - heal_op_str = "to perform full self heal"; - substr = "\nUse heal info commands to check" - " status"; - break; - case GF_AFR_OP_INDEX_SUMMARY: - heal_op_str = "list of entries to be healed"; - break; - case GF_AFR_OP_HEALED_FILES: - heal_op_str = "list of healed entries"; - break; - case GF_AFR_OP_HEAL_FAILED_FILES: - heal_op_str = "list of heal failed entries"; - break; - case GF_AFR_OP_SPLIT_BRAIN_FILES: - heal_op_str = "list of split brain entries"; - break; - case GF_AFR_OP_STATISTICS: - heal_op_str = "crawl statistics"; - break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT: - heal_op_str = "count of entries to be healed"; - break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: - heal_op_str = "count of entries to be healed per replica"; - break; - /* The below 2 cases are never hit; they're coded only to make - * compiler warnings go away.*/ - case GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: - case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: - break; + case GF_SHD_OP_HEAL_INDEX: + operation = "Launching heal operation "; + heal_op_str = "to perform index self heal"; + substr = "\nUse heal info commands to check" + " status"; + break; + case GF_SHD_OP_HEAL_FULL: + operation = "Launching heal operation "; + heal_op_str = "to perform full self heal"; + substr = "\nUse heal info commands to check" + " status"; + break; + case GF_SHD_OP_INDEX_SUMMARY: + heal_op_str = "list of entries to be healed"; + break; + case GF_SHD_OP_HEALED_FILES: + heal_op_str = "list of healed entries"; + break; + case GF_SHD_OP_HEAL_FAILED_FILES: + heal_op_str = "list of heal failed entries"; + break; + case GF_SHD_OP_SPLIT_BRAIN_FILES: + heal_op_str = "list of split brain entries"; + break; + case GF_SHD_OP_STATISTICS: + heal_op_str = "crawl statistics"; + break; + case GF_SHD_OP_STATISTICS_HEAL_COUNT: + heal_op_str = "count of entries to be healed"; + break; + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + heal_op_str = "count of entries to be healed per replica"; + break; + /* The below 2 cases are never hit; they're coded only to make + * compiler warnings go away.*/ + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: + break; - case GF_AFR_OP_INVALID: - heal_op_str = "invalid heal op"; - break; - case GF_AFR_OP_HEAL_ENABLE: - operation = ""; - heal_op_str = "Enable heal"; - break; - case GF_AFR_OP_HEAL_DISABLE: - operation = ""; - heal_op_str = "Disable heal"; - break; + case GF_SHD_OP_INVALID: + heal_op_str = "invalid heal op"; + break; + case GF_SHD_OP_HEAL_ENABLE: + operation = ""; + heal_op_str = "Enable heal"; + break; + case GF_SHD_OP_HEAL_DISABLE: + operation = ""; + heal_op_str = "Disable heal"; + break; } if (rsp.op_ret) { @@ -7559,25 +7559,25 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov, } switch (heal_op) { - case GF_AFR_OP_STATISTICS: - for (i = 0; i < brick_count; i++) - cmd_heal_volume_statistics_out (dict, i); - break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT: - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: - for (i = 0; i < brick_count; i++) - cmd_heal_volume_statistics_heal_count_out (dict, - i); - break; - case GF_AFR_OP_INDEX_SUMMARY: - case GF_AFR_OP_HEALED_FILES: - case GF_AFR_OP_HEAL_FAILED_FILES: - case GF_AFR_OP_SPLIT_BRAIN_FILES: - for (i = 0; i < brick_count; i++) - cmd_heal_volume_brick_out (dict, i); - break; - default: - break; + case GF_SHD_OP_STATISTICS: + for (i = 0; i < brick_count; i++) + cmd_heal_volume_statistics_out (dict, i); + break; + case GF_SHD_OP_STATISTICS_HEAL_COUNT: + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + for (i = 0; i < brick_count; i++) + cmd_heal_volume_statistics_heal_count_out (dict, + i); + break; + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_HEALED_FILES: + case GF_SHD_OP_HEAL_FAILED_FILES: + case GF_SHD_OP_SPLIT_BRAIN_FILES: + for (i = 0; i < brick_count; i++) + cmd_heal_volume_brick_out (dict, i); + break; + default: + break; } ret = rsp.op_ret; diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index b32002a1a74..74cce32fb7c 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -286,21 +286,21 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (list_empty (&entries.list)) goto out; - if (heal_op == GF_AFR_OP_INDEX_SUMMARY) { + if (heal_op == GF_SHD_OP_INDEX_SUMMARY) { ret = glfsh_process_entries (readdir_xl, fd, &entries, &offset, &num_entries, glfsh_print_heal_status); if (ret < 0) goto out; - } else if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES) { + } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) { ret = glfsh_process_entries (readdir_xl, fd, &entries, &offset, &num_entries, glfsh_print_spb_status); if (ret < 0) goto out; - } else if (heal_op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) { + } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) { ret = glfsh_heal_entries (fs, top_subvol, rootloc, &entries, &offset, &num_entries, xattr_req); @@ -316,12 +316,12 @@ out: printf ("Failed to complete gathering info. " "Number of entries so far: %"PRIu64"\n", num_entries); } else { - if (heal_op == GF_AFR_OP_INDEX_SUMMARY) + if (heal_op == GF_SHD_OP_INDEX_SUMMARY) printf ("Number of entries: %"PRIu64"\n", num_entries); - else if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES) + else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) printf ("Number of entries in split-brain: %"PRIu64"\n" , num_entries); - else if (heal_op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) + else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) printf ("Number of healed entries: %"PRIu64"\n", num_entries); } @@ -422,10 +422,10 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (xattr_req) dict_unref (xattr_req); if (ret < 0) { - if (heal_op == GF_AFR_OP_INDEX_SUMMARY) + if (heal_op == GF_SHD_OP_INDEX_SUMMARY) printf ("Failed to find entries with pending" " self-heal\n"); - if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES) + if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) printf ("Failed to find entries in split-brain\n"); } out: @@ -605,7 +605,7 @@ glfsh_heal_from_brick (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (!xattr_req) goto out; ret = dict_set_int32 (xattr_req, "heal-op", - GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK); + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK); if (ret) goto out; client = _brick_path_to_client_xlator (top_subvol, hostname, brickpath); @@ -652,7 +652,7 @@ glfsh_heal_from_bigger_file (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (!xattr_req) goto out; ret = dict_set_int32 (xattr_req, "heal-op", - GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE); + GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE); if (ret) goto out; ret = glfsh_heal_splitbrain_file (fs, top_subvol, rootloc, file, @@ -685,11 +685,11 @@ main (int argc, char **argv) volname = argv[1]; switch (argc) { case 2: - heal_op = GF_AFR_OP_INDEX_SUMMARY; + heal_op = GF_SHD_OP_INDEX_SUMMARY; break; case 3: if (!strcmp (argv[2], "split-brain-info")) { - heal_op = GF_AFR_OP_SPLIT_BRAIN_FILES; + heal_op = GF_SHD_OP_SPLIT_BRAIN_FILES; } else { printf (USAGE_STR, argv[0]); ret = -1; @@ -698,10 +698,10 @@ main (int argc, char **argv) break; case 4: if (!strcmp (argv[2], "bigger-file")) { - heal_op = GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE; + heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE; file = argv[3]; } else if (!strcmp (argv[2], "source-brick")) { - heal_op = GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK; + heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK; hostname = strtok (argv[3], ":"); path = strtok (NULL, ":"); } else { @@ -712,7 +712,7 @@ main (int argc, char **argv) break; case 5: if (!strcmp (argv[2], "source-brick")) { - heal_op = GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK; + heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK; hostname = strtok (argv[3], ":"); path = strtok (NULL, ":"); file = argv[4]; @@ -786,16 +786,16 @@ main (int argc, char **argv) glfs_loc_touchup (&rootloc); switch (heal_op) { - case GF_AFR_OP_INDEX_SUMMARY: - case GF_AFR_OP_SPLIT_BRAIN_FILES: + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_SPLIT_BRAIN_FILES: ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc, heal_op); break; - case GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: ret = glfsh_heal_from_bigger_file (fs, top_subvol, &rootloc, file); break; - case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: ret = glfsh_heal_from_brick (fs, top_subvol, &rootloc, hostname, path, file); break; diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 12118721407..8ab6078aaf5 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -221,20 +221,20 @@ enum glusterd_mgmt_hndsk_procnum { }; typedef enum { - GF_AFR_OP_INVALID, - GF_AFR_OP_HEAL_INDEX, - GF_AFR_OP_HEAL_FULL, - GF_AFR_OP_INDEX_SUMMARY, - GF_AFR_OP_HEALED_FILES, - GF_AFR_OP_HEAL_FAILED_FILES, - GF_AFR_OP_SPLIT_BRAIN_FILES, - GF_AFR_OP_STATISTICS, - GF_AFR_OP_STATISTICS_HEAL_COUNT, - GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA, - GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE, - GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK, - GF_AFR_OP_HEAL_ENABLE, - GF_AFR_OP_HEAL_DISABLE, + GF_SHD_OP_INVALID, + GF_SHD_OP_HEAL_INDEX, + GF_SHD_OP_HEAL_FULL, + GF_SHD_OP_INDEX_SUMMARY, + GF_SHD_OP_HEALED_FILES, + GF_SHD_OP_HEAL_FAILED_FILES, + GF_SHD_OP_SPLIT_BRAIN_FILES, + GF_SHD_OP_STATISTICS, + GF_SHD_OP_STATISTICS_HEAL_COUNT, + GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA, + GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE, + GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK, + GF_SHD_OP_HEAL_ENABLE, + GF_SHD_OP_HEAL_DISABLE, } gf_xl_afr_op_t ; struct gf_gsync_detailed_status_ { diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 74d340bc808..fd450be0890 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -409,7 +409,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, xdata_rsp = local->xdata_rsp; switch (heal_op) { - case GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: if (type == AFR_METADATA_TRANSACTION) { ret = dict_set_str (xdata_rsp, "sh-fail-msg", "Use source-brick option to" @@ -435,7 +435,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, sinks[source] = 0; healed_sinks[source] = 0; break; - case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: ret = dict_get_str (xdata_req, "child-name", &name); if (ret) goto out; diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 707c12b7565..cb5bf6ce197 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -1006,7 +1006,7 @@ out: int afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) { - gf_xl_afr_op_t op = GF_AFR_OP_INVALID; + gf_xl_afr_op_t op = GF_SHD_OP_INVALID; int ret = 0; int xl_id = 0; afr_private_t *priv = NULL; @@ -1034,7 +1034,7 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) if (ret) goto out; switch (op) { - case GF_AFR_OP_HEAL_INDEX: + case GF_SHD_OP_HEAL_INDEX: op_ret = -1; for (i = 0; i < priv->child_count; i++) { @@ -1059,7 +1059,7 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) } } break; - case GF_AFR_OP_HEAL_FULL: + case GF_SHD_OP_HEAL_FULL: op_ret = -1; for (i = 0; i < priv->child_count; i++) { @@ -1084,23 +1084,23 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) } } break; - case GF_AFR_OP_INDEX_SUMMARY: + case GF_SHD_OP_INDEX_SUMMARY: for (i = 0; i < priv->child_count; i++) if (shd->index_healers[i].local) afr_shd_gather_index_entries (this, i, output); break; - case GF_AFR_OP_HEALED_FILES: - case GF_AFR_OP_HEAL_FAILED_FILES: + case GF_SHD_OP_HEALED_FILES: + case GF_SHD_OP_HEAL_FAILED_FILES: for (i = 0; i < priv->child_count; i++) { snprintf (key, sizeof (key), "%d-%d-status", xl_id, i); ret = dict_set_str (output, key, "Operation Not " "Supported"); } break; - case GF_AFR_OP_SPLIT_BRAIN_FILES: + case GF_SHD_OP_SPLIT_BRAIN_FILES: eh_dump (shd->split_brain, output, afr_add_shd_event); break; - case GF_AFR_OP_STATISTICS: + case GF_SHD_OP_STATISTICS: for (i = 0; i < priv->child_count; i++) { eh_dump (shd->statistics[i], output, afr_add_crawl_event); @@ -1110,8 +1110,8 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) &shd->full_healers[i].crawl_event); } break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT: - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: op_ret = -1; for (i = 0; i < priv->child_count; i++) { diff --git a/xlators/cluster/ec/src/Makefile.am b/xlators/cluster/ec/src/Makefile.am index e2a9330a944..12d87f99e4d 100644 --- a/xlators/cluster/ec/src/Makefile.am +++ b/xlators/cluster/ec/src/Makefile.am @@ -15,6 +15,7 @@ ec_sources += ec-combine.c ec_sources += ec-gf.c ec_sources += ec-method.c ec_sources += ec-heal.c +ec_sources += ec-heald.c ec_headers := ec.h ec_headers += ec-mem-types.h @@ -25,6 +26,7 @@ ec_headers += ec-common.h ec_headers += ec-combine.h ec_headers += ec-gf.h ec_headers += ec-method.h +ec_headers += ec-heald.h ec_ext_sources = $(top_builddir)/xlators/lib/src/libxlator.c @@ -37,6 +39,7 @@ ec_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la AM_CPPFLAGS = $(GF_CPPFLAGS) AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src +AM_CPPFLAGS += -I$(top_srcdir)/rpc/rpc-lib/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c new file mode 100644 index 00000000000..6b899414d4d --- /dev/null +++ b/xlators/cluster/ec/src/ec-heald.c @@ -0,0 +1,598 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "xlator.h" +#include "defaults.h" +#include "compat-errno.h" +#include "ec.h" +#include "ec-heald.h" +#include "ec-mem-types.h" +#include "syncop.h" +#include "syncop-utils.h" +#include "protocol-common.h" + +#define SHD_INODE_LRU_LIMIT 2048 +#define ASSERT_LOCAL(this, healer) \ + do { \ + if (!ec_shd_is_subvol_local (this, healer->subvol)) { \ + healer->local = _gf_false; \ + if (safe_break (healer)) { \ + break; \ + } else { \ + continue; \ + } \ + } else { \ + healer->local = _gf_true; \ + } \ + } while (0); + + +#define NTH_INDEX_HEALER(this, n) (&((((ec_t *)this->private))->shd.index_healers[n])) +#define NTH_FULL_HEALER(this, n) (&((((ec_t *)this->private))->shd.full_healers[n])) + +gf_boolean_t +ec_shd_is_subvol_local (xlator_t *this, int subvol) +{ + ec_t *ec = NULL; + gf_boolean_t is_local = _gf_false; + loc_t loc = {0, }; + + ec = this->private; + loc.inode = this->itable->root; + syncop_is_subvol_local (ec->xl_list[subvol], &loc, &is_local); + return is_local; +} + +char * +ec_subvol_name (xlator_t *this, int subvol) +{ + ec_t *ec = NULL; + + ec = this->private; + if (subvol < 0 || subvol > ec->nodes) + return NULL; + + return ec->xl_list[subvol]->name; +} + +int +__ec_shd_healer_wait (struct subvol_healer *healer) +{ + ec_t *ec = NULL; + struct timespec wait_till = {0, }; + int ret = 0; + + ec = healer->this->private; + +disabled_loop: + wait_till.tv_sec = time (NULL) + 60; + + while (!healer->rerun) { + ret = pthread_cond_timedwait (&healer->cond, + &healer->mutex, + &wait_till); + if (ret == ETIMEDOUT) + break; + } + + ret = healer->rerun; + healer->rerun = 0; + + if (!ec->shd.enabled || !ec->up) + goto disabled_loop; + + return ret; +} + + +int +ec_shd_healer_wait (struct subvol_healer *healer) +{ + int ret = 0; + + pthread_mutex_lock (&healer->mutex); + { + ret = __ec_shd_healer_wait (healer); + } + pthread_mutex_unlock (&healer->mutex); + + return ret; +} + + +gf_boolean_t +safe_break (struct subvol_healer *healer) +{ + gf_boolean_t ret = _gf_false; + + pthread_mutex_lock (&healer->mutex); + { + if (healer->rerun) + goto unlock; + + healer->running = _gf_false; + ret = _gf_true; + } +unlock: + pthread_mutex_unlock (&healer->mutex); + + return ret; +} + + +inode_t * +ec_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid) +{ + inode_t *inode = NULL; + int ret = 0; + loc_t loc = {0, }; + struct iatt iatt = {0, }; + + inode = inode_find (this->itable, gfid); + if (inode) { + inode_lookup (inode); + goto out; + } + + loc.inode = inode_new (this->itable); + if (!loc.inode) + goto out; + uuid_copy (loc.gfid, gfid); + + ret = syncop_lookup (subvol, &loc, NULL, &iatt, NULL, NULL); + if (ret < 0) + goto out; + + inode = inode_link (loc.inode, NULL, NULL, &iatt); + if (inode) + inode_lookup (inode); +out: + loc_wipe (&loc); + return inode; +} + + +inode_t* +ec_shd_index_inode (xlator_t *this, xlator_t *subvol) +{ + loc_t rootloc = {0, }; + inode_t *inode = NULL; + int ret = 0; + dict_t *xattr = NULL; + void *index_gfid = NULL; + + rootloc.inode = inode_ref (this->itable->root); + uuid_copy (rootloc.gfid, rootloc.inode->gfid); + + ret = syncop_getxattr (subvol, &rootloc, &xattr, + GF_XATTROP_INDEX_GFID, NULL); + if (ret || !xattr) { + errno = -ret; + goto out; + } + + ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid); + if (ret) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s", + subvol->name, uuid_utoa (index_gfid)); + + inode = ec_shd_inode_find (this, subvol, index_gfid); + +out: + loc_wipe (&rootloc); + + if (xattr) + dict_unref (xattr); + + return inode; +} + +int +ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name) +{ + loc_t loc = {0, }; + int ret = 0; + + loc.parent = inode_ref (inode); + loc.name = name; + + ret = syncop_unlink (subvol, &loc); + + loc_wipe (&loc); + return ret; +} + +int +ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc) +{ + return syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL); +} + + +int +ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + ec_t *ec = NULL; + loc_t loc = {0}; + int ret = 0; + + ec = healer->this->private; + if (!ec->shd.enabled) + return -EBUSY; + + gf_log (healer->this->name, GF_LOG_DEBUG, "got entry: %s", + entry->d_name); + + ret = uuid_parse (entry->d_name, loc.gfid); + if (ret) + return 0; + + /* If this fails with ENOENT/ESTALE index is stale */ + ret = syncop_gfid_to_path (healer->this->itable, subvol, loc.gfid, + (char **)&loc.path); + if (ret == -ENOENT || ret == -ESTALE) { + ec_shd_index_purge (subvol, parent->inode, entry->d_name); + goto out; + } + + loc.inode = ec_shd_inode_find (healer->this, healer->this, loc.gfid); + if (!loc.inode) + goto out; + + ec_shd_selfheal (healer, healer->subvol, &loc); + +out: + loc_wipe (&loc); + + return 0; +} + +int +ec_shd_index_sweep (struct subvol_healer *healer) +{ + loc_t loc = {0}; + ec_t *ec = NULL; + int ret = 0; + xlator_t *subvol = NULL; + + ec = healer->this->private; + subvol = ec->xl_list[healer->subvol]; + + loc.inode = ec_shd_index_inode (healer->this, subvol); + if (!loc.inode) { + gf_log (healer->this->name, GF_LOG_WARNING, + "unable to get index-dir on %s", subvol->name); + return -errno; + } + + ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_AFR_SELF_HEALD, + healer, ec_shd_index_heal); + + inode_forget (loc.inode, 1); + loc_wipe (&loc); + + return ret; +} + +int +ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + xlator_t *this = healer->this; + ec_t *ec = NULL; + loc_t loc = {0}; + int ret = 0; + + ec = this->private; + if (!ec->shd.enabled) + return -EBUSY; + + loc.parent = inode_ref (parent->inode); + loc.name = entry->d_name; + uuid_copy (loc.gfid, entry->d_stat.ia_gfid); + + /* If this fails with ENOENT/ESTALE index is stale */ + ret = syncop_gfid_to_path (this->itable, subvol, loc.gfid, + (char **)&loc.path); + if (ret < 0) + goto out; + + loc.inode = ec_shd_inode_find (this, this, loc.gfid); + if (!loc.inode) { + ret = -EINVAL; + goto out; + } + + ec_shd_selfheal (healer, healer->subvol, &loc); + + loc_wipe (&loc); + ret = 0; + +out: + return ret; +} + +int +ec_shd_full_sweep (struct subvol_healer *healer, inode_t *inode) +{ + ec_t *ec = NULL; + loc_t loc = {0}; + + ec = healer->this->private; + loc.inode = inode; + return syncop_ftw (ec->xl_list[healer->subvol], &loc, + GF_CLIENT_PID_AFR_SELF_HEALD, healer, + ec_shd_full_heal); +} + + +void * +ec_shd_index_healer (void *data) +{ + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + + healer = data; + THIS = this = healer->this; + + for (;;) { + ec_shd_healer_wait (healer); + + ASSERT_LOCAL(this, healer); + + gf_log (this->name, GF_LOG_DEBUG, + "starting index sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + + ec_shd_index_sweep (healer); + + gf_log (this->name, GF_LOG_DEBUG, + "finished index sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + } + + return NULL; +} + + +void * +ec_shd_full_healer (void *data) +{ + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + + int run = 0; + + healer = data; + THIS = this = healer->this; + + for (;;) { + pthread_mutex_lock (&healer->mutex); + { + run = __ec_shd_healer_wait (healer); + if (!run) + healer->running = _gf_false; + } + pthread_mutex_unlock (&healer->mutex); + + if (!run) + break; + + ASSERT_LOCAL(this, healer); + + gf_log (this->name, GF_LOG_INFO, + "starting full sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + + ec_shd_full_sweep (healer, this->itable->root); + + gf_log (this->name, GF_LOG_INFO, + "finished full sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + } + + return NULL; +} + + +int +ec_shd_healer_init (xlator_t *this, struct subvol_healer *healer) +{ + int ret = 0; + + ret = pthread_mutex_init (&healer->mutex, NULL); + if (ret) + goto out; + + ret = pthread_cond_init (&healer->cond, NULL); + if (ret) + goto out; + + healer->this = this; + healer->running = _gf_false; + healer->rerun = _gf_false; + healer->local = _gf_false; +out: + return ret; +} + + +int +ec_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer, + void *(threadfn)(void *)) +{ + int ret = 0; + + pthread_mutex_lock (&healer->mutex); + { + if (healer->running) { + pthread_cond_signal (&healer->cond); + } else { + ret = gf_thread_create (&healer->thread, NULL, + threadfn, healer); + if (ret) + goto unlock; + healer->running = 1; + } + + healer->rerun = 1; + } +unlock: + pthread_mutex_unlock (&healer->mutex); + + return ret; +} + +int +ec_shd_full_healer_spawn (xlator_t *this, int subvol) +{ + return ec_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol), + ec_shd_full_healer); +} + + +int +ec_shd_index_healer_spawn (xlator_t *this, int subvol) +{ + return ec_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol), + ec_shd_index_healer); +} + +void +ec_selfheal_childup (ec_t *ec, int child) +{ + if (!ec->shd.iamshd) + return; + ec_shd_index_healer_spawn (ec->xl, child); +} + +int +ec_selfheal_daemon_init (xlator_t *this) +{ + ec_t *ec = NULL; + ec_self_heald_t *shd = NULL; + int ret = -1; + int i = 0; + + ec = this->private; + shd = &ec->shd; + + this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); + if (!this->itable) + goto out; + + shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers), + ec->nodes, + ec_mt_subvol_healer_t); + if (!shd->index_healers) + goto out; + + for (i = 0; i < ec->nodes; i++) { + shd->index_healers[i].subvol = i; + ret = ec_shd_healer_init (this, &shd->index_healers[i]); + if (ret) + goto out; + } + + shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers), + ec->nodes, + ec_mt_subvol_healer_t); + if (!shd->full_healers) + goto out; + + for (i = 0; i < ec->nodes; i++) { + shd->full_healers[i].subvol = i; + ret = ec_shd_healer_init (this, &shd->full_healers[i]); + if (ret) + goto out; + } + + ret = 0; +out: + return ret; +} + + +int +ec_heal_op (xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id) +{ + char key[64] = {0}; + int op_ret = 0; + ec_t *ec = NULL; + int i = 0; + GF_UNUSED int ret = 0; + + ec = this->private; + + for (i = 0; i < ec->nodes; i++) { + snprintf (key, sizeof (key), "%d-%d-status", xl_id, i); + + op_ret = -1; + if (((ec->xl_up >> i) & 1) == 0) { + ret = dict_set_str (output, key, "Brick is not connected"); + } else if (!ec->up) { + ret = dict_set_str (output, key, + "Disperse subvolume is not up"); + } else if (!ec_shd_is_subvol_local (this, i)) { + ret = dict_set_str (output, key, "Brick is remote"); + } else { + ret = dict_set_str (output, key, "Started self-heal"); + if (op == GF_SHD_OP_HEAL_FULL) { + ec_shd_full_healer_spawn (this, i); + } else if (op == GF_SHD_OP_HEAL_INDEX) { + ec_shd_index_healer_spawn (this, i); + } + op_ret = 0; + } + } + return op_ret; +} + +int +ec_xl_op (xlator_t *this, dict_t *input, dict_t *output) +{ + gf_xl_afr_op_t op = GF_SHD_OP_INVALID; + int ret = 0; + int xl_id = 0; + + ret = dict_get_int32 (input, "xl-op", (int32_t *)&op); + if (ret) + goto out; + + ret = dict_get_int32 (input, this->name, &xl_id); + if (ret) + goto out; + + ret = dict_set_int32 (output, this->name, xl_id); + if (ret) + goto out; + + switch (op) { + case GF_SHD_OP_HEAL_FULL: + ret = ec_heal_op (this, output, op, xl_id); + break; + + case GF_SHD_OP_HEAL_INDEX: + ret = ec_heal_op (this, output, op, xl_id); + break; + + default: + ret = -1; + break; + } +out: + dict_del (output, this->name); + return ret; +} diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h new file mode 100644 index 00000000000..0f27a8ec776 --- /dev/null +++ b/xlators/cluster/ec/src/ec-heald.h @@ -0,0 +1,47 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __EC_HEALD_H__ +#define __EC_HEALD_H__ + +#include "xlator.h" + +struct _ec; +typedef struct _ec ec_t; + +struct subvol_healer { + xlator_t *this; + int subvol; + gf_boolean_t local; + gf_boolean_t running; + gf_boolean_t rerun; + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_t thread; +}; + +struct _ec_self_heald; +typedef struct _ec_self_heald ec_self_heald_t; + +struct _ec_self_heald { + gf_boolean_t iamshd; + gf_boolean_t enabled; + int timeout; + struct subvol_healer *index_healers; + struct subvol_healer *full_healers; +}; + +int +ec_xl_op (xlator_t *this, dict_t *input, dict_t *output); + +int +ec_selfheal_daemon_init (xlator_t *this); +void ec_selfheal_childup (ec_t *ec, int child); +#endif /* __EC_HEALD_H__ */ diff --git a/xlators/cluster/ec/src/ec-mem-types.h b/xlators/cluster/ec/src/ec-mem-types.h index 8a66fb912a5..df65a031590 100644 --- a/xlators/cluster/ec/src/ec-mem-types.h +++ b/xlators/cluster/ec/src/ec-mem-types.h @@ -20,6 +20,7 @@ enum gf_ec_mem_types_ ec_mt_ec_inode_t, ec_mt_ec_fd_t, ec_mt_ec_heal_t, + ec_mt_subvol_healer_t, ec_mt_end }; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 219494b961e..9fecde4c495 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -18,6 +18,7 @@ #include "ec-fops.h" #include "ec-method.h" #include "ec.h" +#include "ec-heald.h" #define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS /* The maximum number of nodes is derived from the maximum allowed fragments @@ -186,8 +187,8 @@ reconfigure (xlator_t *this, dict_t *options) { ec_t *ec = this->private; - GF_OPTION_RECONF ("self-heal-daemon", ec->shd, options, bool, failed); - GF_OPTION_RECONF ("iam-self-heal-daemon", ec->iamshd, options, + GF_OPTION_RECONF ("self-heal-daemon", ec->shd.enabled, options, bool, failed); + GF_OPTION_RECONF ("iam-self-heal-daemon", ec->shd.iamshd, options, bool, failed); return 0; @@ -329,13 +330,35 @@ ec_handle_down (xlator_t *this, ec_t *ec, int32_t idx) } int32_t -notify (xlator_t *this, int32_t event, void *data, ...) -{ - ec_t * ec = this->private; - int32_t idx = 0; - int32_t error = 0; - glusterfs_event_t old_event = GF_EVENT_MAXVAL; - glusterfs_event_t new_event = GF_EVENT_MAXVAL; +ec_notify (xlator_t *this, int32_t event, void *data, void *data2) +{ + ec_t *ec = this->private; + int32_t idx = 0; + int32_t error = 0; + glusterfs_event_t old_event = GF_EVENT_MAXVAL; + glusterfs_event_t new_event = GF_EVENT_MAXVAL; + dict_t *input = NULL; + dict_t *output = NULL; + + if (event == GF_EVENT_TRANSLATOR_OP) { + if (!ec->up) { + error = -1; + goto out; + } else { + input = data; + output = data2; + error = ec_xl_op (this, input, output); + } + goto out; + } + + for (idx = 0; idx < ec->nodes; idx++) { + if (ec->xl_list[idx] == data) { + if (event == GF_EVENT_CHILD_UP) + ec_selfheal_childup (ec, idx); + break; + } + } LOCK (&ec->lock); @@ -348,11 +371,6 @@ notify (xlator_t *this, int32_t event, void *data, ...) goto unlock; } - for (idx = 0; idx < ec->nodes; idx++) { - if (ec->xl_list[idx] == data) - break; - } - gf_log (this->name, GF_LOG_TRACE, "NOTIFY(%d): %p, %d", event, data, idx); @@ -381,13 +399,28 @@ notify (xlator_t *this, int32_t event, void *data, ...) if (new_event != GF_EVENT_MAXVAL) error = default_notify (this, new_event, data); } -unlock: - UNLOCK (&ec->lock); + unlock: + UNLOCK (&ec->lock); + + if (event != GF_EVENT_MAXVAL) + return default_notify (this, event, data); +out: + return error; +} + +int32_t +notify (xlator_t *this, int32_t event, void *data, ...) +{ + int ret = -1; + va_list ap; + void *data2 = NULL; - if (event != GF_EVENT_MAXVAL) - return default_notify (this, event, data); + va_start (ap, data); + data2 = va_arg (ap, dict_t*); + va_end (ap); + ret = ec_notify (this, event, data, data2); - return error; + return ret; } int32_t @@ -440,9 +473,11 @@ init (xlator_t *this) } ec_method_initialize(); - GF_OPTION_INIT ("self-heal-daemon", ec->shd, bool, failed); - GF_OPTION_INIT ("iam-self-heal-daemon", ec->iamshd, bool, failed); + GF_OPTION_INIT ("self-heal-daemon", ec->shd.enabled, bool, failed); + GF_OPTION_INIT ("iam-self-heal-daemon", ec->shd.iamshd, bool, failed); + if (ec->shd.iamshd) + ec_selfheal_daemon_init (this); gf_log(this->name, GF_LOG_DEBUG, "Disperse translator initialized."); return 0; diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h index b6a95a11b18..1c740187757 100644 --- a/xlators/cluster/ec/src/ec.h +++ b/xlators/cluster/ec/src/ec.h @@ -13,6 +13,7 @@ #include "xlator.h" #include "timer.h" +#include "ec-heald.h" #define EC_XATTR_PREFIX "trusted.ec." #define EC_XATTR_CONFIG EC_XATTR_PREFIX"config" @@ -21,9 +22,6 @@ #define EC_XATTR_HEAL EC_XATTR_PREFIX"heal" #define EC_XATTR_DIRTY EC_XATTR_PREFIX"dirty" -struct _ec; -typedef struct _ec ec_t; - struct _ec { xlator_t * xl; @@ -46,8 +44,6 @@ struct _ec struct mem_pool * fop_pool; struct mem_pool * cbk_pool; struct mem_pool * lock_pool; - gf_boolean_t shd; - gf_boolean_t iamshd; + ec_self_heald_t shd; }; - #endif /* __EC_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 8ba77471646..22530f97a12 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -446,7 +446,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin gd1_mgmt_brick_op_req *brick_req = NULL; char *volname = NULL; char name[1024] = {0,}; - gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; xlator_t *this = NULL; this = THIS; @@ -5567,7 +5567,7 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, xlator_t *this = NULL; char msg[2048] = {0,}; glusterd_pending_node_t *pending_node = NULL; - gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; int rxlator_count = 0; this = THIS; @@ -5592,14 +5592,14 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, } ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); - if (ret || (heal_op == GF_AFR_OP_INVALID)) { + if (ret || (heal_op == GF_SHD_OP_INVALID)) { gf_log ("glusterd", GF_LOG_ERROR, "heal op invalid"); goto out; } switch (heal_op) { - case GF_AFR_OP_INDEX_SUMMARY: - case GF_AFR_OP_STATISTICS_HEAL_COUNT: + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: if (!priv->shd_svc.online) { if (!rsp_dict) { gf_log (this->name, GF_LOG_ERROR, "Received " @@ -5619,7 +5619,7 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, } break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: if (!priv->shd_svc.online) { if (!rsp_dict) { gf_log (this->name, GF_LOG_ERROR, "Received " @@ -5644,12 +5644,12 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, switch (heal_op) { - case GF_AFR_OP_HEAL_FULL: + case GF_SHD_OP_HEAL_FULL: rxlator_count = _select_rxlators_for_full_self_heal (this, volinfo, dict); break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: rxlator_count = _select_rxlators_with_local_bricks (this, volinfo, dict, diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 1e7ebb79c6e..18ac27e0fcb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -8618,7 +8618,7 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, rsp_ctx.dict = op_ctx; rsp_ctx.volinfo = volinfo; rsp_ctx.this = THIS; - if (heal_op == GF_AFR_OP_STATISTICS) + if (heal_op == GF_SHD_OP_STATISTICS) dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics, &rsp_ctx); else diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 6ca3e55a122..72da71eafc1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -646,20 +646,20 @@ static int glusterd_handle_heal_enable_disable (rpcsvc_request_t *req, dict_t *dict, glusterd_volinfo_t *volinfo) { - gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; int ret = 0; xlator_t *this = THIS; char *key = NULL; char *value = NULL; ret = dict_get_int32 (dict, "heal-op", (int32_t *)&heal_op); - if (ret || (heal_op == GF_AFR_OP_INVALID)) { + if (ret || (heal_op == GF_SHD_OP_INVALID)) { ret = -1; goto out; } - if ((heal_op != GF_AFR_OP_HEAL_ENABLE) && - (heal_op != GF_AFR_OP_HEAL_DISABLE)) { + if ((heal_op != GF_SHD_OP_HEAL_ENABLE) && + (heal_op != GF_SHD_OP_HEAL_DISABLE)) { ret = -EINVAL; goto out; } @@ -675,9 +675,9 @@ glusterd_handle_heal_enable_disable (rpcsvc_request_t *req, dict_t *dict, if (ret) goto out; - if (heal_op == GF_AFR_OP_HEAL_ENABLE) { + if (heal_op == GF_SHD_OP_HEAL_ENABLE) { value = "enable"; - } else if (heal_op == GF_AFR_OP_HEAL_DISABLE) { + } else if (heal_op == GF_SHD_OP_HEAL_DISABLE) { value = "disable"; } @@ -1619,7 +1619,7 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) char msg[2048]; glusterd_conf_t *priv = NULL; dict_t *opt_dict = NULL; - gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; xlator_t *this = NULL; this = THIS; @@ -1689,7 +1689,7 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) } ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); - if (ret || (heal_op == GF_AFR_OP_INVALID)) { + if (ret || (heal_op == GF_SHD_OP_INVALID)) { ret = -1; *op_errstr = gf_strdup("Invalid heal-op"); gf_log (this->name, GF_LOG_WARNING, "%s", "Invalid heal-op"); @@ -1697,8 +1697,8 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) } switch (heal_op) { - case GF_AFR_OP_HEALED_FILES: - case GF_AFR_OP_HEAL_FAILED_FILES: + case GF_SHD_OP_HEALED_FILES: + case GF_SHD_OP_HEAL_FAILED_FILES: ret = -1; snprintf (msg, sizeof (msg),"Command not supported. " "Please use \"gluster volume heal %s info\" " @@ -1707,9 +1707,9 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) *op_errstr = gf_strdup (msg); goto out; - case GF_AFR_OP_INDEX_SUMMARY: - case GF_AFR_OP_STATISTICS_HEAL_COUNT: - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: break; default: if (!priv->shd_svc.online) { |