From 6d3739292b7b51d2ddbab75b5f884fb38925b943 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Thu, 16 Jan 2014 16:14:36 -0800 Subject: cluster/afr: refactor - Remove client side self-healing completely (opendir, openfd, lookup) - Re-work readdir-failover to work reliably in case of NFS - Remove unused/dead lock recovery code - Consistently use xdata in both calls and callbacks in all FOPs - Per-inode event generation, used to force inode ctx refresh - Implement dirty flag support (in place of pending counts) - Eliminate inode ctx structure, use read subvol bits + event_generation - Implement inode ctx refreshing based on event generation - Provide backward compatibility in transactions - remove unused variables and functions - make code more consistent in style and pattern - regularize and clean up inode-write transaction code - regularize and clean up dir-write transaction code - regularize and clean up common FOPs - reorganize transaction framework code - skip setting xattrs in pending dict if nothing is pending - re-write self-healing code using syncops - re-write simpler self-heal-daemon Change-Id: I1e4080c9796c8a2815c2dab4be3073f389d614a8 BUG: 1021686 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/6010 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-self-heal-common.c | 3287 ++++++------------------ 1 file changed, 742 insertions(+), 2545 deletions(-) (limited to 'xlators/cluster/afr/src/afr-self-heal-common.c') diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index ef92b4205..4dac83113 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2008-2012 Red Hat, Inc. + Copyright (c) 2013 Red Hat, Inc. This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -8,2805 +8,1002 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "xlator.h" -#include "byte-order.h" + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif #include "afr.h" -#include "afr-transaction.h" -#include "afr-self-heal-common.h" #include "afr-self-heal.h" -#include "pump.h" - -#define ADD_FMT_STRING(msg, off, sh_str, status, print_log) \ - do { \ - if (AFR_SELF_HEAL_NOT_ATTEMPTED != status) { \ - off += snprintf (msg + off, sizeof (msg) - off, \ - " "sh_str" self heal %s,", \ - get_sh_completion_status (status));\ - print_log = 1; \ - } \ - } while (0) - -#define ADD_FMT_STRING_SYNC(msg, off, sh_str, status, print_log) \ - do { \ - if (AFR_SELF_HEAL_SYNC_BEGIN == status || \ - AFR_SELF_HEAL_FAILED == status) { \ - off += snprintf (msg + off, sizeof (msg) - off, \ - " "sh_str" self heal %s,", \ - get_sh_completion_status (status));\ - print_log = 1; \ - } \ - } while (0) +#include "byte-order.h" -void -afr_sh_reset (call_frame_t *frame, xlator_t *this) +int +afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - memset (sh->child_errno, 0, - sizeof (*sh->child_errno) * priv->child_count); - memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count); - memset (sh->parentbufs, 0, - sizeof (*sh->parentbufs) * priv->child_count); - memset (sh->success, 0, sizeof (*sh->success) * priv->child_count); - memset (sh->locked_nodes, 0, - sizeof (*sh->locked_nodes) * priv->child_count); - sh->active_sinks = 0; - - afr_reset_xattr (sh->xattr, priv->child_count); -} + afr_local_t *local = NULL; -//Intersection[child]=1 if child is part of intersection -void -afr_children_intersection_get (int32_t *set1, int32_t *set2, - int *intersection, unsigned int child_count) -{ - int i = 0; - - memset (intersection, 0, sizeof (*intersection) * child_count); - for (i = 0; i < child_count; i++) { - intersection[i] = afr_is_child_present (set1, child_count, i) - && afr_is_child_present (set2, child_count, - i); - } + local = frame->local; + + syncbarrier_wake (&local->barrier); + + return 0; } -/** - * select_source - select a source and return it - */ int -afr_sh_select_source (int sources[], int child_count) +afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode, + int subvol, dict_t *xattr) { - int i = 0; - for (i = 0; i < child_count; i++) - if (sources[i]) - return i; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + loc_t loc = {0, }; - return -1; -} + priv = this->private; + local = frame->local; -void -afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this) -{ - int i = 0; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int active_sinks = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (sh->sources[i] == 0 && local->child_up[i] == 1) { - active_sinks++; - sh->success[i] = 1; - } else if (sh->sources[i] == 1 && local->child_up[i] == 1) { - sh->success[i] = 1; - } - } - sh->active_sinks = active_sinks; -} + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); -int -afr_sh_source_count (int sources[], int child_count) -{ - int i = 0; - int nsource = 0; + STACK_WIND (frame, afr_selfheal_post_op_cbk, priv->children[subvol], + priv->children[subvol]->fops->xattrop, &loc, + GF_XATTROP_ADD_ARRAY, xattr, NULL); - for (i = 0; i < child_count; i++) - if (sources[i]) - nsource++; - return nsource; -} + syncbarrier_wait (&local->barrier, 1); -void -afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno) -{ - sh->op_ret = -1; - sh->op_errno = afr_most_important_error(sh->op_errno, op_errno, - _gf_false); + return 0; } -void -afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this) -{ - afr_private_t * priv = this->private; - char *buf = NULL; - char *ptr = NULL; - int i = 0; - int j = 0; - - /* 10 digits per entry + 1 space + '[' and ']' */ - buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char); - - for (i = 0; i < priv->child_count; i++) { - ptr = buf; - ptr += sprintf (ptr, "[ "); - for (j = 0; j < priv->child_count; j++) { - ptr += sprintf (ptr, "%d ", pending_matrix[i][j]); - } - sprintf (ptr, "]"); - gf_log (this->name, GF_LOG_DEBUG, "pending_matrix: %s", buf); - } - - GF_FREE (buf); -} -char* -afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this) +dict_t * +afr_selfheal_output_xattr (xlator_t *this, afr_transaction_type type, + int *output_dirty, int **output_matrix, int subvol) { - afr_private_t * priv = this->private; - char *buf = NULL; - char *ptr = NULL; - int i = 0; - int j = 0; - int child_count = priv->child_count; - char *matrix_begin = "[ [ "; - char *matrix_end = "] ]"; - char *seperator = "] [ "; - int pending_entry_strlen = 12; //Including space after entry - int matrix_begin_strlen = 0; - int matrix_end_strlen = 0; - int seperator_strlen = 0; - int string_length = 0; - char *msg = "- Pending matrix: "; - - /* - * for a list of lists of [ [ a b ] [ c d ] ] - * */ - - matrix_begin_strlen = strlen (matrix_begin); - matrix_end_strlen = strlen (matrix_end); - seperator_strlen = strlen (seperator); - string_length = matrix_begin_strlen + matrix_end_strlen - + (child_count -1) * seperator_strlen - + (child_count * child_count * pending_entry_strlen); - - buf = GF_CALLOC (1, 1 + strlen (msg) + string_length , gf_afr_mt_char); - if (!buf) - goto out; - - ptr = buf; - ptr += sprintf (ptr, "%s", msg); - ptr += sprintf (ptr, "%s", matrix_begin); - for (i = 0; i < priv->child_count; i++) { - for (j = 0; j < priv->child_count; j++) { - ptr += sprintf (ptr, "%d ", pending_matrix[i][j]); - } - if (i < priv->child_count -1) - ptr += sprintf (ptr, "%s", seperator); - } - - ptr += sprintf (ptr, "%s", matrix_end); + dict_t *xattr = NULL; + afr_private_t *priv = NULL; + int j = 0; + int idx = 0; + int ret = 0; + int *raw = 0; -out: - return buf; -} + priv = this->private; + idx = afr_index_for_transaction_type (type); -void -afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this, - const char *loc) -{ - char *buf = NULL; - char *free_ptr = NULL; + xattr = dict_new (); + if (!xattr) + return NULL; - buf = afr_get_pending_matrix_str (pending_matrix, this); - if (buf) - free_ptr = buf; - else - buf = ""; + if (output_dirty[subvol]) { + /* clear dirty */ + raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t); + if (!raw) + goto err; + raw[idx] = hton32 (output_dirty[subvol]); + ret = dict_set_bin (xattr, AFR_DIRTY, raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) + goto err; + } - gf_log (this->name, GF_LOG_ERROR, "Unable to self-heal contents of '%s'" - " (possible split-brain). Please delete the file from all but " - "the preferred subvolume.%s", loc, buf); - GF_FREE (free_ptr); - return; -} + /* clear/set pending */ + for (j = 0; j < priv->child_count; j++) { + if (!output_matrix[subvol][j]) + continue; + raw = GF_CALLOC (sizeof(int), AFR_NUM_CHANGE_LOGS, + gf_afr_mt_int32_t); + if (!raw) + goto err; -void -afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count) -{ - int i = 0; - int j = 0; + raw[idx] = hton32 (output_matrix[subvol][j]); - GF_ASSERT (pending_matrix); + ret = dict_set_bin (xattr, priv->pending_key[j], + raw, sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) + goto err; + } - for (i = 0; i < child_count; i++) { - for (j = 0; j < child_count; j++) { - pending_matrix[i][j] = 0; - } - } + return xattr; +err: + if (xattr) + dict_unref (xattr); + return NULL; } -void -afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix, - unsigned char *ignorant_subvols, - size_t child_count) -{ - int i = 0; - int j = 0; - - GF_ASSERT (pending_matrix); - GF_ASSERT (ignorant_subvols); - - for (i = 0; i < child_count; i++) { - if (ignorant_subvols[i]) { - for (j = 0; j < child_count; j++) { - if (!ignorant_subvols[j]) - pending_matrix[j][i] += 1; - } - } - } -} int -afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix, - unsigned char *ignorant_subvols, - dict_t *xattr[], afr_transaction_type type, - size_t child_count) -{ - /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */ - int32_t pending[3] = {0,}; - void *pending_raw = NULL; - int ret = -1; - int i = 0; - int j = 0; - int k = 0; - - afr_init_pending_matrix (pending_matrix, child_count); - - for (i = 0; i < child_count; i++) { - pending_raw = NULL; - - for (j = 0; j < child_count; j++) { - ret = dict_get_ptr (xattr[i], pending_key[j], - &pending_raw); - - if (ret != 0) { - /* - * There is no xattr present. This means this - * subvolume should be considered an 'ignorant' - * subvolume. - */ - - if (ignorant_subvols) - ignorant_subvols[i] = 1; - continue; - } - - memcpy (pending, pending_raw, sizeof(pending)); - k = afr_index_for_transaction_type (type); - - pending_matrix[i][j] = ntoh32 (pending[k]); - } - } - - return ret; -} +afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, afr_transaction_type type, + struct afr_reply *replies, unsigned char *locked_on) +{ + afr_private_t *priv = NULL; + int i = 0; + int j = 0; + unsigned char *pending = NULL; + int *input_dirty = NULL; + int **input_matrix = NULL; + int *output_dirty = NULL; + int **output_matrix = NULL; + dict_t *xattr = NULL; + + priv = this->private; + + pending = alloca0 (priv->child_count); + + input_dirty = alloca0 (priv->child_count * sizeof (int)); + input_matrix = ALLOC_MATRIX (priv->child_count, int); + output_dirty = alloca0 (priv->child_count * sizeof (int)); + output_matrix = ALLOC_MATRIX (priv->child_count, int); + + afr_selfheal_extract_xattr (this, replies, type, input_dirty, + input_matrix); + + for (i = 0; i < priv->child_count; i++) + if (sinks[i] && !healed_sinks[i]) + pending[i] = 1; + + for (i = 0; i < priv->child_count; i++) { + for (j = 0; j < priv->child_count; j++) { + if (pending[j]) + output_matrix[i][j] = 1; + else + output_matrix[i][j] = -input_matrix[i][j]; + } + } -typedef enum { - AFR_NODE_INVALID, - AFR_NODE_INNOCENT, - AFR_NODE_FOOL, - AFR_NODE_WISE, -} afr_node_type; + for (i = 0; i < priv->child_count; i++) { + if (!pending[i]) + output_dirty[i] = -input_dirty[i]; + } -typedef struct { - afr_node_type type; - int wisdom; -} afr_node_character; + for (i = 0; i < priv->child_count; i++) { + if (!locked_on[i]) + /* perform post-op only on subvols we had locked + and inspected on. + */ + continue; + xattr = afr_selfheal_output_xattr (this, type, output_dirty, + output_matrix, i); + if (!xattr) { + gf_log (this->name, GF_LOG_ERROR, + "unable to allocate xdata for subvol %d", i); + continue; + } -static int -afr_sh_is_innocent (int32_t *array, int child_count) -{ - int i = 0; - int ret = 1; /* innocent until proven guilty */ + afr_selfheal_post_op (frame, this, inode, i, xattr); - for (i = 0; i < child_count; i++) { - if (array[i]) { - ret = 0; - break; - } - } + dict_unref (xattr); + } - return ret; + return 0; } -static int -afr_sh_is_fool (int32_t *array, int i, int child_count) -{ - return array[i]; /* fool if accuses itself */ +void +afr_replies_copy (struct afr_reply *dst, struct afr_reply *src, int count) +{ + int i = 0; + dict_t *xdata = NULL; + + if (dst == src) + return; + + for (i = 0; i < count; i++) { + dst[i].valid = src[i].valid; + dst[i].op_ret = src[i].op_ret; + dst[i].op_errno = src[i].op_errno; + dst[i].prestat = src[i].prestat; + dst[i].poststat = src[i].poststat; + dst[i].preparent = src[i].preparent; + dst[i].postparent = src[i].postparent; + dst[i].preparent2 = src[i].preparent2; + dst[i].postparent2 = src[i].postparent2; + if (src[i].xdata) + xdata = dict_ref (src[i].xdata); + else + xdata = NULL; + if (dst[i].xdata) + dict_unref (dst[i].xdata); + dst[i].xdata = xdata; + memcpy (dst[i].checksum, src[i].checksum, + MD5_DIGEST_LENGTH); + } } -static int -afr_sh_is_wise (int32_t *array, int i, int child_count) +int +afr_selfheal_fill_dirty (xlator_t *this, int *dirty, int subvol, + int idx, dict_t *xdata) { - return !array[i]; /* wise if does not accuse itself */ -} + void *pending_raw = NULL; + int pending[3] = {0, }; + if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw)) + return -1; -static int -afr_sh_all_nodes_innocent (afr_node_character *characters, - int child_count) -{ - int i = 0; - int ret = 1; + if (!pending_raw) + return -1; + + memcpy (pending, pending_raw, sizeof(pending)); - for (i = 0; i < child_count; i++) { - if (characters[i].type != AFR_NODE_INNOCENT) { - ret = 0; - break; - } - } + dirty[subvol] = ntoh32 (pending[idx]); - return ret; + return 0; } -static int -afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count) +int +afr_selfheal_fill_matrix (xlator_t *this, int **matrix, int subvol, + int idx, dict_t *xdata) { - int i = 0; - int ret = 0; + int i = 0; + void *pending_raw = NULL; + int pending[3] = {0, }; + afr_private_t *priv = NULL; - for (i = 0; i < child_count; i++) { - if (characters[i].type == AFR_NODE_WISE) { - ret = 1; - break; - } - } + priv = this->private; - return ret; -} + for (i = 0; i < priv->child_count; i++) { + if (dict_get_ptr (xdata, priv->pending_key[i], &pending_raw)) + continue; + if (!pending_raw) + continue; -/* - * The 'wisdom' of a wise node is 0 if any other wise node accuses it. - * It is 1 if no other wise node accuses it. - * Only wise nodes with wisdom 1 are sources. - * - * If no nodes with wisdom 1 exist, a split-brain has occurred. - */ + memcpy (pending, pending_raw, sizeof(pending)); -static void -afr_sh_compute_wisdom (int32_t *pending_matrix[], - afr_node_character characters[], int child_count) -{ - int i = 0; - int j = 0; - - for (i = 0; i < child_count; i++) { - if (characters[i].type == AFR_NODE_WISE) { - characters[i].wisdom = 1; - - for (j = 0; j < child_count; j++) { - if ((characters[j].type == AFR_NODE_WISE) - && pending_matrix[j][i]) { - - characters[i].wisdom = 0; - } - } - } - } + matrix[subvol][i] = ntoh32 (pending[idx]); + } + + return 0; } -static int -afr_sh_wise_nodes_conflict (afr_node_character *characters, - int child_count) +int +afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, + afr_transaction_type type, int *dirty, int **matrix) { - int i = 0; - int ret = 1; + afr_private_t *priv = NULL; + int i = 0; + dict_t *xdata = NULL; + int idx = -1; + + idx = afr_index_for_transaction_type (type); - for (i = 0; i < child_count; i++) { - if ((characters[i].type == AFR_NODE_WISE) - && characters[i].wisdom == 1) { + priv = this->private; - /* There is atleast one bona-fide wise node */ - ret = 0; - break; - } - } + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].xdata) + continue; + + xdata = replies[i].xdata; - return ret; + afr_selfheal_fill_dirty (this, dirty, i, idx, xdata); + afr_selfheal_fill_matrix (this, matrix, i, idx, xdata); + } + + return 0; } -static int -afr_sh_mark_wisest_as_sources (int sources[], - afr_node_character *characters, - int child_count) -{ - int nsources = 0; - int i = 0; - for (i = 0; i < child_count; i++) { - if (characters[i].wisdom == 1) { - sources[i] = 1; - nsources++; - } - } +/* + * This function determines if a self-heal is required for a given inode, + * and if needed, in what direction. + * + * locked_on[] is the array representing servers which have been locked and + * from which xattrs have been fetched for analysis. + * + * The output of the function is by filling the arrays sources[] and sinks[]. + * + * sources[i] is set if i'th server is an eligible source for a selfheal. + * + * sinks[i] is set if i'th server needs to be healed. + * + * if sources[0..N] are all set, there is no need for a selfheal. + * + * if sinks[0..N] are all set, the inode is in split brain. + * + */ - return nsources; -} +int +afr_selfheal_find_direction (call_frame_t *frame, xlator_t *this, + struct afr_reply *replies, + afr_transaction_type type, unsigned char *locked_on, + unsigned char *sources, unsigned char *sinks) +{ + afr_private_t *priv = NULL; + int i = 0; + int j = 0; + int *dirty = NULL; + int **matrix = NULL; + char *accused = NULL; + + priv = this->private; + + dirty = alloca0 (priv->child_count * sizeof (int)); + accused = alloca0 (priv->child_count); + matrix = ALLOC_MATRIX(priv->child_count, int); + + /* First construct the pending matrix for further analysis */ + afr_selfheal_extract_xattr (this, replies, type, dirty, matrix); + + /* Next short list all accused to exclude them from being sources */ + for (i = 0; i < priv->child_count; i++) { + for (j = 0; j < priv->child_count; j++) { + if (matrix[i][j]) + accused[j] = 1; + } + } -static void -afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix, - afr_node_character *characters, - int32_t child_count) -{ - int i = 0; - int j = 0; - int witness = 0; - - GF_ASSERT (witnesses); - GF_ASSERT (pending_matrix); - GF_ASSERT (characters); - GF_ASSERT (child_count > 0); - - for (i = 0; i < child_count; i++) { - if (characters[i].type != AFR_NODE_FOOL) - continue; - - witness = 0; - for (j = 0; j < child_count; j++) { - if (i == j) - continue; - witness += pending_matrix[i][j]; - } - witnesses[i] = witness; - } -} + /* Short list all non-accused as sources */ + memset (sources, 0, priv->child_count); + for (i = 0; i < priv->child_count; i++) { + if (!accused[i] && locked_on[i]) + sources[i] = 1; + } -static int32_t -afr_find_biggest_witness_among_fools (int32_t *witnesses, - afr_node_character *characters, - int32_t child_count) -{ - int i = 0; - int biggest_witness = -1; - int biggest_witness_idx = -1; - int biggest_witness_cnt = -1; - - GF_ASSERT (witnesses); - GF_ASSERT (characters); - GF_ASSERT (child_count > 0); - - for (i = 0; i < child_count; i++) { - if (characters[i].type != AFR_NODE_FOOL) - continue; - - if (biggest_witness < witnesses[i]) { - biggest_witness = witnesses[i]; - biggest_witness_idx = i; - biggest_witness_cnt = 1; + /* Everyone accused by sources are sinks */ + memset (sinks, 0, priv->child_count); + for (i = 0; i < priv->child_count; i++) { + if (!sources[i]) continue; + for (j = 0; j < priv->child_count; j++) { + if (matrix[i][j]) + sinks[j] = 1; } + } - if (biggest_witness == witnesses[i]) - biggest_witness_cnt++; - } + /* If any source has 'dirty' bit, pick first + 'dirty' source and make everybody else sinks */ + for (i = 0; i < priv->child_count; i++) { + if (sources[i] && dirty[i]) { + for (j = 0; j < priv->child_count; j++) { + if (j != i) { + sources[j] = 0; + sinks[j] = 1; + } + } + break; + } + } - if (biggest_witness_cnt != 1) - return -1; + /* If no sources, all locked nodes are sinks - split brain */ + if (AFR_COUNT (sources, priv->child_count) == 0) { + for (i = 0; i < priv->child_count; i++) { + if (locked_on[i]) + sinks[i] = 1; + } + } - return biggest_witness_idx; + return 0; } + int -afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses, - afr_node_character *characters, - int32_t child_count, int32_t witness) +afr_selfheal_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *parbuf) { - int i = 0; - int nsources = 0; - - GF_ASSERT (sources); - GF_ASSERT (witnesses); - GF_ASSERT (characters); - GF_ASSERT (child_count > 0); - - for (i = 0; i < child_count; i++) { - if (characters[i].type != AFR_NODE_FOOL) - continue; - - if (witness == witnesses[i]) { - sources[i] = 1; - nsources++; - } - } - return nsources; -} + afr_local_t *local = NULL; + int i = -1; + local = frame->local; + i = (long) cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (buf) + local->replies[i].poststat = *buf; + if (parbuf) + local->replies[i].postparent = *parbuf; + if (xdata) + local->replies[i].xdata = dict_ref (xdata); + + syncbarrier_wake (&local->barrier); -int -afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx) -{ - if (idx >= 0 && idx < child_count) { - sources[idx] = 1; - return 1; - } return 0; } -static int -afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children, - int child_count) +inode_t * +afr_selfheal_unlocked_lookup_on (call_frame_t *frame, inode_t *parent, + const char *name, struct afr_reply *replies, + unsigned char *lookup_on) { - int idx = -1; - int i = -1; - int child = -1; - uint64_t max_size = 0; - uint64_t min_size = 0; - int num_children = 0; + loc_t loc = {0, }; + dict_t *xattr_req = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; - for (i = 0; i < child_count; i++) { - if (success_children[i] == -1) - break; + local = frame->local; + priv = frame->this->private; - child = success_children[i]; - if (bufs[child].ia_size > max_size) { - max_size = bufs[child].ia_size; - idx = child; - } - - if ((num_children == 0) || (bufs[child].ia_size < min_size)) { - min_size = bufs[child].ia_size; - } + xattr_req = dict_new (); + if (!xattr_req) + return NULL; - num_children++; + if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) { + dict_destroy (xattr_req); + return NULL; } - /* If sizes are same for all of them, finding sources will have to - * happen with pending changelog. So return -1 - */ - if ((num_children > 1) && (min_size == max_size)) - return -1; - return idx; -} + inode = inode_new (parent->table); + if (!inode) { + dict_destroy (xattr_req); + return NULL; + } + loc.parent = inode_ref (parent); + uuid_copy (loc.pargfid, parent->gfid); + loc.name = name; + loc.inode = inode_ref (inode); -static int -afr_find_newest_file (struct iatt *bufs, int32_t *success_children, - int child_count) -{ - int idx = -1; - int i = -1; - int child = -1; - uint64_t max_ctime = 0; + AFR_ONLIST (lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xattr_req); - for (i = 0; i < child_count; i++) { - if (success_children[i] == -1) - break; + afr_replies_copy (replies, local->replies, priv->child_count); - child = success_children[i]; - if (bufs[child].ia_ctime > max_ctime) { - max_ctime = bufs[child].ia_ctime; - idx = child; - } - } + loc_wipe (&loc); + dict_unref (xattr_req); - return idx; + return inode; } -static int -afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix, - afr_node_character *characters, - int32_t *success_children, - int child_count, struct iatt *bufs) -{ - int32_t biggest_witness = 0; - int nsources = 0; - int32_t *witnesses = NULL; - - GF_ASSERT (child_count > 0); - - biggest_witness = afr_find_largest_file_size (bufs, success_children, - child_count); - if (biggest_witness != -1) - goto found; - - witnesses = GF_CALLOC (child_count, sizeof (*witnesses), - gf_afr_mt_int32_t); - if (NULL == witnesses) { - nsources = -1; - goto out; - } - - afr_compute_witness_of_fools (witnesses, pending_matrix, characters, - child_count); - biggest_witness = afr_find_biggest_witness_among_fools (witnesses, - characters, - child_count); - if (biggest_witness != -1) - goto found; - - biggest_witness = afr_find_newest_file (bufs, success_children, - child_count); - -found: - nsources = afr_mark_fool_as_source_by_idx (sources, child_count, - biggest_witness); -out: - GF_FREE (witnesses); - return nsources; -} - int -afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs, - int32_t *success_children, - unsigned int child_count, uint32_t uid) +afr_selfheal_unlocked_discover_on (call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, + unsigned char *discover_on) { - int i = 0; - int nsources = 0; - int child = 0; - - for (i = 0; i < child_count; i++) { - if (-1 == success_children[i]) - break; - - child = success_children[i]; - if (uid == bufs[child].ia_uid) { - sources[child] = 1; - nsources++; - } - } - return nsources; -} + loc_t loc = {0, }; + dict_t *xattr_req = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; -int -afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children, - unsigned int child_count) -{ - int i = 0; - int smallest = -1; - int child = 0; - - for (i = 0; i < child_count; i++) { - if (-1 == success_children[i]) - break; - child = success_children[i]; - if ((smallest == -1) || - (bufs[child].ia_uid < bufs[smallest].ia_uid)) { - smallest = child; - } - } - return smallest; -} + local = frame->local; + priv = frame->this->private; -static int -afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children, - int child_count, int32_t *sources) -{ - int nsources = 0; - int smallest = 0; - - smallest = afr_get_child_with_lowest_uid (bufs, success_children, - child_count); - if (smallest < 0) { - nsources = -1; - goto out; - } - nsources = afr_mark_child_as_source_by_uid (sources, bufs, - success_children, child_count, - bufs[smallest].ia_uid); -out: - return nsources; -} + xattr_req = dict_new (); + if (!xattr_req) + return -ENOMEM; -int -afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children, - struct iatt *bufs) -{ - afr_private_t *priv = NULL; - int i = 0; - int child = -1; - int read_child = -1; - - priv = this->private; - for (i = 0; i < priv->child_count; i++) { - child = success_children[i]; - if (child < 0) - break; - if (read_child < 0) - read_child = child; - else if (bufs[read_child].ia_size < bufs[child].ia_size) - read_child = child; - } - return read_child; -} + if (afr_xattr_req_prepare (frame->this, xattr_req) != 0) { + dict_destroy (xattr_req); + return -ENOMEM; + } -int -afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children, - int child_count, int32_t *sources) -{ - int nsources = 0; - int i = 0; - int child = 0; - gf_boolean_t sink_exists = _gf_false; - gf_boolean_t source_exists = _gf_false; - int source = -1; - - for (i = 0; i < child_count; i++) { - child = success_children[i]; - if (child < 0) - break; - if (!bufs[child].ia_size) { - sink_exists = _gf_true; - continue; - } - if (!source_exists) { - source_exists = _gf_true; - source = child; - continue; - } - if (bufs[source].ia_size != bufs[child].ia_size) { - nsources = -1; - goto out; - } - } - if (!source_exists && !sink_exists) { - nsources = -1; - goto out; - } - - if (!source_exists || !sink_exists) - goto out; - - for (i = 0; i < child_count; i++) { - child = success_children[i]; - if (child < 0) - break; - if (bufs[child].ia_size) { - sources[child] = 1; - nsources++; - } - } -out: - return nsources; -} + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, gfid); -char * -afr_get_character_str (afr_node_type type) -{ - char *character = NULL; - - switch (type) { - case AFR_NODE_INNOCENT: - character = "innocent"; - break; - case AFR_NODE_FOOL: - character = "fool"; - break; - case AFR_NODE_WISE: - character = "wise"; - break; - default: - character = "invalid"; - break; - } - return character; -} + AFR_ONLIST (discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xattr_req); -afr_node_type -afr_find_child_character_type (int32_t *pending_row, int32_t child, - unsigned int child_count) -{ - afr_node_type type = AFR_NODE_INVALID; + afr_replies_copy (replies, local->replies, priv->child_count); - GF_ASSERT ((child >= 0) && (child < child_count)); + loc_wipe (&loc); + dict_unref (xattr_req); - if (afr_sh_is_innocent (pending_row, child_count)) - type = AFR_NODE_INNOCENT; - else if (afr_sh_is_fool (pending_row, child, child_count)) - type = AFR_NODE_FOOL; - else if (afr_sh_is_wise (pending_row, child, child_count)) - type = AFR_NODE_WISE; - return type; + return 0; } int -afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs, - int32_t **pending_matrix, int32_t *sources, - int32_t *success_children, afr_transaction_type type, - int32_t *subvol_status, gf_boolean_t ignore_ignorant) +afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies) { - afr_private_t *priv = NULL; - afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID; - int nsources = -1; - unsigned char *ignorant_subvols = NULL; - unsigned int child_count = 0; - - priv = this->private; - child_count = priv->child_count; - - if (afr_get_children_count (success_children, priv->child_count) == 0) - goto out; - - if (!ignore_ignorant) { - ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), - child_count, gf_afr_mt_char); - if (NULL == ignorant_subvols) - goto out; - } - - afr_build_pending_matrix (priv->pending_key, pending_matrix, - ignorant_subvols, xattr, type, - priv->child_count); - - if (!ignore_ignorant) - afr_mark_ignorant_subvols_as_pending (pending_matrix, - ignorant_subvols, - priv->child_count); - sh_type = afr_self_heal_type_for_transaction (type); - if (AFR_SELF_HEAL_INVALID == sh_type) - goto out; - - afr_sh_print_pending_matrix (pending_matrix, this); - - nsources = afr_mark_sources (this, sources, pending_matrix, bufs, - sh_type, success_children, subvol_status); -out: - GF_FREE (ignorant_subvols); - return nsources; -} + afr_private_t *priv = NULL; -void -afr_find_character_types (afr_node_character *characters, - int32_t **pending_matrix, int32_t *success_children, - unsigned int child_count) -{ - afr_node_type type = AFR_NODE_INVALID; - int child = 0; - int i = 0; - - for (i = 0; i < child_count; i++) { - child = success_children[i]; - if (child == -1) - break; - type = afr_find_child_character_type (pending_matrix[child], - child, child_count); - characters[child].type = type; - } -} + priv = frame->this->private; -void -afr_mark_success_children_sources (int32_t *sources, int32_t *success_children, - unsigned int child_count) -{ - int i = 0; - for (i = 0; i < child_count; i++) { - if (success_children[i] == -1) - break; - sources[success_children[i]] = 1; - } + return afr_selfheal_unlocked_discover_on (frame, inode, gfid, replies, + priv->child_up); } -/** - * mark_sources: Mark all 'source' nodes and return number of source - * nodes found - * - * A node (a row in the pending matrix) belongs to one of - * three categories: - * - * M is the pending matrix. - * - * 'innocent' - M[i] is all zeroes - * 'fool' - M[i] has i'th element = 1 (self-reference) - * 'wise' - M[i] has i'th element = 0, others are 1 or 0. - * - * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is - * needed. - * - * A 'wise' node can be a source. If two 'wise' nodes conflict, it is - * a split-brain. If one wise node refers to the other but the other doesn't - * refer back, the referrer is a source. - * - * All fools are sinks, unless there are no 'wise' nodes. In that case, - * one of the fools is made a source. - */ + int -afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix, - struct iatt *bufs, afr_self_heal_type type, - int32_t *success_children, int32_t *subvol_status) +afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - /* stores the 'characters' (innocent, fool, wise) of the nodes */ - afr_node_character *characters = NULL; - int nsources = -1; - unsigned int child_count = 0; - afr_private_t *priv = NULL; - - priv = this->private; - child_count = priv->child_count; - characters = GF_CALLOC (sizeof (afr_node_character), - child_count, gf_afr_mt_afr_node_character); - if (!characters) - goto out; - - this = THIS; - - /* start clean */ - memset (sources, 0, sizeof (*sources) * child_count); - nsources = 0; - afr_find_character_types (characters, pending_matrix, success_children, - child_count); - if (afr_sh_all_nodes_innocent (characters, child_count)) { - switch (type) { - case AFR_SELF_HEAL_METADATA: - nsources = afr_sh_mark_lowest_uid_as_source (bufs, - success_children, - child_count, - sources); - break; - case AFR_SELF_HEAL_DATA: - nsources = afr_sh_mark_zero_size_file_as_sink (bufs, - success_children, - child_count, - sources); - if ((nsources < 0) && subvol_status) - *subvol_status |= SPLIT_BRAIN; - break; - default: - break; - } - goto out; - } - - if (afr_sh_wise_nodes_exist (characters, child_count)) { - afr_sh_compute_wisdom (pending_matrix, characters, child_count); - - if (afr_sh_wise_nodes_conflict (characters, child_count)) { - if (subvol_status) - *subvol_status |= SPLIT_BRAIN; - nsources = -1; - } else { - nsources = afr_sh_mark_wisest_as_sources (sources, - characters, - child_count); - } - } else { - if (subvol_status) - *subvol_status |= ALL_FOOLS; - nsources = afr_mark_biggest_of_fools_as_source (sources, - pending_matrix, - characters, - success_children, - child_count, bufs); - } + afr_local_t *local = NULL; + int i = 0; -out: - if (nsources == 0) - afr_mark_success_children_sources (sources, success_children, - child_count); - GF_FREE (characters); + local = frame->local; + i = (long) cookie; - gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources); - return nsources; -} + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; -void -afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr, - int32_t *delta_matrix[], unsigned char success[], - int child_count, afr_transaction_type type) -{ - int tgt = 0; - int src = 0; - int value = 0; - - afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL, - xattr, type, priv->child_count); - - /* - * The algorithm here has two parts. First, for each subvol indexed - * as tgt, we try to figure out what count everyone should have for it. - * If the self-heal succeeded, that's easy; the value is zero. - * Otherwise, the value is the maximum of the succeeding nodes' counts. - * Once we know the value, we loop through (possibly for a second time) - * setting each count to the difference so that when we're done all - * succeeding nodes will have the same count for tgt. - */ - for (tgt = 0; tgt < priv->child_count; ++tgt) { - value = 0; - if (!success[tgt]) { - /* Find the maximum. */ - for (src = 0; src < priv->child_count; ++src) { - if (!success[src]) { - continue; - } - if (delta_matrix[src][tgt] > value) { - value = delta_matrix[src][tgt]; - } - } - } - /* Force everyone who succeeded to the chosen value. */ - for (src = 0; src < priv->child_count; ++src) { - if (success[src]) { - delta_matrix[src][tgt] = value - - delta_matrix[src][tgt]; - } - else { - delta_matrix[src][tgt] = 0; - } - } - } + syncbarrier_wake (&local->barrier); + + return 0; } int -afr_sh_delta_to_xattr (xlator_t *this, - int32_t *delta_matrix[], dict_t *xattr[], - int child_count, afr_transaction_type type) -{ - int i = 0; - int j = 0; - int k = 0; - int ret = 0; - int32_t *pending = NULL; - int32_t *local_pending = NULL; - afr_private_t *priv = NULL; - - priv = this->private; - for (i = 0; i < child_count; i++) { - if (!xattr[i]) - continue; - - local_pending = NULL; - for (j = 0; j < child_count; j++) { - pending = GF_CALLOC (sizeof (int32_t), 3, - gf_afr_mt_int32_t); - - if (!pending) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate pending entry " - "for %s[%d] on %s", - priv->pending_key[j], type, - priv->children[i]->name); - continue; - } - /* 3 = data+metadata+entry */ - - k = afr_index_for_transaction_type (type); - - pending[k] = hton32 (delta_matrix[i][j]); - - if (j == i) { - local_pending = pending; - continue; - } - ret = dict_set_bin (xattr[i], priv->pending_key[j], - pending, - AFR_NUM_CHANGE_LOGS * sizeof (int32_t)); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "Unable to set dict value."); - GF_FREE (pending); - } - } - if (local_pending) { - ret = dict_set_bin (xattr[i], priv->pending_key[i], - local_pending, - AFR_NUM_CHANGE_LOGS * sizeof (int32_t)); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "Unable to set dict value."); - GF_FREE (local_pending); - } - } - } - return 0; +afr_selfheal_locked_fill (call_frame_t *frame, xlator_t *this, + unsigned char *locked_on) +{ + int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int count = 0; + + local = frame->local; + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].valid && local->replies[i].op_ret == 0) { + locked_on[i] = 1; + count++; + } else { + locked_on[i] = 0; + } + } + + return count; } int -afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this) +afr_selfheal_tryinodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, off_t off, size_t size, + unsigned char *locked_on) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - sh = &local->self_heal; - - afr_sh_reset (frame, this); - - if (local->unhealable) { - gf_log (this->name, GF_LOG_DEBUG, - "split brain found, aborting selfheal of %s", - local->loc.path); - } - - if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { - sh->completion_cbk (frame, this); - } else { - gf_log (this->name, GF_LOG_TRACE, - "proceeding to metadata check on %s", - local->loc.path); - afr_self_heal_metadata (frame, this); - } - - return 0; -} + loc_t loc = {0,}; + struct gf_flock flock = {0, }; + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); -static int -afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; + flock.l_type = F_WRLCK; + flock.l_start = off; + flock.l_len = size; - local = frame->local; - int_lock = &local->internal_lock; + AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, + &loc, F_SETLK, &flock, NULL); - int_lock->lock_cbk = afr_sh_missing_entries_done; - afr_unlock (frame, this); + loc_wipe (&loc); - return 0; + return afr_selfheal_locked_fill (frame, this, locked_on); } + int -afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count) -{ - int ret = -ENOMEM; - sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf), - gf_afr_mt_iatt); - if (!sh->buf) - goto out; - sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs), - gf_afr_mt_iatt); - if (!sh->parentbufs) - goto out; - sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno), - gf_afr_mt_int); - if (!sh->child_errno) - goto out; - sh->success_children = afr_children_create (child_count); - if (!sh->success_children) - goto out; - sh->fresh_children = afr_children_create (child_count); - if (!sh->fresh_children) - goto out; - sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr), - gf_afr_mt_dict_t); - if (!sh->xattr) - goto out; - ret = 0; -out: - return ret; -} +afr_selfheal_inodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, off_t off, size_t size, + unsigned char *locked_on) +{ + loc_t loc = {0,}; + struct gf_flock flock = {0, }; + afr_local_t *local = NULL; + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + local = frame->local; + + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); + + flock.l_type = F_WRLCK; + flock.l_start = off; + flock.l_len = size; + + AFR_ONALL (frame, afr_selfheal_lock_cbk, inodelk, dom, + &loc, F_SETLK, &flock, NULL); + + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == -1 && + local->replies[i].op_errno == EAGAIN) { + afr_selfheal_locked_fill (frame, this, locked_on); + afr_selfheal_uninodelk (frame, this, inode, dom, off, + size, locked_on); + + AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, dom, + &loc, F_SETLKW, &flock, NULL); + break; + } + } -void -afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie, - xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, - dict_t *xattr, struct iatt *postparent, - loc_t *loc) -{ - int child_index = 0; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - priv = this->private; - sh = &local->self_heal; - child_index = (long) cookie; - - LOCK (&frame->lock); - { - if (op_ret == 0) { - sh->buf[child_index] = *buf; - sh->parentbufs[child_index] = *postparent; - sh->success_children[sh->success_count] = child_index; - sh->success_count++; - sh->xattr[child_index] = dict_ref (xattr); - } else { - gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume" - " %s => -1 (%s)", loc->path, - priv->children[child_index]->name, - strerror (op_errno)); - local->self_heal.child_errno[child_index] = op_errno; - } - } - UNLOCK (&frame->lock); - return; -} + loc_wipe (&loc); -gf_boolean_t -afr_valid_ia_type (ia_type_t ia_type) -{ - switch (ia_type) { - case IA_IFSOCK: - case IA_IFREG: - case IA_IFBLK: - case IA_IFCHR: - case IA_IFIFO: - case IA_IFLNK: - case IA_IFDIR: - return _gf_true; - default: - return _gf_false; - } - return _gf_false; + return afr_selfheal_locked_fill (frame, this, locked_on); } + int -afr_impunge_frame_create (call_frame_t *frame, xlator_t *this, - int active_source, call_frame_t **impunge_frame) +afr_selfheal_uninodelk (call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, off_t off, size_t size, + const unsigned char *locked_on) { - afr_local_t *local = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *impunge_sh = NULL; - int32_t op_errno = 0; - afr_private_t *priv = NULL; - int ret = 0; - call_frame_t *new_frame = NULL; - - op_errno = ENOMEM; - priv = this->private; - new_frame = copy_frame (frame); - if (!new_frame) { - goto out; - } - - AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out); - - local = frame->local; - new_frame->local = impunge_local; - impunge_sh = &impunge_local->self_heal; - impunge_sh->sh_frame = frame; - impunge_sh->active_source = active_source; - impunge_local->child_up = memdup (local->child_up, - sizeof (*local->child_up) * - priv->child_count); - if (!impunge_local->child_up) - goto out; - - impunge_local->pending = afr_matrix_create (priv->child_count, - AFR_NUM_CHANGE_LOGS); - if (!impunge_local->pending) - goto out; - - ret = afr_sh_common_create (impunge_sh, priv->child_count); - if (ret) { - op_errno = -ret; - goto out; - } - op_errno = 0; - *impunge_frame = new_frame; -out: - if (op_errno && new_frame) - AFR_STACK_DESTROY (new_frame); - return -op_errno; -} + loc_t loc = {0,}; + struct gf_flock flock = {0, }; -void -afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this, - struct iatt *buf, - struct iatt *postparent, - afr_impunge_done_cbk_t impunge_done) -{ - call_frame_t *impunge_frame = NULL; - afr_local_t *local = NULL; - afr_local_t *impunge_local = NULL; - afr_self_heal_t *sh = NULL; - afr_self_heal_t *impunge_sh = NULL; - int ret = 0; - unsigned int enoent_count = 0; - afr_private_t *priv = NULL; - int i = 0; - int32_t op_errno = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - enoent_count = afr_errno_count (NULL, sh->child_errno, - priv->child_count, ENOENT); - if (!enoent_count) { - gf_log (this->name, GF_LOG_INFO, - "no missing files - %s. proceeding to metadata check", - local->loc.path); - goto out; - } - sh->impunge_done = impunge_done; - ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame); - if (ret) - goto out; - impunge_local = impunge_frame->local; - impunge_sh = &impunge_local->self_heal; - loc_copy (&impunge_local->loc, &local->loc); - ret = afr_build_parent_loc (&impunge_sh->parent_loc, - &impunge_local->loc, &op_errno); - if (ret) { - ret = -op_errno; - goto out; - } - impunge_local->call_count = enoent_count; - impunge_sh->entrybuf = sh->buf[sh->source]; - impunge_sh->parentbuf = sh->parentbufs[sh->source]; - for (i = 0; i < priv->child_count; i++) { - if (!impunge_local->child_up[i]) { - impunge_sh->child_errno[i] = ENOTCONN; - continue; - } - if (sh->child_errno[i] != ENOENT) { - impunge_sh->child_errno[i] = EEXIST; - continue; - } - } - for (i = 0; i < priv->child_count; i++) { - if (sh->child_errno[i] != ENOENT) - continue; - afr_sh_entry_impunge_create (impunge_frame, this, i); - enoent_count--; - } - GF_ASSERT (!enoent_count); - return; -out: - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, " - "reason: %s", local->loc.path, strerror (-ret)); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - } - afr_sh_missing_entries_finish (frame, this); -} -int -afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - sh = &local->self_heal; - if (op_ret < 0) - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_missing_entries_finish (frame, this); - return 0; -} + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); -static int -sh_missing_entries_create (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - int type = 0; - struct iatt *buf = NULL; - struct iatt *postparent = NULL; - - local = frame->local; - sh = &local->self_heal; - - buf = &sh->buf[sh->source]; - postparent = &sh->parentbufs[sh->source]; - - type = buf->ia_type; - if (!afr_valid_ia_type (type)) { - gf_log (this->name, GF_LOG_ERROR, - "%s: unknown file type: 0%o", local->loc.path, type); - afr_set_local_for_unhealable (local); - afr_sh_missing_entries_finish (frame, this); - goto out; - } - - afr_sh_missing_entry_call_impunge_recreate (frame, this, - buf, postparent, - afr_sh_create_entry_cbk); -out: - return 0; -} + flock.l_type = F_UNLCK; + flock.l_start = off; + flock.l_len = size; -void -afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - ia_type_t ia_type = IA_INVAL; - int32_t nsources = 0; - loc_t *loc = NULL; - int32_t subvol_status = 0; - afr_transaction_type txn_type = AFR_DATA_TRANSACTION; - gf_boolean_t split_brain = _gf_false; - int read_child = -1; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - loc = &local->loc; - - if (op_ret < 0) { - if (op_errno == EIO) { - afr_set_local_for_unhealable (local); - } - // EIO can happen if finding the fresh parent dir failed - goto out; - } - - //now No chance for the ia_type to conflict - ia_type = sh->buf[sh->success_children[0]].ia_type; - txn_type = afr_transaction_type_get (ia_type); - nsources = afr_build_sources (this, sh->xattr, sh->buf, - sh->pending_matrix, sh->sources, - sh->success_children, txn_type, - &subvol_status, _gf_false); - if (nsources < 0) { - gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," - " in missing entry self-heal, continuing with the rest" - " of the self-heals", local->loc.path); - if (subvol_status & SPLIT_BRAIN) { - split_brain = _gf_true; - switch (txn_type) { - case AFR_DATA_TRANSACTION: - nsources = 1; - sh->sources[sh->success_children[0]] = 1; - break; - case AFR_ENTRY_TRANSACTION: - read_child = afr_get_no_xattr_dir_read_child - (this, - sh->success_children, - sh->buf); - sh->sources[read_child] = 1; - nsources = 1; - break; - default: - op_errno = EIO; - goto out; - } - } else { - op_errno = EIO; - goto out; - } - } - - afr_get_fresh_children (sh->success_children, sh->sources, - sh->fresh_children, priv->child_count); - sh->source = sh->fresh_children[0]; - if (sh->source == -1) { - gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); - op_errno = EIO; - goto out; - } - - if (sh->gfid_sh_success_cbk) - sh->gfid_sh_success_cbk (frame, this); - sh->type = sh->buf[sh->source].ia_type; - if (uuid_is_null (loc->inode->gfid)) - uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid); - if (split_brain) { - afr_sh_missing_entries_finish (frame, this); - } else { - sh_missing_entries_create (frame, this); - } - return; -out: - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_set_error (sh, op_errno); - afr_sh_missing_entries_finish (frame, this); - return; -} + AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, inodelk, + dom, &loc, F_SETLK, &flock, NULL); -static int -afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xattr, - struct iatt *postparent) -{ - int call_count = 0; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, - op_errno, inode, buf, xattr, - postparent, &sh->lookup_loc); - call_count = afr_frame_return (frame); - - if (call_count) - goto out; - op_ret = -1; - if (!sh->success_count) { - op_errno = afr_resultant_errno_get (NULL, sh->child_errno, - priv->child_count); - gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, " - "reason %s", sh->lookup_loc.path, - strerror (op_errno)); - goto done; - } - - if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) && - (afr_conflicting_iattrs (sh->buf, sh->success_children, - priv->child_count, - sh->lookup_loc.path, this->name))) { - op_errno = EIO; - gf_log (this->name, GF_LOG_ERROR, "Conflicting entries " - "for %s", sh->lookup_loc.path); - goto done; - } - - if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) && - (afr_gfid_missing_count (this->name, sh->success_children, - sh->buf, priv->child_count, - sh->lookup_loc.path))) { - op_errno = ENODATA; - gf_log (this->name, GF_LOG_ERROR, "Missing Gfids " - "for %s", sh->lookup_loc.path); - goto done; - } - op_ret = 0; - -done: - sh->lookup_done (frame, this, op_ret, op_errno); -out: - return 0; + loc_wipe (&loc); + + return 0; } + int -afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child, - int32_t op_ret, int32_t op_errno) +afr_selfheal_tryentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, const char *name, unsigned char *locked_on) { - int call_count = 0; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - sh = &local->self_heal; - - GF_ASSERT (sh->post_remove_call); - if ((op_ret == -1) && (op_errno != ENOENT)) { - gf_log (this->name, GF_LOG_ERROR, - "purge entry %s failed, on child %d reason, %s", - local->loc.path, child, strerror (op_errno)); - LOCK (&frame->lock); - { - afr_sh_set_error (sh, EIO); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - } - UNLOCK (&frame->lock); - } - call_count = afr_frame_return (frame); - if (call_count == 0) - sh->post_remove_call (frame, this); - return 0; -} + loc_t loc = {0,}; -void -afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this, - int child_index, struct iatt *buf, - struct iatt *parentbuf, - afr_expunge_done_cbk_t expunge_done) -{ - call_frame_t *expunge_frame = NULL; - afr_local_t *local = NULL; - afr_local_t *expunge_local = NULL; - afr_self_heal_t *sh = NULL; - afr_self_heal_t *expunge_sh = NULL; - int32_t op_errno = 0; - int ret = 0; - - expunge_frame = copy_frame (frame); - if (!expunge_frame) { - goto out; - } - - AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out); - - local = frame->local; - sh = &local->self_heal; - expunge_frame->local = expunge_local; - expunge_sh = &expunge_local->self_heal; - expunge_sh->sh_frame = frame; - loc_copy (&expunge_local->loc, &local->loc); - ret = afr_build_parent_loc (&expunge_sh->parent_loc, - &expunge_local->loc, &op_errno); - if (ret) { - ret = -op_errno; - goto out; - } - sh->expunge_done = expunge_done; - afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf, - parentbuf); - return; -out: - gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s", - local->loc.path, strerror (op_errno)); - expunge_done (frame, this, child_index, -1, op_errno); -} + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); -void -afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children, - int32_t *fresh_children, - unsigned int child_count) -{ - int i = 0; - - for (i = 0; i < child_count; i++) { - if (afr_is_child_present (success_children, child_count, i) && - !afr_is_child_present (fresh_children, child_count, i)) { - sh->child_errno[i] = ENOENT; - GF_ASSERT (sh->xattr[i]); - dict_unref (sh->xattr[i]); - sh->xattr[i] = NULL; - } - } -} + AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, + &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); -int -afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { - afr_sh_missing_entries_finish (frame, this); - } else { - if (afr_gfid_missing_count (this->name, sh->fresh_children, - sh->buf, priv->child_count, - local->loc.path)) { - afr_sh_common_lookup (frame, this, &local->loc, - afr_sh_missing_entries_lookup_done, - sh->sh_gfid_req, - AFR_LOOKUP_FAIL_CONFLICTS| - AFR_LOOKUP_FAIL_MISSING_GFIDS, - NULL); - } else { - //No need to set gfid so goto missing entries lookup done - //Behave as if you have done the lookup - afr_sh_remove_stale_lookup_info (sh, - sh->success_children, - sh->fresh_children, - priv->child_count); - afr_children_copy (sh->success_children, - sh->fresh_children, - priv->child_count); - afr_sh_missing_entries_lookup_done (frame, this, 0, 0); - } - } - return 0; + loc_wipe (&loc); + + return afr_selfheal_locked_fill (frame, this, locked_on); } -gf_boolean_t -afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv, - int child) + +int +afr_selfheal_entrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, const char *name, unsigned char *locked_on) { - afr_self_heal_t *sh = NULL; + loc_t loc = {0,}; + afr_local_t *local = NULL; + int i = 0; + afr_private_t *priv = NULL; - sh = &local->self_heal; + priv = this->private; + local = frame->local; - if (local->child_up[child] && - (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count, - child)) - && (sh->child_errno[child] != ENOENT)) - return _gf_true; + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); - return _gf_false; -} + AFR_ONALL (frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, + name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL); -gf_boolean_t -afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv, - int child) -{ - afr_self_heal_t *sh = NULL; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == -1 && + local->replies[i].op_errno == EAGAIN) { + afr_selfheal_locked_fill (frame, this, locked_on); + afr_selfheal_unentrylk (frame, this, inode, dom, name, + locked_on); - sh = &local->self_heal; + AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, dom, + &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + break; + } + } - if (local->child_up[child] && - (!afr_is_child_present (sh->fresh_children, priv->child_count, - child)) - && (sh->child_errno[child] != ENOENT)) - return _gf_true; + loc_wipe (&loc); - return _gf_false; + return afr_selfheal_locked_fill (frame, this, locked_on); } -void -afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this, - gf_boolean_t purge_condition (afr_local_t *local, - afr_private_t *priv, - int child)) -{ - afr_local_t *local = NULL; - afr_private_t *priv = NULL; - afr_self_heal_t *sh = NULL; - int i = 0; - int call_count = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - for (i = 0; i < priv->child_count; i++) { - if (purge_condition (local, priv, i)) - call_count++; - } - - if (call_count == 0) { - sh->post_remove_call (frame, this); - goto out; - } - - local->call_count = call_count; - for (i = 0; i < priv->child_count; i++) { - if (!purge_condition (local, priv, i)) - continue; - gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s " - "on %s", local->loc.path, priv->children[i]->name); - afr_sh_call_entry_expunge_remove (frame, this, - (long) i, &sh->buf[i], - &sh->parentbufs[i], - afr_sh_remove_entry_cbk); - } -out: - return; -} -void -afr_sh_purge_entry (call_frame_t *frame, xlator_t *this) +int +afr_selfheal_unentrylk (call_frame_t *frame, xlator_t *this, inode_t *inode, + char *dom, const char *name, unsigned char *locked_on) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; + loc_t loc = {0,}; + + loc.inode = inode_ref (inode); + uuid_copy (loc.gfid, inode->gfid); + + AFR_ONLIST (locked_on, frame, afr_selfheal_lock_cbk, entrylk, + dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL); - local = frame->local; - sh = &local->self_heal; - sh->post_remove_call = afr_sh_missing_entries_finish; + loc_wipe (&loc); - afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition); + return 0; } -void -afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this) + +gf_boolean_t +afr_is_pending_set (xlator_t *this, dict_t *xdata, int type) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int i = 0; + int idx = -1; + afr_private_t *priv = NULL; + void *pending_raw = NULL; + int *pending_int = NULL; + int i = 0; - local = frame->local; - sh = &local->self_heal; - priv = this->private; + priv = this->private; + idx = afr_index_for_transaction_type (type); - sh->post_remove_call = afr_sh_purge_stale_entries_done; + if (dict_get_ptr (xdata, AFR_DIRTY, &pending_raw) == 0) { + if (pending_raw) { + pending_int = pending_raw; - for (i = 0; i < priv->child_count; i++) { - if (afr_is_child_present (sh->fresh_children, - priv->child_count, i)) - continue; + if (ntoh32 (pending_int[idx])) + return _gf_true; + } + } - if ((!local->child_up[i]) || sh->child_errno[i] != 0) - continue; + for (i = 0; i < priv->child_count; i++) { + if (dict_get_ptr (xdata, priv->pending_key[i], + &pending_raw)) + continue; + if (!pending_raw) + continue; + pending_int = pending_raw; - GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) || - uuid_is_null (sh->buf[i].ia_gfid)); + if (ntoh32 (pending_int[idx])) + return _gf_true; + } - if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) || - (uuid_compare (sh->buf[i].ia_gfid, - sh->entrybuf.ia_gfid))) - continue; + return _gf_false; +} - afr_children_add_child (sh->fresh_children, i, - priv->child_count); - } - afr_sh_purge_entry_common (frame, this, - afr_sh_purge_stale_entry_condition); +gf_boolean_t +afr_is_data_set (xlator_t *this, dict_t *xdata) +{ + return afr_is_pending_set (this, xdata, AFR_DATA_TRANSACTION); } -void -afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs, - struct iatt *save, - unsigned int child_count) +gf_boolean_t +afr_is_metadata_set (xlator_t *this, dict_t *xdata) { - int i = 0; - int child = 0; - gf_boolean_t saved = _gf_false; - - GF_ASSERT (save); - //if iatt buf with gfid exists sets it - for (i = 0; i < child_count; i++) { - child = children[i]; - if (child == -1) - break; - *save = bufs[child]; - saved = _gf_true; - if (!uuid_is_null (save->ia_gfid)) - break; - } - GF_ASSERT (saved); + return afr_is_pending_set (this, xdata, AFR_METADATA_TRANSACTION); } -void -afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh, - unsigned int child_count) +gf_boolean_t +afr_is_entry_set (xlator_t *this, dict_t *xdata) { - afr_children_intersection_get (sh->success_children, - sh->fresh_parent_dirs, - sh->sources, child_count); - afr_get_fresh_children (sh->success_children, sh->sources, - sh->fresh_children, child_count); - memset (sh->sources, 0, sizeof (*sh->sources) * child_count); + return afr_is_pending_set (this, xdata, AFR_ENTRY_TRANSACTION); } + void -afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) +afr_inode_link (inode_t *inode, struct iatt *iatt) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int32_t fresh_child_enoents = 0; - int32_t fresh_parent_count = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - if (op_ret < 0) - goto fail; - afr_get_children_of_fresh_parent_dirs (sh, priv->child_count); - fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs, - priv->child_count); - //we need the enoent count of the subvols present in fresh_parent_dirs - fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs, - sh->child_errno, - priv->child_count, ENOENT); - if (fresh_child_enoents == fresh_parent_count) { - afr_sh_set_error (sh, ENOENT); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_purge_entry (frame, this); - } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children, - priv->child_count, local->loc.path, - this->name)) { - afr_sh_save_child_iatts_from_policy (sh->fresh_children, - sh->buf, &sh->entrybuf, - priv->child_count); - afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf, - sh->fresh_children, - priv->child_count); - afr_sh_purge_stale_entry (frame, this); - } else { - op_errno = EIO; - afr_set_local_for_unhealable (local); - goto fail; - } - - return; - -fail: - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_set_error (sh, op_errno); - afr_sh_missing_entries_finish (frame, this); - return; -} + inode_t *linked_inode = NULL; -static void -afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int enoent_count = 0; - int nsources = 0; - int source = -1; - int32_t subvol_status = 0; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - if (op_ret < 0) - goto out; - enoent_count = afr_errno_count (NULL, sh->child_errno, - priv->child_count, ENOENT); - if (enoent_count > 0) { - gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s," - " in missing entry self-heal, aborting missing-entry " - "self-heal", - local->loc.path); - afr_sh_missing_entries_finish (frame, this); - return; - } - - nsources = afr_build_sources (this, sh->xattr, sh->buf, - sh->pending_matrix, sh->sources, - sh->success_children, - AFR_ENTRY_TRANSACTION, &subvol_status, - _gf_true); - if ((subvol_status & ALL_FOOLS) || - (subvol_status & SPLIT_BRAIN)) { - gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative " - "merge", sh->parent_loc.path); - afr_mark_success_children_sources (sh->sources, - sh->success_children, - priv->child_count); - } else if (nsources < 0) { - gf_log (this->name, GF_LOG_ERROR, "No sources for dir " - "of %s, in missing entry self-heal, aborting " - "self-heal", local->loc.path); - op_errno = EIO; - goto out; - } - - source = afr_sh_select_source (sh->sources, priv->child_count); - if (source == -1) { - GF_ASSERT (0); - gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); - op_errno = EIO; - goto out; - } - afr_get_fresh_children (sh->success_children, sh->sources, - sh->fresh_parent_dirs, priv->child_count); - afr_sh_common_lookup (frame, this, &local->loc, - afr_sh_children_lookup_done, NULL, 0, - NULL); - return; + linked_inode = inode_link (inode, NULL, NULL, iatt); -out: - afr_sh_set_error (sh, op_errno); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_missing_entries_finish (frame, this); - return; -} + uuid_copy (inode->gfid, iatt->ia_gfid); + inode->ia_type = iatt->ia_type; -void -afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count) -{ - int i = 0; - - for (i = 0; i < child_count; i++) { - memset (&sh->buf[i], 0, sizeof (sh->buf[i])); - memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i])); - sh->child_errno[i] = 0; - } - memset (&sh->parentbuf, 0, sizeof (sh->parentbuf)); - sh->success_count = 0; - afr_reset_children (sh->success_children, child_count); - afr_reset_children (sh->fresh_children, child_count); - afr_reset_xattr (sh->xattr, child_count); - loc_wipe (&sh->lookup_loc); + if (linked_inode) { + inode_lookup (linked_inode); + inode_unref (linked_inode); + } } -/* afr self-heal state will be lost if this call is made - * please check the afr_sh_common_reset that is called in this function + +/* + * This function inspects the looked up replies (in an unlocked manner) + * and decides whether a locked verification and possible healing is + * required or not. It updates the three booleans for each type + * of healing. If the boolean flag gets set to FALSE, then we are sure + * no healing is required. If the boolean flag gets set to TRUE then + * we have to proceed with locked reinspection. */ + int -afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - afr_lookup_done_cbk_t lookup_done , uuid_t gfid, - int32_t flags, dict_t *xdata) +afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, + inode_t *inode, uuid_t gfid, + gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, + gf_boolean_t *entry_selfheal) { - afr_local_t *local = NULL; - int i = 0; - int call_count = 0; - afr_private_t *priv = NULL; - dict_t *xattr_req = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - priv = this->private; - sh = &local->self_heal; - - call_count = afr_up_children_count (local->child_up, priv->child_count); - - local->call_count = call_count; - - xattr_req = dict_new(); - - if (xattr_req) { - afr_xattr_req_prepare (this, xattr_req, loc->path); - if (gfid) { - gf_log (this->name, GF_LOG_DEBUG, - "looking up %s with gfid: %s", - loc->path, uuid_utoa (gfid)); - GF_ASSERT (!uuid_is_null (gfid)); - afr_set_dict_gfid (xattr_req, gfid); - } - } - - afr_sh_common_reset (sh, priv->child_count); - sh->lookup_done = lookup_done; - loc_copy (&sh->lookup_loc, loc); - sh->lookup_flags = flags; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - gf_log (this->name, GF_LOG_DEBUG, - "looking up %s on subvolume %s", - loc->path, priv->children[i]->name); - - STACK_WIND_COOKIE (frame, - afr_sh_common_lookup_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->lookup, - loc, xattr_req); - - if (!--call_count) - break; - } - } - - if (xattr_req) - dict_unref (xattr_req); - - return 0; -} + afr_private_t *priv = NULL; + int i = 0; + int valid_cnt = 0; + struct iatt first = {0, }; + struct afr_reply *replies = NULL; + int ret = -1; + priv = this->private; + replies = alloca0 (sizeof (*replies) * priv->child_count); -int -afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame, - xlator_t *this) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - - local = frame->local; - int_lock = &local->internal_lock; - sh = &local->self_heal; - - if (int_lock->lock_op_ret < 0) { - gf_log (this->name, GF_LOG_INFO, - "Non blocking entrylks failed."); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - afr_sh_missing_entries_done (frame, this); - } else { - - gf_log (this->name, GF_LOG_DEBUG, - "Non blocking entrylks done. Proceeding to FOP"); - afr_sh_common_lookup (frame, this, &sh->parent_loc, - afr_sh_find_fresh_parents, - NULL, AFR_LOOKUP_FAIL_CONFLICTS, - NULL); - } - - return 0; -} + ret = afr_selfheal_unlocked_discover (frame, inode, gfid, replies); + if (ret) + return ret; -int -afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, - char *base_name, afr_lock_cbk_t lock_cbk) -{ - afr_internal_lock_t *int_lock = NULL; - afr_local_t *local = NULL; - afr_private_t *priv = NULL; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; + if (replies[i].op_ret == -1) + continue; - priv = this->private; - local = frame->local; - int_lock = &local->internal_lock; + if (afr_is_data_set (this, replies[i].xdata)) + *data_selfheal = _gf_true; - int_lock->transaction_lk_type = AFR_SELFHEAL_LK; - int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK; + if (afr_is_metadata_set (this, replies[i].xdata)) + *metadata_selfheal = _gf_true; - afr_set_lock_number (frame, this); + if (afr_is_entry_set (this, replies[i].xdata)) + *entry_selfheal = _gf_true; - int_lock->lk_basename = base_name; - int_lock->lk_loc = loc; - int_lock->lock_cbk = lock_cbk; - int_lock->domain = this->name; + valid_cnt ++; + if (valid_cnt == 1) { + first = replies[i].poststat; + continue; + } - int_lock->lockee_count = 0; - afr_init_entry_lockee (&int_lock->lockee[0], local, loc, - base_name, priv->child_count); - int_lock->lockee_count++; - afr_nonblocking_entrylk (frame, this); + if (!IA_EQUAL (first, replies[i].poststat, type)) { + gf_log (this->name, GF_LOG_ERROR, + "TYPE mismatch %d vs %d on %s for gfid:%s", + (int) first.ia_type, + (int) replies[i].poststat.ia_type, + priv->children[i]->name, + uuid_utoa (replies[i].poststat.ia_gfid)); + return -EIO; + } - return 0; -} + if (!IA_EQUAL (first, replies[i].poststat, uid)) { + gf_log (this->name, GF_LOG_DEBUG, + "UID mismatch %d vs %d on %s for gfid:%s", + (int) first.ia_uid, + (int) replies[i].poststat.ia_uid, + priv->children[i]->name, + uuid_utoa (replies[i].poststat.ia_gfid)); -static int -afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this, - afr_lock_cbk_t lock_cbk) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_internal_lock_t *int_lock = NULL; - int ret = -1; - int32_t op_errno = 0; - - local = frame->local; - sh = &local->self_heal; - - gf_log (this->name, GF_LOG_TRACE, - "attempting to recreate missing entries for path=%s", - local->loc.path); - - ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno); - if (ret) - goto out; - - afr_sh_entrylk (frame, this, &sh->parent_loc, NULL, - lock_cbk); - return 0; -out: - int_lock = &local->internal_lock; - int_lock->lock_op_ret = -1; - lock_cbk (frame, this); - return 0; -} + *metadata_selfheal = _gf_true; + } -static int -afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; + if (!IA_EQUAL (first, replies[i].poststat, gid)) { + gf_log (this->name, GF_LOG_DEBUG, + "GID mismatch %d vs %d on %s for gfid:%s", + (int) first.ia_uid, + (int) replies[i].poststat.ia_uid, + priv->children[i]->name, + uuid_utoa (replies[i].poststat.ia_gfid)); - local = frame->local; - sh = &local->self_heal; + *metadata_selfheal = _gf_true; + } - sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY; + if (!IA_EQUAL (first, replies[i].poststat, prot)) { + gf_log (this->name, GF_LOG_DEBUG, + "MODE mismatch %d vs %d on %s for gfid:%s", + (int) st_mode_from_ia (first.ia_prot, 0), + (int) st_mode_from_ia (replies[i].poststat.ia_prot, 0), + priv->children[i]->name, + uuid_utoa (replies[i].poststat.ia_gfid)); - afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); + *metadata_selfheal = _gf_true; + } - afr_self_heal_parent_entrylk (frame, this, - afr_sh_post_nb_entrylk_missing_entry_sh_cbk); - return 0; -} + if (IA_ISREG(first.ia_type) && + !IA_EQUAL (first, replies[i].poststat, size)) { + gf_log (this->name, GF_LOG_DEBUG, + "SIZE mismatch %lld vs %lld on %s for gfid:%s", + (long long) first.ia_size, + (long long) replies[i].poststat.ia_size, + priv->children[i]->name, + uuid_utoa (replies[i].poststat.ia_gfid)); -afr_local_t* -afr_self_heal_local_init (afr_local_t *l, xlator_t *this) -{ - afr_private_t *priv = NULL; - afr_local_t *lc = NULL; - afr_self_heal_t *sh = NULL; - afr_self_heal_t *shc = NULL; - int ret = 0; - - priv = this->private; - - sh = &l->self_heal; - - lc = mem_get0 (this->local_pool); - if (!lc) - goto out; - - shc = &lc->self_heal; - - shc->unwind = sh->unwind; - shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk; - shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal; - shc->do_gfid_self_heal = sh->do_gfid_self_heal; - shc->do_data_self_heal = sh->do_data_self_heal; - shc->do_metadata_self_heal = sh->do_metadata_self_heal; - shc->do_entry_self_heal = sh->do_entry_self_heal; - shc->force_confirm_spb = sh->force_confirm_spb; - shc->forced_merge = sh->forced_merge; - shc->background = sh->background; - shc->type = sh->type; - shc->data_sh_info = ""; - shc->metadata_sh_info = ""; - - uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req); - if (l->loc.path) { - ret = loc_copy (&lc->loc, &l->loc); - if (ret < 0) - goto out; - } - - lc->child_up = memdup (l->child_up, - sizeof (*lc->child_up) * priv->child_count); - if (!lc->child_up) { - ret = -1; - goto out; - } - - if (l->xattr_req) - lc->xattr_req = dict_ref (l->xattr_req); - - if (l->cont.lookup.inode) - lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode); - if (l->cont.lookup.xattr) - lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr); - - lc->internal_lock.locked_nodes = - GF_CALLOC (sizeof (*l->internal_lock.locked_nodes), - priv->child_count, gf_afr_mt_char); - if (!lc->internal_lock.locked_nodes) { - ret = -1; - goto out; - } - - ret = afr_inodelk_init (&lc->internal_lock.inodelk[0], - this->name, priv->child_count); - if (ret) - goto out; + *data_selfheal = _gf_true; + } + } -out: - if (ret) { - afr_local_cleanup (lc, this); - lc = NULL; - } - return lc; -} + if (valid_cnt > 0) + afr_inode_link (inode, &first); -int -afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) -{ - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_local_t * orig_frame_local = NULL; - afr_self_heal_t * orig_frame_sh = NULL; - char sh_type_str[256] = {0,}; - gf_loglevel_t loglevel = 0; - - priv = this->private; - local = bgsh_frame->local; - sh = &local->self_heal; - - if (local->unhealable) { - afr_set_split_brain (this, sh->inode, SPB, SPB); - } - - afr_self_heal_type_str_get (sh, sh_type_str, - sizeof(sh_type_str)); - if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) { - loglevel = GF_LOG_ERROR; - } else if (!is_self_heal_failed (sh, AFR_CHECK_ALL)) { - loglevel = GF_LOG_INFO; - } else { - loglevel = GF_LOG_DEBUG; - } - - afr_log_self_heal_completion_status (local, loglevel); - - FRAME_SU_UNDO (bgsh_frame, afr_local_t); - - if (!sh->unwound && sh->unwind) { - orig_frame_local = sh->orig_frame->local; - orig_frame_sh = &orig_frame_local->self_heal; - orig_frame_sh->actual_sh_started = _gf_true; - sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, - is_self_heal_failed (sh, AFR_CHECK_ALL)); - } - - if (sh->background) { - LOCK (&priv->lock); - { - priv->background_self_heals_started--; - } - UNLOCK (&priv->lock); - } - - AFR_STACK_DESTROY (bgsh_frame); - - return 0; -} - -int -afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int32_t op_errno = 0; - int ret = 0; - afr_self_heal_t *orig_sh = NULL; - call_frame_t *sh_frame = NULL; - afr_local_t *sh_local = NULL; - loc_t *loc = NULL; - - local = frame->local; - orig_sh = &local->self_heal; - priv = this->private; - - GF_ASSERT (local->loc.path); - - gf_log (this->name, GF_LOG_TRACE, - "performing self heal on %s (metadata=%d data=%d entry=%d)", - local->loc.path, - local->self_heal.do_metadata_self_heal, - local->self_heal.do_data_self_heal, - local->self_heal.do_entry_self_heal); - - op_errno = ENOMEM; - sh_frame = copy_frame (frame); - if (!sh_frame) - goto out; - afr_set_lk_owner (sh_frame, this, sh_frame->root); - afr_set_low_priority (sh_frame); - - sh_local = afr_self_heal_local_init (local, this); - if (!sh_local) - goto out; - sh_frame->local = sh_local; - sh = &sh_local->self_heal; - - sh->inode = inode_ref (inode); - sh->orig_frame = frame; - - sh->completion_cbk = afr_self_heal_completion_cbk; - - sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success), - gf_afr_mt_char); - if (!sh->success) - goto out; - sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count, - gf_afr_mt_int); - if (!sh->sources) - goto out; - sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes), - priv->child_count, - gf_afr_mt_int); - if (!sh->locked_nodes) - goto out; - - sh->pending_matrix = afr_matrix_create (priv->child_count, - priv->child_count); - if (!sh->pending_matrix) - goto out; - - sh->delta_matrix = afr_matrix_create (priv->child_count, - priv->child_count); - if (!sh->delta_matrix) - goto out; - - sh->fresh_parent_dirs = afr_children_create (priv->child_count); - if (!sh->fresh_parent_dirs) - goto out; - ret = afr_sh_common_create (sh, priv->child_count); - if (ret) { - op_errno = -ret; - goto out; - } - - if (local->self_heal.background) { - LOCK (&priv->lock); - { - if (priv->background_self_heals_started - < priv->background_self_heal_count) { - priv->background_self_heals_started++; - - - } else { - local->self_heal.background = _gf_false; - sh->background = _gf_false; - } - } - UNLOCK (&priv->lock); - } - - if (!local->loc.parent) { - sh->do_missing_entry_self_heal = _gf_false; - sh->do_gfid_self_heal = _gf_false; - } - - sh->sh_type_in_action = AFR_SELF_HEAL_INVALID; - - FRAME_SU_DO (sh_frame, afr_local_t); - if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) { - afr_self_heal_missing_entries (sh_frame, this); - } else { - loc = &sh_local->loc; - if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) { - if (!uuid_is_null (inode->gfid)) - GF_ASSERT (!uuid_compare (inode->gfid, - sh->sh_gfid_req)); - uuid_copy (loc->gfid, sh->sh_gfid_req); - } - gf_log (this->name, GF_LOG_TRACE, - "proceeding to metadata check on %s", - local->loc.path); - - afr_sh_missing_entries_done (sh_frame, this); - } - op_errno = 0; + if (valid_cnt < 2) + return -ENOTCONN; -out: - if (op_errno) { - orig_sh->unwind (frame, this, -1, op_errno, 1); - if (sh_frame) - AFR_STACK_DESTROY (sh_frame); - } - return 0; + return 0; } -void -afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str, - size_t size) + +inode_t * +afr_inode_find (xlator_t *this, uuid_t gfid) { - GF_ASSERT (str && (size > strlen (" missing-entry gfid " - "meta-data data entry"))); + inode_table_t *table = NULL; + inode_t *inode = NULL; - if (self_heal_p->do_metadata_self_heal) { - snprintf (str, size, " meta-data"); - } + table = this->itable; + if (!table) + return NULL; - if (self_heal_p->do_data_self_heal) { - snprintf (str + strlen(str), size - strlen(str), " data"); - } + inode = inode_find (table, gfid); + if (inode) + return inode; - if (self_heal_p->do_entry_self_heal) { - snprintf (str + strlen(str), size - strlen(str), " entry"); - } + inode = inode_new (table); + if (!inode) + return NULL; - if (self_heal_p->do_missing_entry_self_heal) { - snprintf (str + strlen(str), size - strlen(str), - " missing-entry"); - } + uuid_copy (inode->gfid, gfid); - if (self_heal_p->do_gfid_self_heal) { - snprintf (str + strlen(str), size - strlen(str), " gfid"); - } + return inode; } -afr_self_heal_type -afr_self_heal_type_for_transaction (afr_transaction_type type) -{ - afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID; - - switch (type) { - case AFR_DATA_TRANSACTION: - sh_type = AFR_SELF_HEAL_DATA; - break; - case AFR_METADATA_TRANSACTION: - sh_type = AFR_SELF_HEAL_METADATA; - break; - case AFR_ENTRY_TRANSACTION: - sh_type = AFR_SELF_HEAL_ENTRY; - break; - case AFR_ENTRY_RENAME_TRANSACTION: - GF_ASSERT (0); - break; - } - return sh_type; -} -int -afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name) +call_frame_t * +afr_frame_create (xlator_t *this) { - int ret = -1; - uuid_t pargfid = {0}; - - if (!child) - goto out; - - if (!uuid_is_null (parent->inode->gfid)) - uuid_copy (pargfid, parent->inode->gfid); - else if (!uuid_is_null (parent->gfid)) - uuid_copy (pargfid, parent->gfid); - - if (uuid_is_null (pargfid)) - goto out; - - if (strcmp (parent->path, "/") == 0) - ret = gf_asprintf ((char **)&child->path, "/%s", name); - else - ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path, - name); + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int op_errno = 0; + pid_t pid = -1; - if (-1 == ret) { - gf_log (this->name, GF_LOG_ERROR, - "asprintf failed while setting child path"); - } + frame = create_frame (this, this->ctx->pool); + if (!frame) + return NULL; - child->name = strrchr (child->path, '/'); - if (child->name) - child->name++; + local = AFR_FRAME_INIT (frame, op_errno); + if (!local) { + STACK_DESTROY (frame->root); + return NULL; + } - child->parent = inode_ref (parent->inode); - child->inode = inode_new (parent->inode->table); - uuid_copy (child->pargfid, pargfid); + syncopctx_setfspid (&pid); - if (!child->inode) { - ret = -1; - goto out; - } + frame->root->pid = pid; - ret = 0; -out: - if ((ret == -1) && child) - loc_wipe (child); + afr_set_lk_owner (frame, this, frame->root); - return ret; + return frame; } -int -afr_sh_erase_pending (call_frame_t *frame, xlator_t *this, - afr_transaction_type type, afr_fxattrop_cbk_t cbk, - int (*finish)(call_frame_t *frame, xlator_t *this)) -{ - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = NULL; - int call_count = 0; - int i = 0; - dict_t **erase_xattr = NULL; - int ret = -1; - - local = frame->local; - sh = &local->self_heal; - priv = this->private; - - afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, - sh->success, priv->child_count, type); - - erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count, - gf_afr_mt_dict_t); - if (!erase_xattr) - goto out; - - for (i = 0; i < priv->child_count; i++) { - if (sh->xattr[i]) { - call_count++; - erase_xattr[i] = dict_new (); - if (!erase_xattr[i]) - goto out; - } - } - - afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr, - priv->child_count, type); - - gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s", - lkowner_utoa (&frame->root->lk_owner)); - afr_sh_print_pending_matrix (sh->delta_matrix, this); - local->call_count = call_count; - if (call_count == 0) { - ret = 0; - finish (frame, this); - goto out; - } - - for (i = 0; i < priv->child_count; i++) { - if (!erase_xattr[i]) - continue; - - if (sh->healing_fd) {//true for ENTRY, reg file DATA transaction - STACK_WIND_COOKIE (frame, cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->fxattrop, - sh->healing_fd, - GF_XATTROP_ADD_ARRAY, erase_xattr[i], - NULL); - } else { - STACK_WIND_COOKIE (frame, cbk, (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->loc, - GF_XATTROP_ADD_ARRAY, erase_xattr[i], - NULL); - } - } - - ret = 0; -out: - if (erase_xattr) { - for (i = 0; i < priv->child_count; i++) { - if (erase_xattr[i]) { - dict_unref (erase_xattr[i]); - } - } - } - - GF_FREE (erase_xattr); - - if (ret < 0) { - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - finish (frame, this); - } - - return 0; -} -void -afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status) -{ - xlator_t *this = NULL; - afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status); - afr_self_heal_type sh_type_in_action = sh->sh_type_in_action; - this = THIS; - - if (!sh) { - gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal" - "Structure"); - goto out; - } - - switch (sh_type_in_action) { - case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY: - sh_status->gfid_or_missing_entry_self_heal = status; - break; - case AFR_SELF_HEAL_METADATA: - sh_status->metadata_self_heal = status; - break; - case AFR_SELF_HEAL_DATA: - sh_status->data_self_heal = status; - break; - case AFR_SELF_HEAL_ENTRY: - sh_status->entry_self_heal = status; - break; - case AFR_SELF_HEAL_INVALID: - gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid" - "self heal type in action"); - break; - } -out: - return; -} +/* + * This is the entry point for healing a given GFID + */ -void -afr_set_local_for_unhealable (afr_local_t *local) +int +afr_selfheal (xlator_t *this, uuid_t gfid) { - afr_self_heal_t *sh = NULL; - - sh = &local->self_heal; + inode_t *inode = NULL; + call_frame_t *frame = NULL; + int ret = -1; + gf_boolean_t data_selfheal = _gf_false; + gf_boolean_t metadata_selfheal = _gf_false; + gf_boolean_t entry_selfheal = _gf_false; - local->unhealable = 1; - afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); -} + inode = afr_inode_find (this, gfid); + if (!inode) + goto out; -int -is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type) -{ - afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status; - afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID; - afr_self_heal_status status = AFR_SELF_HEAL_FAILED; - xlator_t *this = NULL; - int sh_failed = 0; - - this = THIS; - - if (!sh) { - gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal " - "structure"); - sh_failed = 1; - goto out; - } - - if (type == AFR_CHECK_ALL) { - if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED) - || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED) - || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED) - || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED)) - sh_failed = 1; - } else if (type == AFR_CHECK_SPECIFIC) { - sh_type_in_action = sh->sh_type_in_action; - switch (sh_type_in_action) { - case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY: - status = sh_status.gfid_or_missing_entry_self_heal; - break; - case AFR_SELF_HEAL_METADATA: - status = sh_status.metadata_self_heal; - break; - case AFR_SELF_HEAL_ENTRY: - status = sh_status.entry_self_heal; - break; - case AFR_SELF_HEAL_DATA: - status = sh_status.data_self_heal; - break; - case AFR_SELF_HEAL_INVALID: - status = AFR_SELF_HEAL_NOT_ATTEMPTED; - break; - } - if (status == AFR_SELF_HEAL_FAILED) - sh_failed = 1; - - } + frame = afr_frame_create (this); + if (!frame) + goto out; -out: - return sh_failed; -} + ret = afr_selfheal_unlocked_inspect (frame, this, inode, gfid, + &data_selfheal, + &metadata_selfheal, + &entry_selfheal); + if (ret) + goto out; -char * -get_sh_completion_status (afr_self_heal_status status) -{ + if (data_selfheal) + afr_selfheal_data (frame, this, inode); - char *not_attempted = " is not attempted"; - char *failed = " failed"; - char *started = " is started"; - char *sync_begin = " is successfully completed"; - char *result = " has unknown status"; - - switch (status) - { - case AFR_SELF_HEAL_NOT_ATTEMPTED: - result = not_attempted; - break; - case AFR_SELF_HEAL_FAILED: - result = failed; - break; - case AFR_SELF_HEAL_STARTED: - result = started; - break; - case AFR_SELF_HEAL_SYNC_BEGIN: - result = sync_begin; - break; - } - - return result; + if (metadata_selfheal) + afr_selfheal_metadata (frame, this, inode); -} + if (entry_selfheal) + afr_selfheal_entry (frame, this, inode); -void -afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl) -{ + inode_forget (inode, 1); +out: + if (inode) + inode_unref (inode); + if (frame) + AFR_STACK_DESTROY (frame); - char sh_log[4096] = {0}; - afr_self_heal_t *sh = &local->self_heal; - afr_sh_status_for_all_type all_status = sh->afr_all_sh_status; - xlator_t *this = NULL; - size_t off = 0; - int data_sh = 0; - int metadata_sh = 0; - int print_log = 0; - - this = THIS; - - ADD_FMT_STRING (sh_log, off, "gfid or missing entry", - all_status.gfid_or_missing_entry_self_heal, print_log); - ADD_FMT_STRING_SYNC (sh_log, off, "metadata", - all_status.metadata_self_heal, print_log); - if (sh->background) { - ADD_FMT_STRING_SYNC (sh_log, off, "backgroung data", - all_status.data_self_heal, print_log); - } else { - ADD_FMT_STRING_SYNC (sh_log, off, "foreground data", - all_status.data_self_heal, print_log); - } - ADD_FMT_STRING_SYNC (sh_log, off, "entry", all_status.entry_self_heal, - print_log); - - if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.data_self_heal && - strcmp (sh->data_sh_info, "") && sh->data_sh_info ) - data_sh = 1; - if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.metadata_self_heal && - strcmp (sh->metadata_sh_info, "") && sh->metadata_sh_info) - metadata_sh = 1; - - if (!print_log) - return; - - gf_log (this->name, loglvl, "%s %s %s on %s", sh_log, - ((data_sh == 1) ? sh->data_sh_info : ""), - ((metadata_sh == 1) ? sh->metadata_sh_info : ""), - local->loc.path); + return ret; } -- cgit