diff options
author | Anand Avati <avati@redhat.com> | 2014-01-16 16:14:36 -0800 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-03-22 05:25:57 -0700 |
commit | 6d3739292b7b51d2ddbab75b5f884fb38925b943 (patch) | |
tree | cf332a881a49c0904a7e023935750c2d080fc1c5 /xlators/cluster/afr/src/afr-self-heald.c | |
parent | eb87c96f49b3dd2c7460e58c54ce909c706cd475 (diff) |
cluster/afr: refactor
- Remove client side self-healing completely (opendir, openfd, lookup)
- Re-work readdir-failover to work reliably in case of NFS
- Remove unused/dead lock recovery code
- Consistently use xdata in both calls and callbacks in all FOPs
- Per-inode event generation, used to force inode ctx refresh
- Implement dirty flag support (in place of pending counts)
- Eliminate inode ctx structure, use read subvol bits + event_generation
- Implement inode ctx refreshing based on event generation
- Provide backward compatibility in transactions
- remove unused variables and functions
- make code more consistent in style and pattern
- regularize and clean up inode-write transaction code
- regularize and clean up dir-write transaction code
- regularize and clean up common FOPs
- reorganize transaction framework code
- skip setting xattrs in pending dict if nothing is pending
- re-write self-healing code using syncops
- re-write simpler self-heal-daemon
Change-Id: I1e4080c9796c8a2815c2dab4be3073f389d614a8
BUG: 1021686
Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.org/6010
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heald.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 2605 |
1 files changed, 1013 insertions, 1592 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 9e5c1b3e79f..4bfe909bcb9 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -8,1828 +8,1249 @@ cases as published by the Free Software Foundation. */ + #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif + #include "afr.h" -#include "syncop.h" +#include "afr-self-heal.h" #include "afr-self-heald.h" -#include "afr-self-heal-common.h" #include "protocol-common.h" -#include "event-history.h" - -typedef enum { - STOP_CRAWL_ON_SINGLE_SUBVOL = 1, - STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL = 2 -} afr_crawl_flags_t; - -typedef enum { - HEAL = 1, - INFO, - STATISTICS_TO_BE_HEALED, -} shd_crawl_op; - -typedef struct shd_dump { - dict_t *dict; - xlator_t *this; - int child; -} shd_dump_t; - -typedef struct shd_event_ { - int child; - char *path; -} shd_event_t; - -typedef struct shd_pos_ { - int child; - xlator_t *this; - afr_child_pos_t pos; -} shd_pos_t; - -typedef int -(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data); -void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, - process_entry_cbk_t process_entry, void *op_data, - gf_boolean_t exclusive, int crawl_flags, - afr_crawl_done_cbk_t crawl_done); +#define SHD_INODE_LRU_LIMIT 2048 +#define AFR_EH_HEALED_LIMIT 1024 +#define AFR_EH_HEAL_FAIL_LIMIT 1024 +#define AFR_EH_SPLIT_BRAIN_LIMIT 1024 +#define AFR_STATISTICS_HISTORY_SIZE 50 -static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data); -/* For calling straight through (e.g. already in a synctask). */ -int -afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos); +#define ASSERT_LOCAL(this, healer) \ + if (!afr_shd_is_subvol_local(this, healer->subvol)) { \ + healer->local = _gf_false; \ + if (safe_break (healer)) { \ + break; \ + } else { \ + continue; \ + } \ + } else { \ + healer->local = _gf_true; \ + } -/* For deferring through a new synctask. */ -int -afr_syncop_find_child_position (void *data); -static int -_loc_assign_gfid_path (loc_t *loc) -{ - int ret = -1; - char gfid_path[64] = {0}; - - if (loc->inode && !uuid_is_null (loc->inode->gfid)) { - ret = inode_path (loc->inode, NULL, (char**)&loc->path); - } else if (!uuid_is_null (loc->gfid)) { - snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>", - uuid_utoa (loc->gfid)); - loc->path = gf_strdup (gfid_path); - if (loc->path) - ret = 0; - } - return ret; -} +#define NTH_INDEX_HEALER(this, n) &((((afr_private_t *)this->private))->shd.index_healers[n]) +#define NTH_FULL_HEALER(this, n) &((((afr_private_t *)this->private))->shd.full_healers[n]) -void -_destroy_crawl_event_data (void *data) -{ - shd_crawl_event_t *crawl_event = NULL; +int afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); - if (!data) - goto out; +char * +afr_subvol_name (xlator_t *this, int subvol) +{ + afr_private_t *priv = NULL; - crawl_event = (shd_crawl_event_t *)data; - GF_FREE (crawl_event->start_time_str); - GF_FREE (crawl_event->end_time_str); + priv = this->private; + if (subvol < 0 || subvol > priv->child_count) + return NULL; -out: - return; + return priv->children[subvol]->name; } + void -_destroy_shd_event_data (void *data) +afr_destroy_crawl_event_data (void *data) { - shd_event_t *event = NULL; - if (!data) - goto out; - event = (shd_event_t*)data; - GF_FREE (event->path); -out: return; } + + void -shd_cleanup_event (void *event) +afr_destroy_shd_event_data (void *data) { - shd_event_t *shd_event = event; + shd_event_t *shd_event = data; + + if (!shd_event) + return; + GF_FREE (shd_event->path); - if (!shd_event) - goto out; - GF_FREE (shd_event->path); - GF_FREE (shd_event); -out: return; } -int -afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count) -{ - int i = 0; - int ret = -1; - for (i = 0; i < child_count; i++) { - if (shd->pos[i] == AFR_POS_LOCAL) { - ret = i; - break; - } - } - return ret; -} -static int -_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent) +gf_boolean_t +afr_shd_is_subvol_local (xlator_t *this, int subvol) { - int ret = 0; + char *pathinfo = NULL; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + int ret = 0; + gf_boolean_t is_local = _gf_false; + loc_t loc = {0, }; - uuid_copy (loc->pargfid, parent->inode->gfid); - loc->path = ""; - loc->name = name; - loc->parent = inode_ref (parent->inode); - if (!loc->parent) { - loc->path = NULL; - loc_wipe (loc); - ret = -1; - } - return ret; -} + priv = this->private; -int -_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child, - shd_crawl_event_t *shd_event, struct timeval *tv) -{ - int ret = 0; - uint64_t count = 0; - char key[256] = {0}; - int xl_id = 0; - uint64_t healed_count = 0; - uint64_t split_brain_count = 0; - uint64_t heal_failed_count = 0; - char *start_time_str = NULL; - char *end_time_str = NULL; - char *crawl_type = NULL; - int progress = -1; + loc.inode = this->itable->root; + uuid_copy (loc.gfid, loc.inode->gfid); - healed_count = shd_event->healed_count; - split_brain_count = shd_event->split_brain_count; - heal_failed_count = shd_event->heal_failed_count; - start_time_str = shd_event->start_time_str; - end_time_str = shd_event->end_time_str; - crawl_type = shd_event->crawl_type; + ret = syncop_getxattr (priv->children[subvol], &loc, &xattr, + GF_XATTR_PATHINFO_KEY); + if (ret) + return _gf_false; + if (!xattr) + return _gf_false; - if (!start_time_str) { - ret = -1; - goto out; - } + ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &pathinfo); + if (ret) + return _gf_false; + afr_local_pathinfo (pathinfo, &is_local); - ret = dict_get_int32 (output, this->name, &xl_id); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); - goto out; - } + gf_log (this->name, GF_LOG_DEBUG, "subvol %s is %slocal", + priv->children[subvol]->name, is_local? "" : "not "); - snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child); - ret = dict_get_uint64 (output, key, &count); + return is_local; +} - snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_uint64(output, key, healed_count); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" - "healed_count to outout"); - goto out; - } - snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_uint64 (output, key, split_brain_count); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" - "split_brain_count to outout"); - goto out; - } - snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_dynstr (output, key, gf_strdup (crawl_type)); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" - "crawl_type to output"); - goto out; - } - snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_uint64 (output, key, heal_failed_count); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" - "healed_failed_count to outout"); - goto out; - } - snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64, - xl_id, child, count); - ret = dict_set_dynstr (output, key, gf_strdup(start_time_str)); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" - "crawl_start_time to outout"); - goto out; - } +int +__afr_shd_healer_wait (struct subvol_healer *healer) +{ + afr_private_t *priv = NULL; + struct timespec wait_till = {0, }; + int ret = 0; - snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64, - xl_id, child, count); + priv = healer->this->private; - if (!end_time_str) - end_time_str = "Could not determine the end time"; - ret = dict_set_dynstr (output, key, gf_strdup(end_time_str)); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" - "crawl_end_time to outout"); - goto out; - } - snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64, - xl_id, child, count); +disabled_loop: + wait_till.tv_sec = time (NULL) + 60; - if (shd_event->crawl_inprogress == _gf_true) - progress = 1; - else - progress = 0; + while (!healer->rerun) { + ret = pthread_cond_timedwait (&healer->cond, + &healer->mutex, + &wait_till); + if (ret == ETIMEDOUT) + break; + } - ret = dict_set_int32 (output, key, progress); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_" - "inprogress to outout"); - goto out; - } + ret = healer->rerun; + healer->rerun = 0; - snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child); - ret = dict_set_uint64 (output, key, count + 1); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not increment the " - "counter."); - goto out; - } -out: - return ret; + if (!priv->shd.enabled) + goto disabled_loop; + + return ret; } + int -_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path, - struct timeval *tv, gf_boolean_t dyn) +afr_shd_healer_wait (struct subvol_healer *healer) { - //subkey not used for now - int ret = -1; - uint64_t count = 0; - char key[256] = {0}; - int xl_id = 0; + int ret = 0; - ret = dict_get_int32 (output, this->name, &xl_id); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); - goto out; - } + pthread_mutex_lock (&healer->mutex); + { + ret = __afr_shd_healer_wait (healer); + } + pthread_mutex_unlock (&healer->mutex); - snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); - ret = dict_get_uint64 (output, key, &count); + return ret; +} - snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count); - if (dyn) - ret = dict_set_dynstr (output, key, path); - else - ret = dict_set_str (output, key, path); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output", - path); - goto out; - } - if (!tv) - goto inc_count; - snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id, - child, count); - ret = dict_set_uint32 (output, key, tv->tv_sec); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time", - path); - goto out; - } +gf_boolean_t +safe_break (struct subvol_healer *healer) +{ + gf_boolean_t ret = _gf_false; -inc_count: - snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); - ret = dict_set_uint64 (output, key, count + 1); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Could not increment count"); - goto out; - } - ret = 0; -out: - return ret; -} + pthread_mutex_lock (&healer->mutex); + { + if (healer->rerun) + goto unlock; -int -_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child, - char **fpath, gf_boolean_t *missing) -{ - dict_t *xattr = NULL; - char *path = NULL; - int ret = -1; + healer->running = _gf_false; + ret = _gf_true; + } +unlock: + pthread_mutex_unlock (&healer->mutex); - ret = syncop_getxattr (readdir_xl, child, &xattr, GFID_TO_PATH_KEY); - if (ret < 0) { - if ((-ret == ENOENT || -ret == ESTALE) && missing) - *missing = _gf_true; - ret = -1; - goto out; - } - ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to get path for " - "gfid %s", uuid_utoa (child->gfid)); - goto out; - } - path = gf_strdup (path); - if (!path) { - ret = -1; - goto out; - } - ret = 0; -out: - if (!ret) - *fpath = path; - if (xattr) - dict_unref (xattr); - return ret; + return ret; } -int -_add_event_to_dict (circular_buffer_t *cb, void *data) -{ - int ret = 0; - shd_dump_t *dump_data = NULL; - shd_event_t *shd_event = NULL; - dump_data = data; - shd_event = cb->data; - if (shd_event->child != dump_data->child) - goto out; - ret = _add_path_to_dict (dump_data->this, dump_data->dict, - dump_data->child, shd_event->path, &cb->tv, - _gf_false); +inode_t * +afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid) +{ + inode_t *inode = NULL; + int ret = 0; + loc_t loc = {0, }; + struct iatt iatt = {0, }; + + inode = inode_find (this->itable, gfid); + if (inode) + goto out; + + loc.inode = inode_new (this->itable); + if (!loc.inode) + goto out; + uuid_copy (loc.gfid, gfid); + + ret = syncop_lookup (subvol, &loc, NULL, &iatt, NULL, NULL); + if (ret < 0) + goto out; + + inode = inode_link (loc.inode, NULL, NULL, &iatt); + if (inode) + inode_lookup (inode); out: - return ret; + loc_wipe (&loc); + return inode; } -int -_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data) -{ - int ret = 0; - shd_dump_t *dump_data = NULL; - shd_crawl_event_t *shd_event = NULL; - - dump_data = data; - shd_event = cb->data; - ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict, - dump_data->child, shd_event, &cb->tv); - return ret; -} -int -_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child) +fd_t * +afr_shd_index_opendir (xlator_t *this, int child) { - shd_dump_t dump_data = {0}; - - dump_data.this = this; - dump_data.dict = dict; - dump_data.child = child; - eh_dump (eh, &dump_data, _add_event_to_dict); - return 0; + fd_t *fd = NULL; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + loc_t rootloc = {0, }; + inode_t *inode = NULL; + int ret = 0; + dict_t *xattr = NULL; + void *index_gfid = NULL; + + priv = this->private; + subvol = priv->children[child]; + + rootloc.inode = inode_ref (this->itable->root); + uuid_copy (rootloc.gfid, rootloc.inode->gfid); + + ret = syncop_getxattr (subvol, &rootloc, &xattr, + GF_XATTROP_INDEX_GFID); + if (ret || !xattr) { + errno = -ret; + goto out; + } + + ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid); + if (ret) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s", + subvol->name, uuid_utoa (index_gfid)); + + inode = afr_shd_inode_find (this, subvol, index_gfid); + if (!inode) + goto out; + fd = fd_anonymous (inode); +out: + loc_wipe (&rootloc); + if (xattr) + dict_unref (xattr); + return fd; } int -_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child) +afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name) { - shd_dump_t dump_data = {0}; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; + loc_t loc = {0, }; + int ret = 0; - priv = this->private; - shd = &priv->shd; + loc.parent = inode_ref (inode); + loc.name = name; - dump_data.this = this; - dump_data.dict = dict; - dump_data.child = child; - eh_dump (shd->statistics[child], &dump_data, - _add_crawl_event_statistics_to_dict); - return 0; + ret = syncop_unlink (subvol, &loc); + loc_wipe (&loc); + return ret; } -void -_remove_stale_index (xlator_t *this, xlator_t *readdir_xl, - loc_t *parent, char *fname) -{ - int ret = 0; - loc_t index_loc = {0}; - - ret = _build_index_loc (this, &index_loc, fname, parent); - if (ret) - goto out; - gf_log (this->name, GF_LOG_DEBUG, "Removing stale index " - "for %s on %s", index_loc.name, readdir_xl->name); - ret = syncop_unlink (readdir_xl, &index_loc); - if((ret < 0) && (-ret != ENOENT)) { - gf_log(this->name, GF_LOG_ERROR, "%s: Failed to remove index " - "on %s - %s",index_loc.name, readdir_xl->name, - strerror (-ret)); - } - index_loc.path = NULL; - loc_wipe (&index_loc); -out: - return; -} int -_count_hard_links_under_base_indices_dir (xlator_t *this, - afr_crawl_data_t *crawl_data, - gf_dirent_t *entry, loc_t *childloc, - loc_t *parentloc, struct iatt *iattr) +afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent, + const char *bname) { - xlator_t *readdir_xl = crawl_data->readdir_xl; - struct iatt parent = {0}; - int ret = 0; - dict_t *output = NULL; - int xl_id = 0; - char key[256] = {0}; - int child = -1; - uint64_t hardlinks = 0; - - output = crawl_data->op_data; - child = crawl_data->child; - - ret = syncop_lookup (readdir_xl, childloc, NULL, iattr, NULL, &parent); - if (ret) { - ret = -1; - goto out; - } - - ret = dict_get_int32 (output, this->name, &xl_id); - if (ret) - goto out; + int ret = -1; - snprintf (key, sizeof (key), "%d-%d-hardlinks", xl_id, child); - ret = dict_get_uint64 (output, key, &hardlinks); + ret = afr_selfheal_name (THIS, parent, bname); - /*Removing the count of base_entry under indices/base_indicies and - * entry under indices/xattrop */ - hardlinks = hardlinks + iattr->ia_nlink - 2; - ret = dict_set_uint64 (output, key, hardlinks); - if (ret) - goto out; - -out: - return ret; + return ret; } int -_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data, - gf_dirent_t *entry, - loc_t *childloc, loc_t *parentloc, struct iatt *iattr) +afr_shd_selfheal (struct subvol_healer *healer, int child, uuid_t gfid) { - dict_t *output = NULL; - xlator_t *readdir_xl = NULL; - int ret = -1; - char *path = NULL; - gf_boolean_t missing = _gf_false; - char gfid_str[64] = {0}; + int ret = 0; + eh_t *eh = NULL; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + shd_event_t *shd_event = NULL; + char *path = NULL; + xlator_t *subvol = NULL; + xlator_t *this = NULL; + crawl_event_t *crawl_event = NULL; + + this = healer->this; + priv = this->private; + shd = &priv->shd; + crawl_event = &healer->crawl_event; + + subvol = priv->children[child]; + + ret = afr_selfheal (this, gfid); + + if (ret == -EIO) { + eh = shd->split_brain; + crawl_event->split_brain_count++; + } else if (ret < 0) { + eh = shd->heal_failed; + crawl_event->heal_failed_count++; + } else if (ret == 0) { + eh = shd->healed; + crawl_event->healed_count++; + } + + afr_shd_gfid_to_path (this, subvol, gfid, &path); + if (!path) + return ret; + + if (eh) { + shd_event = GF_CALLOC (1, sizeof(*shd_event), + gf_afr_mt_shd_event_t); + if (!shd_event) { + GF_FREE (path); + return ret; + } + + shd_event->child = child; + shd_event->path = path; + + if (eh_save_history (eh, shd_event) < 0) { + GF_FREE (shd_event); + GF_FREE (path); + } + } + return ret; +} - if (uuid_is_null (childloc->gfid)) - goto out; - output = crawl_data->op_data; - readdir_xl = crawl_data->readdir_xl; - - ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path, - &missing); - if (ret == 0) { - ret = _add_path_to_dict (this, output, crawl_data->child, path, - NULL, _gf_true); - } else if (missing) { - _remove_stale_index (this, readdir_xl, parentloc, - uuid_utoa_r (childloc->gfid, gfid_str)); - } +void +afr_shd_sweep_prepare (struct subvol_healer *healer) +{ + crawl_event_t *event = NULL; -out: - if (ret && path) - GF_FREE (path); - return ret; + event = &healer->crawl_event; + + event->healed_count = 0; + event->split_brain_count = 0; + event->heal_failed_count = 0; + + time (&event->start_time); + event->end_time = 0; } + void -_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child, - int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp, - afr_crawl_data_t *crawl_data) +afr_shd_sweep_done (struct subvol_healer *healer) { - int ret = 0; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - eh_t *eh = NULL; - char *path = NULL; - char gfid_str[64] = {0}; - shd_event_t *event = NULL; - int32_t sh_failed = 0; - gf_boolean_t split_brain = 0; - int32_t actual_sh_done = 0; - shd_crawl_event_t **shd_crawl_event = NULL; - - priv = this->private; - shd = &priv->shd; - if (crawl_data->crawl == INDEX) { - if ((op_ret < 0) && (op_errno == ENOENT)) { - _remove_stale_index (this, crawl_data->readdir_xl, - parent, uuid_utoa_r (child->gfid, - gfid_str)); - goto out; - } - ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl, - child, &path, NULL); - if (ret) - goto out; - } else { - path = gf_strdup (child->path); - if (!path) { - ret = -1; - goto out; - } - } + crawl_event_t *event = NULL; + crawl_event_t *history = NULL; + afr_self_heald_t *shd = NULL; - if (xattr_rsp) { - ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed); - ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done); - } + event = &healer->crawl_event; + shd = &(((afr_private_t *)healer->this->private)->shd); - shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events); - - split_brain = afr_is_split_brain (this, child->inode); - if ((op_ret < 0 && op_errno == EIO) || split_brain) { - eh = shd->split_brain; - shd_crawl_event[crawl_data->child]->split_brain_count += 1; - } else if ((op_ret < 0) || sh_failed) { - eh = shd->heal_failed; - shd_crawl_event[crawl_data->child]->heal_failed_count += 1; - } else if (actual_sh_done == 1) { - eh = shd->healed; - shd_crawl_event[crawl_data->child]->healed_count += 1; - } - ret = -1; + time (&event->end_time); + history = memdup (event, sizeof (*event)); + event->start_time = 0; - if (eh != NULL) { - event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t); - if (!event) - goto out; - event->child = crawl_data->child; - event->path = path; + if (!history) + return; - ret = eh_save_history (eh, event); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save " - "to event history, (%d, %s)", path, op_ret, - strerror (op_errno)); + if (eh_save_history (shd->statistics[healer->subvol], history) < 0) + GF_FREE (history); +} - goto out; - } - } else { - gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ", - path); - } - ret = 0; -out: - if (ret && path) - GF_FREE (path); - return; +int +afr_shd_index_sweep (struct subvol_healer *healer) +{ + xlator_t *this = NULL; + int child = -1; + fd_t *fd = NULL; + xlator_t *subvol = NULL; + afr_private_t *priv = NULL; + off_t offset = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + uuid_t gfid; + int ret = 0; + int count = 0; + + this = healer->this; + child = healer->subvol; + priv = this->private; + subvol = priv->children[child]; + + fd = afr_shd_index_opendir (this, child); + if (!fd) { + gf_log (this->name, GF_LOG_WARNING, + "unable to opendir index-dir on %s", subvol->name); + return -errno; + } + + INIT_LIST_HEAD (&entries.list); + + while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) { + if (ret > 0) + ret = 0; + list_for_each_entry (entry, &entries.list, list) { + offset = entry->d_off; + + if (!priv->shd.enabled) { + ret = -EBUSY; + break; + } + + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name, "..")) + continue; + + gf_log (this->name, GF_LOG_DEBUG, "got entry: %s", + entry->d_name); + + ret = uuid_parse (entry->d_name, gfid); + if (ret) + continue; + + ret = afr_shd_selfheal (healer, child, gfid); + if (ret == 0) + count++; + + if (ret == -ENOENT || ret == -ESTALE) { + afr_shd_index_purge (subvol, fd->inode, + entry->d_name); + ret = 0; + } + } + + gf_dirent_free (&entries); + if (ret) + break; + } + + if (fd) + fd_unref (fd); + if (!ret) + ret = count; + return ret; } + int -_link_inode_update_loc (xlator_t *this, loc_t *loc, struct iatt *iattr) +afr_shd_full_sweep (struct subvol_healer *healer, inode_t *inode) { - inode_t *link_inode = NULL; - int ret = -1; + fd_t *fd = NULL; + xlator_t *this = NULL; + xlator_t *subvol = NULL; + afr_private_t *priv = NULL; + off_t offset = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + int ret = 0; + + this = healer->this; + priv = this->private; + subvol = priv->children[healer->subvol]; + + fd = fd_anonymous (inode); + if (!fd) + return -errno; + + INIT_LIST_HEAD (&entries.list); + + while ((ret = syncop_readdirp (subvol, fd, 131072, offset, 0, &entries))) { + if (ret < 0) + break; + + ret = gf_link_inodes_from_dirent (this, fd->inode, &entries); + if (ret) + break; + + list_for_each_entry (entry, &entries.list, list) { + offset = entry->d_off; + + if (!priv->shd.enabled) { + ret = -EBUSY; + break; + } + + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name, "..")) + continue; + + afr_shd_selfheal_name (healer, healer->subvol, + inode->gfid, entry->d_name); + + afr_shd_selfheal (healer, healer->subvol, + entry->d_stat.ia_gfid); + + if (entry->d_stat.ia_type == IA_IFDIR) { + ret = afr_shd_full_sweep (healer, entry->inode); + if (ret) + break; + } + } + + gf_dirent_free (&entries); + if (ret) + break; + } + + if (fd) + fd_unref (fd); + return ret; +} - link_inode = inode_link (loc->inode, NULL, NULL, iattr); - if (link_inode == NULL) { - gf_log (this->name, GF_LOG_ERROR, "inode link failed " - "on the inode (%s)", uuid_utoa (iattr->ia_gfid)); - goto out; - } - inode_unref (loc->inode); - loc->inode = link_inode; - ret = 0; -out: - return ret; + +void * +afr_shd_index_healer (void *data) +{ + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + int ret = 0; + + healer = data; + THIS = this = healer->this; + + for (;;) { + afr_shd_healer_wait (healer); + + ASSERT_LOCAL(this, healer); + + do { + gf_log (this->name, GF_LOG_DEBUG, + "starting index sweep on subvol %s", + afr_subvol_name (this, healer->subvol)); + + afr_shd_sweep_prepare (healer); + + ret = afr_shd_index_sweep (healer); + + afr_shd_sweep_done (healer); + /* + As long as at least one gfid was + healed, keep retrying. We may have + just healed a directory and thereby + created entries for other gfids which + could not be healed thus far. + */ + + gf_log (this->name, GF_LOG_DEBUG, + "finished index sweep on subvol %s", + afr_subvol_name (this, healer->subvol)); + /* + Give a pause before retrying to avoid a busy loop + in case the only entry in index is because of + an ongoing I/O. + */ + sleep (1); + } while (ret > 0); + } + + return NULL; } -int -_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry, - loc_t *child, loc_t *parent, struct iatt *iattr) + +void * +afr_shd_full_healer (void *data) { - struct iatt parentbuf = {0}; - int ret = 0; - dict_t *xattr_rsp = NULL; - dict_t *xattr_req = NULL; + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + int run = 0; - xattr_req = dict_new (); - if (!xattr_req) { - errno = ENOMEM; - ret = -1; - goto out; - } + healer = data; + THIS = this = healer->this; - ret = dict_set_int32 (xattr_req, "attempt-self-heal", 1); + for (;;) { + pthread_mutex_lock (&healer->mutex); + { + run = __afr_shd_healer_wait (healer); + if (!run) + healer->running = _gf_false; + } + pthread_mutex_unlock (&healer->mutex); - gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path); + if (!run) + break; - ret = syncop_lookup (this, child, xattr_req, - iattr, &xattr_rsp, &parentbuf); - _crawl_post_sh_action (this, parent, child, ret, -ret, xattr_rsp, - crawl_data); - if (ret < 0) - ret = -1; - if (xattr_rsp) - dict_unref (xattr_rsp); - if (ret == 0) - ret = _link_inode_update_loc (this, child, iattr); + ASSERT_LOCAL(this, healer); -out: - if (xattr_req) - dict_unref(xattr_req); - return ret; -} + gf_log (this->name, GF_LOG_INFO, + "starting full sweep on subvol %s", + afr_subvol_name (this, healer->subvol)); -static int -afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) -{ - GF_FREE (data); - STACK_DESTROY (sync_frame->root); - return 0; -} + afr_shd_sweep_prepare (healer); -int -_get_heal_op_flags (shd_crawl_op op, afr_crawl_type_t crawl) -{ - int crawl_flags = 0; + afr_shd_full_sweep (healer, this->itable->root); - if (HEAL == op) { - crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL; + afr_shd_sweep_done (healer); - if (crawl == INDEX) - crawl_flags |= STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL; - } + gf_log (this->name, GF_LOG_INFO, + "finished full sweep on subvol %s", + afr_subvol_name (this, healer->subvol)); + } - return crawl_flags; + return NULL; } -void -_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl) -{ - afr_start_crawl (this, child, crawl, _self_heal_entry, - NULL, _gf_true, _get_heal_op_flags (HEAL, crawl), - afr_crawl_done); -} -gf_boolean_t -_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason) +int +afr_shd_healer_init (xlator_t *this, struct subvol_healer *healer) { - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - gf_boolean_t proceed = _gf_false; - char *msg = NULL; - - priv = this->private; - shd = &priv->shd; - if (!shd->enabled) { - msg = "Self-heal daemon is not enabled"; - gf_log (this->name, GF_LOG_DEBUG, "%s", msg); - goto out; - } + int ret = 0; - if (!priv->child_up[child]) { - gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , " - "subvol went down", priv->children[child]->name); - msg = "Brick is Not connected"; - goto out; - } - - if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) { - if (afr_up_children_count (priv->child_up, - priv->child_count) < 2) { - gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl as " - "< 2 children are up"); - msg = "< 2 bricks in replica are running"; - goto out; - } - } + ret = pthread_mutex_init (&healer->mutex, NULL); + if (ret) + goto out; - if (crawl_flags & STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL) { - if (shd->pending[child] == FULL) { - gf_log (this->name, GF_LOG_INFO, "Stopping index " - "self-heal as Full self-heal is pending on %s", - priv->children[child]->name); - msg = "Full crawl is pending"; - goto out; - } - } + ret = pthread_cond_init (&healer->cond, NULL); + if (ret) + goto out; - proceed = _gf_true; + healer->this = this; + healer->running = _gf_false; + healer->rerun = _gf_false; + healer->local = _gf_false; out: - if (reason) - *reason = msg; - return proceed; + return ret; } -int -_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, - shd_crawl_op op, dict_t *output) -{ - afr_private_t *priv = NULL; - char *status = NULL; - char *subkey = NULL; - char key[256] = {0}; - shd_pos_t pos_data = {0}; - int op_ret = -1; - int xl_id = -1; - int i = 0; - int ret = 0; - int crawl_flags = 0; - - priv = this->private; - crawl_flags = _get_heal_op_flags (op, crawl); - - if (output) { - ret = dict_get_int32 (output, this->name, &xl_id); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Invalid input, " - "translator-id is not available"); - goto out; - } - } - pos_data.this = this; - subkey = "status"; - for (i = 0; i < priv->child_count; i++) { - if (_crawl_proceed (this, i, crawl_flags, &status)) { - pos_data.child = i; - /* - * We're already in a synctask in this case, so we - * don't need to defer through a second (and in fact - * that can cause deadlock). Just call straight - * through instead. - */ - ret = afr_find_child_position(pos_data.this, - pos_data.child, - &pos_data.pos); - if (ret) { - status = "Not able to find brick location"; - } else if (pos_data.pos == AFR_POS_REMOTE) { - status = "brick is remote"; - } else { - op_ret = 0; - if (op == HEAL) { - status = "Started self-heal"; - _do_self_heal_on_subvol (this, i, - crawl); - } else if (output && (op == INFO)) { - status = ""; - afr_start_crawl (this, i, INDEX, - _add_summary_to_dict, - output, _gf_false, 0, - NULL); - } else if (output && - (op == STATISTICS_TO_BE_HEALED)) { - status = ""; - afr_start_crawl (this, i, - INDEX_TO_BE_HEALED, - _count_hard_links_under_base_indices_dir, - output, _gf_false, - 0, NULL); - } - } - if (output) { - snprintf (key, sizeof (key), "%d-%d-%s", xl_id, - i, subkey); - ret = dict_set_str (output, key, status); - } - if (!op_ret && (crawl == FULL)) - break; - } - if (output) { - snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i, - subkey); - ret = dict_set_str (output, key, status); - } - } -out: - return op_ret; -} int -_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, - dict_t *output) +afr_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer, + void *(threadfn)(void *)) { - return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output); + int ret = 0; + + pthread_mutex_lock (&healer->mutex); + { + if (healer->running) { + pthread_cond_signal (&healer->cond); + } else { + ret = gf_thread_create (&healer->thread, NULL, + threadfn, healer); + if (ret) + goto unlock; + healer->running = 1; + } + + healer->rerun = 1; + } +unlock: + pthread_mutex_unlock (&healer->mutex); + + return ret; } + int -_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output) +afr_shd_full_healer_spawn (xlator_t *this, int subvol) { - return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output); + return afr_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol), + afr_shd_full_healer); } -void -afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict) -{ - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int i = 0; - priv = this->private; - shd= &priv->shd; - for (i = 0; i < priv->child_count; i++) { - if (shd->pos[i] != AFR_POS_LOCAL) - continue; - _add_statistics_to_dict (this, dict, i); - } - - return ; -} -static void -reset_crawl_event (shd_crawl_event_t *crawl_event) +int +afr_shd_index_healer_spawn (xlator_t *this, int subvol) { - crawl_event->healed_count = 0; - crawl_event->split_brain_count = 0; - crawl_event->heal_failed_count = 0; - GF_FREE (crawl_event->start_time_str); - crawl_event->start_time_str = NULL; - crawl_event->end_time_str = NULL; - crawl_event->crawl_type = NULL; - crawl_event->crawl_inprogress = _gf_false; - return; + return afr_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol), + afr_shd_index_healer); } -static void -afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst) -{ - dst->healed_count = src->healed_count; - dst->split_brain_count = src->split_brain_count; - dst->heal_failed_count = src->heal_failed_count; - dst->start_time_str = gf_strdup (src->start_time_str); - dst->end_time_str = "Crawl is already in progress"; - dst->crawl_type = src->crawl_type; - dst->crawl_inprogress = _gf_true; - return; -} -static int -afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict) +int +afr_shd_dict_add_crawl_event (xlator_t *this, dict_t *output, + crawl_event_t *crawl_event) { - shd_crawl_event_t *evnt = NULL; - int ret = 0; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int i = 0; - priv = this->private; - shd = &priv->shd; - - evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t), - gf_afr_mt_shd_crawl_event_t); - if (!evnt) { - ret = -1; - goto out; - } - LOCK (&priv->lock); - { - for (i = 0; i < priv->child_count; i++) { - if (shd->pos[i] != AFR_POS_LOCAL) - continue; - - reset_crawl_event (evnt); - - if (!shd->crawl_events[i]) { - continue; - } - - afr_copy_crawl_event_struct (shd->crawl_events[i], - evnt); - _add_crawl_stats_to_dict (this, dict, i, evnt, NULL); + int ret = 0; + uint64_t count = 0; + char key[256] = {0}; + int xl_id = 0; + uint64_t healed_count = 0; + uint64_t split_brain_count = 0; + uint64_t heal_failed_count = 0; + char *start_time_str = 0; + char *end_time_str = NULL; + char *crawl_type = NULL; + int progress = -1; + int child = -1; - } - } - UNLOCK (&priv->lock); - reset_crawl_event (evnt); - GF_FREE (evnt); + child = crawl_event->child; + healed_count = crawl_event->healed_count; + split_brain_count = crawl_event->split_brain_count; + heal_failed_count = crawl_event->heal_failed_count; + crawl_type = crawl_event->crawl_type; -out: - return ret; -} + if (!crawl_event->start_time) + goto out; -static int -_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict) -{ - int ret = 0; - afr_fill_completed_crawl_statistics_to_dict (this, dict); - ret = afr_fill_crawl_statistics_of_running_crawl (this, dict); - return ret; -} -int -_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict) -{ - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int i = 0; + start_time_str = gf_strdup (ctime (&crawl_event->start_time)); - priv = this->private; - shd = &priv->shd; + if (crawl_event->end_time) + end_time_str = gf_strdup (ctime (&crawl_event->end_time)); - for (i = 0; i < priv->child_count; i++) { - if (shd->pos[i] != AFR_POS_LOCAL) - continue; - _add_eh_to_dict (this, eh, dict, i); + ret = dict_get_int32 (output, this->name, &xl_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); + goto out; } - return 0; -} -int -afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) -{ - gf_xl_afr_op_t op = GF_AFR_OP_INVALID; - int ret = 0; - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int xl_id = 0; + snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child); + ret = dict_get_uint64 (output, key, &count); - priv = this->private; - shd = &priv->shd; - ret = dict_get_int32 (input, "xl-op", (int32_t*)&op); - if (ret) - goto out; - ret = dict_get_int32 (input, this->name, &xl_id); - if (ret) + snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64(output, key, healed_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not add statistics_healed_count to outout"); goto out; - ret = dict_set_int32 (output, this->name, xl_id); - if (ret) + } + + snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64 (output, key, split_brain_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not add statistics_split_brain_count to outout"); goto out; - switch (op) { - case GF_AFR_OP_HEAL_INDEX: - ret = _do_self_heal_on_local_subvols (this, INDEX, output); - break; - case GF_AFR_OP_HEAL_FULL: - ret = _do_self_heal_on_local_subvols (this, FULL, output); - break; - case GF_AFR_OP_INDEX_SUMMARY: - (void)_get_index_summary_on_local_subvols (this, output); - ret = 0; - break; - case GF_AFR_OP_HEALED_FILES: - ret = _add_local_subvols_eh_to_dict (this, shd->healed, output); - break; - case GF_AFR_OP_HEAL_FAILED_FILES: - ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed, - output); - break; - case GF_AFR_OP_SPLIT_BRAIN_FILES: - ret = _add_local_subvols_eh_to_dict (this, shd->split_brain, - output); - break; - case GF_AFR_OP_STATISTICS: - ret = _add_local_subvols_crawl_statistics_to_dict (this, output); - break; - case GF_AFR_OP_STATISTICS_HEAL_COUNT: - case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: - ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED, - STATISTICS_TO_BE_HEALED, - output); - break; - default: - gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op); - break; } -out: - dict_del (output, this->name); - return ret; -} -void -afr_poll_self_heal (void *data) -{ - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - struct timespec timeout = {0}; - xlator_t *this = NULL; - long child = (long)data; - gf_timer_t *old_timer = NULL; - gf_timer_t *new_timer = NULL; - shd_pos_t pos_data = {0}; - int ret = 0; - - this = THIS; - priv = this->private; - shd = &priv->shd; - - if (shd->pos[child] == AFR_POS_UNKNOWN) { - pos_data.this = this; - pos_data.child = child; - ret = synctask_new (this->ctx->env, - afr_syncop_find_child_position, - NULL, NULL, &pos_data); - if (!ret) - shd->pos[child] = pos_data.pos; - } - if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL)) - _do_self_heal_on_subvol (this, child, INDEX); - timeout.tv_sec = shd->timeout; - timeout.tv_nsec = 0; - //notify and previous timer should be synchronized. - LOCK (&priv->lock); - { - old_timer = shd->timer[child]; - if (shd->pos[child] == AFR_POS_REMOTE) - goto unlock; - shd->timer[child] = gf_timer_call_after (this->ctx, timeout, - afr_poll_self_heal, - data); - new_timer = shd->timer[child]; - } -unlock: - UNLOCK (&priv->lock); - - if (old_timer) - gf_timer_call_cancel (this->ctx, old_timer); - if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) { - gf_log (this->name, GF_LOG_WARNING, - "Could not create self-heal polling timer for %s", - priv->children[child]->name); + snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_str (output, key, crawl_type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not add statistics_crawl_type to output"); + goto out; } - return; -} - -static int -afr_handle_child_up (int ret, call_frame_t *sync_frame, void *data) -{ - afr_self_heald_t *shd = NULL; - shd_pos_t *pos_data = data; - afr_private_t *priv = NULL; - if (ret) + snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_uint64 (output, key, heal_failed_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not add statistics_healed_failed_count to outout"); goto out; + } - priv = pos_data->this->private; - shd = &priv->shd; - shd->pos[pos_data->child] = pos_data->pos; - if (pos_data->pos != AFR_POS_REMOTE) - afr_poll_self_heal ((void*)(long)pos_data->child); - _do_self_heal_on_local_subvols (THIS, INDEX, NULL); -out: - GF_FREE (data); - return 0; -} - -void -afr_proactive_self_heal (void *data) -{ - xlator_t *this = NULL; - long child = (long)data; - shd_pos_t *pos_data = NULL; - int ret = 0; + snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64, + xl_id, child, count); + ret = dict_set_dynstr (output, key, start_time_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not add statistics_crawl_start_time to outout"); + goto out; + } else { + start_time_str = NULL; + } - this = THIS; + if (!end_time_str) + progress = 1; + else + progress = 0; - //Position of brick could have changed and it could be local now. - //Compute the position again - pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t); - if (!pos_data) - goto out; - pos_data->this = this; - pos_data->child = child; - ret = synctask_new (this->ctx->env, afr_syncop_find_child_position, - afr_handle_child_up, NULL, pos_data); - if (ret) + snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64, + xl_id, child, count); + if (!end_time_str) + end_time_str = gf_strdup ("Could not determine the end time"); + ret = dict_set_dynstr (output, key, end_time_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not add statistics_crawl_end_time to outout"); goto out; -out: - return; -} + } else { + end_time_str = NULL; + } -static int -get_pathinfo_host (char *pathinfo, char *hostname, size_t size) -{ - char *start = NULL; - char *end = NULL; - int ret = -1; - int i = 0; + snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64, + xl_id, child, count); - if (!pathinfo) + ret = dict_set_int32 (output, key, progress); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not add statistics_inprogress to outout"); goto out; + } - start = strchr (pathinfo, ':'); - if (!start) + snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child); + ret = dict_set_uint64 (output, key, count + 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not increment the counter."); goto out; - end = strrchr (pathinfo, ':'); - if (start == end) - goto out; - - memset (hostname, 0, size); - i = 0; - while (++start != end) - hostname[i++] = *start; - ret = 0; + } out: + GF_FREE (start_time_str); + GF_FREE (end_time_str); return ret; } + int -afr_local_pathinfo (char *pathinfo, gf_boolean_t *local) +afr_shd_dict_add_path (xlator_t *this, dict_t *output, int child, char *path, + struct timeval *tv) { - int ret = 0; - char pathinfohost[1024] = {0}; - char localhost[1024] = {0}; - xlator_t *this = THIS; + int ret = -1; + uint64_t count = 0; + char key[256] = {0}; + int xl_id = 0; - *local = _gf_false; - ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost)); + ret = dict_get_int32 (output, this->name, &xl_id); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s", - pathinfo); + gf_log (this->name, GF_LOG_ERROR, "xl does not have id"); goto out; } - ret = gethostname (localhost, sizeof (localhost)); + snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); + ret = dict_get_uint64 (output, key, &count); + + snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count); + ret = dict_set_dynstr (output, key, path); + if (ret) { - gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, " - "reason: %s", strerror (errno)); + gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output", + path); goto out; } - if (!strcmp (localhost, pathinfohost)) - *local = _gf_true; -out: - return ret; -} + if (tv) { + snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id, + child, count); + ret = dict_set_uint32 (output, key, tv->tv_sec); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time", + path); + goto out; + } + } -int -afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data, - loc_t *dirloc) -{ - afr_private_t *priv = NULL; - dict_t *xattr = NULL; - void *index_gfid = NULL; - void *base_indices_holder_vgfid = NULL; - loc_t rootloc = {0}; - struct iatt iattr = {0}; - struct iatt parent = {0}; - int ret = 0; - xlator_t *readdir_xl = crawl_data->readdir_xl; - - priv = this->private; - if (crawl_data->crawl == FULL) { - afr_build_root_loc (this, dirloc); - } else if (crawl_data->crawl == INDEX) { - afr_build_root_loc (this, &rootloc); - ret = syncop_getxattr (readdir_xl, &rootloc, &xattr, - GF_XATTROP_INDEX_GFID); - if (ret < 0) { - ret = -1; - goto out; - } - ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get index " - "dir gfid on %s", readdir_xl->name); - goto out; - } - if (!index_gfid) { - gf_log (this->name, GF_LOG_ERROR, "index gfid empty " - "on %s", readdir_xl->name); - ret = -1; - goto out; - } - uuid_copy (dirloc->gfid, index_gfid); - dirloc->path = ""; - dirloc->inode = inode_new (priv->root_inode->table); - ret = syncop_lookup (readdir_xl, dirloc, NULL, - &iattr, NULL, &parent); - if (ret < 0) { - if (-ret != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, "lookup " - "failed on index dir on %s - (%s)", - readdir_xl->name, strerror (-ret)); - } - ret = -1; - goto out; - } - ret = _link_inode_update_loc (this, dirloc, &iattr); - if (ret) - goto out; - } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) { - afr_build_root_loc (this, &rootloc); - ret = syncop_getxattr (readdir_xl, &rootloc, &xattr, - GF_BASE_INDICES_HOLDER_GFID); - if (ret < 0) { - ret = -1; - goto out; - } - ret = dict_get_ptr (xattr, GF_BASE_INDICES_HOLDER_GFID, - &base_indices_holder_vgfid); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "index gfid empty " - "on %s", readdir_xl->name); - ret = -1; - goto out; - } - if (!base_indices_holder_vgfid) { - gf_log (this->name, GF_LOG_ERROR, "Base indices holder" - "virtual gfid is null on %s", readdir_xl->name); - ret = -1; - goto out; - } - uuid_copy (dirloc->gfid, base_indices_holder_vgfid); - dirloc->path = ""; - dirloc->inode = inode_new (priv->root_inode->table); - ret = syncop_lookup (readdir_xl, dirloc, NULL, &iattr, NULL, - &parent); - if (ret < 0) { - if (-ret != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, "lookup " - "failed for base_indices_holder dir" - " on %s - (%s)", readdir_xl->name, - strerror (-ret)); - - } else { - gf_log (this->name, GF_LOG_ERROR, "base_indices" - "_holder is not yet created."); - } - ret = -1; - goto out; - } - ret = _link_inode_update_loc (this, dirloc, &iattr); - if (ret) - goto out; + snprintf (key, sizeof (key), "%d-%d-count", xl_id, child); + + ret = dict_set_uint64 (output, key, count + 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not increment count"); + goto out; } + ret = 0; out: - if (xattr) - dict_unref (xattr); - loc_wipe (&rootloc); return ret; } + int -afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd, - loc_t *dirloc) +afr_shd_gfid_to_path (xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p) { - fd_t *fd = NULL; - int ret = 0; - - if (crawl_data->crawl == FULL) { - fd = fd_create (dirloc->inode, crawl_data->pid); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to create fd for %s", dirloc->path); - ret = -1; - goto out; - } - - ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "opendir failed on %s", dirloc->path); - ret = -1; - goto out; - } - } else { - fd = fd_anonymous (dirloc->inode); - } - ret = 0; -out: - if (!ret) - *dirfd = fd; - return ret; + loc_t loc = {0,}; + char *path = NULL; + dict_t *xattr = NULL; + int ret = 0; + + uuid_copy (loc.gfid, gfid); + loc.inode = inode_new (this->itable); + + ret = syncop_getxattr (subvol, &loc, &xattr, GFID_TO_PATH_KEY); + loc_wipe (&loc); + if (ret) + return ret; + + ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path); + if (ret || !path) + return -EINVAL; + + *path_p = gf_strdup (path); + if (!*path_p) + return -ENOMEM; + return 0; } -xlator_t* -afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data) -{ - afr_private_t *priv = this->private; - - if (crawl_data->crawl == FULL) { - return this; - } else { - return priv->children[crawl_data->child]; - } - return NULL; -} int -afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, - gf_dirent_t *entry, afr_crawl_data_t *crawl_data) +afr_shd_gather_index_entries (xlator_t *this, int child, dict_t *output) { - int ret = -1; - afr_private_t *priv = NULL; - - priv = this->private; - if (crawl_data->crawl == FULL) { - ret = afr_build_child_loc (this, child, parent, entry->d_name); - } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) { - ret = _build_index_loc (this, child, entry->d_name, parent); - if (ret) - goto out; - child->inode = inode_new (priv->root_inode->table); - if (!child->inode) { - ret = -1; - goto out; - } - child->path = NULL; - } else { - child->inode = inode_new (priv->root_inode->table); - if (!child->inode) - goto out; - uuid_parse (entry->d_name, child->gfid); - ret = _loc_assign_gfid_path (child); - } -out: - return ret; + fd_t *fd = NULL; + xlator_t *subvol = NULL; + afr_private_t *priv = NULL; + off_t offset = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + uuid_t gfid; + int ret = 0; + int count = 0; + char *path = NULL; + + priv = this->private; + subvol = priv->children[child]; + + fd = afr_shd_index_opendir (this, child); + if (!fd) { + gf_log (this->name, GF_LOG_WARNING, + "unable to opendir index-dir on %s", subvol->name); + return -errno; + } + + INIT_LIST_HEAD (&entries.list); + + while ((ret = syncop_readdir (subvol, fd, 131072, offset, &entries))) { + if (ret > 0) + ret = 0; + list_for_each_entry (entry, &entries.list, list) { + offset = entry->d_off; + + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name, "..")) + continue; + + gf_log (this->name, GF_LOG_DEBUG, "got entry: %s", + entry->d_name); + + ret = uuid_parse (entry->d_name, gfid); + if (ret) + continue; + + path = NULL; + ret = afr_shd_gfid_to_path (this, subvol, gfid, &path); + + if (ret == -ENOENT || ret == -ESTALE) { + afr_shd_index_purge (subvol, fd->inode, + entry->d_name); + ret = 0; + continue; + } + + ret = afr_shd_dict_add_path (this, output, child, path, + NULL); + } + + gf_dirent_free (&entries); + if (ret) + break; + } + + if (fd) + fd_unref (fd); + if (!ret) + ret = count; + return ret; } -static int -_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries, - off_t *offset, afr_crawl_data_t *crawl_data) -{ - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - int ret = 0; - loc_t entry_loc = {0}; - fd_t *fd = NULL; - struct iatt iattr = {0}; - - list_for_each_entry_safe (entry, tmp, &entries->list, list) { - if (!_crawl_proceed (this, crawl_data->child, - crawl_data->crawl_flags, NULL)) { - ret = -1; - goto out; - } - *offset = entry->d_off; - if (IS_ENTRY_CWD (entry->d_name) || - IS_ENTRY_PARENT (entry->d_name)) - continue; - if ((crawl_data->crawl == FULL) && - uuid_is_null (entry->d_stat.ia_gfid)) { - gf_log (this->name, GF_LOG_WARNING, "%s/%s: No " - "gfid present skipping", - parentloc->path, entry->d_name); - continue; - } - - loc_wipe (&entry_loc); - ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc, - entry, crawl_data); - if (ret) - goto out; - ret = crawl_data->process_entry (this, crawl_data, entry, - &entry_loc, parentloc, &iattr); - - if (crawl_data->crawl == INDEX_TO_BE_HEALED && ret) { - goto out; - } else if (ret) { - continue; - } - - if ((crawl_data->crawl == INDEX) || - (crawl_data->crawl == INDEX_TO_BE_HEALED)) - continue; - - if (!IA_ISDIR (iattr.ia_type)) - continue; - fd = NULL; - ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc); - if (ret) - continue; - ret = _crawl_directory (fd, &entry_loc, crawl_data); - if (fd) - fd_unref (fd); - } - ret = 0; -out: - if ((crawl_data->crawl == INDEX_TO_BE_HEALED) && ret) { - gf_log (this->name, GF_LOG_ERROR,"Failed to get the hardlink " - "count"); - } - loc_wipe (&entry_loc); - return ret; +int +afr_add_shd_event (circular_buffer_t *cb, void *data) +{ + dict_t *output = NULL; + xlator_t *this = THIS; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + shd_event_t *shd_event = NULL; + char *path = NULL; + + output = data; + priv = this->private; + shd = &priv->shd; + shd_event = cb->data; + + if (!shd->index_healers[shd_event->child].local) + return 0; + + path = gf_strdup (shd_event->path); + if (!path) + return -ENOMEM; + + afr_shd_dict_add_path (this, output, shd_event->child, path, + &cb->tv); + return 0; } -static int -_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data) +int +afr_add_crawl_event (circular_buffer_t *cb, void *data) { - xlator_t *this = NULL; - off_t offset = 0; - gf_dirent_t entries; - int ret = 0; - gf_boolean_t free_entries = _gf_false; - xlator_t *readdir_xl = crawl_data->readdir_xl; + dict_t *output = NULL; + xlator_t *this = THIS; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + crawl_event_t *crawl_event = NULL; - INIT_LIST_HEAD (&entries.list); - this = THIS; + output = data; + priv = this->private; + shd = &priv->shd; + crawl_event = cb->data; - GF_ASSERT (loc->inode); + if (!shd->index_healers[crawl_event->child].local) + return 0; - if (crawl_data->crawl == FULL) - gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path); - else - gf_log (this->name, GF_LOG_DEBUG, "crawling INDEX %s", - uuid_utoa (loc->gfid)); - - while (1) { - if (crawl_data->crawl == FULL) - ret = syncop_readdirp (readdir_xl, fd, 131072, offset, - NULL, &entries); - else - ret = syncop_readdir (readdir_xl, fd, 131072, offset, - &entries); - if (ret < 0) { - ret = -1; - break; - } else if (ret == 0) { - break; - } - - ret = 0; - free_entries = _gf_true; - - if (!_crawl_proceed (this, crawl_data->child, - crawl_data->crawl_flags, NULL)) { - ret = -1; - goto out; - } - if (list_empty (&entries.list)) - goto out; + afr_shd_dict_add_crawl_event (this, output, crawl_event); - ret = _process_entries (this, loc, &entries, &offset, - crawl_data); - if ((ret < 0) && (crawl_data->crawl == INDEX_TO_BE_HEALED)) { - goto out; - } - gf_dirent_free (&entries); - free_entries = _gf_false; - } - ret = 0; -out: - if (free_entries) - gf_dirent_free (&entries); - return ret; + return 0; } -static char* -position_str_get (afr_child_pos_t pos) -{ - switch (pos) { - case AFR_POS_UNKNOWN: - return "unknown"; - case AFR_POS_LOCAL: - return "local"; - case AFR_POS_REMOTE: - return "remote"; - } - return NULL; -} int -afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos) +afr_selfheal_daemon_init (xlator_t *this) { - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - dict_t *xattr_rsp = NULL; - loc_t loc = {0}; - int ret = 0; - char *node_uuid = NULL; - - priv = this->private; - shd = &priv->shd; - - afr_build_root_loc (this, &loc); - - ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp, - GF_XATTR_NODE_UUID_KEY); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s - " - "(%s)", priv->children[child]->name, strerror (-ret)); - ret = -1; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = -1; + int i = 0; + + priv = this->private; + shd = &priv->shd; + + this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); + if (!this->itable) + goto out; + + shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers), + priv->child_count, + gf_afr_mt_subvol_healer_t); + if (!shd->index_healers) + goto out; + + for (i = 0; i < priv->child_count; i++) { + shd->index_healers[i].subvol = i; + ret = afr_shd_healer_init (this, &shd->index_healers[i]); + if (ret) + goto out; + } + + shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers), + priv->child_count, + gf_afr_mt_subvol_healer_t); + if (!shd->full_healers) + goto out; + for (i = 0; i < priv->child_count; i++) { + shd->full_healers[i].subvol = i; + ret = afr_shd_healer_init (this, &shd->full_healers[i]); + if (ret) + goto out; + } + + shd->healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false, + afr_destroy_shd_event_data); + if (!shd->healed) + goto out; + + shd->heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false, + afr_destroy_shd_event_data); + if (!shd->heal_failed) + goto out; + + shd->split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false, + afr_destroy_shd_event_data); + if (!shd->split_brain) + goto out; + + shd->statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count, + gf_common_mt_eh_t); + if (!shd->statistics) goto out; - } - ret = dict_get_str (xattr_rsp, GF_XATTR_NODE_UUID_KEY, &node_uuid); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "node-uuid key not found on " - "child %s", priv->children[child]->name); - goto out; + for (i = 0; i < priv->child_count ; i++) { + shd->statistics[i] = eh_new (AFR_STATISTICS_HISTORY_SIZE, + _gf_false, + afr_destroy_crawl_event_data); + if (!shd->statistics[i]) + goto out; + shd->full_healers[i].crawl_event.child = i; + shd->full_healers[i].crawl_event.crawl_type = "FULL"; + shd->index_healers[i].crawl_event.child = i; + shd->index_healers[i].crawl_event.crawl_type = "INDEX"; } - if (!strcmp (node_uuid, shd->node_uuid)) - *pos = AFR_POS_LOCAL; - else - *pos = AFR_POS_REMOTE; - - gf_log (this->name, GF_LOG_DEBUG, "child %s is %s", - priv->children[child]->name, position_str_get (*pos)); + ret = 0; out: - if (ret) - *pos = AFR_POS_UNKNOWN; - loc_wipe (&loc); - return ret; + return ret; } + int -afr_syncop_find_child_position (void *data) +afr_selfheal_childup (xlator_t *this, int subvol) { - shd_pos_t *pos_data = data; - int ret = 0; + afr_shd_index_healer_spawn (this, subvol); - ret = afr_find_child_position (pos_data->this, pos_data->child, - &pos_data->pos); - return ret; + return 0; } -static int -afr_dir_crawl (void *data) -{ - xlator_t *this = NULL; - int ret = -1; - xlator_t *readdir_xl = NULL; - fd_t *fd = NULL; - loc_t dirloc = {0}; - afr_crawl_data_t *crawl_data = data; - - this = THIS; - - if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags, - NULL)) - goto out; - - readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data); - if (!readdir_xl) - goto out; - crawl_data->readdir_xl = readdir_xl; - ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc); - if (ret) - goto out; - - ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc); - if (ret) { - if (crawl_data->crawl == INDEX_TO_BE_HEALED) { - gf_log (this->name, GF_LOG_ERROR, "Failed to open base_" - "indices_holder"); - } - goto out; - } - - ret = _crawl_directory (fd, &dirloc, crawl_data); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s", - readdir_xl->name); - else - gf_log (this->name, GF_LOG_DEBUG, "Crawl completed " - "on %s", readdir_xl->name); - if (crawl_data->crawl == INDEX) - dirloc.path = NULL; -out: - if (fd) - fd_unref (fd); - if ((crawl_data->crawl == INDEX) || - (crawl_data->crawl == INDEX_TO_BE_HEALED )) - dirloc.path = NULL; - loc_wipe (&dirloc); - return ret; -} - -char * -get_crawl_type_in_string (afr_crawl_type_t crawl) +int64_t +afr_shd_get_index_count (xlator_t *this, int i) { - char *index = "INDEX"; - char *full = "FULL"; - char *crawl_type = NULL; - - if (crawl == INDEX){ - crawl_type = index; - } else if (crawl == FULL) { - crawl_type = full; - } - - return crawl_type; -} - -static int -afr_allocate_crawl_event (xlator_t *this, int child, afr_crawl_type_t crawl) -{ - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int ret = 0; - shd_crawl_event_t *crawl_event = NULL; - time_t get_time = 0; - - priv = this->private; - shd = &priv->shd; - - crawl_event = GF_CALLOC (sizeof (shd_crawl_event_t), 1, - gf_afr_mt_shd_crawl_event_t); - if (!crawl_event) { - ret = -1; - goto out; - } - - get_time = time(NULL); - if (get_time == ((time_t)-1)) { - ret = -1; - goto out; - } - - crawl_event->start_time_str = gf_strdup (ctime(&get_time)); - - crawl_event->crawl_type = get_crawl_type_in_string (crawl); - if (!crawl_event->crawl_type) { - ret = -1; - goto out; - } - LOCK (&priv->lock); - { - shd->crawl_events[child] = crawl_event; - } - UNLOCK (&priv->lock); - ret = 0; -out: - return ret; - + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + uint64_t count = 0; + loc_t rootloc = {0, }; + dict_t *xattr = NULL; + int ret = -1; + + priv = this->private; + subvol = priv->children[i]; + + rootloc.inode = inode_ref (this->itable->root); + uuid_copy (rootloc.gfid, rootloc.inode->gfid); + + ret = syncop_getxattr (subvol, &rootloc, &xattr, + GF_XATTROP_INDEX_COUNT); + loc_wipe (&rootloc); + + if (ret < 0) + return -1; + + ret = dict_get_uint64 (xattr, GF_XATTROP_INDEX_COUNT, &count); + if (ret) + return -1; + return count; } -static int -afr_put_crawl_event_in_eh (xlator_t *this, int child) -{ - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - int ret = 0; - time_t get_time = 0; - shd_crawl_event_t **crawl_event = NULL; - - priv = this->private; - shd = &priv->shd; - - get_time = time(NULL); - if (get_time == ((time_t)-1)) { - ret = -1; - goto out; - } - crawl_event = (shd_crawl_event_t**)shd->crawl_events; - LOCK (&priv->lock); - { - crawl_event[child]->end_time_str = gf_strdup (ctime(&get_time)); - ret = eh_save_history (shd->statistics[child], - crawl_event[child]); - crawl_event[child] = NULL; - } - UNLOCK (&priv->lock); -out: - return ret; -} -static int -afr_dir_exclusive_crawl (void *data) +int +afr_xl_op (xlator_t *this, dict_t *input, dict_t *output) { - afr_private_t *priv = NULL; - afr_self_heald_t *shd = NULL; - gf_boolean_t crawl = _gf_false; + gf_xl_afr_op_t op = GF_AFR_OP_INVALID; int ret = 0; - int child = -1; - xlator_t *this = NULL; - afr_crawl_data_t *crawl_data = data; - - this = THIS; - priv = this->private; - shd = &priv->shd; - child = crawl_data->child; - - LOCK (&priv->lock); - { - if (shd->inprogress[child]) { - if (shd->pending[child] != FULL) - shd->pending[child] = crawl_data->crawl; - } else { - shd->inprogress[child] = _gf_true; - crawl = _gf_true; - } - } - UNLOCK (&priv->lock); - - if (!crawl) { - gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress " - "for %s while attempting %s heal on %s", - priv->children[child]->name, - get_crawl_type_in_string (crawl_data->crawl), - priv->children[child]->name); - goto out; - } - - do { - ret = afr_allocate_crawl_event (this, child, crawl_data->crawl); - if (ret) - goto out; - afr_dir_crawl (data); - - ret = afr_put_crawl_event_in_eh (this, child); - if (ret < 0) - goto out; - - LOCK (&priv->lock); - { - if (shd->pending[child] != NONE) { - crawl_data->crawl = shd->pending[child]; - shd->pending[child] = NONE; - } else { - shd->inprogress[child] = _gf_false; - crawl = _gf_false; - } - } - UNLOCK (&priv->lock); - } while (crawl); -out: - return ret; -} + int xl_id = 0; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + struct subvol_healer *healer = NULL; + int i = 0; + char key[64]; + int op_ret = 0; + int64_t cnt = 0; -void -afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl, - process_entry_cbk_t process_entry, void *op_data, - gf_boolean_t exclusive, int crawl_flags, - afr_crawl_done_cbk_t crawl_done) -{ - afr_private_t *priv = NULL; - call_frame_t *frame = NULL; - afr_crawl_data_t *crawl_data = NULL; - int ret = 0; - int (*crawler) (void*) = NULL; + priv = this->private; + shd = &priv->shd; - priv = this->private; + for (i = 0; i < priv->child_count; i++) + if (priv->child_up[i] == -1) + goto out; - frame = create_frame (this, this->ctx->pool); - if (!frame) + ret = dict_get_int32 (input, "xl-op", (int32_t*)&op); + if (ret) goto out; - - afr_set_lk_owner (frame, this, frame->root); - afr_set_low_priority (frame); - crawl_data = GF_CALLOC (1, sizeof (*crawl_data), - gf_afr_mt_crawl_data_t); - if (!crawl_data) + ret = dict_get_int32 (input, this->name, &xl_id); + if (ret) goto out; - crawl_data->process_entry = process_entry; - crawl_data->child = idx; - crawl_data->pid = frame->root->pid; - crawl_data->crawl = crawl; - crawl_data->op_data = op_data; - crawl_data->crawl_flags = crawl_flags; - gf_log (this->name, GF_LOG_DEBUG, "starting crawl %d for %s", - crawl_data->crawl, priv->children[idx]->name); - - if (exclusive) - crawler = afr_dir_exclusive_crawl; - else - crawler = afr_dir_crawl; - ret = synctask_new (this->ctx->env, crawler, - crawl_done, frame, crawl_data); + ret = dict_set_int32 (output, this->name, xl_id); if (ret) - gf_log (this->name, GF_LOG_ERROR, "afr crawl failed for child" - " %d with ret %d", idx, ret); -out: - return; -} - -void -afr_build_root_loc (xlator_t *this, loc_t *loc) -{ - afr_private_t *priv = NULL; - - priv = this->private; - loc->path = gf_strdup ("/"); - loc->name = ""; - loc->inode = inode_ref (priv->root_inode); - uuid_copy (loc->gfid, loc->inode->gfid); -} - -int -afr_set_root_gfid (dict_t *dict) -{ - uuid_t gfid; - int ret = 0; - - memset (gfid, 0, 16); - gfid[15] = 1; - - ret = afr_set_dict_gfid (dict, gfid); + goto out; + switch (op) { + case GF_AFR_OP_HEAL_INDEX: + op_ret = -1; + + for (i = 0; i < priv->child_count; i++) { + healer = &shd->index_healers[i]; + snprintf (key, 64, "%d-%d-status", xl_id, i); + + if (!priv->child_up[i]) { + ret = dict_set_str (output, key, + "Brick is not connected"); + } else if (AFR_COUNT (priv->child_up, + priv->child_count) < 2) { + ret = dict_set_str (output, key, + "< 2 bricks in replica are up"); + } else if (!afr_shd_is_subvol_local (this, healer->subvol)) { + ret = dict_set_str (output, key, + "Brick is remote"); + } else { + ret = dict_set_str (output, key, + "Started self-heal"); + afr_shd_index_healer_spawn (this, i); + op_ret = 0; + } + } + break; + case GF_AFR_OP_HEAL_FULL: + op_ret = -1; + + for (i = 0; i < priv->child_count; i++) { + healer = &shd->full_healers[i]; + snprintf (key, 64, "%d-%d-status", xl_id, i); + + if (!priv->child_up[i]) { + ret = dict_set_str (output, key, + "Brick is not connected"); + } else if (AFR_COUNT (priv->child_up, + priv->child_count) < 2) { + ret = dict_set_str (output, key, + "< 2 bricks in replica are up"); + } else if (!afr_shd_is_subvol_local (this, healer->subvol)) { + ret = dict_set_str (output, key, + "Brick is remote"); + } else { + ret = dict_set_str (output, key, + "Started self-heal"); + afr_shd_full_healer_spawn (this, i); + op_ret = 0; + } + } + break; + case GF_AFR_OP_INDEX_SUMMARY: + for (i = 0; i < priv->child_count; i++) + if (shd->index_healers[i].local) + afr_shd_gather_index_entries (this, i, output); + break; + case GF_AFR_OP_HEALED_FILES: + eh_dump (shd->healed, output, afr_add_shd_event); + break; + case GF_AFR_OP_HEAL_FAILED_FILES: + eh_dump (shd->heal_failed, output, afr_add_shd_event); + break; + case GF_AFR_OP_SPLIT_BRAIN_FILES: + eh_dump (shd->split_brain, output, afr_add_shd_event); + break; + case GF_AFR_OP_STATISTICS: + for (i = 0; i < priv->child_count; i++) { + eh_dump (shd->statistics[i], output, + afr_add_crawl_event); + afr_shd_dict_add_crawl_event (this, output, + &shd->index_healers[i].crawl_event); + afr_shd_dict_add_crawl_event (this, output, + &shd->full_healers[i].crawl_event); + } + break; + case GF_AFR_OP_STATISTICS_HEAL_COUNT: + case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + op_ret = -1; + + for (i = 0; i < priv->child_count; i++) { + if (!priv->child_up[i]) { + snprintf (key, 64, "%d-%d-status", xl_id, i); + ret = dict_set_str (output, key, + "Brick is not connected"); + } else { + snprintf (key, 64, "%d-%d-hardlinks", xl_id, i); + cnt = afr_shd_get_index_count (this, i); + if (cnt >= 0) { + ret = dict_set_uint64 (output, key, cnt); + } + op_ret = 0; + } + } + +// ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED, +// STATISTICS_TO_BE_HEALED, +// output); + break; - return ret; + default: + gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op); + break; + } +out: + dict_del (output, this->name); + return op_ret; } |