From 45f03a58a0fbfc1d5e647c764b10e37d0a9ebb26 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Thu, 8 Sep 2011 14:06:32 +0530 Subject: Proactive self heal process implementation Change-Id: I96db0d94566ceabf1649f890318363f738c06553 BUG: 2458 Reviewed-on: http://review.gluster.com/403 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/Makefile.am | 4 +- xlators/cluster/afr/src/afr-common.c | 33 +- xlators/cluster/afr/src/afr-mem-types.h | 2 + xlators/cluster/afr/src/afr-self-heal-common.c | 6 - xlators/cluster/afr/src/afr-self-heald.c | 512 +++++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heald.h | 44 +++ xlators/cluster/afr/src/afr.c | 15 + xlators/cluster/afr/src/afr.h | 17 +- xlators/cluster/afr/src/pump.c | 115 +----- xlators/cluster/afr/src/pump.h | 4 - 10 files changed, 631 insertions(+), 121 deletions(-) create mode 100644 xlators/cluster/afr/src/afr-self-heald.c create mode 100644 xlators/cluster/afr/src/afr-self-heald.h (limited to 'xlators/cluster/afr/src') diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index e192b599bf4..16ed25af10b 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = afr.la pump.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster -afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c $(top_builddir)/xlators/lib/src/libxlator.c +afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c $(top_builddir)/xlators/lib/src/libxlator.c afr_la_LDFLAGS = -module -avoidversion afr_la_SOURCES = $(afr_common_source) afr.c @@ -11,7 +11,7 @@ pump_la_LDFLAGS = -module -avoidversion pump_la_SOURCES = $(afr_common_source) pump.c pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h $(top_builddir)/glusterfsd/src/glusterfsd.h AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \ diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2d5e981967e..0e4e9735503 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -54,6 +54,7 @@ #include "afr-transaction.h" #include "afr-self-heal.h" #include "afr-self-heal-common.h" +#include "afr-self-heald.h" #include "pump.h" #define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL @@ -132,7 +133,7 @@ afr_set_dict_gfid (dict_t *dict, uuid_t gfid) ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t)); if (ret) - gf_log (THIS->name, GF_LOG_DEBUG, "gfid set failed"); + gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed"); out: if (ret && pgfid) @@ -1961,15 +1962,15 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc->path, GLUSTERFS_ENTRYLK_COUNT); } - ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req); + ret = dict_get_ptr (local->xattr_req, "gfid-req", &gfid_req); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "failed to get the gfid from dict"); } else { uuid_copy (local->cont.lookup.gfid_req, gfid_req); + if (local->loc.parent) + dict_del (local->xattr_req, "gfid-req"); } - if (local->loc.parent != NULL) - dict_del (xattr_req, "gfid-req"); for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { @@ -3395,11 +3396,12 @@ afr_notify (xlator_t *this, int32_t event, int up_children = 0; int down_children = 0; int propagate = 0; - int had_heard_from_all = 0; int have_heard_from_all = 0; int idx = -1; int ret = -1; + int call_psh = 0; + int up_child = AFR_ALL_CHILDREN; priv = this->private; @@ -3445,6 +3447,12 @@ afr_notify (xlator_t *this, int32_t event, "going online.", ((xlator_t *)data)->name); } else { event = GF_EVENT_CHILD_MODIFIED; + gf_log (this->name, GF_LOG_INFO, "subvol %d came up, " + "start crawl", idx); + if (had_heard_from_all) { + call_psh = 1; + up_child = idx; + } } priv->last_event[idx] = event; @@ -3509,6 +3517,8 @@ afr_notify (xlator_t *this, int32_t event, LOCK (&priv->lock); { + up_children = afr_up_children_count (priv->child_up, + priv->child_count); for (i = 0; i < priv->child_count; i++) { if (priv->last_event[i] == GF_EVENT_CHILD_UP) { event = GF_EVENT_CHILD_UP; @@ -3523,11 +3533,18 @@ afr_notify (xlator_t *this, int32_t event, } } UNLOCK (&priv->lock); + if (up_children > 1) { + gf_log (this->name, GF_LOG_INFO, "All subvolumes came " + "up, start crawl"); + call_psh = 1; + } } ret = 0; if (propagate) ret = default_notify (this, event, data); + if (call_psh) + afr_proactive_self_heal (this, up_child); out: return ret; @@ -3767,3 +3784,9 @@ afr_get_children_count (int32_t *children, unsigned int child_count) } return count; } + +void +afr_set_low_priority (call_frame_t *frame) +{ + frame->root->pid = LOW_PRIO_PROC_PID; +} diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index d5a988708b8..ebe189c3575 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -44,6 +44,8 @@ enum gf_afr_mem_types_ { gf_afr_mt_locked_fd, gf_afr_mt_inode_ctx_t, gf_afr_fd_paused_call_t, + gf_afr_mt_afr_crawl_data_t, + gf_afr_mt_afr_brick_pos_t, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index eeca62724b4..8f50c625136 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2031,12 +2031,6 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) return 0; } -static inline void -afr_set_low_priority (call_frame_t *frame) -{ - frame->root->pid = -1; -} - int afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) { diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c new file mode 100644 index 00000000000..d27d9e09b5b --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -0,0 +1,512 @@ +/* + Copyright (c) 2010-2011 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include "afr.h" +#include "syncop.h" +#include "afr-self-heald.h" + +static int +get_pathinfo_host (char *pathinfo, char *hostname, size_t size) +{ + char *start = NULL; + char *end = NULL; + int ret = -1; + int i = 0; + + if (!pathinfo) + goto out; + + start = strchr (pathinfo, ':'); + if (!start) + goto out; + end = strrchr (pathinfo, ':'); + if (start == end) + goto out; + + memset (hostname, 0, size); + i = 0; + while (++start != end) + hostname[i++] = *start; + ret = 0; +out: + return ret; +} + +int +afr_local_pathinfo (char *pathinfo, gf_boolean_t *local) +{ + int ret = 0; + char pathinfohost[1024] = {0}; + char localhost[1024] = {0}; + xlator_t *this = THIS; + + *local = _gf_false; + ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s", + pathinfo); + goto out; + } + + ret = gethostname (localhost, sizeof (localhost)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, " + "reason: %s", strerror (errno)); + goto out; + } + + if (!strcmp (localhost, pathinfohost)) + *local = _gf_true; +out: + return ret; +} + +static int +_crawl_directory (loc_t *loc, pid_t pid) +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + off_t offset = 0; + loc_t entry_loc = {0}; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + gf_dirent_t entries; + struct iatt iatt = {0}; + struct iatt parent = {0};; + char *file_path = NULL; + int ret = 0; + gf_boolean_t free_entries = _gf_false; + + INIT_LIST_HEAD (&entries.list); + this = THIS; + priv = this->private; + + GF_ASSERT (loc->inode); + + gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path); + fd = fd_create (loc->inode, pid); + if (!fd) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create fd for %s", loc->path); + goto out; + } + + if (!loc->parent) { + ret = syncop_lookup (this, loc, NULL, + &iatt, NULL, &parent); + } + + ret = syncop_opendir (this, loc, fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "opendir failed on %s", loc->path); + goto out; + } + + while (syncop_readdirp (this, fd, 131072, offset, &entries)) { + ret = 0; + free_entries = _gf_true; + if (afr_up_children_count (priv->child_up, + priv->child_count) < 2) { + gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " + "< 2 children are up"); + ret = -1; + goto out; + } + + if (list_empty (&entries.list)) + goto out; + + list_for_each_entry_safe (entry, tmp, &entries.list, list) { + offset = entry->d_off; + if (IS_ENTRY_CWD (entry->d_name) || + IS_ENTRY_PARENT (entry->d_name)) + continue; + + file_path = afr_build_file_path (loc, entry); + if (!file_path) { + ret = -1; + goto out; + } + + loc_wipe (&entry_loc); + afr_build_child_loc (loc, &entry_loc, + file_path, entry->d_name); + + gf_log (this->name, GF_LOG_DEBUG, + "found readdir entry=%s", entry->d_name); + + ret = syncop_lookup (this, &entry_loc, NULL, + &iatt, NULL, &parent); + + //Don't fail the crawl if lookup fails as it + //could be because of split-brain + if (ret || (!IA_ISDIR (iatt.ia_type))) + continue; + ret = _crawl_directory (&entry_loc, pid); + } + + gf_dirent_free (&entries); + free_entries = _gf_false; + } + ret = 0; +out: + if (entry_loc.path) + loc_wipe (&entry_loc); + if (free_entries) + gf_dirent_free (&entries); + return ret; +} + +int +afr_find_child_position (xlator_t *this, int child) +{ + afr_private_t *priv = NULL; + dict_t *xattr_rsp = NULL; + loc_t loc = {0}; + int ret = 0; + gf_boolean_t local = _gf_false; + char *pathinfo = NULL; + afr_child_pos_t *pos = NULL; + inode_table_t *itable = NULL; + + priv = this->private; + pos = &priv->shd.pos[child]; + + if (*pos != AFR_POS_UNKNOWN) { + goto out; + } + + //TODO: Hack to make the root_loc hack work + LOCK (&priv->lock); + { + if (!priv->root_inode) { + itable = inode_table_new (0, this); + if (!itable) + goto unlock; + priv->root_inode = inode_new (itable); + if (!priv->root_inode) + goto unlock; + } + } +unlock: + UNLOCK (&priv->lock); + + if (!priv->root_inode) { + ret = -1; + goto out; + } + afr_build_root_loc (priv->root_inode, &loc); + + ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp, + GF_XATTR_PATHINFO_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child " + "%d", child); + goto out; + } + + ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on " + "child %d", child); + goto out; + } + + ret = afr_local_pathinfo (pathinfo, &local); + if (ret) + goto out; + if (local) + *pos = AFR_POS_LOCAL; + else + *pos = AFR_POS_REMOTE; + + gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos); +out: + return ret; +} + +static int +afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) +{ + GF_FREE (data); + STACK_DESTROY (sync_frame->root); + return 0; +} + +static int +afr_find_all_children_postions (xlator_t *this) +{ + int ret = -1; + int i = 0; + gf_boolean_t succeeded = _gf_false; + afr_private_t *priv = NULL; + + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (priv->child_up[i] != 1) + continue; + ret = afr_find_child_position (this, i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to determine if the " + "child %s is local.", + priv->children[i]->name); + continue; + } + succeeded = _gf_true; + } + if (succeeded) + ret = 0; + return ret; +} + +static gf_boolean_t +afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count) +{ + int i = 0; + gf_boolean_t local = _gf_false; + + for (i = 0; i < child_count; i++, pos++) { + if (*pos == AFR_POS_LOCAL) { + local = _gf_true; + break; + } + } + return local; +} + +int +afr_init_child_position (xlator_t *this, int child) +{ + int ret = 0; + + if (child == AFR_ALL_CHILDREN) { + ret = afr_find_all_children_postions (this); + } else { + ret = afr_find_child_position (this, child); + } + return ret; +} + +int +afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) +{ + gf_boolean_t local = _gf_false; + + if (child == AFR_ALL_CHILDREN) + local = afr_local_child_exists (shd->pos, child_count); + else + local = (shd->pos[child] == AFR_POS_LOCAL); + + return local; +} + +static int +afr_crawl_directory (xlator_t *this, pid_t pid) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + loc_t loc = {0}; + gf_boolean_t crawl = _gf_false; + int ret = 0; + + priv = this->private; + shd = &priv->shd; + + + LOCK (&priv->lock); + { + if (shd->inprogress) { + shd->pending = _gf_true; + } else { + shd->inprogress = _gf_true; + crawl = _gf_true; + } + } + UNLOCK (&priv->lock); + + if (!priv->root_inode) { + ret = -1; + goto out; + } + + if (!crawl) + goto out; + + afr_build_root_loc (priv->root_inode, &loc); + while (crawl) { + ret = _crawl_directory (&loc, pid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Crawl failed"); + else + gf_log (this->name, GF_LOG_INFO, "Crawl completed"); + LOCK (&priv->lock); + { + if (shd->pending) { + shd->pending = _gf_false; + } else { + shd->inprogress = _gf_false; + crawl = _gf_false; + } + } + UNLOCK (&priv->lock); + } +out: + return ret; +} + +static int +afr_crawl (void *data) +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = -1; + afr_crawl_data_t *crawl_data = data; + + this = THIS; + priv = this->private; + shd = &priv->shd; + + ret = afr_init_child_position (this, crawl_data->child); + if (ret) + goto out; + + if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) + goto out; + + ret = afr_crawl_directory (this, crawl_data->pid); +out: + return ret; +} + +void +afr_proactive_self_heal (xlator_t *this, int idx) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + call_frame_t *frame = NULL; + afr_crawl_data_t *crawl_data = NULL; + int ret = 0; + + priv = this->private; + shd = &priv->shd; + if (!shd->enabled) + goto out; + + if ((idx != AFR_ALL_CHILDREN) && + (shd->pos[idx] == AFR_POS_REMOTE)) + goto out; + + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + + afr_set_lk_owner (frame, this); + afr_set_low_priority (frame); + crawl_data = GF_CALLOC (1, sizeof (*crawl_data), + gf_afr_mt_afr_crawl_data_t); + if (!crawl_data) + goto out; + crawl_data->child = idx; + crawl_data->pid = frame->root->pid; + gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx); + ret = synctask_new (this->ctx->env, afr_crawl, + afr_crawl_done, frame, crawl_data); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Could not create the " + "task for %d ret %d", idx, ret); +out: + return; +} + +//TODO: This is a hack +void +afr_build_root_loc (inode_t *inode, loc_t *loc) +{ + loc->path = "/"; + loc->name = ""; + loc->inode = inode; + loc->ino = 1; + loc->inode->ino = 1; + loc->inode->ia_type = IA_IFDIR; + memset (loc->inode->gfid, 0, 16); + loc->inode->gfid[15] = 1; + +} + +int +afr_set_root_gfid (dict_t *dict) +{ + uuid_t gfid; + int ret = 0; + + memset (gfid, 0, 16); + gfid[15] = 1; + + ret = afr_set_dict_gfid (dict, gfid); + + return ret; +} + +char * +afr_build_file_path (loc_t *loc, gf_dirent_t *entry) +{ + xlator_t *this = NULL; + char *file_path = NULL; + int pathlen = 0; + size_t total_size = 0; + char *fmt = NULL; + + this = THIS; + + pathlen = STRLEN_0 (loc->path); + + if (IS_ROOT_PATH (loc->path)) { + total_size = pathlen + entry->d_len; + fmt = "%s%s"; + } else { + total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */ + fmt = "%s/%s"; + } + + file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char); + if (!file_path) + goto out; + + snprintf(file_path, total_size, fmt, loc->path, entry->d_name); +out: + return file_path; +} + +void +afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name) +{ + child->path = path; + child->name = name; + + child->parent = inode_ref (parent->inode); + child->inode = inode_new (parent->inode->table); +} diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h new file mode 100644 index 00000000000..c85c97b25e4 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -0,0 +1,44 @@ +/* + Copyright (c) 2010-2011 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#ifndef __AFR_SELF_HEALD_H__ +#define __AFR_SELF_HEALD_H__ +#include "xlator.h" + +#define IS_ROOT_PATH(path) (!strcmp (path, "/")) +#define IS_ENTRY_CWD(entry) (!strcmp (entry, ".")) +#define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) +#define AFR_ALL_CHILDREN -1 + +typedef struct afr_crawl_data_ { + int child; + pid_t pid; +} afr_crawl_data_t; + +void afr_proactive_self_heal (xlator_t *this, int idx); + +void afr_build_root_loc (inode_t *inode, loc_t *loc); + +int afr_set_root_gfid (dict_t *dict); + +char * afr_build_file_path (loc_t *loc, gf_dirent_t *entry); + +void +afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name); +#endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 044213e0710..8bb94e2053e 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -140,6 +140,8 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("data-self-heal-algorithm", priv->data_self_heal_algorithm, options, str, out); + GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out); + GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out); if (read_subvol) { @@ -240,6 +242,8 @@ init (xlator_t *this) GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out); + GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out); + GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out); GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool, @@ -320,6 +324,13 @@ init (xlator_t *this) goto out; } + priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, + gf_afr_mt_afr_brick_pos_t); + if (!priv->shd.pos) { + ret = -ENOMEM; + goto out; + } + LOCK_INIT (&priv->root_inode_lk); priv->first_lookup = 1; priv->root_inode = NULL; @@ -475,5 +486,9 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "off", }, + { .key = {"self-heal-daemon"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index b9a11c486fd..92ccf607f10 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -49,6 +49,12 @@ typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this); typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); +typedef enum { + AFR_POS_UNKNOWN, + AFR_POS_LOCAL, + AFR_POS_REMOTE +} afr_child_pos_t; + typedef enum { AFR_INODE_SET_READ_CTX = 1, AFR_INODE_RM_STALE_CHILDREN, @@ -75,6 +81,13 @@ typedef struct afr_inode_ctx_ { int32_t *fresh_children;//increasing order of latency } afr_inode_ctx_t; +typedef struct afr_self_heald_ { + gf_boolean_t enabled; + gf_boolean_t pending; + gf_boolean_t inprogress; + afr_child_pos_t *pos; +} afr_self_heald_t; + typedef struct _afr_private { gf_lock_t lock; /* to guard access to child_count, etc */ unsigned int child_count; /* total number of children */ @@ -134,6 +147,7 @@ typedef struct _afr_private { char vol_uuid[UUID_SIZE + 1]; int32_t *last_event; + afr_self_heald_t shd; } afr_private_t; typedef struct { @@ -241,7 +255,6 @@ typedef struct { call_frame_t *sh_frame; } afr_self_heal_t; - typedef enum { AFR_DATA_TRANSACTION, /* truncate, write, ... */ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */ @@ -1001,4 +1014,6 @@ afr_open_only_data_self_heal (char *data_self_heal); gf_boolean_t afr_data_self_heal_enabled (char *data_self_heal); +void +afr_set_low_priority (call_frame_t *frame); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index ede9f3b498c..0623b817a78 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -149,71 +149,6 @@ pump_set_resume_path (xlator_t *this, const char *path) return ret; } -static void -build_child_loc (loc_t *parent, loc_t *child, char *path, char *name) -{ - child->path = path; - child->name = name; - - child->parent = inode_ref (parent->inode); - child->inode = inode_new (parent->inode->table); -} - -static char * -build_file_path (loc_t *loc, gf_dirent_t *entry) -{ - xlator_t *this = NULL; - char *file_path = NULL; - int pathlen = 0; - int total_size = 0; - - this = THIS; - - pathlen = STRLEN_0 (loc->path); - - if (IS_ROOT_PATH (loc->path)) { - total_size = pathlen + entry->d_len; - file_path = GF_CALLOC (1, total_size, gf_afr_mt_char); - if (!file_path) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - return NULL; - } - - gf_log (this->name, GF_LOG_TRACE, - "constructing file path of size=%d" - "pathlen=%d, d_len=%d", - total_size, pathlen, - entry->d_len); - - snprintf(file_path, total_size, "%s%s", loc->path, entry->d_name); - - } else { - total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */ - file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char); - if (!file_path) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - return NULL; - } - - gf_log (this->name, GF_LOG_TRACE, - "constructing file path of size=%d" - "pathlen=%d, d_len=%d", - total_size, pathlen, - entry->d_len); - - snprintf(file_path, total_size, "%s/%s", loc->path, entry->d_name); - } - - gf_log (this->name, GF_LOG_TRACE, - "path=%s and d_name=%s", loc->path, entry->d_name); - gf_log (this->name, GF_LOG_TRACE, - "constructed file_path=%s of size=%d", file_path, total_size); - - return file_path; -} - static int pump_save_path (xlator_t *this, const char *path) { @@ -232,7 +167,7 @@ pump_save_path (xlator_t *this, const char *path) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); dict = dict_new (); dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path); @@ -450,14 +385,15 @@ gf_pump_traverse_directory (loc_t *loc) gf_log (this->name, GF_LOG_DEBUG, "found readdir entry=%s", entry->d_name); - file_path = build_file_path (loc, entry); + file_path = afr_build_file_path (loc, entry); if (!file_path) { gf_log (this->name, GF_LOG_DEBUG, "file path construction failed"); goto out; } - build_child_loc (loc, &entry_loc, file_path, entry->d_name); + afr_build_child_loc (loc, &entry_loc, file_path, + entry->d_name); if (!IS_ENTRY_CWD (entry->d_name) && !IS_ENTRY_PARENT (entry->d_name)) { @@ -530,19 +466,6 @@ out: } -void -build_root_loc (inode_t *inode, loc_t *loc) -{ - loc->path = "/"; - loc->name = ""; - loc->inode = inode; - loc->ino = 1; - loc->inode->ino = 1; - memset (loc->inode->gfid, 0, 16); - loc->inode->gfid[15] = 1; - -} - static int pump_update_resume_path (xlator_t *this) { @@ -583,7 +506,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this, priv = this->private; - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); ret = syncop_removexattr (priv->children[source], &loc, PUMP_PATH); @@ -618,7 +541,7 @@ pump_complete_migration (xlator_t *this) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); dict = dict_new (); @@ -655,20 +578,6 @@ pump_complete_migration (xlator_t *this) return 0; } -static int -pump_set_root_gfid (dict_t *dict) -{ - uuid_t gfid; - int ret = 0; - - memset (gfid, 0, 16); - gfid[15] = 1; - - ret = afr_set_dict_gfid (dict, gfid); - - return ret; -} - static int pump_lookup_sink (loc_t *loc) { @@ -682,7 +591,7 @@ pump_lookup_sink (loc_t *loc) xattr_req = dict_new (); - ret = pump_set_root_gfid (xattr_req); + ret = afr_set_root_gfid (xattr_req); if (ret) goto out; @@ -721,7 +630,7 @@ pump_task (void *data) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); xattr_req = dict_new (); if (!xattr_req) { gf_log (this->name, GF_LOG_DEBUG, @@ -730,7 +639,7 @@ pump_task (void *data) goto out; } - pump_set_root_gfid (xattr_req); + afr_set_root_gfid (xattr_req); ret = syncop_lookup (this, &loc, xattr_req, &iatt, &xattr_rsp, &parent); @@ -746,7 +655,7 @@ pump_task (void *data) pump_update_resume_path (this); - pump_set_root_gfid (xattr_req); + afr_set_root_gfid (xattr_req); ret = pump_lookup_sink (&loc); if (ret) { pump_update_resume_path (this); @@ -894,7 +803,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); data = data_ref (dict_get (local->dict, PUMP_CMD_START)); if (!data) { @@ -1132,7 +1041,7 @@ pump_execute_start (call_frame_t *frame, xlator_t *this) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); STACK_WIND (frame, pump_cmd_start_getxattr_cbk, diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h index 02752422796..02eede49cf5 100644 --- a/xlators/cluster/afr/src/pump.h +++ b/xlators/cluster/afr/src/pump.h @@ -26,10 +26,6 @@ #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect" #define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect" -#define IS_ROOT_PATH(path) (!strcmp (path, "/")) -#define IS_ENTRY_CWD(entry) (!strcmp (entry, ".")) -#define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) - #define PUMP_CMD_START "trusted.glusterfs.pump.start" #define PUMP_CMD_COMMIT "trusted.glusterfs.pump.commit" #define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort" -- cgit