summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-09-08 14:06:32 +0530
committerVijay Bellur <vijay@gluster.com>2011-09-14 05:36:24 -0700
commit45f03a58a0fbfc1d5e647c764b10e37d0a9ebb26 (patch)
tree92b1848d39c867733c3c1876840b2b5f6a9c219e /xlators/cluster/afr
parent3bea46c1f232a4480e57ac482f92f7673af7034f (diff)
Proactive self heal process implementation
Change-Id: I96db0d94566ceabf1649f890318363f738c06553 BUG: 2458 Reviewed-on: http://review.gluster.com/403 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/Makefile.am4
-rw-r--r--xlators/cluster/afr/src/afr-common.c33
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c6
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c512
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h44
-rw-r--r--xlators/cluster/afr/src/afr.c15
-rw-r--r--xlators/cluster/afr/src/afr.h17
-rw-r--r--xlators/cluster/afr/src/pump.c115
-rw-r--r--xlators/cluster/afr/src/pump.h4
10 files changed, 631 insertions, 121 deletions
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index e192b599b..16ed25af1 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,7 +1,7 @@
xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c $(top_builddir)/xlators/lib/src/libxlator.c
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c $(top_builddir)/xlators/lib/src/libxlator.c
afr_la_LDFLAGS = -module -avoidversion
afr_la_SOURCES = $(afr_common_source) afr.c
@@ -11,7 +11,7 @@ pump_la_LDFLAGS = -module -avoidversion
pump_la_SOURCES = $(afr_common_source) pump.c
pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h $(top_builddir)/glusterfsd/src/glusterfsd.h
AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2d5e98196..0e4e97355 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -54,6 +54,7 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#include "afr-self-heald.h"
#include "pump.h"
#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
@@ -132,7 +133,7 @@ afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
if (ret)
- gf_log (THIS->name, GF_LOG_DEBUG, "gfid set failed");
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
out:
if (ret && pgfid)
@@ -1961,15 +1962,15 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
loc->path, GLUSTERFS_ENTRYLK_COUNT);
}
- ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req);
+ ret = dict_get_ptr (local->xattr_req, "gfid-req", &gfid_req);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"failed to get the gfid from dict");
} else {
uuid_copy (local->cont.lookup.gfid_req, gfid_req);
+ if (local->loc.parent)
+ dict_del (local->xattr_req, "gfid-req");
}
- if (local->loc.parent != NULL)
- dict_del (xattr_req, "gfid-req");
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
@@ -3395,11 +3396,12 @@ afr_notify (xlator_t *this, int32_t event,
int up_children = 0;
int down_children = 0;
int propagate = 0;
-
int had_heard_from_all = 0;
int have_heard_from_all = 0;
int idx = -1;
int ret = -1;
+ int call_psh = 0;
+ int up_child = AFR_ALL_CHILDREN;
priv = this->private;
@@ -3445,6 +3447,12 @@ afr_notify (xlator_t *this, int32_t event,
"going online.", ((xlator_t *)data)->name);
} else {
event = GF_EVENT_CHILD_MODIFIED;
+ gf_log (this->name, GF_LOG_INFO, "subvol %d came up, "
+ "start crawl", idx);
+ if (had_heard_from_all) {
+ call_psh = 1;
+ up_child = idx;
+ }
}
priv->last_event[idx] = event;
@@ -3509,6 +3517,8 @@ afr_notify (xlator_t *this, int32_t event,
LOCK (&priv->lock);
{
+ up_children = afr_up_children_count (priv->child_up,
+ priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
event = GF_EVENT_CHILD_UP;
@@ -3523,11 +3533,18 @@ afr_notify (xlator_t *this, int32_t event,
}
}
UNLOCK (&priv->lock);
+ if (up_children > 1) {
+ gf_log (this->name, GF_LOG_INFO, "All subvolumes came "
+ "up, start crawl");
+ call_psh = 1;
+ }
}
ret = 0;
if (propagate)
ret = default_notify (this, event, data);
+ if (call_psh)
+ afr_proactive_self_heal (this, up_child);
out:
return ret;
@@ -3767,3 +3784,9 @@ afr_get_children_count (int32_t *children, unsigned int child_count)
}
return count;
}
+
+void
+afr_set_low_priority (call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index d5a988708..ebe189c35 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -44,6 +44,8 @@ enum gf_afr_mem_types_ {
gf_afr_mt_locked_fd,
gf_afr_mt_inode_ctx_t,
gf_afr_fd_paused_call_t,
+ gf_afr_mt_afr_crawl_data_t,
+ gf_afr_mt_afr_brick_pos_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index eeca62724..8f50c6251 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2031,12 +2031,6 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
return 0;
}
-static inline void
-afr_set_low_priority (call_frame_t *frame)
-{
- frame->root->pid = -1;
-}
-
int
afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
new file mode 100644
index 000000000..d27d9e09b
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -0,0 +1,512 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "afr.h"
+#include "syncop.h"
+#include "afr-self-heald.h"
+
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
+{
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
+
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
+
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
+ }
+
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
+out:
+ return ret;
+}
+
+static int
+_crawl_directory (loc_t *loc, pid_t pid)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ loc_t entry_loc = {0};
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t entries;
+ struct iatt iatt = {0};
+ struct iatt parent = {0};;
+ char *file_path = NULL;
+ int ret = 0;
+ gf_boolean_t free_entries = _gf_false;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (loc->inode);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
+ fd = fd_create (loc->inode, pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", loc->path);
+ goto out;
+ }
+
+ if (!loc->parent) {
+ ret = syncop_lookup (this, loc, NULL,
+ &iatt, NULL, &parent);
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s", loc->path);
+ goto out;
+ }
+
+ while (syncop_readdirp (this, fd, 131072, offset, &entries)) {
+ ret = 0;
+ free_entries = _gf_true;
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
+ "< 2 children are up");
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty (&entries.list))
+ goto out;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ offset = entry->d_off;
+ if (IS_ENTRY_CWD (entry->d_name) ||
+ IS_ENTRY_PARENT (entry->d_name))
+ continue;
+
+ file_path = afr_build_file_path (loc, entry);
+ if (!file_path) {
+ ret = -1;
+ goto out;
+ }
+
+ loc_wipe (&entry_loc);
+ afr_build_child_loc (loc, &entry_loc,
+ file_path, entry->d_name);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found readdir entry=%s", entry->d_name);
+
+ ret = syncop_lookup (this, &entry_loc, NULL,
+ &iatt, NULL, &parent);
+
+ //Don't fail the crawl if lookup fails as it
+ //could be because of split-brain
+ if (ret || (!IA_ISDIR (iatt.ia_type)))
+ continue;
+ ret = _crawl_directory (&entry_loc, pid);
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ }
+ ret = 0;
+out:
+ if (entry_loc.path)
+ loc_wipe (&entry_loc);
+ if (free_entries)
+ gf_dirent_free (&entries);
+ return ret;
+}
+
+int
+afr_find_child_position (xlator_t *this, int child)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr_rsp = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+ gf_boolean_t local = _gf_false;
+ char *pathinfo = NULL;
+ afr_child_pos_t *pos = NULL;
+ inode_table_t *itable = NULL;
+
+ priv = this->private;
+ pos = &priv->shd.pos[child];
+
+ if (*pos != AFR_POS_UNKNOWN) {
+ goto out;
+ }
+
+ //TODO: Hack to make the root_loc hack work
+ LOCK (&priv->lock);
+ {
+ if (!priv->root_inode) {
+ itable = inode_table_new (0, this);
+ if (!itable)
+ goto unlock;
+ priv->root_inode = inode_new (itable);
+ if (!priv->root_inode)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (!priv->root_inode) {
+ ret = -1;
+ goto out;
+ }
+ afr_build_root_loc (priv->root_inode, &loc);
+
+ ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
+ GF_XATTR_PATHINFO_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child "
+ "%d", child);
+ goto out;
+ }
+
+ ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on "
+ "child %d", child);
+ goto out;
+ }
+
+ ret = afr_local_pathinfo (pathinfo, &local);
+ if (ret)
+ goto out;
+ if (local)
+ *pos = AFR_POS_LOCAL;
+ else
+ *pos = AFR_POS_REMOTE;
+
+ gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos);
+out:
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
+static int
+afr_find_all_children_postions (xlator_t *this)
+{
+ int ret = -1;
+ int i = 0;
+ gf_boolean_t succeeded = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] != 1)
+ continue;
+ ret = afr_find_child_position (this, i);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to determine if the "
+ "child %s is local.",
+ priv->children[i]->name);
+ continue;
+ }
+ succeeded = _gf_true;
+ }
+ if (succeeded)
+ ret = 0;
+ return ret;
+}
+
+static gf_boolean_t
+afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count)
+{
+ int i = 0;
+ gf_boolean_t local = _gf_false;
+
+ for (i = 0; i < child_count; i++, pos++) {
+ if (*pos == AFR_POS_LOCAL) {
+ local = _gf_true;
+ break;
+ }
+ }
+ return local;
+}
+
+int
+afr_init_child_position (xlator_t *this, int child)
+{
+ int ret = 0;
+
+ if (child == AFR_ALL_CHILDREN) {
+ ret = afr_find_all_children_postions (this);
+ } else {
+ ret = afr_find_child_position (this, child);
+ }
+ return ret;
+}
+
+int
+afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
+{
+ gf_boolean_t local = _gf_false;
+
+ if (child == AFR_ALL_CHILDREN)
+ local = afr_local_child_exists (shd->pos, child_count);
+ else
+ local = (shd->pos[child] == AFR_POS_LOCAL);
+
+ return local;
+}
+
+static int
+afr_crawl_directory (xlator_t *this, pid_t pid)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ loc_t loc = {0};
+ gf_boolean_t crawl = _gf_false;
+ int ret = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+
+ LOCK (&priv->lock);
+ {
+ if (shd->inprogress) {
+ shd->pending = _gf_true;
+ } else {
+ shd->inprogress = _gf_true;
+ crawl = _gf_true;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (!priv->root_inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!crawl)
+ goto out;
+
+ afr_build_root_loc (priv->root_inode, &loc);
+ while (crawl) {
+ ret = _crawl_directory (&loc, pid);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed");
+ else
+ gf_log (this->name, GF_LOG_INFO, "Crawl completed");
+ LOCK (&priv->lock);
+ {
+ if (shd->pending) {
+ shd->pending = _gf_false;
+ } else {
+ shd->inprogress = _gf_false;
+ crawl = _gf_false;
+ }
+ }
+ UNLOCK (&priv->lock);
+ }
+out:
+ return ret;
+}
+
+static int
+afr_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = afr_init_child_position (this, crawl_data->child);
+ if (ret)
+ goto out;
+
+ if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
+ goto out;
+
+ ret = afr_crawl_directory (this, crawl_data->pid);
+out:
+ return ret;
+}
+
+void
+afr_proactive_self_heal (xlator_t *this, int idx)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ call_frame_t *frame = NULL;
+ afr_crawl_data_t *crawl_data = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (!shd->enabled)
+ goto out;
+
+ if ((idx != AFR_ALL_CHILDREN) &&
+ (shd->pos[idx] == AFR_POS_REMOTE))
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ afr_set_lk_owner (frame, this);
+ afr_set_low_priority (frame);
+ crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
+ gf_afr_mt_afr_crawl_data_t);
+ if (!crawl_data)
+ goto out;
+ crawl_data->child = idx;
+ crawl_data->pid = frame->root->pid;
+ gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx);
+ ret = synctask_new (this->ctx->env, afr_crawl,
+ afr_crawl_done, frame, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create the "
+ "task for %d ret %d", idx, ret);
+out:
+ return;
+}
+
+//TODO: This is a hack
+void
+afr_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->name = "";
+ loc->inode = inode;
+ loc->ino = 1;
+ loc->inode->ino = 1;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->inode->gfid, 0, 16);
+ loc->inode->gfid[15] = 1;
+
+}
+
+int
+afr_set_root_gfid (dict_t *dict)
+{
+ uuid_t gfid;
+ int ret = 0;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ ret = afr_set_dict_gfid (dict, gfid);
+
+ return ret;
+}
+
+char *
+afr_build_file_path (loc_t *loc, gf_dirent_t *entry)
+{
+ xlator_t *this = NULL;
+ char *file_path = NULL;
+ int pathlen = 0;
+ size_t total_size = 0;
+ char *fmt = NULL;
+
+ this = THIS;
+
+ pathlen = STRLEN_0 (loc->path);
+
+ if (IS_ROOT_PATH (loc->path)) {
+ total_size = pathlen + entry->d_len;
+ fmt = "%s%s";
+ } else {
+ total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */
+ fmt = "%s/%s";
+ }
+
+ file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char);
+ if (!file_path)
+ goto out;
+
+ snprintf(file_path, total_size, fmt, loc->path, entry->d_name);
+out:
+ return file_path;
+}
+
+void
+afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name)
+{
+ child->path = path;
+ child->name = name;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
new file mode 100644
index 000000000..c85c97b25
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __AFR_SELF_HEALD_H__
+#define __AFR_SELF_HEALD_H__
+#include "xlator.h"
+
+#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
+#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
+#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
+#define AFR_ALL_CHILDREN -1
+
+typedef struct afr_crawl_data_ {
+ int child;
+ pid_t pid;
+} afr_crawl_data_t;
+
+void afr_proactive_self_heal (xlator_t *this, int idx);
+
+void afr_build_root_loc (inode_t *inode, loc_t *loc);
+
+int afr_set_root_gfid (dict_t *dict);
+
+char * afr_build_file_path (loc_t *loc, gf_dirent_t *entry);
+
+void
+afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name);
+#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 044213e07..8bb94e205 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -140,6 +140,8 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("data-self-heal-algorithm",
priv->data_self_heal_algorithm, options, str, out);
+ GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
+
GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
if (read_subvol) {
@@ -240,6 +242,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
+ GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
+
GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
@@ -320,6 +324,13 @@ init (xlator_t *this)
goto out;
}
+ priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
+ gf_afr_mt_afr_brick_pos_t);
+ if (!priv->shd.pos) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
LOCK_INIT (&priv->root_inode_lk);
priv->first_lookup = 1;
priv->root_inode = NULL;
@@ -475,5 +486,9 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
},
+ { .key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index b9a11c486..92ccf607f 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -50,6 +50,12 @@ typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
typedef enum {
+ AFR_POS_UNKNOWN,
+ AFR_POS_LOCAL,
+ AFR_POS_REMOTE
+} afr_child_pos_t;
+
+typedef enum {
AFR_INODE_SET_READ_CTX = 1,
AFR_INODE_RM_STALE_CHILDREN,
AFR_INODE_SET_OPENDIR_DONE,
@@ -75,6 +81,13 @@ typedef struct afr_inode_ctx_ {
int32_t *fresh_children;//increasing order of latency
} afr_inode_ctx_t;
+typedef struct afr_self_heald_ {
+ gf_boolean_t enabled;
+ gf_boolean_t pending;
+ gf_boolean_t inprogress;
+ afr_child_pos_t *pos;
+} afr_self_heald_t;
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -134,6 +147,7 @@ typedef struct _afr_private {
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
+ afr_self_heald_t shd;
} afr_private_t;
typedef struct {
@@ -241,7 +255,6 @@ typedef struct {
call_frame_t *sh_frame;
} afr_self_heal_t;
-
typedef enum {
AFR_DATA_TRANSACTION, /* truncate, write, ... */
AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
@@ -1001,4 +1014,6 @@ afr_open_only_data_self_heal (char *data_self_heal);
gf_boolean_t
afr_data_self_heal_enabled (char *data_self_heal);
+void
+afr_set_low_priority (call_frame_t *frame);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index ede9f3b49..0623b817a 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -149,71 +149,6 @@ pump_set_resume_path (xlator_t *this, const char *path)
return ret;
}
-static void
-build_child_loc (loc_t *parent, loc_t *child, char *path, char *name)
-{
- child->path = path;
- child->name = name;
-
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
-}
-
-static char *
-build_file_path (loc_t *loc, gf_dirent_t *entry)
-{
- xlator_t *this = NULL;
- char *file_path = NULL;
- int pathlen = 0;
- int total_size = 0;
-
- this = THIS;
-
- pathlen = STRLEN_0 (loc->path);
-
- if (IS_ROOT_PATH (loc->path)) {
- total_size = pathlen + entry->d_len;
- file_path = GF_CALLOC (1, total_size, gf_afr_mt_char);
- if (!file_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return NULL;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "constructing file path of size=%d"
- "pathlen=%d, d_len=%d",
- total_size, pathlen,
- entry->d_len);
-
- snprintf(file_path, total_size, "%s%s", loc->path, entry->d_name);
-
- } else {
- total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */
- file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char);
- if (!file_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return NULL;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "constructing file path of size=%d"
- "pathlen=%d, d_len=%d",
- total_size, pathlen,
- entry->d_len);
-
- snprintf(file_path, total_size, "%s/%s", loc->path, entry->d_name);
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "path=%s and d_name=%s", loc->path, entry->d_name);
- gf_log (this->name, GF_LOG_TRACE,
- "constructed file_path=%s of size=%d", file_path, total_size);
-
- return file_path;
-}
-
static int
pump_save_path (xlator_t *this, const char *path)
{
@@ -232,7 +167,7 @@ pump_save_path (xlator_t *this, const char *path)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (priv->root_inode, &loc);
dict = dict_new ();
dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
@@ -450,14 +385,15 @@ gf_pump_traverse_directory (loc_t *loc)
gf_log (this->name, GF_LOG_DEBUG,
"found readdir entry=%s", entry->d_name);
- file_path = build_file_path (loc, entry);
+ file_path = afr_build_file_path (loc, entry);
if (!file_path) {
gf_log (this->name, GF_LOG_DEBUG,
"file path construction failed");
goto out;
}
- build_child_loc (loc, &entry_loc, file_path, entry->d_name);
+ afr_build_child_loc (loc, &entry_loc, file_path,
+ entry->d_name);
if (!IS_ENTRY_CWD (entry->d_name) &&
!IS_ENTRY_PARENT (entry->d_name)) {
@@ -530,19 +466,6 @@ out:
}
-void
-build_root_loc (inode_t *inode, loc_t *loc)
-{
- loc->path = "/";
- loc->name = "";
- loc->inode = inode;
- loc->ino = 1;
- loc->inode->ino = 1;
- memset (loc->inode->gfid, 0, 16);
- loc->inode->gfid[15] = 1;
-
-}
-
static int
pump_update_resume_path (xlator_t *this)
{
@@ -583,7 +506,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (priv->root_inode, &loc);
ret = syncop_removexattr (priv->children[source], &loc,
PUMP_PATH);
@@ -618,7 +541,7 @@ pump_complete_migration (xlator_t *this)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (priv->root_inode, &loc);
dict = dict_new ();
@@ -656,20 +579,6 @@ pump_complete_migration (xlator_t *this)
}
static int
-pump_set_root_gfid (dict_t *dict)
-{
- uuid_t gfid;
- int ret = 0;
-
- memset (gfid, 0, 16);
- gfid[15] = 1;
-
- ret = afr_set_dict_gfid (dict, gfid);
-
- return ret;
-}
-
-static int
pump_lookup_sink (loc_t *loc)
{
xlator_t *this = NULL;
@@ -682,7 +591,7 @@ pump_lookup_sink (loc_t *loc)
xattr_req = dict_new ();
- ret = pump_set_root_gfid (xattr_req);
+ ret = afr_set_root_gfid (xattr_req);
if (ret)
goto out;
@@ -721,7 +630,7 @@ pump_task (void *data)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (priv->root_inode, &loc);
xattr_req = dict_new ();
if (!xattr_req) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -730,7 +639,7 @@ pump_task (void *data)
goto out;
}
- pump_set_root_gfid (xattr_req);
+ afr_set_root_gfid (xattr_req);
ret = syncop_lookup (this, &loc, xattr_req,
&iatt, &xattr_rsp, &parent);
@@ -746,7 +655,7 @@ pump_task (void *data)
pump_update_resume_path (this);
- pump_set_root_gfid (xattr_req);
+ afr_set_root_gfid (xattr_req);
ret = pump_lookup_sink (&loc);
if (ret) {
pump_update_resume_path (this);
@@ -894,7 +803,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (priv->root_inode, &loc);
data = data_ref (dict_get (local->dict, PUMP_CMD_START));
if (!data) {
@@ -1132,7 +1041,7 @@ pump_execute_start (call_frame_t *frame, xlator_t *this)
GF_ASSERT (priv->root_inode);
- build_root_loc (priv->root_inode, &loc);
+ afr_build_root_loc (priv->root_inode, &loc);
STACK_WIND (frame,
pump_cmd_start_getxattr_cbk,
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
index 027524227..02eede49c 100644
--- a/xlators/cluster/afr/src/pump.h
+++ b/xlators/cluster/afr/src/pump.h
@@ -26,10 +26,6 @@
#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
-#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
-#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
-
#define PUMP_CMD_START "trusted.glusterfs.pump.start"
#define PUMP_CMD_COMMIT "trusted.glusterfs.pump.commit"
#define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort"