summaryrefslogtreecommitdiffstats
path: root/heal
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2013-12-15 08:05:04 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2014-10-15 01:22:02 -0700
commit7e8eefca2caaaa61e1b31b747384f660c595d9c9 (patch)
treea6e862f2caad4097a41dd8b260a2333e90db3b4a /heal
parent7808d4b7892ccd80bcabb1253ee359a64c51c612 (diff)
heal: glfs-heal implementation
Thanks a lot to Niels for helping me to get build stuff right. Change-Id: I634f24d90cd856ceab3cc0c6e9a91003f443403e BUG: 1147462 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/6529 Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'heal')
-rw-r--r--heal/Makefile.am3
-rw-r--r--heal/src/Makefile.am35
-rw-r--r--heal/src/glfs-heal.c610
3 files changed, 648 insertions, 0 deletions
diff --git a/heal/Makefile.am b/heal/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/heal/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/heal/src/Makefile.am b/heal/src/Makefile.am
new file mode 100644
index 00000000000..65093df35cc
--- /dev/null
+++ b/heal/src/Makefile.am
@@ -0,0 +1,35 @@
+sbin_PROGRAMS = glfsheal
+
+glfsheal_SOURCES = glfs-heal.c
+
+glfsheal_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
+ $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/api/src/libgfapi.la \
+ $(top_builddir)/xlators/cluster/afr/src/afr.la \
+ $(GF_GLUSTERFS_LIBS) $(XML_LIBS)
+
+glfsheal_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/xlators/lib/src\
+ -I$(top_srcdir)/xlators/cluster/afr/src\
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
+ -I$(top_srcdir)/rpc/xdr/src\
+ -I$(top_srcdir)/api/src\
+ -I$(top_srcdir)/contrib/argp-standalone\
+ -DDATADIR=\"$(localstatedir)\" \
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"\
+ $(XML_CPPFLAGS)
+
+AM_CFLAGS = -Wall $(GF_GLUSTERFS_CFLAGS)
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
+
+$(top_builddir)/xlators/cluster/afr/src/afr.la:
+ $(MAKE) -C $(top_builddir)/xlators/cluster/afr/src/ all
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
new file mode 100644
index 00000000000..d02858642ab
--- /dev/null
+++ b/heal/src/glfs-heal.c
@@ -0,0 +1,610 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "glfs.h"
+#include "glfs-handles.h"
+#include "glfs-internal.h"
+#include "syncop.h"
+#include <string.h>
+#include <time.h>
+#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-self-heald.h"
+
+#define DEFAULT_HEAL_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+
+int
+glfsh_link_inode_update_loc (loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link (loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL)
+ goto out;
+
+ inode_unref (loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glfsh_get_index_dir_loc (loc_t *rootloc, xlator_t *xl, loc_t *dirloc,
+ int32_t *op_errno)
+{
+ void *index_gfid = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+
+ ret = syncop_getxattr (xl, rootloc, &xattr, GF_XATTROP_INDEX_GFID);
+ if (ret < 0) {
+ *op_errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret < 0) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ uuid_copy (dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (rootloc->inode->table);
+ ret = syncop_lookup (xl, dirloc, NULL,
+ &iattr, NULL, &parent);
+ dirloc->path = NULL;
+ if (ret < 0) {
+ *op_errno = -ret;
+ goto out;
+ }
+ ret = glfsh_link_inode_update_loc (dirloc, &iattr);
+ if (ret)
+ goto out;
+ glfs_loc_touchup (dirloc);
+
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ gf_boolean_t *metadata_selfheal)
+{
+ int ret = -1;
+ unsigned char *locked_on = NULL;
+ afr_private_t *priv = this->private;
+
+ locked_on = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ {
+ if (ret == 0) {
+ /* Not a single lock */
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+ ret = afr_selfheal_unlocked_inspect (frame, this, inode->gfid,
+ NULL, NULL,
+ metadata_selfheal, NULL);
+ }
+ afr_selfheal_uninodelk (frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+out:
+ return ret;
+}
+
+int
+afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, gf_boolean_t *data_selfheal)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *data_lock = NULL;
+
+ priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain,
+ 0, 0, locked_on);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+ ret = afr_selfheal_inodelk (frame, this, inode, this->name,
+ 0, 0, data_lock);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;
+ /* all invalid responses */
+ goto unlock;
+ }
+ ret = afr_selfheal_unlocked_inspect (frame, this,
+ inode->gfid, NULL,
+ data_selfheal,
+ NULL, NULL);
+ }
+ afr_selfheal_uninodelk (frame, this, inode, this->name, 0, 0,
+ data_lock);
+ }
+unlock:
+ afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
+out:
+ return ret;
+}
+
+int
+afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, gf_boolean_t *entry_selfheal)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *data_lock = NULL;
+
+ priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ data_lock = alloca0 (priv->child_count);
+
+ ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain,
+ NULL, locked_on);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;/* all invalid responses */
+ goto out;
+ }
+
+ ret = afr_selfheal_entrylk (frame, this, inode, this->name,
+ NULL, data_lock);
+ {
+ if (ret == 0) {
+ ret = -afr_final_errno (frame->local, priv);
+ if (ret == 0)
+ ret = -ENOTCONN;
+ /* all invalid responses */
+ goto unlock;
+ }
+ ret = afr_selfheal_unlocked_inspect (frame, this,
+ inode->gfid,
+ NULL, NULL, NULL,
+ entry_selfheal);
+ }
+ afr_selfheal_unentrylk (frame, this, inode, this->name, NULL,
+ data_lock);
+ }
+unlock:
+ afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,
+ locked_on);
+out:
+ return ret;
+}
+
+int
+afr_selfheal_locked_inspect (call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal)
+{
+ int ret = -1;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
+
+ ret = afr_selfheal_unlocked_inspect (frame, this, gfid, inode,
+ &dsh, &msh, &esh);
+ if (ret)
+ goto out;
+
+ /* For every heal type hold locks and check if it indeed needs heal */
+
+ if (msh) {
+ msh = _gf_false;
+ ret = afr_selfheal_locked_metadata_inspect (frame, this,
+ *inode, &msh);
+ if (msh || ret < 0)
+ goto out;
+ }
+
+ if (dsh) {
+ dsh = _gf_false;
+ ret = afr_selfheal_locked_data_inspect (frame, this, *inode,
+ &dsh);
+ if (dsh || ret < 0)
+ goto out;
+ }
+
+ if (esh) {
+ esh = _gf_false;
+ ret = afr_selfheal_locked_entry_inspect (frame, this, *inode,
+ &esh);
+ }
+
+out:
+ if (entry_selfheal)
+ *entry_selfheal = esh;
+ if (data_selfheal)
+ *data_selfheal = dsh;
+ if (metadata_selfheal)
+ *metadata_selfheal = msh;
+ return ret;
+}
+
+static xlator_t*
+_get_afr_ancestor (xlator_t *xl)
+{
+ if (!xl || !xl->parents)
+ return NULL;
+
+ while (xl->parents) {
+ xl = xl->parents->xlator;
+ if (!xl)
+ break;
+ if (strcmp (xl->type, "cluster/replicate") == 0)
+ return xl;
+ }
+
+ return NULL;
+}
+
+static int
+glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
+ off_t *offset, uint64_t *num_entries)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ char *path = NULL;
+ uuid_t gfid = {0};
+ inode_t *inode = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+
+ this = THIS;
+ frame = afr_frame_create (this);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ *offset = entry->d_off;
+ if ((strcmp (entry->d_name, ".") == 0) ||
+ (strcmp (entry->d_name, "..") == 0))
+ continue;
+
+ uuid_clear (gfid);
+ GF_FREE (path);
+ path = NULL;
+ if (inode) {
+ inode_forget (inode, 1);
+ inode_unref (inode);
+ inode = NULL;
+ }
+
+ AFR_STACK_RESET (frame);
+ uuid_parse (entry->d_name, gfid);
+ ret = afr_selfheal_locked_inspect (frame, this, gfid, &inode,
+ &data_selfheal,
+ &metadata_selfheal,
+ &entry_selfheal);
+ if (ret == 0) {
+ if (!entry_selfheal && !metadata_selfheal &&
+ !data_selfheal)
+ continue;
+ }
+
+ ret = afr_shd_gfid_to_path (this, xl, gfid, &path);
+
+ if (ret == -ENOENT || ret == -ESTALE) {
+ afr_shd_index_purge (xl, fd->inode, entry->d_name);
+ ret = 0;
+ continue;
+ }
+
+ (*num_entries)++;
+ printf ("%s\n", path ? path : uuid_utoa (inode->gfid));
+ }
+ ret = 0;
+out:
+ if (inode) {
+ inode_forget (inode, 1);
+ inode_unref (inode);
+ }
+
+ GF_FREE (path);
+ return ret;
+}
+
+static int
+glfsh_crawl_directory (xlator_t *readdir_xl, fd_t *fd, loc_t *loc)
+{
+ off_t offset = 0;
+ gf_dirent_t entries;
+ int ret = 0;
+ gf_boolean_t free_entries = _gf_false;
+ uint64_t num_entries = 0;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while (1) {
+ ret = syncop_readdir (readdir_xl, fd, 131072, offset, &entries);
+ if (ret <= 0)
+ break;
+ ret = 0;
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list))
+ goto out;
+
+ ret = glfsh_process_entries (readdir_xl, fd, &entries, &offset,
+ &num_entries);
+ if (ret < 0)
+ goto out;
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+ if (ret < 0) {
+ printf ("Failed to complete gathering info. "
+ "Number of entries so far: %"PRIu64"\n", num_entries);
+ }
+ else {
+ printf ("Number of entries: %"PRIu64"\n", num_entries);
+ }
+ return ret;
+}
+
+static int
+glfsh_print_brick (xlator_t *xl, loc_t *rootloc)
+{
+ int ret = 0;
+ dict_t *xattr = NULL;
+ char *pathinfo = NULL;
+ char *brick_start = NULL;
+ char *brick_end = NULL;
+
+ ret = syncop_getxattr (xl, rootloc, &xattr, GF_XATTR_PATHINFO_KEY);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret < 0)
+ goto out;
+
+ brick_start = strchr (pathinfo, ':') + 1;
+ brick_end = pathinfo + strlen (pathinfo) - 1;
+ *brick_end = 0;
+ printf ("Brick %s\n", brick_start);
+
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+void
+glfsh_print_brick_from_xl (xlator_t *xl)
+{
+ char *remote_host = NULL;
+ char *remote_subvol = NULL;
+ int ret = 0;
+
+ ret = dict_get_str (xl->options, "remote-host", &remote_host);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str (xl->options, "remote-subvolume", &remote_subvol);
+ if (ret < 0)
+ goto out;
+out:
+ if (ret < 0)
+ printf ("Brick - Not able to get brick information\n");
+ else
+ printf ("Brick %s:%s\n", remote_host, remote_subvol);
+}
+
+void
+glfsh_print_pending_heals (xlator_t *xl, loc_t *rootloc)
+{
+ int ret = 0;
+ loc_t dirloc = {0};
+ fd_t *fd = NULL;
+ int32_t op_errno = 0;
+
+ ret = glfsh_print_brick (xl, rootloc);
+ if (ret < 0) {
+ glfsh_print_brick_from_xl (xl);
+ printf ("Status: %s\n", strerror (-ret));
+ goto out;
+ }
+
+ ret = glfsh_get_index_dir_loc (rootloc, xl, &dirloc, &op_errno);
+ if (ret < 0) {
+ if (op_errno == ESTALE || op_errno == ENOENT)
+ printf ("Number of entries: 0\n");
+ else
+ printf ("Status: %s\n", strerror (op_errno));
+ goto out;
+ }
+
+ fd = fd_anonymous (dirloc.inode);
+ ret = glfsh_crawl_directory (xl, fd, &dirloc);
+ if (fd)
+ fd_unref (fd);
+ if (ret < 0)
+ printf ("Failed to find entries with pending self-heal\n");
+out:
+ loc_wipe (&dirloc);
+ return;
+}
+
+static int
+glfsh_validate_replicate_volume (xlator_t *xl)
+{
+ xlator_t *afr_xl = NULL;
+ int ret = -1;
+
+ while (xl->next)
+ xl = xl->next;
+
+ while (xl) {
+ if (strcmp (xl->type, "protocol/client") == 0) {
+ afr_xl = _get_afr_ancestor (xl);
+ if (afr_xl) {
+ ret = 0;
+ break;
+ }
+ }
+
+ xl = xl->prev;
+ }
+
+ return ret;
+}
+
+int
+main (int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ char *volname = NULL;
+ xlator_t *top_subvol = NULL;
+ xlator_t *xl = NULL;
+ loc_t rootloc = {0};
+ char logfilepath[PATH_MAX] = {0};
+ xlator_t *old_THIS = NULL;
+ xlator_t *afr_xl = NULL;
+
+ if (argc != 2) {
+ printf ("Usage: %s <volname>\n", argv[0]);
+ ret = -1;
+ goto out;
+ }
+ volname = argv[1];
+
+ fs = glfs_new (volname);
+ if (!fs) {
+ ret = -1;
+ printf ("Not able to initialize volume '%s'\n", volname);
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server (fs, "tcp", "localhost", 24007);
+ snprintf (logfilepath, sizeof (logfilepath),
+ DEFAULT_HEAL_LOG_FILE_DIRECTORY"/glfsheal-%s.log", volname);
+ ret = glfs_set_logging(fs, logfilepath, GF_LOG_INFO);
+ if (ret < 0) {
+ ret = -1;
+ printf ("Not able to initialize volume '%s'\n", volname);
+ goto out;
+ }
+
+ ret = glfs_init (fs);
+ if (ret < 0) {
+ ret = -1;
+ if (errno == ENOENT) {
+ printf ("Volume %s does not exist\n", volname);
+ }
+ else {
+ printf ("%s: Not able to fetch volfile from "
+ "glusterd\n", volname);
+ }
+ goto out;
+ }
+
+ sleep (2);
+ __glfs_entry_fs (fs);
+ top_subvol = glfs_active_subvol (fs);
+ if (!top_subvol) {
+ ret = -1;
+ if (errno == ENOTCONN) {
+ printf ("Volume %s is not started (Or) All the bricks "
+ "are not running.\n", volname);
+ }
+ else {
+ printf ("%s: Not able to mount the volume, %s\n",
+ volname, strerror (errno));
+ }
+ goto out;
+ }
+
+ ret = glfsh_validate_replicate_volume (top_subvol);
+ if (ret < 0) {
+ printf ("Volume %s is not of type replicate\n", volname);
+ goto out;
+ }
+ rootloc.inode = inode_ref (top_subvol->itable->root);
+ glfs_loc_touchup (&rootloc);
+
+ xl = top_subvol;
+ while (xl->next)
+ xl = xl->next;
+
+ while (xl) {
+ if (strcmp (xl->type, "protocol/client") == 0) {
+ afr_xl = _get_afr_ancestor (xl);
+ if (afr_xl) {
+ old_THIS = THIS;
+ THIS = afr_xl;
+ glfsh_print_pending_heals (xl, &rootloc);
+ THIS = old_THIS;
+ printf("\n");
+ }
+ }
+
+ xl = xl->prev;
+ }
+
+ loc_wipe (&rootloc);
+ glfs_subvol_done (fs, top_subvol);
+ glfs_fini (fs);
+
+ return 0;
+out:
+ if (fs && top_subvol)
+ glfs_subvol_done (fs, top_subvol);
+ loc_wipe (&rootloc);
+ if (fs)
+ glfs_fini (fs);
+
+ return ret;
+}