diff options
author | Pranith Kumar K <pranithk@gluster.com> | 2011-09-08 14:06:32 +0530 |
---|---|---|
committer | Vijay Bellur <vijay@gluster.com> | 2011-09-14 05:36:24 -0700 |
commit | 45f03a58a0fbfc1d5e647c764b10e37d0a9ebb26 (patch) | |
tree | 92b1848d39c867733c3c1876840b2b5f6a9c219e /xlators | |
parent | 3bea46c1f232a4480e57ac482f92f7673af7034f (diff) |
Proactive self heal process implementation
Change-Id: I96db0d94566ceabf1649f890318363f738c06553
BUG: 2458
Reviewed-on: http://review.gluster.com/403
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators')
23 files changed, 1592 insertions, 523 deletions
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am index e192b599bf4..16ed25af10b 100644 --- a/xlators/cluster/afr/src/Makefile.am +++ b/xlators/cluster/afr/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = afr.la pump.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster -afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c $(top_builddir)/xlators/lib/src/libxlator.c +afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c $(top_builddir)/xlators/lib/src/libxlator.c afr_la_LDFLAGS = -module -avoidversion afr_la_SOURCES = $(afr_common_source) afr.c @@ -11,7 +11,7 @@ pump_la_LDFLAGS = -module -avoidversion pump_la_SOURCES = $(afr_common_source) pump.c pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h $(top_builddir)/glusterfsd/src/glusterfsd.h AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/contrib/md5 -shared -nostartfiles $(GF_CFLAGS) \ diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2d5e981967e..0e4e9735503 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -54,6 +54,7 @@ #include "afr-transaction.h" #include "afr-self-heal.h" #include "afr-self-heal-common.h" +#include "afr-self-heald.h" #include "pump.h" #define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL @@ -132,7 +133,7 @@ afr_set_dict_gfid (dict_t *dict, uuid_t gfid) ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t)); if (ret) - gf_log (THIS->name, GF_LOG_DEBUG, "gfid set failed"); + gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed"); out: if (ret && pgfid) @@ -1961,15 +1962,15 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc->path, GLUSTERFS_ENTRYLK_COUNT); } - ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req); + ret = dict_get_ptr (local->xattr_req, "gfid-req", &gfid_req); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "failed to get the gfid from dict"); } else { uuid_copy (local->cont.lookup.gfid_req, gfid_req); + if (local->loc.parent) + dict_del (local->xattr_req, "gfid-req"); } - if (local->loc.parent != NULL) - dict_del (xattr_req, "gfid-req"); for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { @@ -3395,11 +3396,12 @@ afr_notify (xlator_t *this, int32_t event, int up_children = 0; int down_children = 0; int propagate = 0; - int had_heard_from_all = 0; int have_heard_from_all = 0; int idx = -1; int ret = -1; + int call_psh = 0; + int up_child = AFR_ALL_CHILDREN; priv = this->private; @@ -3445,6 +3447,12 @@ afr_notify (xlator_t *this, int32_t event, "going online.", ((xlator_t *)data)->name); } else { event = GF_EVENT_CHILD_MODIFIED; + gf_log (this->name, GF_LOG_INFO, "subvol %d came up, " + "start crawl", idx); + if (had_heard_from_all) { + call_psh = 1; + up_child = idx; + } } priv->last_event[idx] = event; @@ -3509,6 +3517,8 @@ afr_notify (xlator_t *this, int32_t event, LOCK (&priv->lock); { + up_children = afr_up_children_count (priv->child_up, + priv->child_count); for (i = 0; i < priv->child_count; i++) { if (priv->last_event[i] == GF_EVENT_CHILD_UP) { event = GF_EVENT_CHILD_UP; @@ -3523,11 +3533,18 @@ afr_notify (xlator_t *this, int32_t event, } } UNLOCK (&priv->lock); + if (up_children > 1) { + gf_log (this->name, GF_LOG_INFO, "All subvolumes came " + "up, start crawl"); + call_psh = 1; + } } ret = 0; if (propagate) ret = default_notify (this, event, data); + if (call_psh) + afr_proactive_self_heal (this, up_child); out: return ret; @@ -3767,3 +3784,9 @@ afr_get_children_count (int32_t *children, unsigned int child_count) } return count; } + +void +afr_set_low_priority (call_frame_t *frame) +{ + frame->root->pid = LOW_PRIO_PROC_PID; +} diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h index d5a988708b8..ebe189c3575 100644 --- a/xlators/cluster/afr/src/afr-mem-types.h +++ b/xlators/cluster/afr/src/afr-mem-types.h @@ -44,6 +44,8 @@ enum gf_afr_mem_types_ { gf_afr_mt_locked_fd, gf_afr_mt_inode_ctx_t, gf_afr_fd_paused_call_t, + gf_afr_mt_afr_crawl_data_t, + gf_afr_mt_afr_brick_pos_t, gf_afr_mt_end }; #endif diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index eeca62724b4..8f50c625136 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2031,12 +2031,6 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) return 0; } -static inline void -afr_set_low_priority (call_frame_t *frame) -{ - frame->root->pid = -1; -} - int afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) { diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c new file mode 100644 index 00000000000..d27d9e09b5b --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -0,0 +1,512 @@ +/* + Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include "afr.h" +#include "syncop.h" +#include "afr-self-heald.h" + +static int +get_pathinfo_host (char *pathinfo, char *hostname, size_t size) +{ + char *start = NULL; + char *end = NULL; + int ret = -1; + int i = 0; + + if (!pathinfo) + goto out; + + start = strchr (pathinfo, ':'); + if (!start) + goto out; + end = strrchr (pathinfo, ':'); + if (start == end) + goto out; + + memset (hostname, 0, size); + i = 0; + while (++start != end) + hostname[i++] = *start; + ret = 0; +out: + return ret; +} + +int +afr_local_pathinfo (char *pathinfo, gf_boolean_t *local) +{ + int ret = 0; + char pathinfohost[1024] = {0}; + char localhost[1024] = {0}; + xlator_t *this = THIS; + + *local = _gf_false; + ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s", + pathinfo); + goto out; + } + + ret = gethostname (localhost, sizeof (localhost)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, " + "reason: %s", strerror (errno)); + goto out; + } + + if (!strcmp (localhost, pathinfohost)) + *local = _gf_true; +out: + return ret; +} + +static int +_crawl_directory (loc_t *loc, pid_t pid) +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + fd_t *fd = NULL; + off_t offset = 0; + loc_t entry_loc = {0}; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + gf_dirent_t entries; + struct iatt iatt = {0}; + struct iatt parent = {0};; + char *file_path = NULL; + int ret = 0; + gf_boolean_t free_entries = _gf_false; + + INIT_LIST_HEAD (&entries.list); + this = THIS; + priv = this->private; + + GF_ASSERT (loc->inode); + + gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path); + fd = fd_create (loc->inode, pid); + if (!fd) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create fd for %s", loc->path); + goto out; + } + + if (!loc->parent) { + ret = syncop_lookup (this, loc, NULL, + &iatt, NULL, &parent); + } + + ret = syncop_opendir (this, loc, fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "opendir failed on %s", loc->path); + goto out; + } + + while (syncop_readdirp (this, fd, 131072, offset, &entries)) { + ret = 0; + free_entries = _gf_true; + if (afr_up_children_count (priv->child_up, + priv->child_count) < 2) { + gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as " + "< 2 children are up"); + ret = -1; + goto out; + } + + if (list_empty (&entries.list)) + goto out; + + list_for_each_entry_safe (entry, tmp, &entries.list, list) { + offset = entry->d_off; + if (IS_ENTRY_CWD (entry->d_name) || + IS_ENTRY_PARENT (entry->d_name)) + continue; + + file_path = afr_build_file_path (loc, entry); + if (!file_path) { + ret = -1; + goto out; + } + + loc_wipe (&entry_loc); + afr_build_child_loc (loc, &entry_loc, + file_path, entry->d_name); + + gf_log (this->name, GF_LOG_DEBUG, + "found readdir entry=%s", entry->d_name); + + ret = syncop_lookup (this, &entry_loc, NULL, + &iatt, NULL, &parent); + + //Don't fail the crawl if lookup fails as it + //could be because of split-brain + if (ret || (!IA_ISDIR (iatt.ia_type))) + continue; + ret = _crawl_directory (&entry_loc, pid); + } + + gf_dirent_free (&entries); + free_entries = _gf_false; + } + ret = 0; +out: + if (entry_loc.path) + loc_wipe (&entry_loc); + if (free_entries) + gf_dirent_free (&entries); + return ret; +} + +int +afr_find_child_position (xlator_t *this, int child) +{ + afr_private_t *priv = NULL; + dict_t *xattr_rsp = NULL; + loc_t loc = {0}; + int ret = 0; + gf_boolean_t local = _gf_false; + char *pathinfo = NULL; + afr_child_pos_t *pos = NULL; + inode_table_t *itable = NULL; + + priv = this->private; + pos = &priv->shd.pos[child]; + + if (*pos != AFR_POS_UNKNOWN) { + goto out; + } + + //TODO: Hack to make the root_loc hack work + LOCK (&priv->lock); + { + if (!priv->root_inode) { + itable = inode_table_new (0, this); + if (!itable) + goto unlock; + priv->root_inode = inode_new (itable); + if (!priv->root_inode) + goto unlock; + } + } +unlock: + UNLOCK (&priv->lock); + + if (!priv->root_inode) { + ret = -1; + goto out; + } + afr_build_root_loc (priv->root_inode, &loc); + + ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp, + GF_XATTR_PATHINFO_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child " + "%d", child); + goto out; + } + + ret = dict_get_str (xattr_rsp, GF_XATTR_PATHINFO_KEY, &pathinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pathinfo key not found on " + "child %d", child); + goto out; + } + + ret = afr_local_pathinfo (pathinfo, &local); + if (ret) + goto out; + if (local) + *pos = AFR_POS_LOCAL; + else + *pos = AFR_POS_REMOTE; + + gf_log (this->name, GF_LOG_INFO, "child %d is %d", child, *pos); +out: + return ret; +} + +static int +afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) +{ + GF_FREE (data); + STACK_DESTROY (sync_frame->root); + return 0; +} + +static int +afr_find_all_children_postions (xlator_t *this) +{ + int ret = -1; + int i = 0; + gf_boolean_t succeeded = _gf_false; + afr_private_t *priv = NULL; + + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (priv->child_up[i] != 1) + continue; + ret = afr_find_child_position (this, i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to determine if the " + "child %s is local.", + priv->children[i]->name); + continue; + } + succeeded = _gf_true; + } + if (succeeded) + ret = 0; + return ret; +} + +static gf_boolean_t +afr_local_child_exists (afr_child_pos_t *pos, unsigned int child_count) +{ + int i = 0; + gf_boolean_t local = _gf_false; + + for (i = 0; i < child_count; i++, pos++) { + if (*pos == AFR_POS_LOCAL) { + local = _gf_true; + break; + } + } + return local; +} + +int +afr_init_child_position (xlator_t *this, int child) +{ + int ret = 0; + + if (child == AFR_ALL_CHILDREN) { + ret = afr_find_all_children_postions (this); + } else { + ret = afr_find_child_position (this, child); + } + return ret; +} + +int +afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count) +{ + gf_boolean_t local = _gf_false; + + if (child == AFR_ALL_CHILDREN) + local = afr_local_child_exists (shd->pos, child_count); + else + local = (shd->pos[child] == AFR_POS_LOCAL); + + return local; +} + +static int +afr_crawl_directory (xlator_t *this, pid_t pid) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + loc_t loc = {0}; + gf_boolean_t crawl = _gf_false; + int ret = 0; + + priv = this->private; + shd = &priv->shd; + + + LOCK (&priv->lock); + { + if (shd->inprogress) { + shd->pending = _gf_true; + } else { + shd->inprogress = _gf_true; + crawl = _gf_true; + } + } + UNLOCK (&priv->lock); + + if (!priv->root_inode) { + ret = -1; + goto out; + } + + if (!crawl) + goto out; + + afr_build_root_loc (priv->root_inode, &loc); + while (crawl) { + ret = _crawl_directory (&loc, pid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Crawl failed"); + else + gf_log (this->name, GF_LOG_INFO, "Crawl completed"); + LOCK (&priv->lock); + { + if (shd->pending) { + shd->pending = _gf_false; + } else { + shd->inprogress = _gf_false; + crawl = _gf_false; + } + } + UNLOCK (&priv->lock); + } +out: + return ret; +} + +static int +afr_crawl (void *data) +{ + xlator_t *this = NULL; + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + int ret = -1; + afr_crawl_data_t *crawl_data = data; + + this = THIS; + priv = this->private; + shd = &priv->shd; + + ret = afr_init_child_position (this, crawl_data->child); + if (ret) + goto out; + + if (!afr_is_local_child (shd, crawl_data->child, priv->child_count)) + goto out; + + ret = afr_crawl_directory (this, crawl_data->pid); +out: + return ret; +} + +void +afr_proactive_self_heal (xlator_t *this, int idx) +{ + afr_private_t *priv = NULL; + afr_self_heald_t *shd = NULL; + call_frame_t *frame = NULL; + afr_crawl_data_t *crawl_data = NULL; + int ret = 0; + + priv = this->private; + shd = &priv->shd; + if (!shd->enabled) + goto out; + + if ((idx != AFR_ALL_CHILDREN) && + (shd->pos[idx] == AFR_POS_REMOTE)) + goto out; + + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + + afr_set_lk_owner (frame, this); + afr_set_low_priority (frame); + crawl_data = GF_CALLOC (1, sizeof (*crawl_data), + gf_afr_mt_afr_crawl_data_t); + if (!crawl_data) + goto out; + crawl_data->child = idx; + crawl_data->pid = frame->root->pid; + gf_log (this->name, GF_LOG_INFO, "starting crawl for %d", idx); + ret = synctask_new (this->ctx->env, afr_crawl, + afr_crawl_done, frame, crawl_data); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Could not create the " + "task for %d ret %d", idx, ret); +out: + return; +} + +//TODO: This is a hack +void +afr_build_root_loc (inode_t *inode, loc_t *loc) +{ + loc->path = "/"; + loc->name = ""; + loc->inode = inode; + loc->ino = 1; + loc->inode->ino = 1; + loc->inode->ia_type = IA_IFDIR; + memset (loc->inode->gfid, 0, 16); + loc->inode->gfid[15] = 1; + +} + +int +afr_set_root_gfid (dict_t *dict) +{ + uuid_t gfid; + int ret = 0; + + memset (gfid, 0, 16); + gfid[15] = 1; + + ret = afr_set_dict_gfid (dict, gfid); + + return ret; +} + +char * +afr_build_file_path (loc_t *loc, gf_dirent_t *entry) +{ + xlator_t *this = NULL; + char *file_path = NULL; + int pathlen = 0; + size_t total_size = 0; + char *fmt = NULL; + + this = THIS; + + pathlen = STRLEN_0 (loc->path); + + if (IS_ROOT_PATH (loc->path)) { + total_size = pathlen + entry->d_len; + fmt = "%s%s"; + } else { + total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */ + fmt = "%s/%s"; + } + + file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char); + if (!file_path) + goto out; + + snprintf(file_path, total_size, fmt, loc->path, entry->d_name); +out: + return file_path; +} + +void +afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name) +{ + child->path = path; + child->name = name; + + child->parent = inode_ref (parent->inode); + child->inode = inode_new (parent->inode->table); +} diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h new file mode 100644 index 00000000000..c85c97b25e4 --- /dev/null +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -0,0 +1,44 @@ +/* + Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __AFR_SELF_HEALD_H__ +#define __AFR_SELF_HEALD_H__ +#include "xlator.h" + +#define IS_ROOT_PATH(path) (!strcmp (path, "/")) +#define IS_ENTRY_CWD(entry) (!strcmp (entry, ".")) +#define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) +#define AFR_ALL_CHILDREN -1 + +typedef struct afr_crawl_data_ { + int child; + pid_t pid; +} afr_crawl_data_t; + +void afr_proactive_self_heal (xlator_t *this, int idx); + +void afr_build_root_loc (inode_t *inode, loc_t *loc); + +int afr_set_root_gfid (dict_t *dict); + +char * afr_build_file_path (loc_t *loc, gf_dirent_t *entry); + +void +afr_build_child_loc (loc_t *parent, loc_t *child, char *path, char *name); +#endif /* __AFR_SELF_HEALD_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 044213e0710..8bb94e2053e 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -140,6 +140,8 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("data-self-heal-algorithm", priv->data_self_heal_algorithm, options, str, out); + GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out); + GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out); if (read_subvol) { @@ -240,6 +242,8 @@ init (xlator_t *this) GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out); + GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out); + GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out); GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool, @@ -320,6 +324,13 @@ init (xlator_t *this) goto out; } + priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count, + gf_afr_mt_afr_brick_pos_t); + if (!priv->shd.pos) { + ret = -ENOMEM; + goto out; + } + LOCK_INIT (&priv->root_inode_lk); priv->first_lookup = 1; priv->root_inode = NULL; @@ -475,5 +486,9 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "off", }, + { .key = {"self-heal-daemon"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index b9a11c486fd..92ccf607f10 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -50,6 +50,12 @@ typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this); typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); typedef enum { + AFR_POS_UNKNOWN, + AFR_POS_LOCAL, + AFR_POS_REMOTE +} afr_child_pos_t; + +typedef enum { AFR_INODE_SET_READ_CTX = 1, AFR_INODE_RM_STALE_CHILDREN, AFR_INODE_SET_OPENDIR_DONE, @@ -75,6 +81,13 @@ typedef struct afr_inode_ctx_ { int32_t *fresh_children;//increasing order of latency } afr_inode_ctx_t; +typedef struct afr_self_heald_ { + gf_boolean_t enabled; + gf_boolean_t pending; + gf_boolean_t inprogress; + afr_child_pos_t *pos; +} afr_self_heald_t; + typedef struct _afr_private { gf_lock_t lock; /* to guard access to child_count, etc */ unsigned int child_count; /* total number of children */ @@ -134,6 +147,7 @@ typedef struct _afr_private { char vol_uuid[UUID_SIZE + 1]; int32_t *last_event; + afr_self_heald_t shd; } afr_private_t; typedef struct { @@ -241,7 +255,6 @@ typedef struct { call_frame_t *sh_frame; } afr_self_heal_t; - typedef enum { AFR_DATA_TRANSACTION, /* truncate, write, ... */ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */ @@ -1001,4 +1014,6 @@ afr_open_only_data_self_heal (char *data_self_heal); gf_boolean_t afr_data_self_heal_enabled (char *data_self_heal); +void +afr_set_low_priority (call_frame_t *frame); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index ede9f3b498c..0623b817a78 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -149,71 +149,6 @@ pump_set_resume_path (xlator_t *this, const char *path) return ret; } -static void -build_child_loc (loc_t *parent, loc_t *child, char *path, char *name) -{ - child->path = path; - child->name = name; - - child->parent = inode_ref (parent->inode); - child->inode = inode_new (parent->inode->table); -} - -static char * -build_file_path (loc_t *loc, gf_dirent_t *entry) -{ - xlator_t *this = NULL; - char *file_path = NULL; - int pathlen = 0; - int total_size = 0; - - this = THIS; - - pathlen = STRLEN_0 (loc->path); - - if (IS_ROOT_PATH (loc->path)) { - total_size = pathlen + entry->d_len; - file_path = GF_CALLOC (1, total_size, gf_afr_mt_char); - if (!file_path) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - return NULL; - } - - gf_log (this->name, GF_LOG_TRACE, - "constructing file path of size=%d" - "pathlen=%d, d_len=%d", - total_size, pathlen, - entry->d_len); - - snprintf(file_path, total_size, "%s%s", loc->path, entry->d_name); - - } else { - total_size = pathlen + entry->d_len + 1; /* for the extra '/' in the path */ - file_path = GF_CALLOC (1, total_size + 1, gf_afr_mt_char); - if (!file_path) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - return NULL; - } - - gf_log (this->name, GF_LOG_TRACE, - "constructing file path of size=%d" - "pathlen=%d, d_len=%d", - total_size, pathlen, - entry->d_len); - - snprintf(file_path, total_size, "%s/%s", loc->path, entry->d_name); - } - - gf_log (this->name, GF_LOG_TRACE, - "path=%s and d_name=%s", loc->path, entry->d_name); - gf_log (this->name, GF_LOG_TRACE, - "constructed file_path=%s of size=%d", file_path, total_size); - - return file_path; -} - static int pump_save_path (xlator_t *this, const char *path) { @@ -232,7 +167,7 @@ pump_save_path (xlator_t *this, const char *path) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); dict = dict_new (); dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path); @@ -450,14 +385,15 @@ gf_pump_traverse_directory (loc_t *loc) gf_log (this->name, GF_LOG_DEBUG, "found readdir entry=%s", entry->d_name); - file_path = build_file_path (loc, entry); + file_path = afr_build_file_path (loc, entry); if (!file_path) { gf_log (this->name, GF_LOG_DEBUG, "file path construction failed"); goto out; } - build_child_loc (loc, &entry_loc, file_path, entry->d_name); + afr_build_child_loc (loc, &entry_loc, file_path, + entry->d_name); if (!IS_ENTRY_CWD (entry->d_name) && !IS_ENTRY_PARENT (entry->d_name)) { @@ -530,19 +466,6 @@ out: } -void -build_root_loc (inode_t *inode, loc_t *loc) -{ - loc->path = "/"; - loc->name = ""; - loc->inode = inode; - loc->ino = 1; - loc->inode->ino = 1; - memset (loc->inode->gfid, 0, 16); - loc->inode->gfid[15] = 1; - -} - static int pump_update_resume_path (xlator_t *this) { @@ -583,7 +506,7 @@ pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this, priv = this->private; - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); ret = syncop_removexattr (priv->children[source], &loc, PUMP_PATH); @@ -618,7 +541,7 @@ pump_complete_migration (xlator_t *this) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); dict = dict_new (); @@ -656,20 +579,6 @@ pump_complete_migration (xlator_t *this) } static int -pump_set_root_gfid (dict_t *dict) -{ - uuid_t gfid; - int ret = 0; - - memset (gfid, 0, 16); - gfid[15] = 1; - - ret = afr_set_dict_gfid (dict, gfid); - - return ret; -} - -static int pump_lookup_sink (loc_t *loc) { xlator_t *this = NULL; @@ -682,7 +591,7 @@ pump_lookup_sink (loc_t *loc) xattr_req = dict_new (); - ret = pump_set_root_gfid (xattr_req); + ret = afr_set_root_gfid (xattr_req); if (ret) goto out; @@ -721,7 +630,7 @@ pump_task (void *data) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); xattr_req = dict_new (); if (!xattr_req) { gf_log (this->name, GF_LOG_DEBUG, @@ -730,7 +639,7 @@ pump_task (void *data) goto out; } - pump_set_root_gfid (xattr_req); + afr_set_root_gfid (xattr_req); ret = syncop_lookup (this, &loc, xattr_req, &iatt, &xattr_rsp, &parent); @@ -746,7 +655,7 @@ pump_task (void *data) pump_update_resume_path (this); - pump_set_root_gfid (xattr_req); + afr_set_root_gfid (xattr_req); ret = pump_lookup_sink (&loc); if (ret) { pump_update_resume_path (this); @@ -894,7 +803,7 @@ pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); data = data_ref (dict_get (local->dict, PUMP_CMD_START)); if (!data) { @@ -1132,7 +1041,7 @@ pump_execute_start (call_frame_t *frame, xlator_t *this) GF_ASSERT (priv->root_inode); - build_root_loc (priv->root_inode, &loc); + afr_build_root_loc (priv->root_inode, &loc); STACK_WIND (frame, pump_cmd_start_getxattr_cbk, diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h index 02752422796..02eede49cf5 100644 --- a/xlators/cluster/afr/src/pump.h +++ b/xlators/cluster/afr/src/pump.h @@ -26,10 +26,6 @@ #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect" #define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect" -#define IS_ROOT_PATH(path) (!strcmp (path, "/")) -#define IS_ENTRY_CWD(entry) (!strcmp (entry, ".")) -#define IS_ENTRY_PARENT(entry) (!strcmp (entry, "..")) - #define PUMP_CMD_START "trusted.glusterfs.pump.start" #define PUMP_CMD_COMMIT "trusted.glusterfs.pump.commit" #define PUMP_CMD_ABORT "trusted.glusterfs.pump.abort" diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 66467373b5c..faf4960dfac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -961,7 +961,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) goto out; if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (volinfo); out: return ret; @@ -1133,7 +1133,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } } else { if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (volinfo); } out: diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 6dca708f7d1..19975a689f4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -1310,7 +1310,7 @@ glusterd_marker_create_volfile (glusterd_volinfo_t *volinfo) goto out; if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (volinfo); ret = 0; out: return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 25b1e669570..8331a91f669 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -53,10 +53,17 @@ build_volfile_path (const char *volname, char *path, char *free_ptr = NULL; char *tmp = NULL; glusterd_volinfo_t *volinfo = NULL; + char *server = NULL; priv = THIS->private; - if (volname[0] != '/') { + if (strstr (volname, "gluster/")) { + server = strchr (volname, '/') + 1; + glusterd_get_nodesvc_volfile (server, priv->workdir, + path, path_len); + ret = 1; + goto out; + } else if (volname[0] != '/') { /* Normal behavior */ dup_volname = gf_strdup (volname); } else { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 53556984a33..b9d4606c72a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -96,11 +96,6 @@ static char *glusterd_op_sm_event_names[] = { "GD_OP_EVENT_INVALID" }; - -static int -glusterd_restart_brick_servers (glusterd_volinfo_t *); - - char* glusterd_op_sm_state_name_get (int state) { @@ -271,12 +266,12 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) { int ret = 0; char *volname = NULL; - int exists = 0; - char *key = NULL; + int exists = 0; + char *key = NULL; char *key_fixed = NULL; char *value = NULL; - char str[100] = {0, }; - int count = 0; + char str[100] = {0, }; + int count = 0; int dict_count = 0; char errstr[2048] = {0, }; glusterd_volinfo_t *volinfo = NULL; @@ -352,21 +347,21 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) goto out; } - for ( count = 1; ret != 1 ; count++ ) { + for ( count = 1; ret != 1 ; count++ ) { global_opt = _gf_false; - sprintf (str, "key%d", count); - ret = dict_get_str (dict, str, &key); + sprintf (str, "key%d", count); + ret = dict_get_str (dict, str, &key); - if (ret) + if (ret) break; - exists = glusterd_check_option_exists (key, &key_fixed); + exists = glusterd_check_option_exists (key, &key_fixed); if (exists == -1) { ret = -1; goto out; } - if (!exists) { + if (!exists) { gf_log ("", GF_LOG_ERROR, "Option with name: %s " "does not exist", key); ret = snprintf (errstr, 2048, @@ -378,7 +373,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) *op_errstr = gf_strdup (errstr); ret = -1; goto out; - } + } sprintf (str, "value%d", count); ret = dict_get_str (dict, str, &value); @@ -734,10 +729,11 @@ glusterd_options_reset (glusterd_volinfo_t *volinfo, int32_t is_force) if (ret) goto out; - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); - if (ret) - goto out; + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); + if (ret) + goto out; + } ret = 0; @@ -807,25 +803,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo) } static int -glusterd_restart_brick_servers (glusterd_volinfo_t *volinfo) -{ - if (!volinfo) - return -1; - if (glusterd_stop_bricks (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Restart Failed: Unable to " - "stop brick servers"); - return -1; - } - usleep (500000); - if (glusterd_start_bricks (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Restart Failed: Unable to " - "start brick servers"); - return -1; - } - return 0; -} - -static int glusterd_volset_help (dict_t *dict) { int ret = -1; @@ -853,11 +830,10 @@ glusterd_op_set_volume (dict_t *dict) xlator_t *this = NULL; glusterd_conf_t *priv = NULL; int count = 1; - int restart_flag = 0; - char *key = NULL; - char *key_fixed = NULL; - char *value = NULL; - char str[50] = {0, }; + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char str[50] = {0, }; gf_boolean_t global_opt = _gf_false; glusterd_volinfo_t *voliter = NULL; int32_t dict_count = 0; @@ -894,7 +870,7 @@ glusterd_op_set_volume (dict_t *dict) goto out; } - for ( count = 1; ret != -1 ; count++ ) { + for ( count = 1; ret != -1 ; count++ ) { global_opt = _gf_false; sprintf (str, "key%d", count); @@ -976,19 +952,12 @@ glusterd_op_set_volume (dict_t *dict) goto out; } - if (restart_flag) { - if (glusterd_restart_brick_servers (volinfo)) { - ret = -1; - goto out; - } - } - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) goto out; if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); if (ret) { gf_log ("", GF_LOG_WARNING, "Unable to restart NFS-Server"); @@ -1008,20 +977,13 @@ glusterd_op_set_volume (dict_t *dict) goto out; } - if (restart_flag) { - if (glusterd_restart_brick_servers (volinfo)) { - ret = -1; - goto out; - } - } - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) goto out; if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); if (ret) { gf_log ("", GF_LOG_WARNING, "Unable to restart NFS-Server"); @@ -1212,13 +1174,13 @@ glusterd_op_stats_volume (dict_t *dict, char **op_errstr, goto out; break; } - ret = glusterd_create_volfiles_and_notify_services (volinfo); + ret = glusterd_create_volfiles_and_notify_services (volinfo); - if (ret) { + if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" - " 'volume set'"); - ret = -1; - goto out; + " 'volume set'"); + ret = -1; + goto out; } ret = glusterd_store_volinfo (volinfo, @@ -1227,7 +1189,7 @@ glusterd_op_stats_volume (dict_t *dict, char **op_errstr, goto out; if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); ret = 0; @@ -1249,7 +1211,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, glusterd_brickinfo_t *brickinfo = NULL; glusterd_conf_t *priv = NULL; xlator_t *this = NULL; - int32_t brick_index = 0; + int32_t brick_index = 0; this = THIS; GF_ASSERT (this); @@ -1286,7 +1248,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, count++; brick_count = count; } - brick_index++; + brick_index++; } } @@ -1908,12 +1870,12 @@ glusterd_op_brick_disconnect (void *data) brickinfo = ev_ctx->brickinfo; GF_ASSERT (brickinfo); - if (brickinfo->timer) { - gf_timer_call_cancel (THIS->ctx, brickinfo->timer); - brickinfo->timer = NULL; + if (brickinfo->timer) { + gf_timer_call_cancel (THIS->ctx, brickinfo->timer); + brickinfo->timer = NULL; gf_log ("", GF_LOG_DEBUG, "Cancelled timer thread"); - } + } glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_ACC, ev_ctx); glusterd_op_sm (); diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 4a428991061..1f424f6c653 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -528,7 +528,7 @@ rb_src_brick_restart (glusterd_volinfo_t *volinfo, gf_log ("", GF_LOG_DEBUG, "Attempting to kill src"); - ret = glusterd_nfs_server_stop (); + ret = glusterd_nfs_server_stop (volinfo); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to stop nfs, ret: %d", @@ -570,7 +570,7 @@ rb_src_brick_restart (glusterd_volinfo_t *volinfo, } out: - ret = glusterd_nfs_server_start (); + ret = glusterd_nfs_server_start (volinfo); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to start nfs, ret: %d", ret); @@ -1678,7 +1678,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } - ret = glusterd_nfs_server_stop (); + ret = glusterd_nodesvcs_stop (volinfo); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to stop nfs server, ret: %d", ret); @@ -1690,13 +1690,13 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) gf_log ("", GF_LOG_CRITICAL, "Unable to add " "dst-brick: %s to volume: %s", dst_brick, volinfo->volname); - (void) glusterd_check_generate_start_nfs (); + (void) glusterd_nodesvcs_handle_graph_change (volinfo); goto out; } volinfo->defrag_status = 0; - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (volinfo); if (ret) { gf_log ("", GF_LOG_CRITICAL, "Failed to generate nfs volume file"); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 5b247b6a901..18cda46ebb2 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -822,6 +822,23 @@ out: return ret; } +gf_boolean_t +glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, + char *path) +{ + gf_boolean_t decommissioned = _gf_false; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + + ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo, + &brickinfo); + if (ret) + goto out; + decommissioned = brickinfo->decommissioned; +out: + return decommissioned; +} + int32_t glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) { @@ -2204,6 +2221,7 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) int i = 1; gf_boolean_t update = _gf_false; gf_boolean_t stale_nfs = _gf_false; + gf_boolean_t stale_shd = _gf_false; GF_ASSERT (vols); GF_ASSERT (status); @@ -2228,16 +2246,20 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) } if (update) { - if (glusterd_is_nfs_started ()) + if (glusterd_is_nodesvc_running ("nfs")) stale_nfs = _gf_true; + if (glusterd_is_nodesvc_running ("glustershd")) + stale_shd = _gf_true; ret = glusterd_import_friend_volumes (vols); if (ret) goto out; if (_gf_false == glusterd_are_all_volumes_stopped ()) { - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (NULL); } else { if (stale_nfs) glusterd_nfs_server_stop (); + if (stale_shd) + glusterd_shd_stop (); } } @@ -2249,29 +2271,81 @@ out: } gf_boolean_t -glusterd_is_nfs_started () +glusterd_is_service_running (char *pidfile) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + FILE *file = NULL; + gf_boolean_t running = _gf_false; + gf_boolean_t locked = _gf_false; + int ret = 0; + int fno = 0; - this = THIS; - GF_ASSERT(this); + file = fopen (pidfile, "r+"); + if (!file) + goto out; - priv = this->private; + fno = fileno (file); + ret = lockf (fno, F_TLOCK, 0); + if (!ret) { + locked = _gf_true; + goto out; + } - GLUSTERD_GET_NFS_PIDFILE(pidfile); - ret = access (pidfile, F_OK); + running = _gf_true; +out: + if (locked) { + GF_ASSERT (file); + if (lockf (fno, F_ULOCK, 0) < 0) + gf_log ("", GF_LOG_WARNING, "Cannot unlock pidfile: %s" + " reason: %s", pidfile, strerror(errno)); + } + if (file) + fclose (file); + return running; +} - if (ret == 0) - return _gf_true; - else - return _gf_false; +void +glusterd_get_nodesvc_dir (char *server, char *workdir, + char *path, size_t len) +{ + GF_ASSERT (len == PATH_MAX); + snprintf (path, len, "%s/%s", workdir, server); +} + +void +glusterd_get_nodesvc_rundir (char *server, char *workdir, + char *path, size_t len) +{ + char dir[PATH_MAX] = {0}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); + snprintf (path, len, "%s/run", dir); +} + +void +glusterd_get_nodesvc_pidfile (char *server, char *workdir, + char *path, size_t len) +{ + char dir[PATH_MAX] = {0}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_rundir (server, workdir, dir, sizeof (dir)); + snprintf (path, len, "%s/%s.pid", dir, server); +} + +void +glusterd_get_nodesvc_volfile (char *server, char *workdir, + char *volfile, size_t len) +{ + char dir[PATH_MAX] = {0,}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); + snprintf (volfile, len, "%s/%s-server.vol", dir, server); } int32_t -glusterd_nfs_server_start () +glusterd_nodesvc_start (char *server, gf_boolean_t pmap_signin) { int32_t ret = -1; xlator_t *this = NULL; @@ -2279,16 +2353,16 @@ glusterd_nfs_server_start () char pidfile[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; char rundir[PATH_MAX] = {0,}; + char volfileid[256] = {0}; this = THIS; GF_ASSERT(this); priv = this->private; - GLUSTERD_GET_NFS_DIR(path, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); + glusterd_get_nodesvc_rundir (server, priv->workdir, + rundir, sizeof (rundir)); ret = mkdir (rundir, 0777); if ((ret == -1) && (EEXIST != errno)) { @@ -2297,25 +2371,72 @@ glusterd_nfs_server_start () goto out; } - GLUSTERD_GET_NFS_PIDFILE(pidfile); - glusterd_get_nfs_filepath (volfile); - + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + glusterd_get_nodesvc_volfile (server, priv->workdir, + volfile, sizeof (volfile)); ret = access (volfile, F_OK); if (ret) { - gf_log ("", GF_LOG_ERROR, "Nfs Volfile %s is not present", - volfile); + gf_log ("", GF_LOG_ERROR, "%s Volfile %s is not present", + server, volfile); goto out; } - snprintf (logfile, PATH_MAX, "%s/nfs.log", DEFAULT_LOG_FILE_DIRECTORY); + snprintf (logfile, PATH_MAX, "%s/%s.log", DEFAULT_LOG_FILE_DIRECTORY, + server); + snprintf (volfileid, sizeof (volfileid), "gluster/%s", server); - ret = runcmd (GFS_PREFIX"/sbin/glusterfs", "-f", volfile, - "-p", pidfile, "-l", logfile, NULL); + if (pmap_signin) + ret = runcmd (GFS_PREFIX"/sbin/glusterfs", "-s", "localhost", + "--volfile-id", volfileid, + "-p", pidfile, "-l", logfile, NULL); + else + ret = runcmd (GFS_PREFIX"/sbin/glusterfs", "-f", volfile, + "-p", pidfile, "-l", logfile, NULL); out: return ret; } +int +glusterd_nfs_server_start () +{ + return glusterd_nodesvc_start ("nfs", _gf_false); +} + +int +glusterd_shd_start () +{ + return glusterd_nodesvc_start ("glustershd", _gf_true); +} + +gf_boolean_t +glusterd_is_nodesvc_running (char *server) +{ + char pidfile[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + return glusterd_is_service_running (pidfile); +} + +int32_t +glusterd_nodesvc_stop (char *server, int sig) +{ + char pidfile[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + int ret = 0; + + if (!glusterd_is_nodesvc_running (server)) + goto out; + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + ret = glusterd_service_stop (server, pidfile, sig, _gf_true); +out: + return ret; +} + void glusterd_nfs_pmap_deregister () { @@ -2336,26 +2457,27 @@ glusterd_nfs_pmap_deregister () } -int32_t +int glusterd_nfs_server_stop () { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - - this = THIS; - GF_ASSERT(this); - - priv = this->private; - - GLUSTERD_GET_NFS_DIR(path, priv); - GLUSTERD_GET_NFS_PIDFILE(pidfile); + int ret = 0; + gf_boolean_t deregister = _gf_false; - glusterd_service_stop ("nfsd", pidfile, SIGKILL, _gf_true); - glusterd_nfs_pmap_deregister (); + if (glusterd_is_nodesvc_running ("nfs")) + deregister = _gf_true; + ret = glusterd_nodesvc_stop ("nfs", SIGKILL); + if (ret) + goto out; + if (deregister) + glusterd_nfs_pmap_deregister (); +out: + return ret; +} - return 0; +int +glusterd_shd_stop () +{ + return glusterd_nodesvc_stop ("glustershd", SIGTERM); } int @@ -2392,26 +2514,122 @@ out: } int -glusterd_check_generate_start_nfs () +glusterd_check_generate_start_service (int (*create_volfile) (), + int (*stop) (), int (*start) ()) { int ret = -1; - ret = glusterd_create_nfs_volfile (); + ret = create_volfile (); if (ret) goto out; - if (glusterd_is_nfs_started ()) { - ret = glusterd_nfs_server_stop (); - if (ret) - goto out; - } + ret = stop (); + if (ret) + goto out; + + ret = start (); +out: + return ret; +} + +int +glusterd_reconfigure_nodesvc (int (*create_volfile) ()) +{ + int ret = -1; + + ret = create_volfile (); + if (ret) + goto out; + + ret = glusterd_fetchspec_notify (THIS); +out: + return ret; +} + +int +glusterd_reconfigure_shd () +{ + int (*create_volfile) () = glusterd_create_shd_volfile; + return glusterd_reconfigure_nodesvc (create_volfile); +} + +int +glusterd_check_generate_start_nfs () +{ + int ret = 0; + + ret = glusterd_check_generate_start_service (glusterd_create_nfs_volfile, + glusterd_nfs_server_stop, + glusterd_nfs_server_start); + return ret; +} + +int +glusterd_check_generate_start_shd () +{ + int ret = 0; + + ret = glusterd_check_generate_start_service (glusterd_create_shd_volfile, + glusterd_shd_stop, + glusterd_shd_start); + if (ret == -EINVAL) + ret = 0; + return ret; +} + +int +glusterd_nodesvcs_batch_op (glusterd_volinfo_t *volinfo, + int (*nfs_op) (), int (*shd_op) ()) +{ + int ret = 0; + + ret = nfs_op (); + if (ret) + goto out; + + if (volinfo && !glusterd_is_volume_replicate (volinfo)) + goto out; - ret = glusterd_nfs_server_start (); + ret = shd_op (); + if (ret) + goto out; out: return ret; } int +glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_nfs_server_start, + glusterd_shd_start); +} + +int +glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_nfs_server_stop, + glusterd_shd_stop); +} + +int +glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_check_generate_start_nfs, + glusterd_check_generate_start_shd); +} + +int +glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_check_generate_start_nfs, + glusterd_reconfigure_shd); +} + +int glusterd_volume_count_get (void) { glusterd_volinfo_t *tmp_volinfo = NULL; @@ -2510,7 +2728,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf) glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; int ret = 0; - gf_boolean_t start_nfs = _gf_false; + gf_boolean_t start_nodesvcs = _gf_false; GF_ASSERT (conf); @@ -2521,11 +2739,11 @@ glusterd_restart_bricks (glusterd_conf_t *conf) brick_list) { glusterd_brick_start (volinfo, brickinfo); } - start_nfs = _gf_true; + start_nodesvcs = _gf_true; } } - if (start_nfs) - glusterd_check_generate_start_nfs (); + if (start_nodesvcs) + glusterd_nodesvcs_handle_graph_change (NULL); return ret; } @@ -3740,3 +3958,12 @@ out: return ret; } +gf_boolean_t +glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t replicates = _gf_false; + if (volinfo && ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || + (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE))) + replicates = _gf_true; + return replicates; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 2ee36936ade..f06a1ce1710 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -142,15 +142,31 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status); int glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo); +void +glusterd_get_nodesvc_volfile (char *server, char *workdir, + char *volfile, size_t len); + +gf_boolean_t +glusterd_is_nodesvc_running (); + gf_boolean_t -glusterd_is_nfs_started (); +glusterd_is_nodesvc_running (); +void +glusterd_get_nodesvc_dir (char *server, char *workdir, + char *path, size_t len); int32_t glusterd_nfs_server_start (); int32_t glusterd_nfs_server_stop (); +int32_t +glusterd_shd_start (); + +int32_t +glusterd_shd_stop (); + int glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len); @@ -161,6 +177,22 @@ glusterd_set_volume_status (glusterd_volinfo_t *volinfo, glusterd_volume_status status); int glusterd_check_generate_start_nfs (void); + +int +glusterd_check_generate_start_shd (void); + +int +glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo); + int32_t glusterd_volume_count_get (void); int32_t @@ -290,4 +322,9 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, gf_boolean_t glusterd_is_fuse_available (); +gf_boolean_t +glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, + char *path); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index d0533b1fccc..fe7cfc1d14b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -116,6 +116,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.metadata-self-heal", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.data-self-heal", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.entry-self-heal", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, + {"cluster.self-heal-daemon", "cluster/replicate", "!self-heal-daemon" , NULL, NO_DOC, 0 }, {"cluster.strict-readdir", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.self-heal-window-size", "cluster/replicate", "data-self-heal-window-size", NULL, DOC, 0}, {"cluster.data-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, @@ -382,6 +383,13 @@ xlator_set_option (xlator_t *xl, char *key, char *value) return dict_set_dynstr (xl->options, key, dval); } +static int +xlator_get_option (xlator_t *xl, char *key, char **value) +{ + GF_ASSERT (xl); + return dict_get_str (xl->options, key, value); +} + static inline xlator_t * first_of (volgen_graph_t *graph) { @@ -685,25 +693,35 @@ volgen_graph_set_options_generic (volgen_graph_t *graph, dict_t *dict, } static int -basic_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, - void *param) +no_filter_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { xlator_t *trav; int ret = 0; - if (vme->option[0] == '!') - return 0; - for (trav = first_of (graph); trav; trav = trav->next) { if (strcmp (trav->type, vme->voltype) != 0) continue; ret = xlator_set_option (trav, vme->option, vme->value); if (ret) - return -1; + break; } + return ret; +} - return 0; +static int +basic_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + int ret = 0; + + if (vme->option[0] == '!') + goto out; + + ret = no_filter_option_handler (graph, vme, param); +out: + return ret; } static int @@ -991,14 +1009,39 @@ glusterd_get_trans_type_rb (gf_transport_type ttype) } static int -volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph) +_xl_link_children (xlator_t *parent, xlator_t *children, size_t child_count) +{ + xlator_t *trav = NULL; + size_t seek = 0; + int ret = -1; + + if (child_count == 0) + goto out; + seek = child_count; + for (trav = children; --seek; trav = trav->next); + for (; child_count--; trav = trav->prev) { + ret = volgen_xlator_link (parent, trav); + if (ret) + goto out; + } + ret = 0; +out: + return ret; +} + +static int +volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph, + size_t child_count) { xlator_t *trav = NULL; + int ret = 0; GF_ASSERT (dgraph->graph.first); - if (volgen_xlator_link (first_of (dgraph), first_of (sgraph)) == -1) - return -1; + ret = _xl_link_children (first_of (dgraph), first_of (sgraph), + child_count); + if (ret) + goto out; for (trav = first_of (dgraph); trav->next; trav = trav->next); @@ -1006,7 +1049,8 @@ volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph) trav->next->prev = trav; dgraph->graph.xl_count += sgraph->graph.xl_count; - return 0; +out: + return ret; } static int @@ -1082,10 +1126,11 @@ build_graph_generic (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, set_dict = dict_copy (volinfo->dict, NULL); if (!set_dict) return -1; - dict_copy (mod_dict, set_dict); - /* XXX dict_copy swallows errors */ - } else + dict_copy (mod_dict, set_dict); + /* XXX dict_copy swallows errors */ + } else { set_dict = volinfo->dict; + } ret = builder (graph, volinfo, set_dict, param); if (!ret) @@ -1097,10 +1142,24 @@ build_graph_generic (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, return ret; } +static gf_transport_type +transport_str_to_type (char *tt) +{ + gf_transport_type type = GF_TRANSPORT_TCP; + + if (!strcmp ("tcp", tt)) + type = GF_TRANSPORT_TCP; + else if (!strcmp ("rdma", tt)) + type = GF_TRANSPORT_RDMA; + else if (!strcmp ("tcp,rdma", tt)) + type = GF_TRANSPORT_BOTH_TCP_RDMA; + return type; +} + static void -get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt) +transport_type_to_str (gf_transport_type type, char *tt) { - switch (volinfo->transport_type) { + switch (type) { case GF_TRANSPORT_RDMA: strcpy (tt, "rdma"); break; @@ -1114,20 +1173,20 @@ get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt) } static void +get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt) +{ + transport_type_to_str (volinfo->transport_type, tt); +} + +static void get_vol_nfs_transport_type (glusterd_volinfo_t *volinfo, char *tt) { - switch (volinfo->nfs_transport_type) { - case GF_TRANSPORT_RDMA: - strcpy (tt, "rdma"); - break; - case GF_TRANSPORT_TCP: - strcpy (tt, "tcp"); - break; - case GF_TRANSPORT_BOTH_TCP_RDMA: + if (volinfo->nfs_transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { gf_log ("", GF_LOG_ERROR, "%s:nfs transport cannot be both" " tcp and rdma", volinfo->volname); GF_ASSERT (0); } + transport_type_to_str (volinfo->nfs_transport_type, tt); } /* gets the volinfo, dict, a character array for filling in @@ -1795,35 +1854,17 @@ glusterd_get_volopt_content (gf_boolean_t xml_out) } static int -client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *set_dict, void *param) +volgen_graph_build_clients (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) { - int sub_count = 0; - int dist_count = 0; + int i = 0; + int ret = -1; char transt[16] = {0,}; char *volname = NULL; - dict_t *dict = NULL; glusterd_brickinfo_t *brick = NULL; - char *replicate_args[] = {"cluster/replicate", - "%s-replicate-%d"}; - char *stripe_args[] = {"cluster/stripe", - "%s-stripe-%d"}; - char **cluster_args = NULL; - int i = 0; - int j = 0; - int ret = -1; - xlator_t *xl = NULL; - xlator_t *txl = NULL; - xlator_t *trav = NULL; - int removed_bricks = 0; - int index_of_removed_brick = 0; - char *removed_bricklist = NULL; - char volume_name[1024] = {0,}; - int idx = 0; + xlator_t *xl = NULL; volname = volinfo->volname; - dict = volinfo->dict; - GF_ASSERT (dict); if (volinfo->brick_count == 0) { gf_log ("", GF_LOG_ERROR, @@ -1848,6 +1889,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, strcpy (transt, "tcp"); i = 0; + ret = -1; list_for_each_entry (brick, &volinfo->bricks, brick_list) { ret = -1; xl = volgen_graph_add_nolink (graph, "protocol/client", @@ -1863,19 +1905,6 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, ret = xlator_set_option (xl, "transport-type", transt); if (ret) goto out; - if (brick->decommissioned) { - if (!removed_bricklist) { - removed_bricklist = GF_CALLOC (16 * GF_UNIT_KB, - 1, gf_common_mt_char); - index_of_removed_brick = i; - } - if (removed_bricks) - strcat (removed_bricklist, ","); - snprintf (volume_name, 1024, "%s-client-%d", volname, i); - strcat (removed_bricklist, volume_name); - removed_bricks++; - } - i++; } if (i != volinfo->brick_count) { @@ -1884,138 +1913,283 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, "differs from brick count (%d)", i, volinfo->brick_count); + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} + +static int +volgen_graph_build_clusters (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + int i = 0; + int j = 0; + xlator_t *txl = NULL; + xlator_t *xl = NULL; + xlator_t *trav = NULL; + char *volname = NULL; + int ret = -1; + + if (child_count == 0) + goto out; + volname = volinfo->volname; + txl = first_of (graph); + for (trav = txl; --child_count; trav = trav->next); + for (;; trav = trav->prev) { + if (i % sub_count == 0) { + xl = volgen_graph_add_nolink (graph, xl_type, + xl_namefmt, volname, j); + if (!xl) { + ret = -1; + goto out; + } + j++; + } + + ret = volgen_xlator_link (xl, trav); + if (ret) + goto out; + + if (trav == txl) + break; + i++; + } + + ret = j; +out: + return ret; +} + +gf_boolean_t +_xl_is_client_decommissioned (xlator_t *xl, glusterd_volinfo_t *volinfo) +{ + int ret = 0; + gf_boolean_t decommissioned = _gf_false; + char *hostname = NULL; + char *path = NULL; + + GF_ASSERT (!strcmp (xl->type, "protocol/client")); + ret = xlator_get_option (xl, "remote-host", &hostname); + if (ret) { + GF_ASSERT (0); + gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host " + "from client %s", xl->name); + goto out; + } + ret = xlator_get_option (xl, "remote-subvolume", &path); + if (ret) { + GF_ASSERT (0); + gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host " + "from client %s", xl->name); + goto out; + } + + decommissioned = glusterd_is_brick_decommissioned (volinfo, hostname, + path); +out: + return decommissioned; +} + +gf_boolean_t +_xl_has_decommissioned_clients (xlator_t *xl, glusterd_volinfo_t *volinfo) +{ + xlator_list_t *xl_child = NULL; + gf_boolean_t decommissioned = _gf_false; + xlator_t *cxl = NULL; + + if (!xl) + goto out; + + if (!strcmp (xl->type, "protocol/client")) { + decommissioned = _xl_is_client_decommissioned (xl, volinfo); goto out; } - sub_count = volinfo->sub_count; - if (sub_count > 1) { + xl_child = xl->children; + while (xl_child) { + cxl = xl_child->xlator; + decommissioned = _xl_is_client_decommissioned (cxl, volinfo); + if (decommissioned) + break; + + xl_child = xl_child->next; + } +out: + return decommissioned; +} + +static int +_graph_get_decommissioned_children (xlator_t *dht, glusterd_volinfo_t *volinfo, + char **children) +{ + int ret = -1; + xlator_list_t *xl_child = NULL; + xlator_t *cxl = NULL; + gf_boolean_t comma = _gf_false; + + *children = NULL; + xl_child = dht->children; + while (xl_child) { + cxl = xl_child->xlator; + if (_xl_has_decommissioned_clients (cxl, volinfo)) { + if (!*children) { + *children = GF_CALLOC (16 * GF_UNIT_KB, 1, + gf_common_mt_char); + if (!*children) + goto out; + } + + if (comma) + strcat (*children, ","); + strcat (*children, cxl->name); + comma = _gf_true; + } + + xl_child = xl_child->next; + } + ret = 0; +out: + return ret; +} + +static int +volgen_graph_build_dht_cluster (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, size_t child_count) +{ + int32_t clusters = 0; + int ret = -1; + char *decommissioned_children = NULL; + xlator_t *dht = NULL; + + GF_ASSERT (child_count > 1); + clusters = volgen_graph_build_clusters (graph, volinfo, + "cluster/distribute", "%s-dht", + child_count, child_count); + if (clusters < 0) + goto out; + dht = first_of (graph); + ret = _graph_get_decommissioned_children (dht, volinfo, + &decommissioned_children); + if (ret) + goto out; + if (decommissioned_children) { + ret = xlator_set_option (dht, "decommissioned-bricks", + decommissioned_children); + if (ret) + goto out; + } + ret = 0; +out: + if (decommissioned_children) + GF_FREE (decommissioned_children); + return ret; +} + +static int +volume_volgen_graph_build_clusters (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) +{ + char *replicate_args[] = {"cluster/replicate", + "%s-replicate-%d"}; + char *stripe_args[] = {"cluster/stripe", + "%s-stripe-%d"}; + int rclusters = 0; + int clusters = 0; + int dist_count = 0; + int ret = -1; + + if (volinfo->sub_count > 1) { switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: - cluster_args = replicate_args; + clusters = volgen_graph_build_clusters (graph, volinfo, + replicate_args[0], + replicate_args[1], + volinfo->brick_count, + volinfo->sub_count); + if (clusters < 0) + goto out; break; case GF_CLUSTER_TYPE_STRIPE: - cluster_args = stripe_args; + clusters = volgen_graph_build_clusters (graph, volinfo, + stripe_args[0], + stripe_args[1], + volinfo->brick_count, + volinfo->sub_count); + if (clusters < 0) + goto out; break; case GF_CLUSTER_TYPE_STRIPE_REPLICATE: /* Replicate after the clients, then stripe */ - if (volinfo->replica_count == 0) { - ret = -1; + if (volinfo->replica_count == 0) + return -1; + clusters = volgen_graph_build_clusters (graph, volinfo, + replicate_args[0], + replicate_args[1], + volinfo->brick_count, + volinfo->replica_count); + if (clusters < 0) + goto out; + + rclusters = volinfo->brick_count/volinfo->replica_count; + GF_ASSERT (rclusters == clusters); + clusters = volgen_graph_build_clusters (graph, volinfo, + stripe_args[0], + stripe_args[1], + rclusters, + volinfo->stripe_count); + if (clusters < 0) goto out; - } - sub_count = volinfo->replica_count; - cluster_args = replicate_args; break; default: gf_log ("", GF_LOG_ERROR, "volume inconsistency: " "unrecognized clustering type"); - ret = -1; goto out; } - - i = 0; - j = 0; - txl = first_of (graph); - for (trav = txl; trav->next; trav = trav->next); - for (;; trav = trav->prev) { - if (i % sub_count == 0) { - xl = volgen_graph_add_nolink (graph, - cluster_args[0], - cluster_args[1], - volname, j); - if (!xl) { - ret = -1; - goto out; - } - j++; - } - - ret = volgen_xlator_link (xl, trav); - if (ret) - goto out; - - if (trav == txl) - break; - i++; - } - - if (GF_CLUSTER_TYPE_STRIPE_REPLICATE == volinfo->type) { - sub_count = volinfo->stripe_count; - cluster_args = stripe_args; - - i = 0; - txl = first_of (graph); - for (trav = txl; --j; trav = trav->next); - for (;; trav = trav->prev) { - if (i % sub_count == 0) { - xl = volgen_graph_add_nolink (graph, - cluster_args[0], - cluster_args[1], - volname, j); - if (!xl) { - ret = -1; - goto out; - } - j++; - } - - ret = volgen_xlator_link (xl, trav); - if (ret) - goto out; - - if (trav == txl) - break; - i++; - } - - } } - - if (volinfo->sub_count) + if (volinfo->sub_count) { dist_count = volinfo->brick_count / volinfo->sub_count; - else + GF_ASSERT (dist_count == clusters); + } else { dist_count = volinfo->brick_count; + } + if (dist_count > 1) { - xl = volgen_graph_add_nolink (graph, "cluster/distribute", - "%s-dht", volname); - if (!xl) { - ret = -1; + ret = volgen_graph_build_dht_cluster (graph, volinfo, + dist_count); + if (ret) goto out; - } + } + ret = 0; +out: + return ret; +} - trav = xl; - for (i = 0; i < dist_count; i++) - trav = trav->next; - for (; trav != xl; trav = trav->prev) { - ret = volgen_xlator_link (xl, trav); - if (ret) - goto out; - } +static int +client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +{ + int ret = 0; + xlator_t *xl = NULL; + char *volname = NULL; - if (removed_bricks) { - if (volinfo->sub_count) { - idx = index_of_removed_brick / volinfo->sub_count; - if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { - snprintf (volume_name, 1024, "%s-replicate-%d", - volname, idx); - strcpy (removed_bricklist, volume_name); - } else if (volinfo->type != GF_CLUSTER_TYPE_NONE) { - snprintf (volume_name, 1024, "%s-stripe-%d ", - volname, idx); - strcpy (removed_bricklist, volume_name); - } - } - ret = xlator_set_option (xl, "decommissioned-bricks", - removed_bricklist); - if (ret) - goto out; - } - } + volname = volinfo->volname; + ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); + if (ret) + goto out; + + ret = volume_volgen_graph_build_clusters (graph, volinfo); + if (ret) + goto out; ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA); if (ret == -1) goto out; - if (ret) { xl = volgen_graph_add (graph, "features/quota", volname); @@ -2030,6 +2204,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (ret) goto out; + ret = -1; xl = volgen_graph_add_as (graph, "debug/io-stats", volname); if (!xl) goto out; @@ -2040,11 +2215,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (!ret) ret = volgen_graph_set_options_generic (graph, set_dict, "client", &sys_loglevel_option_handler); - out: - if (removed_bricklist) - GF_FREE (removed_bricklist); - return ret; } @@ -2059,8 +2230,28 @@ build_client_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } static int +shd_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + int ret = 0; + struct volopt_map_entry new_vme = {0}; + int shd = 0; + + shd = !strcmp (vme->option, "!self-heal-daemon"); + if ((vme->option[0] == '!') && !shd) + goto out; + new_vme = *vme; + if (shd) + new_vme.option = "self-heal-daemon"; + + ret = no_filter_option_handler (graph, &new_vme, param); +out: + return ret; +} + +static int nfs_option_handler (volgen_graph_t *graph, - struct volopt_map_entry *vme, void *param) + struct volopt_map_entry *vme, void *param) { xlator_t *xl = NULL; char *aa = NULL; @@ -2234,6 +2425,93 @@ nfs_option_handler (volgen_graph_t *graph, return 0; } +static int +build_shd_graph (volgen_graph_t *graph, dict_t *mod_dict) +{ + volgen_graph_t cgraph = {0}; + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; + int ret = 0; + gf_boolean_t valid_config = _gf_false; + xlator_t *iostxl = NULL; + int rclusters = 0; + int replica_count = 0; + + this = THIS; + priv = this->private; + + set_dict = dict_new (); + if (!set_dict) { + ret = -ENOMEM; + goto out; + } + + iostxl = volgen_graph_add_as (graph, "debug/io-stats", "glustershd"); + if (!iostxl) { + ret = -1; + goto out; + } + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (voliter->status != GLUSTERD_STATUS_STARTED) + continue; + + if (voliter->type == GF_CLUSTER_TYPE_REPLICATE) + replica_count = voliter->sub_count; + else if (voliter->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) + replica_count = voliter->replica_count; + else + continue; + + valid_config = _gf_true; + + ret = dict_set_str (set_dict, "cluster.self-heal-daemon", "on"); + if (ret) + goto out; + + dict_copy (voliter->dict, set_dict); + if (mod_dict) + dict_copy (mod_dict, set_dict); + + memset (&cgraph, 0, sizeof (cgraph)); + ret = volgen_graph_build_clients (&cgraph, voliter, set_dict, + NULL); + if (ret) + goto out; + + rclusters = volgen_graph_build_clusters (&cgraph, voliter, + "cluster/replicate", + "%s-replicate-%d", + voliter->brick_count, + replica_count); + if (rclusters < 0) { + ret = -1; + goto out; + } + + ret = volgen_graph_set_options_generic (&cgraph, set_dict, voliter, + shd_option_handler); + if (ret) + goto out; + + ret = volgen_graph_merge_sub (graph, &cgraph, rclusters); + if (ret) + goto out; + + ret = dict_reset (set_dict); + if (ret) + goto out; + } +out: + if (set_dict) + dict_unref (set_dict); + if (!valid_config) + ret = -EINVAL; + return ret; +} + /* builds a graph for nfs server role, with option overrides in mod_dict */ static int build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) @@ -2259,14 +2537,6 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) return -1; } - ret = dict_set_str (set_dict, VKEY_PERF_STAT_PREFETCH, "off"); - if (ret) - goto out; - - ret = dict_set_str (set_dict, "performance.client-io-threads", "off"); - if (ret) - goto out; - nfsxl = volgen_graph_add_as (graph, "nfs/server", "nfs-server"); if (!nfsxl) { ret = -1; @@ -2274,7 +2544,7 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) } ret = xlator_set_option (nfsxl, "nfs.dynamic-volumes", "on"); if (ret) - goto out;; + goto out; list_for_each_entry (voliter, &priv->volumes, vol_list) { if (voliter->status != GLUSTERD_STATUS_STARTED) @@ -2313,11 +2583,19 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) else get_transport_type (voliter, voliter->dict, nfs_xprt, _gf_true); + ret = dict_set_str (set_dict, VKEY_PERF_STAT_PREFETCH, "off"); + if (ret) + goto out; + + ret = dict_set_str (set_dict, "performance.client-io-threads", "off"); + if (ret) + goto out; + ret = dict_set_str (set_dict, "client-transport-type", nfs_xprt); ret = build_client_graph (&cgraph, voliter, set_dict); if (ret) - goto out;; + goto out; if (mod_dict) { dict_copy (mod_dict, set_dict); @@ -2328,7 +2606,13 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) basic_option_handler); } - ret = volgen_graph_merge_sub (graph, &cgraph); + if (ret) + goto out; + + ret = volgen_graph_merge_sub (graph, &cgraph, 1); + if (ret) + goto out; + ret = dict_reset (set_dict); if (ret) goto out; } @@ -2336,8 +2620,7 @@ build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) list_for_each_entry (voliter, &priv->volumes, vol_list) { if (mod_dict) { - dict_copy (mod_dict, set_dict); - ret = volgen_graph_set_options_generic (graph, set_dict, voliter, + ret = volgen_graph_set_options_generic (graph, mod_dict, voliter, nfs_option_handler); } else { ret = volgen_graph_set_options_generic (graph, voliter->dict, voliter, @@ -2509,22 +2792,24 @@ out: return ret; } -static void -get_client_filepath (char *filename, glusterd_volinfo_t *volinfo) +static int +generate_single_transport_client_volfile (glusterd_volinfo_t *volinfo, + char *filepath, dict_t *dict) { - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; + volgen_graph_t graph = {0,}; + int ret = -1; - priv = THIS->private; + ret = build_client_graph (&graph, volinfo, dict); + if (!ret) + ret = volgen_write_volfile (&graph, filepath); - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + volgen_graph_free (&graph); - snprintf (filename, PATH_MAX, "%s/%s-fuse.vol", - path, volinfo->volname); + return ret; } -static void -get_rdma_client_filepath (char *filename, glusterd_volinfo_t *volinfo) +void +get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo, gf_transport_type type) { char path[PATH_MAX] = {0,}; glusterd_conf_t *priv = NULL; @@ -2533,55 +2818,68 @@ get_rdma_client_filepath (char *filename, glusterd_volinfo_t *volinfo) GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - snprintf (filename, PATH_MAX, "%s/%s.rdma-fuse.vol", - path, volinfo->volname); + switch (type) { + case GF_TRANSPORT_TCP: + snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol", + path, volinfo->volname); + break; + case GF_TRANSPORT_RDMA: + snprintf (filepath, PATH_MAX, "%s/%s.rdma-fuse.vol", + path, volinfo->volname); + break; + default: + GF_ASSERT (0); + break; + } } -static int -generate_client_volfile (glusterd_volinfo_t *volinfo) +static void +enumerate_transport_reqs (gf_transport_type type, char **types) { - volgen_graph_t graph = {0,}; - char filename[PATH_MAX] = {0,}; - int ret = -1; - dict_t *dict = NULL; - - get_client_filepath (filename, volinfo); + switch (type) { + case GF_TRANSPORT_TCP: + types[0] = "tcp"; + break; + case GF_TRANSPORT_RDMA: + types[0] = "rdma"; + break; + case GF_TRANSPORT_BOTH_TCP_RDMA: + types[0] = "tcp"; + types[1] = "rdma"; + break; + } +} - if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { - dict = dict_new (); - if (!dict) - goto out; - ret = dict_set_str (dict, "client-transport-type", "tcp"); +static int +generate_client_volfiles (glusterd_volinfo_t *volinfo) +{ + char filepath[PATH_MAX] = {0,}; + int ret = -1; + char *types[] = {NULL, NULL, NULL}; + int i = 0; + dict_t *dict = NULL; + gf_transport_type type = GF_TRANSPORT_TCP; + + enumerate_transport_reqs (volinfo->transport_type, types); + dict = dict_new (); + if (!dict) + goto out; + for (i = 0; types[i]; i++) { + memset (filepath, 0, sizeof (filepath)); + ret = dict_set_str (dict, "client-transport-type", types[i]); if (ret) goto out; - } - - ret = build_client_graph (&graph, volinfo, dict); - if (!ret) - ret = volgen_write_volfile (&graph, filename); - - volgen_graph_free (&graph); - - if (dict) { - /* This means, transport type is both RDMA and TCP */ - - memset (&graph, 0, sizeof (graph)); - get_rdma_client_filepath (filename, volinfo); - - ret = dict_set_str (dict, "client-transport-type", "rdma"); + type = transport_str_to_type (types[i]); + get_client_filepath (filepath, volinfo, type); + ret = generate_single_transport_client_volfile (volinfo, + filepath, + dict); if (ret) goto out; - - ret = build_client_graph (&graph, volinfo, dict); - if (!ret) - ret = volgen_write_volfile (&graph, filename); - - volgen_graph_free (&graph); - - dict_unref (dict); } - out: + if (dict) + dict_unref (dict); return ret; } @@ -2593,7 +2891,7 @@ glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo, ret = glusterd_generate_brick_volfile (volinfo, brickinfo); if (!ret) - ret = generate_client_volfile (volinfo); + ret = generate_client_volfiles (volinfo); if (!ret) ret = glusterd_fetchspec_notify (THIS); @@ -2612,7 +2910,7 @@ glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo) goto out; } - ret = generate_client_volfile (volinfo); + ret = generate_client_volfiles (volinfo); if (ret) { gf_log ("", GF_LOG_ERROR, "Could not generate volfile for client"); @@ -2625,34 +2923,62 @@ out: return ret; } -void -glusterd_get_nfs_filepath (char *filename) +int +glusterd_create_global_volfile (int (*builder) (volgen_graph_t *graph, + dict_t *set_dict), + char *filepath, dict_t *mod_dict) { - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; + volgen_graph_t graph = {0,}; + int ret = -1; - priv = THIS->private; + ret = builder (&graph, mod_dict); + if (!ret) + ret = volgen_write_volfile (&graph, filepath); - GLUSTERD_GET_NFS_DIR (path, priv); + volgen_graph_free (&graph); - snprintf (filename, PATH_MAX, "%s/nfs-server.vol", path); + return ret; } int glusterd_create_nfs_volfile () { - volgen_graph_t graph = {0,}; - char filename[PATH_MAX] = {0,}; - int ret = -1; + char filepath[PATH_MAX] = {0,}; + glusterd_conf_t *conf = THIS->private; - glusterd_get_nfs_filepath (filename); + glusterd_get_nodesvc_volfile ("nfs", conf->workdir, + filepath, sizeof (filepath)); + return glusterd_create_global_volfile (build_nfs_graph, + filepath, NULL); +} - ret = build_nfs_graph (&graph, NULL); - if (!ret) - ret = volgen_write_volfile (&graph, filename); +int +glusterd_create_shd_volfile () +{ + char filepath[PATH_MAX] = {0,}; + int ret = -1; + glusterd_conf_t *conf = THIS->private; + dict_t *mod_dict = NULL; - volgen_graph_free (&graph); + mod_dict = dict_new (); + if (!mod_dict) + goto out; + + ret = dict_set_uint32 (mod_dict, "cluster.background-self-heal-count", 0); + if (ret) + goto out; + + ret = dict_set_str (mod_dict, "cluster.data-self-heal", "on"); + if (ret) + goto out; + glusterd_get_nodesvc_volfile ("glustershd", conf->workdir, + filepath, sizeof (filepath)); + ret = glusterd_create_global_volfile (build_shd_graph, filepath, + mod_dict); +out: + if (mod_dict) + dict_unref (mod_dict); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 3fd8a8351df..974aed934ba 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -68,7 +68,10 @@ int glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo); void glusterd_get_nfs_filepath (char *filename); +void glusterd_get_shd_filepath (char *filename); + int glusterd_create_nfs_volfile (); +int glusterd_create_shd_volfile (); int glusterd_delete_volfile (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 90d3f16bc45..81ef4c60560 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -955,7 +955,7 @@ glusterd_op_start_volume (dict_t *dict, char **op_errstr) if (ret) goto out; - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (volinfo); out: gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); @@ -994,13 +994,9 @@ glusterd_op_stop_volume (dict_t *dict) goto out; if (glusterd_are_all_volumes_stopped ()) { - if (glusterd_is_nfs_started ()) { - ret = glusterd_nfs_server_stop (); - if (ret) - goto out; - } + ret = glusterd_nodesvcs_stop (volinfo); } else { - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_nodesvcs_handle_graph_change (volinfo); } out: diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 69923256c0a..b9e09254ba8 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -852,6 +852,15 @@ init (xlator_t *this) exit (1); } + snprintf (voldir, PATH_MAX, "%s/glustershd", dirname); + ret = mkdir (voldir, 0777); + if ((-1 == ret) && (errno != EEXIST)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create glustershd directory %s" + " ,errno = %d", voldir, errno); + exit (1); + } + ret = glusterd_rpcsvc_options_build (this->options); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index c8fa8281903..45890a5d8b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -263,11 +263,6 @@ enum glusterd_vol_comp_status_ { typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); -#define GLUSTERD_GET_NFS_DIR(path, priv) \ - do { \ - snprintf (path, PATH_MAX, "%s/nfs", priv->workdir);\ - } while (0); \ - #define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir,\ volinfo->volname); @@ -277,10 +272,6 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ GLUSTERD_BRICK_INFO_DIR); -#define GLUSTERD_GET_NFS_PIDFILE(pidfile) \ - snprintf (pidfile, PATH_MAX, "%s/nfs/run/nfs.pid", \ - priv->workdir); \ - #define GLUSTERD_REMOVE_SLASH_FROM_PATH(path,string) do { \ int i = 0; \ for (i = 1; i < strlen (path); i++) { \ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index a8b7b67a46d..7a9bdbcb9e5 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -2549,6 +2549,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) { snprintf (host_buf, 1024, "<POSIX:%s:%s>", priv->hostname, real_path); + size = strlen (host_buf) + 1; ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY, host_buf); if (ret < 0) { |