diff options
Diffstat (limited to 'xlators/cluster/ec')
-rw-r--r-- | xlators/cluster/ec/src/Makefile.am | 3 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heald.c | 598 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heald.h | 47 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.c | 77 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.h | 8 |
6 files changed, 707 insertions, 27 deletions
diff --git a/xlators/cluster/ec/src/Makefile.am b/xlators/cluster/ec/src/Makefile.am index e2a9330a944..12d87f99e4d 100644 --- a/xlators/cluster/ec/src/Makefile.am +++ b/xlators/cluster/ec/src/Makefile.am @@ -15,6 +15,7 @@ ec_sources += ec-combine.c ec_sources += ec-gf.c ec_sources += ec-method.c ec_sources += ec-heal.c +ec_sources += ec-heald.c ec_headers := ec.h ec_headers += ec-mem-types.h @@ -25,6 +26,7 @@ ec_headers += ec-common.h ec_headers += ec-combine.h ec_headers += ec-gf.h ec_headers += ec-method.h +ec_headers += ec-heald.h ec_ext_sources = $(top_builddir)/xlators/lib/src/libxlator.c @@ -37,6 +39,7 @@ ec_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la AM_CPPFLAGS = $(GF_CPPFLAGS) AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src +AM_CPPFLAGS += -I$(top_srcdir)/rpc/rpc-lib/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c new file mode 100644 index 00000000000..6b899414d4d --- /dev/null +++ b/xlators/cluster/ec/src/ec-heald.c @@ -0,0 +1,598 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "xlator.h" +#include "defaults.h" +#include "compat-errno.h" +#include "ec.h" +#include "ec-heald.h" +#include "ec-mem-types.h" +#include "syncop.h" +#include "syncop-utils.h" +#include "protocol-common.h" + +#define SHD_INODE_LRU_LIMIT 2048 +#define ASSERT_LOCAL(this, healer) \ + do { \ + if (!ec_shd_is_subvol_local (this, healer->subvol)) { \ + healer->local = _gf_false; \ + if (safe_break (healer)) { \ + break; \ + } else { \ + continue; \ + } \ + } else { \ + healer->local = _gf_true; \ + } \ + } while (0); + + +#define NTH_INDEX_HEALER(this, n) (&((((ec_t *)this->private))->shd.index_healers[n])) +#define NTH_FULL_HEALER(this, n) (&((((ec_t *)this->private))->shd.full_healers[n])) + +gf_boolean_t +ec_shd_is_subvol_local (xlator_t *this, int subvol) +{ + ec_t *ec = NULL; + gf_boolean_t is_local = _gf_false; + loc_t loc = {0, }; + + ec = this->private; + loc.inode = this->itable->root; + syncop_is_subvol_local (ec->xl_list[subvol], &loc, &is_local); + return is_local; +} + +char * +ec_subvol_name (xlator_t *this, int subvol) +{ + ec_t *ec = NULL; + + ec = this->private; + if (subvol < 0 || subvol > ec->nodes) + return NULL; + + return ec->xl_list[subvol]->name; +} + +int +__ec_shd_healer_wait (struct subvol_healer *healer) +{ + ec_t *ec = NULL; + struct timespec wait_till = {0, }; + int ret = 0; + + ec = healer->this->private; + +disabled_loop: + wait_till.tv_sec = time (NULL) + 60; + + while (!healer->rerun) { + ret = pthread_cond_timedwait (&healer->cond, + &healer->mutex, + &wait_till); + if (ret == ETIMEDOUT) + break; + } + + ret = healer->rerun; + healer->rerun = 0; + + if (!ec->shd.enabled || !ec->up) + goto disabled_loop; + + return ret; +} + + +int +ec_shd_healer_wait (struct subvol_healer *healer) +{ + int ret = 0; + + pthread_mutex_lock (&healer->mutex); + { + ret = __ec_shd_healer_wait (healer); + } + pthread_mutex_unlock (&healer->mutex); + + return ret; +} + + +gf_boolean_t +safe_break (struct subvol_healer *healer) +{ + gf_boolean_t ret = _gf_false; + + pthread_mutex_lock (&healer->mutex); + { + if (healer->rerun) + goto unlock; + + healer->running = _gf_false; + ret = _gf_true; + } +unlock: + pthread_mutex_unlock (&healer->mutex); + + return ret; +} + + +inode_t * +ec_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid) +{ + inode_t *inode = NULL; + int ret = 0; + loc_t loc = {0, }; + struct iatt iatt = {0, }; + + inode = inode_find (this->itable, gfid); + if (inode) { + inode_lookup (inode); + goto out; + } + + loc.inode = inode_new (this->itable); + if (!loc.inode) + goto out; + uuid_copy (loc.gfid, gfid); + + ret = syncop_lookup (subvol, &loc, NULL, &iatt, NULL, NULL); + if (ret < 0) + goto out; + + inode = inode_link (loc.inode, NULL, NULL, &iatt); + if (inode) + inode_lookup (inode); +out: + loc_wipe (&loc); + return inode; +} + + +inode_t* +ec_shd_index_inode (xlator_t *this, xlator_t *subvol) +{ + loc_t rootloc = {0, }; + inode_t *inode = NULL; + int ret = 0; + dict_t *xattr = NULL; + void *index_gfid = NULL; + + rootloc.inode = inode_ref (this->itable->root); + uuid_copy (rootloc.gfid, rootloc.inode->gfid); + + ret = syncop_getxattr (subvol, &rootloc, &xattr, + GF_XATTROP_INDEX_GFID, NULL); + if (ret || !xattr) { + errno = -ret; + goto out; + } + + ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid); + if (ret) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "index-dir gfid for %s: %s", + subvol->name, uuid_utoa (index_gfid)); + + inode = ec_shd_inode_find (this, subvol, index_gfid); + +out: + loc_wipe (&rootloc); + + if (xattr) + dict_unref (xattr); + + return inode; +} + +int +ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name) +{ + loc_t loc = {0, }; + int ret = 0; + + loc.parent = inode_ref (inode); + loc.name = name; + + ret = syncop_unlink (subvol, &loc); + + loc_wipe (&loc); + return ret; +} + +int +ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc) +{ + return syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL); +} + + +int +ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + ec_t *ec = NULL; + loc_t loc = {0}; + int ret = 0; + + ec = healer->this->private; + if (!ec->shd.enabled) + return -EBUSY; + + gf_log (healer->this->name, GF_LOG_DEBUG, "got entry: %s", + entry->d_name); + + ret = uuid_parse (entry->d_name, loc.gfid); + if (ret) + return 0; + + /* If this fails with ENOENT/ESTALE index is stale */ + ret = syncop_gfid_to_path (healer->this->itable, subvol, loc.gfid, + (char **)&loc.path); + if (ret == -ENOENT || ret == -ESTALE) { + ec_shd_index_purge (subvol, parent->inode, entry->d_name); + goto out; + } + + loc.inode = ec_shd_inode_find (healer->this, healer->this, loc.gfid); + if (!loc.inode) + goto out; + + ec_shd_selfheal (healer, healer->subvol, &loc); + +out: + loc_wipe (&loc); + + return 0; +} + +int +ec_shd_index_sweep (struct subvol_healer *healer) +{ + loc_t loc = {0}; + ec_t *ec = NULL; + int ret = 0; + xlator_t *subvol = NULL; + + ec = healer->this->private; + subvol = ec->xl_list[healer->subvol]; + + loc.inode = ec_shd_index_inode (healer->this, subvol); + if (!loc.inode) { + gf_log (healer->this->name, GF_LOG_WARNING, + "unable to get index-dir on %s", subvol->name); + return -errno; + } + + ret = syncop_dir_scan (subvol, &loc, GF_CLIENT_PID_AFR_SELF_HEALD, + healer, ec_shd_index_heal); + + inode_forget (loc.inode, 1); + loc_wipe (&loc); + + return ret; +} + +int +ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + xlator_t *this = healer->this; + ec_t *ec = NULL; + loc_t loc = {0}; + int ret = 0; + + ec = this->private; + if (!ec->shd.enabled) + return -EBUSY; + + loc.parent = inode_ref (parent->inode); + loc.name = entry->d_name; + uuid_copy (loc.gfid, entry->d_stat.ia_gfid); + + /* If this fails with ENOENT/ESTALE index is stale */ + ret = syncop_gfid_to_path (this->itable, subvol, loc.gfid, + (char **)&loc.path); + if (ret < 0) + goto out; + + loc.inode = ec_shd_inode_find (this, this, loc.gfid); + if (!loc.inode) { + ret = -EINVAL; + goto out; + } + + ec_shd_selfheal (healer, healer->subvol, &loc); + + loc_wipe (&loc); + ret = 0; + +out: + return ret; +} + +int +ec_shd_full_sweep (struct subvol_healer *healer, inode_t *inode) +{ + ec_t *ec = NULL; + loc_t loc = {0}; + + ec = healer->this->private; + loc.inode = inode; + return syncop_ftw (ec->xl_list[healer->subvol], &loc, + GF_CLIENT_PID_AFR_SELF_HEALD, healer, + ec_shd_full_heal); +} + + +void * +ec_shd_index_healer (void *data) +{ + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + + healer = data; + THIS = this = healer->this; + + for (;;) { + ec_shd_healer_wait (healer); + + ASSERT_LOCAL(this, healer); + + gf_log (this->name, GF_LOG_DEBUG, + "starting index sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + + ec_shd_index_sweep (healer); + + gf_log (this->name, GF_LOG_DEBUG, + "finished index sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + } + + return NULL; +} + + +void * +ec_shd_full_healer (void *data) +{ + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; + + int run = 0; + + healer = data; + THIS = this = healer->this; + + for (;;) { + pthread_mutex_lock (&healer->mutex); + { + run = __ec_shd_healer_wait (healer); + if (!run) + healer->running = _gf_false; + } + pthread_mutex_unlock (&healer->mutex); + + if (!run) + break; + + ASSERT_LOCAL(this, healer); + + gf_log (this->name, GF_LOG_INFO, + "starting full sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + + ec_shd_full_sweep (healer, this->itable->root); + + gf_log (this->name, GF_LOG_INFO, + "finished full sweep on subvol %s", + ec_subvol_name (this, healer->subvol)); + } + + return NULL; +} + + +int +ec_shd_healer_init (xlator_t *this, struct subvol_healer *healer) +{ + int ret = 0; + + ret = pthread_mutex_init (&healer->mutex, NULL); + if (ret) + goto out; + + ret = pthread_cond_init (&healer->cond, NULL); + if (ret) + goto out; + + healer->this = this; + healer->running = _gf_false; + healer->rerun = _gf_false; + healer->local = _gf_false; +out: + return ret; +} + + +int +ec_shd_healer_spawn (xlator_t *this, struct subvol_healer *healer, + void *(threadfn)(void *)) +{ + int ret = 0; + + pthread_mutex_lock (&healer->mutex); + { + if (healer->running) { + pthread_cond_signal (&healer->cond); + } else { + ret = gf_thread_create (&healer->thread, NULL, + threadfn, healer); + if (ret) + goto unlock; + healer->running = 1; + } + + healer->rerun = 1; + } +unlock: + pthread_mutex_unlock (&healer->mutex); + + return ret; +} + +int +ec_shd_full_healer_spawn (xlator_t *this, int subvol) +{ + return ec_shd_healer_spawn (this, NTH_FULL_HEALER (this, subvol), + ec_shd_full_healer); +} + + +int +ec_shd_index_healer_spawn (xlator_t *this, int subvol) +{ + return ec_shd_healer_spawn (this, NTH_INDEX_HEALER (this, subvol), + ec_shd_index_healer); +} + +void +ec_selfheal_childup (ec_t *ec, int child) +{ + if (!ec->shd.iamshd) + return; + ec_shd_index_healer_spawn (ec->xl, child); +} + +int +ec_selfheal_daemon_init (xlator_t *this) +{ + ec_t *ec = NULL; + ec_self_heald_t *shd = NULL; + int ret = -1; + int i = 0; + + ec = this->private; + shd = &ec->shd; + + this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this); + if (!this->itable) + goto out; + + shd->index_healers = GF_CALLOC (sizeof(*shd->index_healers), + ec->nodes, + ec_mt_subvol_healer_t); + if (!shd->index_healers) + goto out; + + for (i = 0; i < ec->nodes; i++) { + shd->index_healers[i].subvol = i; + ret = ec_shd_healer_init (this, &shd->index_healers[i]); + if (ret) + goto out; + } + + shd->full_healers = GF_CALLOC (sizeof(*shd->full_healers), + ec->nodes, + ec_mt_subvol_healer_t); + if (!shd->full_healers) + goto out; + + for (i = 0; i < ec->nodes; i++) { + shd->full_healers[i].subvol = i; + ret = ec_shd_healer_init (this, &shd->full_healers[i]); + if (ret) + goto out; + } + + ret = 0; +out: + return ret; +} + + +int +ec_heal_op (xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id) +{ + char key[64] = {0}; + int op_ret = 0; + ec_t *ec = NULL; + int i = 0; + GF_UNUSED int ret = 0; + + ec = this->private; + + for (i = 0; i < ec->nodes; i++) { + snprintf (key, sizeof (key), "%d-%d-status", xl_id, i); + + op_ret = -1; + if (((ec->xl_up >> i) & 1) == 0) { + ret = dict_set_str (output, key, "Brick is not connected"); + } else if (!ec->up) { + ret = dict_set_str (output, key, + "Disperse subvolume is not up"); + } else if (!ec_shd_is_subvol_local (this, i)) { + ret = dict_set_str (output, key, "Brick is remote"); + } else { + ret = dict_set_str (output, key, "Started self-heal"); + if (op == GF_SHD_OP_HEAL_FULL) { + ec_shd_full_healer_spawn (this, i); + } else if (op == GF_SHD_OP_HEAL_INDEX) { + ec_shd_index_healer_spawn (this, i); + } + op_ret = 0; + } + } + return op_ret; +} + +int +ec_xl_op (xlator_t *this, dict_t *input, dict_t *output) +{ + gf_xl_afr_op_t op = GF_SHD_OP_INVALID; + int ret = 0; + int xl_id = 0; + + ret = dict_get_int32 (input, "xl-op", (int32_t *)&op); + if (ret) + goto out; + + ret = dict_get_int32 (input, this->name, &xl_id); + if (ret) + goto out; + + ret = dict_set_int32 (output, this->name, xl_id); + if (ret) + goto out; + + switch (op) { + case GF_SHD_OP_HEAL_FULL: + ret = ec_heal_op (this, output, op, xl_id); + break; + + case GF_SHD_OP_HEAL_INDEX: + ret = ec_heal_op (this, output, op, xl_id); + break; + + default: + ret = -1; + break; + } +out: + dict_del (output, this->name); + return ret; +} diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h new file mode 100644 index 00000000000..0f27a8ec776 --- /dev/null +++ b/xlators/cluster/ec/src/ec-heald.h @@ -0,0 +1,47 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __EC_HEALD_H__ +#define __EC_HEALD_H__ + +#include "xlator.h" + +struct _ec; +typedef struct _ec ec_t; + +struct subvol_healer { + xlator_t *this; + int subvol; + gf_boolean_t local; + gf_boolean_t running; + gf_boolean_t rerun; + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_t thread; +}; + +struct _ec_self_heald; +typedef struct _ec_self_heald ec_self_heald_t; + +struct _ec_self_heald { + gf_boolean_t iamshd; + gf_boolean_t enabled; + int timeout; + struct subvol_healer *index_healers; + struct subvol_healer *full_healers; +}; + +int +ec_xl_op (xlator_t *this, dict_t *input, dict_t *output); + +int +ec_selfheal_daemon_init (xlator_t *this); +void ec_selfheal_childup (ec_t *ec, int child); +#endif /* __EC_HEALD_H__ */ diff --git a/xlators/cluster/ec/src/ec-mem-types.h b/xlators/cluster/ec/src/ec-mem-types.h index 8a66fb912a5..df65a031590 100644 --- a/xlators/cluster/ec/src/ec-mem-types.h +++ b/xlators/cluster/ec/src/ec-mem-types.h @@ -20,6 +20,7 @@ enum gf_ec_mem_types_ ec_mt_ec_inode_t, ec_mt_ec_fd_t, ec_mt_ec_heal_t, + ec_mt_subvol_healer_t, ec_mt_end }; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 219494b961e..9fecde4c495 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -18,6 +18,7 @@ #include "ec-fops.h" #include "ec-method.h" #include "ec.h" +#include "ec-heald.h" #define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS /* The maximum number of nodes is derived from the maximum allowed fragments @@ -186,8 +187,8 @@ reconfigure (xlator_t *this, dict_t *options) { ec_t *ec = this->private; - GF_OPTION_RECONF ("self-heal-daemon", ec->shd, options, bool, failed); - GF_OPTION_RECONF ("iam-self-heal-daemon", ec->iamshd, options, + GF_OPTION_RECONF ("self-heal-daemon", ec->shd.enabled, options, bool, failed); + GF_OPTION_RECONF ("iam-self-heal-daemon", ec->shd.iamshd, options, bool, failed); return 0; @@ -329,13 +330,35 @@ ec_handle_down (xlator_t *this, ec_t *ec, int32_t idx) } int32_t -notify (xlator_t *this, int32_t event, void *data, ...) -{ - ec_t * ec = this->private; - int32_t idx = 0; - int32_t error = 0; - glusterfs_event_t old_event = GF_EVENT_MAXVAL; - glusterfs_event_t new_event = GF_EVENT_MAXVAL; +ec_notify (xlator_t *this, int32_t event, void *data, void *data2) +{ + ec_t *ec = this->private; + int32_t idx = 0; + int32_t error = 0; + glusterfs_event_t old_event = GF_EVENT_MAXVAL; + glusterfs_event_t new_event = GF_EVENT_MAXVAL; + dict_t *input = NULL; + dict_t *output = NULL; + + if (event == GF_EVENT_TRANSLATOR_OP) { + if (!ec->up) { + error = -1; + goto out; + } else { + input = data; + output = data2; + error = ec_xl_op (this, input, output); + } + goto out; + } + + for (idx = 0; idx < ec->nodes; idx++) { + if (ec->xl_list[idx] == data) { + if (event == GF_EVENT_CHILD_UP) + ec_selfheal_childup (ec, idx); + break; + } + } LOCK (&ec->lock); @@ -348,11 +371,6 @@ notify (xlator_t *this, int32_t event, void *data, ...) goto unlock; } - for (idx = 0; idx < ec->nodes; idx++) { - if (ec->xl_list[idx] == data) - break; - } - gf_log (this->name, GF_LOG_TRACE, "NOTIFY(%d): %p, %d", event, data, idx); @@ -381,13 +399,28 @@ notify (xlator_t *this, int32_t event, void *data, ...) if (new_event != GF_EVENT_MAXVAL) error = default_notify (this, new_event, data); } -unlock: - UNLOCK (&ec->lock); + unlock: + UNLOCK (&ec->lock); + + if (event != GF_EVENT_MAXVAL) + return default_notify (this, event, data); +out: + return error; +} + +int32_t +notify (xlator_t *this, int32_t event, void *data, ...) +{ + int ret = -1; + va_list ap; + void *data2 = NULL; - if (event != GF_EVENT_MAXVAL) - return default_notify (this, event, data); + va_start (ap, data); + data2 = va_arg (ap, dict_t*); + va_end (ap); + ret = ec_notify (this, event, data, data2); - return error; + return ret; } int32_t @@ -440,9 +473,11 @@ init (xlator_t *this) } ec_method_initialize(); - GF_OPTION_INIT ("self-heal-daemon", ec->shd, bool, failed); - GF_OPTION_INIT ("iam-self-heal-daemon", ec->iamshd, bool, failed); + GF_OPTION_INIT ("self-heal-daemon", ec->shd.enabled, bool, failed); + GF_OPTION_INIT ("iam-self-heal-daemon", ec->shd.iamshd, bool, failed); + if (ec->shd.iamshd) + ec_selfheal_daemon_init (this); gf_log(this->name, GF_LOG_DEBUG, "Disperse translator initialized."); return 0; diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h index b6a95a11b18..1c740187757 100644 --- a/xlators/cluster/ec/src/ec.h +++ b/xlators/cluster/ec/src/ec.h @@ -13,6 +13,7 @@ #include "xlator.h" #include "timer.h" +#include "ec-heald.h" #define EC_XATTR_PREFIX "trusted.ec." #define EC_XATTR_CONFIG EC_XATTR_PREFIX"config" @@ -21,9 +22,6 @@ #define EC_XATTR_HEAL EC_XATTR_PREFIX"heal" #define EC_XATTR_DIRTY EC_XATTR_PREFIX"dirty" -struct _ec; -typedef struct _ec ec_t; - struct _ec { xlator_t * xl; @@ -46,8 +44,6 @@ struct _ec struct mem_pool * fop_pool; struct mem_pool * cbk_pool; struct mem_pool * lock_pool; - gf_boolean_t shd; - gf_boolean_t iamshd; + ec_self_heald_t shd; }; - #endif /* __EC_H__ */ |