summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-heald.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-heald.c')
-rw-r--r--xlators/cluster/ec/src/ec-heald.c165
1 files changed, 141 insertions, 24 deletions
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 130790c66ac..5c1586bc9c5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -8,15 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
#include "ec.h"
#include "ec-messages.h"
#include "ec-heald.h"
#include "ec-mem-types.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#include "protocol-common.h"
#define NTH_INDEX_HEALER(this, n) \
@@ -63,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer)
ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + 60;
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -71,6 +70,11 @@ disabled_loop:
break;
}
+ if (ec->shutdown) {
+ healer->running = _gf_false;
+ return -1;
+ }
+
ret = healer->rerun;
healer->rerun = 0;
@@ -152,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
- * other index entries have appeared to be healed. We put a
- * mark so that we can check it later and restart a scan
- * without delay. */
- healer->rerun = _gf_true;
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (dict) {
+ dict_unref(dict);
}
return ret;
@@ -241,9 +304,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
goto out;
}
+ _mask_cancellation();
ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
healer, ec_shd_index_heal, xdata,
ec->shd.max_threads, ec->shd.wait_qlength);
+ _unmask_cancellation();
out:
if (xdata)
dict_unref(xdata);
@@ -263,6 +328,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int ret = 0;
ec = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
if (ec->xl_up_count <= ec->fragments) {
return -ENOTCONN;
}
@@ -305,11 +375,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
ec_t *ec = NULL;
loc_t loc = {0};
+ int ret = -1;
ec = healer->this->private;
loc.inode = inode;
- return syncop_ftw(ec->xl_list[healer->subvol], &loc,
- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _mask_cancellation();
+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _unmask_cancellation();
+ return ret;
}
void *
@@ -317,13 +391,16 @@ ec_shd_index_healer(void *data)
{
struct subvol_healer *healer = NULL;
xlator_t *this = NULL;
+ int run = 0;
healer = data;
THIS = this = healer->this;
ec_t *ec = this->private;
for (;;) {
- ec_shd_healer_wait(healer);
+ run = ec_shd_healer_wait(healer);
+ if (run == -1)
+ break;
if (ec->xl_up_count > ec->fragments) {
gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
@@ -352,16 +429,12 @@ ec_shd_full_healer(void *data)
rootloc.inode = this->itable->root;
for (;;) {
- pthread_mutex_lock(&healer->mutex);
- {
- run = __ec_shd_healer_wait(healer);
- if (!run)
- healer->running = _gf_false;
- }
- pthread_mutex_unlock(&healer->mutex);
-
- if (!run)
+ run = ec_shd_healer_wait(healer);
+ if (run < 0) {
break;
+ } else if (run == 0) {
+ continue;
+ }
if (ec->xl_up_count > ec->fragments) {
gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
@@ -429,6 +502,9 @@ unlock:
int
ec_shd_full_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
ec_shd_full_healer);
}
@@ -436,6 +512,9 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
int
ec_shd_index_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
ec_shd_index_healer);
}
@@ -562,3 +641,41 @@ out:
dict_del(output, this->name);
return ret;
}
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ if (!healer)
+ return;
+
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ ec_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->nodes; i++) {
+ healer = &shd->index_healers[i];
+ ec_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ ec_destroy_healer_object(this, healer);
+ }
+
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+}