summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-heal.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-heal.c')
-rw-r--r--xlators/cluster/ec/src/ec-heal.c375
1 files changed, 288 insertions, 87 deletions
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 9eb5b856932..7d991f04aac 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -8,16 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
-#include "byte-order.h"
-#include "syncop.h"
-#include "syncop-utils.h"
-#include "cluster-syncop.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/cluster-syncop.h>
#include "ec.h"
-#include "ec-mem-types.h"
#include "ec-types.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -72,6 +70,7 @@ struct ec_name_data {
char *name;
inode_t *parent;
default_args_cbk_t *replies;
+ uint32_t heal_pending;
};
static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
@@ -103,6 +102,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
}
/* FOP: heal */
+void
+ec_set_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += 1;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+}
+
+void
+ec_reset_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+ int32_t heal_count = 0;
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += -1;
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+}
+
uintptr_t
ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood)
{
@@ -325,16 +366,16 @@ ec_heal_data_block(ec_heal_t *heal)
/* FOP: fheal */
void
-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
- dict_t *xdata)
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata)
{
ec_fd_t *ctx = ec_fd_get(fd, this);
if (ctx != NULL) {
gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags,
ctx->open);
- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
+ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial,
xdata);
}
}
@@ -954,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
ret = -ENOTCONN;
goto out;
}
+
out:
if (xattr)
dict_unref(xattr);
@@ -977,6 +1019,7 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
int estale_count = 0;
int i = 0;
call_frame_t *frame = name_data->frame;
+ uuid_t gfid;
ec = name_data->frame->this->private;
EC_REPLIES_ALLOC(replies, ec->nodes);
@@ -985,12 +1028,16 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
goto out;
}
+ loc.parent = inode_ref(name_data->parent);
loc.inode = inode_new(name_data->parent->table);
if (!loc.inode) {
ret = -ENOMEM;
goto out;
}
- gf_uuid_parse(key, loc.gfid);
+
+ gf_uuid_parse(key, gfid);
+ gf_uuid_copy(loc.pargfid, name_data->parent->gfid);
+ loc.name = name_data->name;
output = alloca0(ec->nodes);
ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes,
replies, output, name_data->frame, ec->xl, &loc, NULL);
@@ -1003,6 +1050,11 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
estale_count++;
else
name_data->participants[i] = 0;
+ } else if (gf_uuid_compare(gfid, replies[i].stat.ia_gfid)) {
+ estale_count++;
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: different gfid as %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ key);
}
}
@@ -1122,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
dict_t *xdata = NULL;
char *linkname = NULL;
ec_config_t config;
+
/* There should be just one gfid key */
EC_REPLIES_ALLOC(replies, ec->nodes);
if (gfid_db->count != 1) {
@@ -1366,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -1443,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
name_on);
- if (ret < 0)
+ if (ret < 0) {
memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
for (i = 0; i < ec->nodes; i++)
if (name_data->participants[i] && !name_on[i])
name_data->failed_on[i] = 1;
+
return 0;
}
int
ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *participants)
+ unsigned char *participants, uint32_t *pending)
{
int i = 0;
int j = 0;
@@ -1467,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
name_data.frame = frame;
name_data.participants = participants;
name_data.failed_on = alloca0(ec->nodes);
- ;
+ name_data.heal_pending = 0;
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
@@ -1486,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
break;
}
}
+ *pending += name_data.heal_pending;
+
loc_wipe(&loc);
return ret;
}
@@ -1493,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
int
__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
unsigned char *heal_on, unsigned char *sources,
- unsigned char *healed_sinks)
+ unsigned char *healed_sinks, uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *output = NULL;
@@ -1538,7 +1602,7 @@ unlock:
if (sources[i] || healed_sinks[i])
participants[i] = 1;
}
- ret = ec_heal_names(frame, ec, inode, participants);
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
goto out;
@@ -1559,7 +1623,8 @@ out:
int
ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *sources, unsigned char *healed_sinks)
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *up_subvols = NULL;
@@ -1590,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
goto unlock;
}
ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
- healed_sinks);
+ healed_sinks, pending);
}
unlock:
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
@@ -1909,16 +1974,16 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
case EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
(heal->good | heal->bad), heal->good, heal->bad,
- NULL);
+ 0, NULL);
}
return EC_STATE_END;
case -EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
- 0, 0, NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
+ fop->error, 0, 0, 0, 0, NULL);
}
return EC_STATE_END;
@@ -1933,7 +1998,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
/*Takes lock */
void
ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal)
+ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -1944,7 +2009,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, ec_manager_heal_block, callback, heal);
if (fop == NULL)
goto out;
@@ -1955,19 +2020,21 @@ out:
if (fop != NULL) {
ec_manager(fop, error);
} else {
- func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
}
}
int32_t
ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- ec_heal_t *heal = fop->data;
+ ec_heal_t *heal = cookie;
- fop->heal = NULL;
+ if (heal->fop) {
+ heal->fop->heal = NULL;
+ }
heal->fop = NULL;
heal->error = op_ret < 0 ? op_errno : 0;
syncbarrier_wake(heal->data);
@@ -2259,9 +2326,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
loc.inode = inode_ref(fd->inode);
gf_uuid_copy(loc.gfid, fd->inode->gfid);
- ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies,
- output, frame, ec->xl, &loc, &source_buf,
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
+ ret = cluster_setattr(
+ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
+ ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
ret = -ENOTCONN;
@@ -2429,6 +2497,58 @@ out:
return ret;
}
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
void
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
{
@@ -2446,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
intptr_t mbad = 0;
intptr_t good = 0;
intptr_t bad = 0;
+ uint32_t pending = 0;
ec_fop_data_t *fop = data;
gf_boolean_t blocking = _gf_false;
ec_heal_need_t need_heal = EC_HEAL_NONEED;
@@ -2481,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
if (loc->name && strlen(loc->name)) {
ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
participants);
- if (ret == 0) {
+ if (ret >= 0) {
gf_msg_debug(this->name, 0,
"%s: name heal "
"successful on %" PRIXPTR,
@@ -2499,32 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
/* Mount triggers heal only when it detects that it must need heal, shd
* triggers heals periodically which need not be thorough*/
- ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false,
- !ec->shd.iamshd, &need_heal);
-
- if (need_heal == EC_HEAL_NONEED) {
- gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
- "Heal is not required for : %s ", uuid_utoa(loc->gfid));
- goto out;
+ if (ec->shd.iamshd && (ret <= 0)) {
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+ }
}
- msources = alloca0(ec->nodes);
- mhealed_sinks = alloca0(ec->nodes);
- ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
- if (ret == 0) {
- mgood = ec_char_array_to_mask(msources, ec->nodes);
- mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
- } else {
- op_ret = -1;
- op_errno = -ret;
- }
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
if (IA_ISREG(loc->inode->ia_type)) {
ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
healed_sinks);
} else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
} else {
ret = 0;
memcpy(sources, participants, ec->nodes);
@@ -2538,15 +2661,27 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
op_ret = -1;
op_errno = -ret;
}
+ msources = alloca0(ec->nodes);
+ mhealed_sinks = alloca0(ec->nodes);
+ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
+ if (ret == 0) {
+ mgood = ec_char_array_to_mask(msources, ec->nodes);
+ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
out:
+ ec_reset_entry_healing(fop);
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
ec_char_array_to_mask(participants, ec->nodes),
- mgood & good, mbad & bad, NULL);
+ mgood & good, mbad & bad, pending, NULL);
}
if (frame)
STACK_DESTROY(frame->root);
+
return;
}
@@ -2593,8 +2728,8 @@ void
ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
{
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0,
- NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
+ 0, 0, NULL);
}
ec_fop_data_release(fop);
}
@@ -2603,13 +2738,31 @@ void
ec_launch_heal(ec_t *ec, ec_fop_data_t *fop)
{
int ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = create_frame(ec->xl, ec->xl->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done,
- NULL, fop);
+ frame, fop);
+out:
if (ret < 0) {
ec_fop_set_error(fop, ENOMEM);
ec_heal_fail(ec, fop);
}
+
+ if (frame)
+ STACK_DESTROY(frame->root);
}
void
@@ -2650,11 +2803,33 @@ ec_handle_healers_done(ec_fop_data_t *fop)
ec_launch_heal(ec, heal_fop);
}
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ int32_t heal_count = 0;
+ loc_t *loc = NULL;
+
+ loc = &fop->loc[0];
+
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+ return heal_count;
+}
+
void
ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
{
gf_boolean_t can_heal = _gf_true;
ec_t *ec = this->private;
+ ec_fop_data_t *fop_rel = NULL;
if (fop->req_frame == NULL) {
LOCK(&ec->lock);
@@ -2662,8 +2837,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
if ((ec->background_heals > 0) &&
(ec->heal_wait_qlen + ec->background_heals) >
(ec->heal_waiters + ec->healers)) {
- list_add_tail(&fop->healer, &ec->heal_waiting);
- ec->heal_waiters++;
+ if (!ec_is_entry_healing(fop)) {
+ list_add_tail(&fop->healer, &ec->heal_waiting);
+ ec->heal_waiters++;
+ ec_set_entry_healing(fop);
+ } else {
+ fop_rel = fop;
+ }
fop = __ec_dequeue_heals(ec);
} else {
can_heal = _gf_false;
@@ -2673,8 +2853,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
}
if (can_heal) {
- if (fop)
+ if (fop) {
+ if (fop->req_frame != NULL) {
+ ec_set_entry_healing(fop);
+ }
ec_launch_heal(ec, fop);
+ }
} else {
gf_msg_debug(this->name, 0,
"Max number of heals are "
@@ -2682,12 +2866,15 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
ec_fop_set_error(fop, EBUSY);
ec_heal_fail(ec, fop);
}
+ if (fop_rel) {
+ ec_heal_done(0, NULL, fop_rel);
+ }
}
void
-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
- dict_t *xdata)
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -2703,7 +2890,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
if (frame && frame->local)
goto fail;
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, NULL, callback, data);
err = ENOMEM;
@@ -2729,15 +2916,27 @@ fail:
if (fop)
ec_fop_data_release(fop);
if (func)
- func(frame, NULL, this, -1, err, 0, 0, 0, NULL);
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
}
int
ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
{
ec_t *ec = opaque;
+ gf_boolean_t last_fop = _gf_false;
+ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
+ LOCK(&ec->lock);
+ {
+ last_fop = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
+
+ if (last_fop)
+ ec_pending_fops_completed(ec);
+
return 0;
}
@@ -2777,6 +2976,10 @@ ec_replace_brick_heal_wrap(void *opaque)
itable = ec->xl->itable;
else
goto out;
+
+ if (xlator_is_cleanup_starting(ec->xl))
+ goto out;
+
ret = ec_replace_heal(ec, itable->root);
out:
return ret;
@@ -2787,14 +2990,15 @@ ec_launch_replace_heal(ec_t *ec)
{
int ret = -1;
- if (!ec)
- return ret;
ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
ec_replace_heal_done, NULL, ec);
+
if (ret < 0) {
gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
ret);
+ ec_replace_heal_done(-1, NULL, ec);
}
+
return ret;
}
@@ -2826,7 +3030,7 @@ out:
static int32_t
_need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
gf_boolean_t self_locked, int32_t lock_count,
- ec_heal_need_t *need_heal)
+ ec_heal_need_t *need_heal, uint64_t *versions)
{
int i = 0;
int source_count = 0;
@@ -2836,11 +3040,18 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_NONEED;
if (self_locked || lock_count == 0) {
for (i = 0; i < ec->nodes; i++) {
- if (dirty[i]) {
+ if (dirty[i] || (versions[i] != versions[0])) {
*need_heal = EC_HEAL_MUST;
goto out;
}
}
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
} else {
for (i = 0; i < ec->nodes; i++) {
/* Since each lock can only increment the dirty
@@ -2852,6 +3063,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_MUST;
goto out;
}
+ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MAYBE;
+ }
}
}
} else {
@@ -2872,7 +3086,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
unsigned char *healed_sinks = NULL;
uint64_t *meta_versions = NULL;
int ret = 0;
- int i = 0;
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
@@ -2885,15 +3098,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
- if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
- for (i = 1; i < ec->nodes; i++) {
- if (meta_versions[i] != meta_versions[0]) {
- *need_heal = EC_HEAL_MUST;
- goto out;
- }
- }
- }
+ need_heal, meta_versions);
out:
return ret;
}
@@ -2929,7 +3134,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -2957,7 +3162,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -3055,10 +3260,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
need_heal:
ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
need_heal);
-
- if (!self_locked && *need_heal == EC_HEAL_MUST) {
- *need_heal = EC_HEAL_MAYBE;
- }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -3144,7 +3345,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
_gf_false, &need_heal);
- if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
+ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
goto set_heal;
}
need_heal = EC_HEAL_NONEED;