diff options
Diffstat (limited to 'xlators/cluster/ec/src/ec-heal.c')
| -rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 375 |
1 files changed, 288 insertions, 87 deletions
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 9eb5b856932..7d991f04aac 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -8,16 +8,14 @@ cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "defaults.h" -#include "compat-errno.h" -#include "byte-order.h" -#include "syncop.h" -#include "syncop-utils.h" -#include "cluster-syncop.h" +#include <glusterfs/defaults.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syncop.h> +#include <glusterfs/syncop-utils.h> +#include <glusterfs/cluster-syncop.h> #include "ec.h" -#include "ec-mem-types.h" #include "ec-types.h" #include "ec-messages.h" #include "ec-helpers.h" @@ -72,6 +70,7 @@ struct ec_name_data { char *name; inode_t *parent; default_args_cbk_t *replies; + uint32_t heal_pending; }; static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; @@ -103,6 +102,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata) } /* FOP: heal */ +void +ec_set_entry_healing(ec_fop_data_t *fop) +{ + ec_inode_t *ctx = NULL; + loc_t *loc = NULL; + + if (!fop) + return; + + loc = &fop->loc[0]; + LOCK(&loc->inode->lock); + { + ctx = __ec_inode_get(loc->inode, fop->xl); + if (ctx) { + ctx->heal_count += 1; + } + } + UNLOCK(&loc->inode->lock); +} + +void +ec_reset_entry_healing(ec_fop_data_t *fop) +{ + ec_inode_t *ctx = NULL; + loc_t *loc = NULL; + int32_t heal_count = 0; + if (!fop) + return; + + loc = &fop->loc[0]; + LOCK(&loc->inode->lock); + { + ctx = __ec_inode_get(loc->inode, fop->xl); + if (ctx) { + ctx->heal_count += -1; + heal_count = ctx->heal_count; + } + } + UNLOCK(&loc->inode->lock); + GF_ASSERT(heal_count >= 0); +} + uintptr_t ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood) { @@ -325,16 +366,16 @@ ec_heal_data_block(ec_heal_t *heal) /* FOP: fheal */ void -ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, - fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial, - dict_t *xdata) +ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, + uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd, + int32_t partial, dict_t *xdata) { ec_fd_t *ctx = ec_fd_get(fd, this); if (ctx != NULL) { gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags, ctx->open); - ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial, + ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial, xdata); } } @@ -954,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia, ret = -ENOTCONN; goto out; } + out: if (xattr) dict_unref(xattr); @@ -977,6 +1019,7 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data) int estale_count = 0; int i = 0; call_frame_t *frame = name_data->frame; + uuid_t gfid; ec = name_data->frame->this->private; EC_REPLIES_ALLOC(replies, ec->nodes); @@ -985,12 +1028,16 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data) goto out; } + loc.parent = inode_ref(name_data->parent); loc.inode = inode_new(name_data->parent->table); if (!loc.inode) { ret = -ENOMEM; goto out; } - gf_uuid_parse(key, loc.gfid); + + gf_uuid_parse(key, gfid); + gf_uuid_copy(loc.pargfid, name_data->parent->gfid); + loc.name = name_data->name; output = alloca0(ec->nodes); ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes, replies, output, name_data->frame, ec->xl, &loc, NULL); @@ -1003,6 +1050,11 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data) estale_count++; else name_data->participants[i] = 0; + } else if (gf_uuid_compare(gfid, replies[i].stat.ia_gfid)) { + estale_count++; + gf_msg_debug(ec->xl->name, 0, "%s/%s: different gfid as %s", + uuid_utoa(name_data->parent->gfid), name_data->name, + key); } } @@ -1122,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, dict_t *xdata = NULL; char *linkname = NULL; ec_config_t config; + /* There should be just one gfid key */ EC_REPLIES_ALLOC(replies, ec->nodes); if (gfid_db->count != 1) { @@ -1366,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent, participants); + if (ret >= 0) { + /* If ec_create_name() succeeded we return 1 to indicate that a new + * file has been created and it will need to be healed. */ + ret = 1; + } out: cluster_replies_wipe(replies, ec->nodes); loc_wipe(&loc); @@ -1443,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name, name_on); - if (ret < 0) + if (ret < 0) { memset(name_on, 0, ec->nodes); + } else { + name_data->heal_pending += ret; + } for (i = 0; i < ec->nodes; i++) if (name_data->participants[i] && !name_on[i]) name_data->failed_on[i] = 1; + return 0; } int ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *participants) + unsigned char *participants, uint32_t *pending) { int i = 0; int j = 0; @@ -1467,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, name_data.frame = frame; name_data.participants = participants; name_data.failed_on = alloca0(ec->nodes); - ; + name_data.heal_pending = 0; for (i = 0; i < ec->nodes; i++) { if (!participants[i]) @@ -1486,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, break; } } + *pending += name_data.heal_pending; + loc_wipe(&loc); return ret; } @@ -1493,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, int __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, unsigned char *heal_on, unsigned char *sources, - unsigned char *healed_sinks) + unsigned char *healed_sinks, uint32_t *pending) { unsigned char *locked_on = NULL; unsigned char *output = NULL; @@ -1538,7 +1602,7 @@ unlock: if (sources[i] || healed_sinks[i]) participants[i] = 1; } - ret = ec_heal_names(frame, ec, inode, participants); + ret = ec_heal_names(frame, ec, inode, participants, pending); if (EC_COUNT(participants, ec->nodes) <= ec->fragments) goto out; @@ -1559,7 +1623,8 @@ out: int ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, - unsigned char *sources, unsigned char *healed_sinks) + unsigned char *sources, unsigned char *healed_sinks, + uint32_t *pending) { unsigned char *locked_on = NULL; unsigned char *up_subvols = NULL; @@ -1590,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, goto unlock; } ret = __ec_heal_entry(frame, ec, inode, locked_on, sources, - healed_sinks); + healed_sinks, pending); } unlock: cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, @@ -1909,16 +1974,16 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) case EC_STATE_REPORT: if (fop->cbks.heal) { - fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, + fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0, (heal->good | heal->bad), heal->good, heal->bad, - NULL); + 0, NULL); } return EC_STATE_END; case -EC_STATE_REPORT: if (fop->cbks.heal) { - fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0, - 0, 0, NULL); + fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1, + fop->error, 0, 0, 0, 0, NULL); } return EC_STATE_END; @@ -1933,7 +1998,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) /*Takes lock */ void ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target, - int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal) + uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal) { ec_cbk_t callback = {.heal = func}; ec_fop_data_t *fop = NULL; @@ -1944,7 +2009,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target, VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, + fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags, NULL, ec_manager_heal_block, callback, heal); if (fop == NULL) goto out; @@ -1955,19 +2020,21 @@ out: if (fop != NULL) { ec_manager(fop, error); } else { - func(frame, NULL, this, -1, error, 0, 0, 0, NULL); + func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL); } } int32_t ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uintptr_t mask, - uintptr_t good, uintptr_t bad, dict_t *xdata) + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) { - ec_fop_data_t *fop = cookie; - ec_heal_t *heal = fop->data; + ec_heal_t *heal = cookie; - fop->heal = NULL; + if (heal->fop) { + heal->fop->heal = NULL; + } heal->fop = NULL; heal->error = op_ret < 0 ? op_errno : 0; syncbarrier_wake(heal->data); @@ -2259,9 +2326,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd, loc.inode = inode_ref(fd->inode); gf_uuid_copy(loc.gfid, fd->inode->gfid); - ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies, - output, frame, ec->xl, &loc, &source_buf, - GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL); + ret = cluster_setattr( + ec->xl_list, healed_sinks, ec->nodes, replies, output, frame, + ec->xl, &loc, &source_buf, + GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL); EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes); if (EC_COUNT(healed_sinks, ec->nodes) == 0) { ret = -ENOTCONN; @@ -2429,6 +2497,58 @@ out: return ret; } +int +ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode) +{ + int i = 0; + int ret = 0; + dict_t **xattr = NULL; + loc_t loc = {0}; + uint64_t dirty_xattr[EC_VERSION_SIZE] = {0}; + unsigned char *on = NULL; + default_args_cbk_t *replies = NULL; + dict_t *dict = NULL; + + /* Allocate the required memory */ + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + on = alloca0(ec->nodes); + EC_REPLIES_ALLOC(replies, ec->nodes); + xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer); + if (!xattr) { + ret = -ENOMEM; + goto out; + } + dict = dict_new(); + if (!dict) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < ec->nodes; i++) { + xattr[i] = dict; + on[i] = 1; + } + ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame, + ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64, + xattr, NULL); +out: + if (dict) { + dict_unref(dict); + } + if (xattr) { + GF_FREE(xattr); + } + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); + return ret; +} + void ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) { @@ -2446,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) intptr_t mbad = 0; intptr_t good = 0; intptr_t bad = 0; + uint32_t pending = 0; ec_fop_data_t *fop = data; gf_boolean_t blocking = _gf_false; ec_heal_need_t need_heal = EC_HEAL_NONEED; @@ -2481,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) if (loc->name && strlen(loc->name)) { ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name, participants); - if (ret == 0) { + if (ret >= 0) { gf_msg_debug(this->name, 0, "%s: name heal " "successful on %" PRIXPTR, @@ -2499,32 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) /* Mount triggers heal only when it detects that it must need heal, shd * triggers heals periodically which need not be thorough*/ - ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, - !ec->shd.iamshd, &need_heal); - - if (need_heal == EC_HEAL_NONEED) { - gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, - "Heal is not required for : %s ", uuid_utoa(loc->gfid)); - goto out; + if (ec->shd.iamshd && (ret <= 0)) { + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, + &need_heal); + + if (need_heal == EC_HEAL_PURGE_INDEX) { + gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, + "Index entry needs to be purged for: %s ", + uuid_utoa(loc->gfid)); + /* We need to send zero-xattrop so that stale index entry could be + * removed. We need not take lock on this entry to do so as + * xattrop on a brick is atomic. */ + ec_heal_purge_stale_index(frame, ec, loc->inode); + goto out; + } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); + goto out; + } } - msources = alloca0(ec->nodes); - mhealed_sinks = alloca0(ec->nodes); - ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); - if (ret == 0) { - mgood = ec_char_array_to_mask(msources, ec->nodes); - mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); - } else { - op_ret = -1; - op_errno = -ret; - } sources = alloca0(ec->nodes); healed_sinks = alloca0(ec->nodes); if (IA_ISREG(loc->inode->ia_type)) { ret = ec_heal_data(frame, ec, blocking, loc->inode, sources, healed_sinks); } else if (IA_ISDIR(loc->inode->ia_type) && !partial) { - ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks); + ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks, + &pending); } else { ret = 0; memcpy(sources, participants, ec->nodes); @@ -2538,15 +2661,27 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) op_ret = -1; op_errno = -ret; } + msources = alloca0(ec->nodes); + mhealed_sinks = alloca0(ec->nodes); + ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); + if (ret == 0) { + mgood = ec_char_array_to_mask(msources, ec->nodes); + mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); + } else { + op_ret = -1; + op_errno = -ret; + } out: + ec_reset_entry_healing(fop); if (fop->cbks.heal) { - fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, + fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno, ec_char_array_to_mask(participants, ec->nodes), - mgood & good, mbad & bad, NULL); + mgood & good, mbad & bad, pending, NULL); } if (frame) STACK_DESTROY(frame->root); + return; } @@ -2593,8 +2728,8 @@ void ec_heal_fail(ec_t *ec, ec_fop_data_t *fop) { if (fop->cbks.heal) { - fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0, - NULL); + fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0, + 0, 0, NULL); } ec_fop_data_release(fop); } @@ -2603,13 +2738,31 @@ void ec_launch_heal(ec_t *ec, ec_fop_data_t *fop) { int ret = 0; + call_frame_t *frame = NULL; + + frame = create_frame(ec->xl, ec->xl->ctx->pool); + if (!frame) { + ret = -1; + goto out; + } + + ec_owner_set(frame, frame->root); + /*Do heal as root*/ + frame->root->uid = 0; + frame->root->gid = 0; + /*Mark the fops as internal*/ + frame->root->pid = GF_CLIENT_PID_SELF_HEALD; ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done, - NULL, fop); + frame, fop); +out: if (ret < 0) { ec_fop_set_error(fop, ENOMEM); ec_heal_fail(ec, fop); } + + if (frame) + STACK_DESTROY(frame->root); } void @@ -2650,11 +2803,33 @@ ec_handle_healers_done(ec_fop_data_t *fop) ec_launch_heal(ec, heal_fop); } +gf_boolean_t +ec_is_entry_healing(ec_fop_data_t *fop) +{ + ec_inode_t *ctx = NULL; + int32_t heal_count = 0; + loc_t *loc = NULL; + + loc = &fop->loc[0]; + + LOCK(&loc->inode->lock); + { + ctx = __ec_inode_get(loc->inode, fop->xl); + if (ctx) { + heal_count = ctx->heal_count; + } + } + UNLOCK(&loc->inode->lock); + GF_ASSERT(heal_count >= 0); + return heal_count; +} + void ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) { gf_boolean_t can_heal = _gf_true; ec_t *ec = this->private; + ec_fop_data_t *fop_rel = NULL; if (fop->req_frame == NULL) { LOCK(&ec->lock); @@ -2662,8 +2837,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) if ((ec->background_heals > 0) && (ec->heal_wait_qlen + ec->background_heals) > (ec->heal_waiters + ec->healers)) { - list_add_tail(&fop->healer, &ec->heal_waiting); - ec->heal_waiters++; + if (!ec_is_entry_healing(fop)) { + list_add_tail(&fop->healer, &ec->heal_waiting); + ec->heal_waiters++; + ec_set_entry_healing(fop); + } else { + fop_rel = fop; + } fop = __ec_dequeue_heals(ec); } else { can_heal = _gf_false; @@ -2673,8 +2853,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) } if (can_heal) { - if (fop) + if (fop) { + if (fop->req_frame != NULL) { + ec_set_entry_healing(fop); + } ec_launch_heal(ec, fop); + } } else { gf_msg_debug(this->name, 0, "Max number of heals are " @@ -2682,12 +2866,15 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) ec_fop_set_error(fop, EBUSY); ec_heal_fail(ec, fop); } + if (fop_rel) { + ec_heal_done(0, NULL, fop_rel); + } } void -ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, - fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial, - dict_t *xdata) +ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, + uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc, + int32_t partial, dict_t *xdata) { ec_cbk_t callback = {.heal = func}; ec_fop_data_t *fop = NULL; @@ -2703,7 +2890,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, if (frame && frame->local) goto fail; - fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, + fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags, NULL, NULL, callback, data); err = ENOMEM; @@ -2729,15 +2916,27 @@ fail: if (fop) ec_fop_data_release(fop); if (func) - func(frame, NULL, this, -1, err, 0, 0, 0, NULL); + func(frame, data, this, -1, err, 0, 0, 0, 0, NULL); } int ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque) { ec_t *ec = opaque; + gf_boolean_t last_fop = _gf_false; + if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) { + LOCK(&ec->lock); + { + last_fop = __ec_is_last_fop(ec); + } + UNLOCK(&ec->lock); + } gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret); + + if (last_fop) + ec_pending_fops_completed(ec); + return 0; } @@ -2777,6 +2976,10 @@ ec_replace_brick_heal_wrap(void *opaque) itable = ec->xl->itable; else goto out; + + if (xlator_is_cleanup_starting(ec->xl)) + goto out; + ret = ec_replace_heal(ec, itable->root); out: return ret; @@ -2787,14 +2990,15 @@ ec_launch_replace_heal(ec_t *ec) { int ret = -1; - if (!ec) - return ret; ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap, ec_replace_heal_done, NULL, ec); + if (ret < 0) { gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d", ret); + ec_replace_heal_done(-1, NULL, ec); } + return ret; } @@ -2826,7 +3030,7 @@ out: static int32_t _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, gf_boolean_t self_locked, int32_t lock_count, - ec_heal_need_t *need_heal) + ec_heal_need_t *need_heal, uint64_t *versions) { int i = 0; int source_count = 0; @@ -2836,11 +3040,18 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, *need_heal = EC_HEAL_NONEED; if (self_locked || lock_count == 0) { for (i = 0; i < ec->nodes; i++) { - if (dirty[i]) { + if (dirty[i] || (versions[i] != versions[0])) { *need_heal = EC_HEAL_MUST; goto out; } } + /* If lock count is 0, all dirty flags are 0 and all the + * versions are macthing then why are we here. It looks + * like something went wrong while removing the index entries + * after completing a successful heal or fop. In this case + * we need to remove this index entry to avoid triggering heal + * in a loop and causing lookups again and again*/ + *need_heal = EC_HEAL_PURGE_INDEX; } else { for (i = 0; i < ec->nodes; i++) { /* Since each lock can only increment the dirty @@ -2852,6 +3063,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, *need_heal = EC_HEAL_MUST; goto out; } + if (dirty[i] != dirty[0] || (versions[i] != versions[0])) { + *need_heal = EC_HEAL_MAYBE; + } } } } else { @@ -2872,7 +3086,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, unsigned char *healed_sinks = NULL; uint64_t *meta_versions = NULL; int ret = 0; - int i = 0; sources = alloca0(ec->nodes); healed_sinks = alloca0(ec->nodes); @@ -2885,15 +3098,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, } ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, - need_heal); - if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) { - for (i = 1; i < ec->nodes; i++) { - if (meta_versions[i] != meta_versions[0]) { - *need_heal = EC_HEAL_MUST; - goto out; - } - } - } + need_heal, meta_versions); out: return ret; } @@ -2929,7 +3134,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, } ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, - need_heal); + need_heal, data_versions); out: return ret; } @@ -2957,7 +3162,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, } ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, - need_heal); + need_heal, data_versions); out: return ret; } @@ -3055,10 +3260,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode, need_heal: ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough, need_heal); - - if (!self_locked && *need_heal == EC_HEAL_MUST) { - *need_heal = EC_HEAL_MAYBE; - } out: cluster_replies_wipe(replies, ec->nodes); loc_wipe(&loc); @@ -3144,7 +3345,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp) ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false, _gf_false, &need_heal); - if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) { + if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) { goto set_heal; } need_heal = EC_HEAL_NONEED; |
