diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/ec/src/ec-data.c | 21 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 59 |
2 files changed, 52 insertions, 28 deletions
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c index 9d64280eb35..a869735794e 100644 --- a/xlators/cluster/ec/src/ec-data.c +++ b/xlators/cluster/ec/src/ec-data.c @@ -104,19 +104,6 @@ void ec_cbk_data_destroy(ec_cbk_data_t * cbk) mem_put(cbk); } -/* PARENT_DOWN will be notified to children only after these fops are complete - * when graph switch happens. We do not want graph switch to be waiting on - * heal to complete as healing big file/directory could take a while. Which - * will lead to hang on the mount. - */ -static gf_boolean_t -ec_needs_graceful_completion (ec_fop_data_t *fop) -{ - if ((fop->id != EC_FOP_HEAL) && (fop->id != EC_FOP_FHEAL)) - return _gf_true; - return _gf_false; -} - ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this, int32_t id, uint32_t flags, uintptr_t target, int32_t minimum, @@ -203,13 +190,11 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this, fop->parent = parent; } - if (ec_needs_graceful_completion (fop)) { - LOCK(&ec->lock); + LOCK(&ec->lock); - list_add_tail(&fop->pending_list, &ec->pending_fops); + list_add_tail(&fop->pending_list, &ec->pending_fops); - UNLOCK(&ec->lock); - } + UNLOCK(&ec->lock); return fop; } diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 8d62b01ac8a..6562adf9e24 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -1428,6 +1428,12 @@ ec_name_heal_handler (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, int i = 0; int ret = 0; + if (ec->shutdown) { + gf_msg_debug(this->name, 0, "Cancelling directory heal " + "because EC is stopping."); + return -ENOTCONN; + } + memcpy (name_on, name_data->participants, ec->nodes); ret = ec_heal_name (name_data->frame, ec, parent->inode, entry->d_name, name_on); @@ -1449,6 +1455,7 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode, int j = 0; loc_t loc = {0}; struct ec_name_data name_data = {0}; + int ret = 0; loc.inode = inode_ref (inode); gf_uuid_copy (loc.gfid, inode->gfid); @@ -1459,18 +1466,23 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode, for (i = 0; i < ec->nodes; i++) { if (!participants[i]) continue; - syncop_dir_scan (ec->xl_list[i], &loc, - GF_CLIENT_PID_SELF_HEALD, &name_data, - ec_name_heal_handler); + ret = syncop_dir_scan (ec->xl_list[i], &loc, + GF_CLIENT_PID_SELF_HEALD, &name_data, + ec_name_heal_handler); + if (ret < 0) { + break; + } for (j = 0; j < ec->nodes; j++) if (name_data.failed_on[j]) participants[j] = 0; - if (EC_COUNT (participants, ec->nodes) <= ec->fragments) - return -ENOTCONN; + if (EC_COUNT (participants, ec->nodes) <= ec->fragments) { + ret = -ENOTCONN; + break; + } } loc_wipe (&loc); - return 0; + return ret; } int @@ -2009,6 +2021,17 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size, for (heal->offset = 0; (heal->offset < size) && !heal->done; heal->offset += heal->size) { + /* We immediately abort any heal if a shutdown request has been + * received to avoid delays. The healing of this file will be + * restarted by another SHD or other client that accesses the + * file. */ + if (ec->shutdown) { + gf_msg_debug(ec->xl->name, 0, "Cancelling heal because " + "EC is stopping."); + ret = -ENOTCONN; + break; + } + gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: " "%d, offset: %"PRIu64" bsize: %"PRIu64, uuid_utoa (fd->inode->gfid), @@ -2612,16 +2635,32 @@ ec_handle_healers_done (ec_fop_data_t *fop) return; LOCK (&ec->lock); - { - list_del_init (&fop->healer); + + list_del_init (&fop->healer); + + do { ec->healers--; heal_fop = __ec_dequeue_heals (ec); - } + + if ((heal_fop != NULL) && ec->shutdown) { + /* This will prevent ec_handle_healers_done() to be + * called recursively. That would be problematic if + * the queue is too big. */ + list_del_init(&heal_fop->healer); + + UNLOCK(&ec->lock); + + ec_fop_set_error(fop, ENOTCONN); + ec_heal_fail(ec, heal_fop); + + LOCK(&ec->lock); + } + } while ((heal_fop != NULL) && ec->shutdown); + UNLOCK (&ec->lock); if (heal_fop) ec_launch_heal (ec, heal_fop); - } void |