summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorXavier Hernandez <jahernan@redhat.com>2017-11-22 11:10:32 +0100
committerPranith Kumar Karampuri <pkarampu@redhat.com>2017-11-28 09:11:45 +0000
commitc471636264040d84e0f21f5a26f61746aa65975a (patch)
tree66649538dc7ecf1c7a5791a483a95974bcbd186b /xlators/cluster
parenteba88aed7d2813bfccd1455d4148c4f25d9d0e48 (diff)
cluster/ec: Prevent self-heal to work after PARENT_DOWN
When the volume is being stopped, PARENT_DOWN event is received. This instructs EC to wait until all pending operations are completed before declaring itself down. However heal operations are ignored and allowed to continue even after having said it was down. This may cause unexpected results and crashes. To solve this, heal operations are considered exactly equal as any other operation and EC won't propagate PARENT_DOWN until all operations, including healing, are complete. To avoid big delays if this happens in the middle of a big heal, a check has been added to quit current heal if shutdown is detected. Change-Id: I26645e236ebd115eb22c7ad4972461111a2d2034 BUG: 1515266 Signed-off-by: Xavier Hernandez <jahernan@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/ec/src/ec-data.c21
-rw-r--r--xlators/cluster/ec/src/ec-heal.c59
2 files changed, 52 insertions, 28 deletions
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index 9d64280eb35..a869735794e 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -104,19 +104,6 @@ void ec_cbk_data_destroy(ec_cbk_data_t * cbk)
mem_put(cbk);
}
-/* PARENT_DOWN will be notified to children only after these fops are complete
- * when graph switch happens. We do not want graph switch to be waiting on
- * heal to complete as healing big file/directory could take a while. Which
- * will lead to hang on the mount.
- */
-static gf_boolean_t
-ec_needs_graceful_completion (ec_fop_data_t *fop)
-{
- if ((fop->id != EC_FOP_HEAL) && (fop->id != EC_FOP_FHEAL))
- return _gf_true;
- return _gf_false;
-}
-
ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
int32_t id, uint32_t flags,
uintptr_t target, int32_t minimum,
@@ -203,13 +190,11 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
fop->parent = parent;
}
- if (ec_needs_graceful_completion (fop)) {
- LOCK(&ec->lock);
+ LOCK(&ec->lock);
- list_add_tail(&fop->pending_list, &ec->pending_fops);
+ list_add_tail(&fop->pending_list, &ec->pending_fops);
- UNLOCK(&ec->lock);
- }
+ UNLOCK(&ec->lock);
return fop;
}
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 8d62b01ac8a..6562adf9e24 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -1428,6 +1428,12 @@ ec_name_heal_handler (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int i = 0;
int ret = 0;
+ if (ec->shutdown) {
+ gf_msg_debug(this->name, 0, "Cancelling directory heal "
+ "because EC is stopping.");
+ return -ENOTCONN;
+ }
+
memcpy (name_on, name_data->participants, ec->nodes);
ret = ec_heal_name (name_data->frame, ec, parent->inode,
entry->d_name, name_on);
@@ -1449,6 +1455,7 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode,
int j = 0;
loc_t loc = {0};
struct ec_name_data name_data = {0};
+ int ret = 0;
loc.inode = inode_ref (inode);
gf_uuid_copy (loc.gfid, inode->gfid);
@@ -1459,18 +1466,23 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode,
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
continue;
- syncop_dir_scan (ec->xl_list[i], &loc,
- GF_CLIENT_PID_SELF_HEALD, &name_data,
- ec_name_heal_handler);
+ ret = syncop_dir_scan (ec->xl_list[i], &loc,
+ GF_CLIENT_PID_SELF_HEALD, &name_data,
+ ec_name_heal_handler);
+ if (ret < 0) {
+ break;
+ }
for (j = 0; j < ec->nodes; j++)
if (name_data.failed_on[j])
participants[j] = 0;
- if (EC_COUNT (participants, ec->nodes) <= ec->fragments)
- return -ENOTCONN;
+ if (EC_COUNT (participants, ec->nodes) <= ec->fragments) {
+ ret = -ENOTCONN;
+ break;
+ }
}
loc_wipe (&loc);
- return 0;
+ return ret;
}
int
@@ -2009,6 +2021,17 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
for (heal->offset = 0; (heal->offset < size) && !heal->done;
heal->offset += heal->size) {
+ /* We immediately abort any heal if a shutdown request has been
+ * received to avoid delays. The healing of this file will be
+ * restarted by another SHD or other client that accesses the
+ * file. */
+ if (ec->shutdown) {
+ gf_msg_debug(ec->xl->name, 0, "Cancelling heal because "
+ "EC is stopping.");
+ ret = -ENOTCONN;
+ break;
+ }
+
gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: "
"%d, offset: %"PRIu64" bsize: %"PRIu64,
uuid_utoa (fd->inode->gfid),
@@ -2612,16 +2635,32 @@ ec_handle_healers_done (ec_fop_data_t *fop)
return;
LOCK (&ec->lock);
- {
- list_del_init (&fop->healer);
+
+ list_del_init (&fop->healer);
+
+ do {
ec->healers--;
heal_fop = __ec_dequeue_heals (ec);
- }
+
+ if ((heal_fop != NULL) && ec->shutdown) {
+ /* This will prevent ec_handle_healers_done() to be
+ * called recursively. That would be problematic if
+ * the queue is too big. */
+ list_del_init(&heal_fop->healer);
+
+ UNLOCK(&ec->lock);
+
+ ec_fop_set_error(fop, ENOTCONN);
+ ec_heal_fail(ec, heal_fop);
+
+ LOCK(&ec->lock);
+ }
+ } while ((heal_fop != NULL) && ec->shutdown);
+
UNLOCK (&ec->lock);
if (heal_fop)
ec_launch_heal (ec, heal_fop);
-
}
void