diff options
author | Xavier Hernandez <xhernandez@datalab.es> | 2015-01-07 12:29:48 +0100 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2015-05-09 17:29:46 -0700 |
commit | b46e65db722c14985db62a1679e0388d217b713b (patch) | |
tree | fac847af8f529505bf364ed54df25658552fbae3 /xlators/cluster/ec/src/ec-heal.c | |
parent | 73b61d709712692b48a1c9b18b4547410fb9078c (diff) |
ec: Fix failures with missing files
When a file does not exist on a brick but it does on others, there
could be problems trying to access it because there was some loc_t
structures with null 'pargfid' but 'name' was set. This forced
inode resolution based on <pargfid>/name instead of <gfid> which
would be the correct one. To solve this problem, 'name' is always
set to NULL when 'pargfid' is not present.
Another problem was caused by an incorrect management of errors
while doing incremental locking. The only allowed error during an
incremental locking was ENOTCONN, but missing files on a brick can
be returned as ESTALE. This caused an EIO on the operation.
This patch doesn't care of errors during an incremental locking. At
the end of the operation it will check if there are enough successfully
locked bricks to continue or not.
Change-Id: I9360ebf8d819d219cea2d173c09bd37679a6f15a
BUG: 1176062
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/9407
Tested-by: NetBSD Build System
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-heal.c')
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 64 |
1 files changed, 32 insertions, 32 deletions
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 315de8765ad..ceddfeb6ac7 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -486,16 +486,6 @@ ec_heal_init (ec_fop_data_t * fop) ec_heal_t * heal = NULL; int32_t error = 0; - inode = fop->loc[0].inode; - if (inode == NULL) - { - gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to start inode healing " - "because there is not enough " - "information"); - - return ENODATA; - } - heal = GF_MALLOC(sizeof(ec_heal_t), ec_mt_ec_heal_t); if (heal == NULL) { @@ -509,6 +499,16 @@ ec_heal_init (ec_fop_data_t * fop) goto out; } + inode = heal->loc.inode; + if (inode == NULL) { + gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to start inode healing " + "because there is not enough " + "information"); + + error = ENODATA; + goto out; + } + LOCK_INIT(&heal->lock); heal->xl = fop->xl; @@ -532,26 +532,20 @@ ec_heal_init (ec_fop_data_t * fop) gf_log("ec", GF_LOG_INFO, "Healing '%s', gfid %s", heal->loc.path, uuid_utoa(heal->loc.gfid)); } else { - error = EEXIST; - } - - list_add_tail(&heal->list, &ctx->heal); - heal = NULL; - -unlock: - UNLOCK(&inode->lock); - - if (error == EEXIST) { LOCK(&fop->lock); fop->jobs++; fop->refs++; UNLOCK(&fop->lock); - - error = 0; } + list_add_tail(&heal->list, &ctx->heal); + heal = NULL; + +unlock: + UNLOCK(&inode->lock); + out: GF_FREE(heal); @@ -563,6 +557,7 @@ void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd) loc_t loc; if (ec_loc_parent(heal->xl, &heal->loc, &loc) != 0) { + gf_log("ec", GF_LOG_NOTICE, "ec_loc_parent() failed"); ec_fop_set_error(heal->fop, EIO); return; @@ -1164,10 +1159,11 @@ void ec_heal_dispatch(ec_heal_t *heal) LOCK(&inode->lock); - /* A heal object not belonging to any list means that it has not been fully - * executed. It got its information from a previous heal that was executing - * when this heal started. */ - if (!list_empty(&heal->list)) { + /* done == 0 means that self-heal is still running (it shouldn't happen) + * done == 1 means that self-heal has just completed + * done == 2 means that self-heal has completed and reported */ + if (heal->done == 1) { + heal->done = 2; list_del_init(&heal->list); ctx = __ec_inode_get(inode, heal->xl); if (ctx != NULL) { @@ -1182,6 +1178,11 @@ void ec_heal_dispatch(ec_heal_t *heal) if (!next->partial) { break; } + + /* Setting 'done' to 2 avoids executing all heal logic and + * directly reports the result to the caller. */ + next->done = 2; + list_move_tail(&next->list, &list); } if (list_empty(&ctx->heal)) { @@ -1241,10 +1242,6 @@ void ec_heal_dispatch(ec_heal_t *heal) heal->good = cbk->uintptr[1]; heal->fixed = cbk->uintptr[2]; - /* Setting 'done' to 1 avoids executing all heal logic and directly - * reports the result to the caller. */ - heal->done = 1; - ec_resume(heal->fop, error); } @@ -1304,11 +1301,14 @@ ec_manager_heal (ec_fop_data_t * fop, int32_t state) } case EC_STATE_DISPATCH: - if (heal->done) { + if (heal->done != 0) { + gf_log("ec", GF_LOG_NOTICE, "heal already done"); return EC_STATE_HEAL_DISPATCH; } + gf_log("ec", GF_LOG_NOTICE, "heal before entrylk"); ec_heal_entrylk(heal, ENTRYLK_LOCK); + gf_log("ec", GF_LOG_NOTICE, "heal after entrylk"); return EC_STATE_HEAL_ENTRY_LOOKUP; @@ -1403,7 +1403,7 @@ ec_manager_heal (ec_fop_data_t * fop, int32_t state) return EC_STATE_HEAL_DISPATCH; case EC_STATE_HEAL_DATA_LOCK: - if (heal->done) + if (heal->done != 0) { return EC_STATE_HEAL_POST_INODELK_LOCK; } |