diff options
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 96 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-data.h | 50 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-dir-read.c | 4 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-dir-write.c | 150 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-generic.c | 7 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 182 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 271 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.h | 4 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-inode-read.c | 96 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.h | 1 |
10 files changed, 548 insertions, 313 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 2d69ac0f384..894d2f552f3 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -116,20 +116,28 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this, int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, uintptr_t bad, dict_t * xdata) { - if (op_ret < 0) - { - gf_log(this->name, GF_LOG_WARNING, "Heal failed (error %d)", op_errno); - } - else - { - gf_log(this->name, GF_LOG_INFO, "Heal succeeded on %d/%d subvolumes", - ec_bits_count(mask & ~ (good | bad)), - ec_bits_count(mask & ~good)); + if (op_ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "Heal failed (error %d)", + op_errno); + } else { + if ((mask & ~good) != 0) { + gf_log(this->name, GF_LOG_INFO, "Heal succeeded on %d/%d " + "subvolumes", + ec_bits_count(mask & ~(good | bad)), + ec_bits_count(mask & ~good)); + } } return 0; } +int32_t ec_fop_needs_heal(ec_fop_data_t *fop) +{ + ec_t *ec = fop->xl->private; + + return (ec->xl_up & ~(fop->remaining | fop->good)) != 0; +} + void ec_check_status(ec_fop_data_t * fop) { ec_t * ec = fop->xl->private; @@ -144,8 +152,7 @@ void ec_check_status(ec_fop_data_t * fop) } } - if ((ec->xl_up & ~(fop->remaining | fop->good)) == 0) - { + if (!ec_fop_needs_heal(fop)) { return; } @@ -157,19 +164,19 @@ void ec_check_status(ec_fop_data_t * fop) if (fop->use_fd) { if (fop->fd != NULL) { - ec_fheal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, - NULL, fop->fd, partial, NULL); + ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, + fop->fd, partial, NULL); } } else { - ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, + ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, &fop->loc[0], partial, NULL); if (fop->loc[1].inode != NULL) { - ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, - NULL, &fop->loc[1], partial, NULL); + ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, + &fop->loc[1], partial, NULL); } } } @@ -320,16 +327,12 @@ void ec_complete(ec_fop_data_t * fop) ec_trace("COMPLETE", fop, ""); - if (--fop->winds == 0) - { - if (fop->answer == NULL) - { - if (!list_empty(&fop->cbk_list)) - { + if (--fop->winds == 0) { + if (fop->answer == NULL) { + if (!list_empty(&fop->cbk_list)) { cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list); if ((cbk->count >= fop->minimum) && - ((cbk->op_ret >= 0) || (cbk->op_errno != ENOTCONN))) - { + ((cbk->op_ret >= 0) || (cbk->op_errno != ENOTCONN))) { fop->answer = cbk; ec_update_bad(fop, cbk->mask); @@ -600,7 +603,7 @@ ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc) lock->kind = kind; lock->good_mask = -1ULL; INIT_LIST_HEAD(&lock->waiting); - if (!ec_loc_from_loc(xl, &lock->loc, loc)) + if (ec_loc_from_loc(xl, &lock->loc, loc) != 0) { mem_put(lock); lock = NULL; @@ -665,7 +668,6 @@ void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update) ec_inode_t * ctx = NULL; ec_lock_link_t *link = NULL; loc_t tmp; - int32_t error; if ((fop->parent != NULL) || (fop->error != 0)) { @@ -677,14 +679,13 @@ void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update) */ if (update) { - error = ec_loc_parent(fop->xl, loc, &tmp); - if (error != 0) { - ec_fop_set_error(fop, error); + if (ec_loc_parent(fop->xl, loc, &tmp) != 0) { + ec_fop_set_error(fop, EIO); return; } } else { - if (!ec_loc_from_loc(fop->xl, &tmp, loc)) { + if (ec_loc_from_loc(fop->xl, &tmp, loc) != 0) { ec_fop_set_error(fop, EIO); return; @@ -805,7 +806,7 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, int32_t update) return; } - if (ec_loc_from_fd(fop->xl, &loc, fd)) + if (ec_loc_from_fd(fop->xl, &loc, fd) == 0) { ec_lock_prepare_inode(fop, &loc, update); @@ -1074,7 +1075,7 @@ void ec_get_size_version(ec_fop_data_t * fop) if (!fop->use_fd) { - if (!ec_loc_from_loc(fop->xl, &loc, &fop->loc[0])) + if (ec_loc_from_loc(fop->xl, &loc, &fop->loc[0]) != 0) { goto out; } @@ -1089,9 +1090,7 @@ void ec_get_size_version(ec_fop_data_t * fop) loc.path = NULL; loc.name = NULL; } - } - else if (!ec_loc_from_fd(fop->xl, &loc, fop->fd)) - { + } else if (ec_loc_from_fd(fop->xl, &loc, fop->fd) != 0) { goto out; } @@ -1317,11 +1316,6 @@ void ec_unlock_timer_add(ec_lock_link_t *link) UNLOCK(&lock->loc.inode->lock); } else { - ec_trace("UNLOCK_DELAY", fop, "lock=%p", lock); - - delay.tv_sec = 1; - delay.tv_nsec = 0; - LOCK(&fop->lock); fop->jobs++; @@ -1329,11 +1323,23 @@ void ec_unlock_timer_add(ec_lock_link_t *link) UNLOCK(&fop->lock); - lock->timer = gf_timer_call_after(fop->xl->ctx, delay, - ec_unlock_timer_cbk, link); - if (lock->timer == NULL) { - gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to delay an unlock"); - + /* If healing is needed, do not delay lock release to let self-heal + * start working as soon as possible. */ + if (!ec_fop_needs_heal(fop)) { + ec_trace("UNLOCK_DELAY", fop, "lock=%p", lock); + + delay.tv_sec = 1; + delay.tv_nsec = 0; + lock->timer = gf_timer_call_after(fop->xl->ctx, delay, + ec_unlock_timer_cbk, link); + if (lock->timer == NULL) { + gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to delay an " + "unlock"); + + *lock->plock = NULL; + refs = 0; + } + } else { *lock->plock = NULL; refs = 0; } diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h index d22a20090df..35c84254550 100644 --- a/xlators/cluster/ec/src/ec-data.h +++ b/xlators/cluster/ec/src/ec-data.h @@ -66,10 +66,11 @@ struct _ec_fd struct _ec_inode { - uintptr_t bad; - ec_lock_t *entry_lock; - ec_lock_t *inode_lock; - ec_heal_t *heal; + uintptr_t bad; + ec_lock_t *entry_lock; + ec_lock_t *inode_lock; + struct list_head heal; + }; typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *, @@ -199,6 +200,7 @@ struct _ec_fop_data ec_resume_f resume; ec_cbk_t cbks; void * data; + ec_heal_t *heal; uint64_t user_size; uint32_t head; @@ -255,25 +257,27 @@ struct _ec_cbk_data struct _ec_heal { - gf_lock_t lock; - xlator_t * xl; - ec_fop_data_t * fop; - ec_fop_data_t * lookup; - loc_t loc; - struct iatt iatt; - char * symlink; - fd_t * fd; - int32_t partial; - int32_t done; - uintptr_t available; - uintptr_t good; - uintptr_t bad; - uintptr_t open; - uintptr_t fixed; - uint64_t offset; - uint64_t size; - uint64_t version; - uint64_t raw_size; + struct list_head list; + gf_lock_t lock; + xlator_t *xl; + ec_fop_data_t *fop; + void *data; + ec_fop_data_t *lookup; + loc_t loc; + struct iatt iatt; + char *symlink; + fd_t *fd; + int32_t partial; + int32_t done; + uintptr_t available; + uintptr_t good; + uintptr_t bad; + uintptr_t open; + uintptr_t fixed; + uint64_t offset; + uint64_t size; + uint64_t version; + uint64_t raw_size; }; ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this, diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index 0e91d5f416e..95d80efdf8b 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -114,8 +114,8 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) LOCK(&fop->fd->lock); ctx = __ec_fd_get(fop->fd, fop->xl); - if ((ctx == NULL) || !ec_loc_from_loc(fop->xl, &ctx->loc, - &fop->loc[0])) { + if ((ctx == NULL) || + (ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0])) != 0) { UNLOCK(&fop->fd->lock); fop->error = EIO; diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c index 3d8055c40a7..2b1064a98f0 100644 --- a/xlators/cluster/ec/src/ec-dir-write.c +++ b/xlators/cluster/ec/src/ec-dir-write.c @@ -149,9 +149,8 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) LOCK(&fop->fd->lock); ctx = __ec_fd_get(fop->fd, fop->xl); - if ((ctx == NULL) || !ec_loc_from_loc(fop->xl, &ctx->loc, - &fop->loc[0])) - { + if ((ctx == NULL) || + (ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0])) != 0) { UNLOCK(&fop->fd->lock); fop->error = EIO; @@ -194,6 +193,18 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) return EC_STATE_REPORT; } + if (ec_dict_set_number(fop->xdata, EC_XATTR_VERSION, 0) != 0) { + fop->error = EIO; + + return EC_STATE_REPORT; + } + + if (ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0) != 0) { + fop->error = EIO; + + return EC_STATE_REPORT; + } + fop->int32 &= ~O_ACCMODE; fop->int32 |= O_RDWR; @@ -222,27 +233,27 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) cbk->op_errno = EIO; } } - if (cbk->op_ret < 0) - { - ec_fop_set_error(fop, cbk->op_errno); - } - else - { + if (cbk->op_ret >= 0) { ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count); - ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, - &cbk->iatt[0]); + if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, + &cbk->iatt[0]) != 0) { + cbk->op_ret = -1; + cbk->op_errno = EIO; + } else { + LOCK(&fop->fd->lock); - LOCK(&fop->fd->lock); + ctx = __ec_fd_get(fop->fd, fop->xl); + if (ctx != NULL) { + ctx->open |= cbk->mask; + } - ctx = __ec_fd_get(fop->fd, fop->xl); - if (ctx != NULL) - { - ctx->open |= cbk->mask; + UNLOCK(&fop->fd->lock); } - - UNLOCK(&fop->fd->lock); + } + if (cbk->op_ret < 0) { + ec_fop_set_error(fop, cbk->op_errno); } } else @@ -511,22 +522,21 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) cbk->op_errno = EIO; } } - if (cbk->op_ret < 0) - { - ec_fop_set_error(fop, cbk->op_errno); - } - else - { + if (cbk->op_ret >= 0) { ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count); - - ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, - &cbk->iatt[0]); - - if (cbk->iatt[0].ia_type == IA_IFREG) - { + if (cbk->iatt[0].ia_type == IA_IFREG) { cbk->iatt[0].ia_size = fop->pre_size; } + + if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, + &cbk->iatt[0]) != 0) { + cbk->op_ret = -1; + cbk->op_errno = EIO; + } + } + if (cbk->op_ret < 0) { + ec_fop_set_error(fop, cbk->op_errno); } } else @@ -754,6 +764,23 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) switch (state) { case EC_STATE_INIT: + if (fop->xdata == NULL) { + fop->xdata = dict_new(); + if (fop->xdata == NULL) { + fop->error = EIO; + + return EC_STATE_REPORT; + } + } + + if (ec_dict_set_number(fop->xdata, EC_XATTR_VERSION, 0) != 0) { + fop->error = EIO; + + return EC_STATE_REPORT; + } + + /* Fall through */ + case EC_STATE_LOCK: ec_lock_prepare_entry(fop, &fop->loc[0], 1); ec_lock(fop); @@ -777,17 +804,18 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) cbk->op_errno = EIO; } } - if (cbk->op_ret < 0) - { - ec_fop_set_error(fop, cbk->op_errno); - } - else - { + if (cbk->op_ret >= 0) { ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count); - ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, - &cbk->iatt[0]); + if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, + &cbk->iatt[0]) != 0) { + cbk->op_ret = -1; + cbk->op_errno = EIO; + } + } + if (cbk->op_ret < 0) { + ec_fop_set_error(fop, cbk->op_errno); } } else @@ -1037,6 +1065,18 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) return EC_STATE_REPORT; } + + if (ec_dict_set_number(fop->xdata, EC_XATTR_VERSION, 0) != 0) { + fop->error = EIO; + + return EC_STATE_REPORT; + } + + if (ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0) != 0) { + fop->error = EIO; + + return EC_STATE_REPORT; + } } /* Fall through */ @@ -1064,17 +1104,18 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) cbk->op_errno = EIO; } } - if (cbk->op_ret < 0) - { - ec_fop_set_error(fop, cbk->op_errno); - } - else - { + if (cbk->op_ret >= 0) { ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count); - ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, - &cbk->iatt[0]); + if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, + &cbk->iatt[0]) != 0) { + cbk->op_ret = -1; + cbk->op_errno = EIO; + } + } + if (cbk->op_ret < 0) { + ec_fop_set_error(fop, cbk->op_errno); } } else @@ -1822,17 +1863,18 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) cbk->op_errno = EIO; } } - if (cbk->op_ret < 0) - { - ec_fop_set_error(fop, cbk->op_errno); - } - else - { + if (cbk->op_ret >= 0) { ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count); - ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, - &cbk->iatt[0]); + if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, + &cbk->iatt[0]) != 0) { + cbk->op_ret = -1; + cbk->op_errno = EIO; + } + } + if (cbk->op_ret < 0) { + ec_fop_set_error(fop, cbk->op_errno); } } else diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c index 63edf7da132..ffc40f01de1 100644 --- a/xlators/cluster/ec/src/ec-generic.c +++ b/xlators/cluster/ec/src/ec-generic.c @@ -711,7 +711,12 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) ec_dict_del_number(cbk->xdata, EC_XATTR_VERSION, &cbk->version); - ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]); + if (ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]) != 0) { + cbk->op_ret = -1; + cbk->op_errno = EIO; + + return; + } LOCK(&cbk->inode->lock); diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 042f24e7d4a..da5f5947de3 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -62,8 +62,8 @@ void ec_heal_lookup_resume(ec_fop_data_t * fop) heal->fop->post_size = cbk->iatt[0].ia_size; heal->fop->have_size = 1; - if (!ec_loc_prepare(heal->xl, &heal->loc, cbk->inode, - &cbk->iatt[0])) + if (ec_loc_update(heal->xl, &heal->loc, cbk->inode, + &cbk->iatt[0]) != 0) { fop->answer = NULL; fop->error = EIO; @@ -383,6 +383,41 @@ int32_t ec_heal_create(ec_heal_t * heal, uintptr_t mask, int32_t try_link) return 0; } +int32_t ec_heal_parent_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + int32_t op_ret, int32_t op_errno, uintptr_t mask, + uintptr_t good, uintptr_t bad, dict_t *xdata) +{ + ec_fop_data_t *fop = cookie; + ec_heal_t *heal = fop->data; + + /* Even if parent self-heal has failed, we try to heal the current entry */ + ec_heal_create(heal, fop->mask, 0); + + return 0; +} + +void ec_heal_parent(ec_heal_t *heal, uintptr_t mask) +{ + loc_t parent; + int32_t healing = 0; + + /* First we try to do a partial heal of the parent directory to avoid + * ENOENT/ENOTDIR errors caused by missing parents */ + if (ec_loc_parent(heal->xl, &heal->loc, &parent) == 0) { + if (!__is_root_gfid(parent.gfid)) { + ec_heal(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_parent_cbk, heal, &parent, 1, NULL); + + healing = 1; + } + loc_wipe(&parent); + } + + if (!healing) { + ec_heal_create(heal, mask, 0); + } +} + void ec_heal_recreate(ec_fop_data_t * fop) { ec_cbk_data_t * cbk; @@ -405,7 +440,7 @@ void ec_heal_recreate(ec_fop_data_t * fop) if (mask != 0) { - ec_heal_create(heal, mask, 0); + ec_heal_parent(heal, mask); } } @@ -458,8 +493,7 @@ int32_t ec_heal_init(ec_fop_data_t * fop) memset(heal, 0, sizeof(ec_heal_t)); - if (!ec_loc_from_loc(fop->xl, &heal->loc, &fop->loc[0])) - { + if (ec_loc_from_loc(fop->xl, &heal->loc, &fop->loc[0]) != 0) { error = ENOMEM; goto out; @@ -472,6 +506,7 @@ int32_t ec_heal_init(ec_fop_data_t * fop) pool = fop->xl->ctx->iobuf_pool; heal->size = iobpool_default_pagesize(pool) * ec->fragments; heal->partial = fop->int32; + fop->heal = heal; LOCK(&inode->lock); @@ -483,20 +518,30 @@ int32_t ec_heal_init(ec_fop_data_t * fop) goto unlock; } - if (ctx->heal != NULL) - { + if (list_empty(&ctx->heal)) { + gf_log("ec", GF_LOG_INFO, "Healing '%s', gfid %s", heal->loc.path, + uuid_utoa(heal->loc.gfid)); + } else { error = EEXIST; - - goto unlock; } - fop->data = heal; - - ctx->heal = heal; + list_add_tail(&heal->list, &ctx->heal); heal = NULL; unlock: UNLOCK(&inode->lock); + + if (error == EEXIST) { + LOCK(&fop->lock); + + fop->jobs++; + fop->refs++; + + UNLOCK(&fop->lock); + + error = 0; + } + out: GF_FREE(heal); @@ -506,12 +551,9 @@ out: void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd) { loc_t loc; - int32_t error; - error = ec_loc_parent(heal->xl, &heal->loc, &loc); - if (error != 0) - { - ec_fop_set_error(heal->fop, error); + if (ec_loc_parent(heal->xl, &heal->loc, &loc) != 0) { + ec_fop_set_error(heal->fop, EIO); return; } @@ -605,7 +647,8 @@ void ec_heal_remove_others(ec_heal_t * heal) if (cbk->op_ret < 0) { - if ((cbk->op_errno != ENOENT) && (cbk->op_errno != ENOTDIR)) + if ((cbk->op_errno != ENOENT) && (cbk->op_errno != ENOTDIR) && + (cbk->op_errno != ESTALE)) { gf_log(heal->xl->name, GF_LOG_WARNING, "Don't know how to " "remove inode with " @@ -635,7 +678,7 @@ void ec_heal_prepare_others(ec_heal_t * heal) if (cbk->op_ret < 0) { - if (cbk->op_errno == ENOENT) + if ((cbk->op_errno == ENOENT) || (cbk->op_errno == ESTALE)) { ec_heal_create(heal, cbk->mask, 1); } @@ -1061,35 +1104,61 @@ void ec_heal_data(ec_heal_t * heal) } } -void ec_heal_dispatch(ec_heal_t * heal) +void ec_heal_dispatch(ec_heal_t *heal) { - ec_fop_data_t * fop = heal->fop; - ec_cbk_data_t * cbk; - inode_t * inode; - ec_inode_t * ctx; + ec_fop_data_t *fop; + ec_cbk_data_t *cbk; + inode_t *inode; + ec_inode_t *ctx; + ec_heal_t *next = NULL; + struct list_head list; int32_t error; inode = heal->loc.inode; + INIT_LIST_HEAD(&list); + LOCK(&inode->lock); - ctx = __ec_inode_get(inode, heal->xl); - if (ctx != NULL) - { - ctx->bad &= ~heal->good; - ctx->heal = NULL; - } + /* A heal object not belonging to any list means that it has not been fully + * executed. It got its information from a previous heal that was executing + * when this heal started. */ + if (!list_empty(&heal->list)) { + list_del_init(&heal->list); + ctx = __ec_inode_get(inode, heal->xl); + if (ctx != NULL) { + ctx->bad &= ~heal->good; - fop->data = NULL; + if (heal->partial) { + /* Collect all partial heal requests. All of them will receive + * the same answer. 'next' will contain a pointer to the first + * full request (if any) after this partial heal request.*/ + while (!list_empty(&ctx->heal)) { + next = list_entry(ctx->heal.next, ec_heal_t, list); + if (!next->partial) { + break; + } + list_move_tail(&next->list, &list); + } + if (list_empty(&ctx->heal)) { + next = NULL; + } + } else { + /* This is a full heal request, so take all received heal + * requests to answer them now. */ + list_splice_init(&ctx->heal, &list); + } + } + } UNLOCK(&inode->lock); + fop = heal->fop; error = fop->error; cbk = ec_cbk_data_allocate(fop->frame, heal->xl, fop, fop->id, 0, error == 0 ? 0 : -1, error); - if (cbk != NULL) - { + if (cbk != NULL) { cbk->uintptr[0] = heal->available; cbk->uintptr[1] = heal->good; cbk->uintptr[2] = heal->fixed; @@ -1097,9 +1166,7 @@ void ec_heal_dispatch(ec_heal_t * heal) ec_combine(cbk, NULL); fop->answer = cbk; - } - else if (error == 0) - { + } else if (error == 0) { error = ENOMEM; } @@ -1119,16 +1186,38 @@ void ec_heal_dispatch(ec_heal_t * heal) GF_FREE(heal); ec_fop_set_error(fop, error); + + /* Resume all pending heal requests, setting the same data obtained by + * this heal execution. */ + while (!list_empty(&list)) { + heal = list_entry(list.next, ec_heal_t, list); + list_del_init(&heal->list); + + heal->available = cbk->uintptr[0]; + heal->good = cbk->uintptr[1]; + heal->fixed = cbk->uintptr[2]; + + /* Setting 'done' to 1 avoids executing all heal logic and directly + * reports the result to the caller. */ + heal->done = 1; + + ec_resume(heal->fop, error); + } + + /* If there is a pending full request, resume it. */ + if (next != NULL) { + ec_resume(next->fop, 0); + } } void ec_wind_heal(ec_t * ec, ec_fop_data_t * fop, int32_t idx) { ec_cbk_data_t * cbk; - ec_heal_t * heal = fop->data; + ec_heal_t *heal = fop->heal; ec_trace("WIND", fop, "idx=%d", idx); - cbk = ec_cbk_data_allocate(fop->req_frame, fop->xl, fop, EC_FOP_HEAL, idx, + cbk = ec_cbk_data_allocate(fop->frame, fop->xl, fop, EC_FOP_HEAL, idx, fop->error == 0 ? 0 : -1, fop->error); if (cbk != NULL) { @@ -1145,7 +1234,7 @@ void ec_wind_heal(ec_t * ec, ec_fop_data_t * fop, int32_t idx) int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) { ec_cbk_data_t * cbk; - ec_heal_t * heal = fop->data; + ec_heal_t *heal = fop->heal; switch (state) { @@ -1158,10 +1247,14 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) return EC_STATE_REPORT; } - /* Fall through */ + return EC_STATE_DISPATCH; case EC_STATE_DISPATCH: - ec_heal_entrylk(fop->data, ENTRYLK_LOCK); + if (heal->done) { + return EC_STATE_HEAL_DISPATCH; + } + + ec_heal_entrylk(heal, ENTRYLK_LOCK); return EC_STATE_HEAL_ENTRY_LOOKUP; @@ -1405,10 +1498,9 @@ void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target, gf_log("ec", GF_LOG_TRACE, "EC(HEAL) %p", frame); VALIDATE_OR_GOTO(this, out); - GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); - fop = ec_fop_data_allocate(NULL, this, EC_FOP_HEAL, + fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, EC_FLAG_UPDATE_LOC_INODE, target, minimum, ec_wind_heal, ec_manager_heal, callback, data); if (fop == NULL) @@ -1457,11 +1549,11 @@ out: void ec_wind_fheal(ec_t * ec, ec_fop_data_t * fop, int32_t idx) { ec_cbk_data_t * cbk; - ec_heal_t * heal = fop->data; + ec_heal_t *heal = fop->heal; ec_trace("WIND", fop, "idx=%d", idx); - cbk = ec_cbk_data_allocate(fop->req_frame, fop->xl, fop, EC_FOP_FHEAL, idx, + cbk = ec_cbk_data_allocate(fop->frame, fop->xl, fop, EC_FOP_FHEAL, idx, fop->error == 0 ? 0 : -1, fop->error); if (cbk != NULL) { diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 6dae0232a01..3c3e2302e53 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -324,218 +324,242 @@ int32_t ec_loc_gfid_check(xlator_t * xl, uuid_t dst, uuid_t src) return 1; } -int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) +int32_t ec_loc_setup_inode(xlator_t *xl, loc_t *loc) { - char * str = NULL; - int32_t error = 0; - - memset(parent, 0, sizeof(loc_t)); + int32_t ret = -1; - if (loc->inode == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Invalid loc"); - - error = EINVAL; - - goto out; + if (loc->inode != NULL) { + if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) { + goto out; + } + } else if (loc->parent != NULL) { + if (!uuid_is_null(loc->gfid)) { + loc->inode = inode_find(loc->parent->table, loc->gfid); + } else if (loc->path != NULL) { + loc->inode = inode_resolve(loc->parent->table, (char *)loc->path); + } } - if (__is_root_gfid(loc->inode->gfid) || __is_root_gfid(loc->gfid) || - ((loc->path != NULL) && (strcmp(loc->path, "/") == 0))) - { - parent->path = gf_strdup("/"); - if (parent->path == NULL) { - gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '/'"); + ret = 0; - error = ENOMEM; +out: + return ret; +} +int32_t ec_loc_setup_parent(xlator_t *xl, loc_t *loc) +{ + char *path, *parent; + int32_t ret = -1; + + if (loc->parent != NULL) { + if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid)) { goto out; } - - parent->gfid[15] = 1; - parent->inode = inode_find(loc->inode->table, parent->gfid); - - return 0; + } else if (loc->inode != NULL) { + if (!uuid_is_null(loc->pargfid)) { + loc->parent = inode_find(loc->inode->table, loc->pargfid); + } else if (loc->path != NULL) { + path = gf_strdup(loc->path); + if (path == NULL) { + gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '%s'", + loc->path); + + goto out; + } + parent = dirname(path); + loc->parent = inode_resolve(loc->inode->table, parent); + GF_FREE(path); + } } - if (loc->path != NULL) { - str = gf_strdup(loc->path); - if (str == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path " - "'%s'", loc->path); + ret = 0; - error = ENOMEM; +out: + return ret; +} - goto out; - } - parent->path = gf_strdup(dirname(str)); - if (parent->path == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Unable to get dirname of " - "'%s'", loc->path); +int32_t ec_loc_setup_path(xlator_t *xl, loc_t *loc) +{ + uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char *name; + int32_t ret = -1; - error = ENOMEM; + if (loc->path != NULL) { + name = strrchr(loc->path, '/'); + if (name == NULL) { + gf_log(xl->name, GF_LOG_ERROR, "Invalid path '%s' in loc", + loc->path); goto out; } - parent->name = strrchr(parent->path, '/'); - if (parent->name == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Invalid path name (%s)", - parent->path); + if (name == loc->path) { + if (name[1] == 0) { + if (!ec_loc_gfid_check(xl, loc->gfid, root)) { + goto out; + } + } else { + if (!ec_loc_gfid_check(xl, loc->pargfid, root)) { + goto out; + } + } + } + name++; - error = EINVAL; + if (loc->name != NULL) { + if (strcmp(loc->name, name) != 0) { + gf_log(xl->name, GF_LOG_ERROR, "Invalid name '%s' in loc", + loc->name); - goto out; + goto out; + } + } else { + loc->name = name; } - parent->name++; } + + ret = 0; + +out: + return ret; +} + +int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) +{ + char *str = NULL; + int32_t ret = -1; + + memset(parent, 0, sizeof(loc_t)); + if (loc->parent != NULL) { parent->inode = inode_ref(loc->parent); - uuid_copy(parent->gfid, loc->parent->gfid); } - if (!uuid_is_null(loc->pargfid) && uuid_is_null(parent->gfid)) { + if (!uuid_is_null(loc->pargfid)) { uuid_copy(parent->gfid, loc->pargfid); } - - if ((parent->inode == NULL) && (parent->path != NULL)) - { - if (strcmp(parent->path, "/") == 0) { - parent->inode = inode_ref(loc->inode->table->root); + if (loc->path != NULL) { + str = gf_strdup(loc->path); + if (str == NULL) { + gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '%s'", + loc->path); goto out; } - parent->inode = inode_resolve(loc->inode->table, (char *)parent->path); - if (parent->inode != NULL) { - goto out; - } - - gf_log(xl->name, GF_LOG_WARNING, "Unable to resolve parent inode"); - } - - if ((parent->inode == NULL) && !uuid_is_null(parent->gfid)) { - if (__is_root_gfid(parent->gfid)) { - parent->inode = inode_ref(loc->inode->table->root); + parent->path = gf_strdup(dirname(str)); + if (parent->path == NULL) { + gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '%s'", + dirname(str)); goto out; } - parent->inode = inode_find(loc->inode->table, parent->gfid); - if (parent->inode != NULL) { - goto out; - } + } - gf_log(xl->name, GF_LOG_WARNING, "Unable to find parent inode"); + if ((ec_loc_setup_path(xl, parent) != 0) || + (ec_loc_setup_inode(xl, parent) != 0) || + (ec_loc_setup_parent(xl, parent) != 0)) { + goto out; } if ((parent->inode == NULL) && (parent->path == NULL) && uuid_is_null(parent->gfid)) { gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for loc_t"); - error = EINVAL; - goto out; } + ret = 0; + out: GF_FREE(str); - if (error != 0) + if (ret != 0) { loc_wipe(parent); } - return error; + return ret; } -int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode, - struct iatt * iatt) +int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, + struct iatt *iatt) { - if ((inode != NULL) && (loc->inode != inode)) - { - if (loc->inode != NULL) - { + int32_t ret = -1; + + if ((inode != NULL) && (loc->inode != inode)) { + if (loc->inode != NULL) { inode_unref(loc->inode); } loc->inode = inode_ref(inode); - uuid_copy(loc->gfid, inode->gfid); } - else if (loc->inode != NULL) - { - if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) - { - return 0; - } - } - if (iatt != NULL) - { - if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid)) - { - return 0; + if (iatt != NULL) { + if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid)) { + goto out; } } - if (loc->parent != NULL) - { - if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid)) - { - return 0; - } - + if ((ec_loc_setup_path(xl, loc) != 0) || + (ec_loc_setup_inode(xl, loc) != 0) || + (ec_loc_setup_parent(xl, loc) != 0)) { + goto out; } - if (uuid_is_null(loc->gfid)) - { - gf_log(xl->name, GF_LOG_WARNING, "GFID not available for inode"); - } + ret = 0; - return 1; +out: + return ret; } int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd) { ec_fd_t * ctx; + int32_t ret = -1; memset(loc, 0, sizeof(*loc)); ctx = ec_fd_get(fd, xl); - if (ctx != NULL) - { - if (loc_copy(loc, &ctx->loc) != 0) - { - return 0; + if (ctx != NULL) { + if (loc_copy(loc, &ctx->loc) != 0) { + goto out; } } - if (ec_loc_prepare(xl, loc, fd->inode, NULL)) - { - return 1; + if (ec_loc_update(xl, loc, fd->inode, NULL) != 0) { + goto out; } - loc_wipe(loc); + ret = 0; - return 0; +out: + if (ret != 0) { + loc_wipe(loc); + } + + return ret; } int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src) { + int32_t ret = -1; + memset(dst, 0, sizeof(*dst)); - if (loc_copy(dst, src) != 0) - { - return 0; + if (loc_copy(dst, src) != 0) { + goto out; } - if (ec_loc_prepare(xl, dst, NULL, NULL)) - { - return 1; + if (ec_loc_update(xl, dst, NULL, NULL) != 0) { + goto out; } - loc_wipe(dst); + ret = 0; - return 0; +out: + if (ret != 0) { + loc_wipe(dst); + } + + return ret; } void ec_owner_set(call_frame_t * frame, void * owner) @@ -560,6 +584,7 @@ ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl) if (ctx != NULL) { memset(ctx, 0, sizeof(*ctx)); + INIT_LIST_HEAD(&ctx->heal); value = (uint64_t)(uintptr_t)ctx; if (__inode_ctx_set(inode, xl, &value) != 0) diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index d654a49f34c..46791041a3f 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -30,8 +30,8 @@ int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config); int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config); int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent); -int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode, - struct iatt * iatt); +int32_t ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, + struct iatt *iatt); int32_t ec_loc_from_fd(xlator_t * xl, loc_t * loc, fd_t * fd); int32_t ec_loc_from_loc(xlator_t * xl, loc_t * dst, loc_t * src); diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index 88e9661743d..f9d1bcb052c 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -344,6 +344,59 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) } } +int32_t ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + int32_t op_ret, int32_t op_errno, uintptr_t mask, + uintptr_t good, uintptr_t bad, dict_t *xdata) +{ + ec_fop_data_t *fop = cookie; + fop_getxattr_cbk_t func = fop->data; + ec_t *ec = xl->private; + dict_t *dict = NULL; + char *str; + char bin1[65], bin2[65]; + + if (op_ret >= 0) { + dict = dict_new(); + if (dict == NULL) { + op_ret = -1; + op_errno = ENOMEM; + } else { + if (gf_asprintf(&str, "Good: %s, Bad: %s", + ec_bin(bin1, sizeof(bin1), good, ec->nodes), + ec_bin(bin2, sizeof(bin2), mask & ~(good | bad), + ec->nodes)) < 0) { + dict_unref(dict); + dict = NULL; + + op_ret = -1; + op_errno = ENOMEM; + + goto out; + } + + if (dict_set_str(dict, EC_XATTR_HEAL, str) != 0) { + GF_FREE(str); + dict_unref(dict); + dict = NULL; + + op_ret = -1; + op_errno = ENOMEM; + + goto out; + } + } + } + +out: + func(frame, NULL, xl, op_ret, op_errno, dict, NULL); + + if (dict != NULL) { + dict_unref(dict); + } + + return 0; +} + void ec_getxattr(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_getxattr_cbk_t func, void * data, loc_t * loc, const char * name, dict_t * xdata) @@ -358,6 +411,14 @@ void ec_getxattr(call_frame_t * frame, xlator_t * this, uintptr_t target, GF_VALIDATE_OR_GOTO(this->name, frame, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); + /* Special handling of an explicit self-heal request */ + if ((name != NULL) && (strcmp(name, EC_XATTR_HEAL) == 0)) { + ec_heal(frame, this, target, EC_MINIMUM_ONE, ec_getxattr_heal_cbk, + func, loc, 0, NULL); + + return; + } + fop = ec_fop_data_allocate(frame, this, GF_FOP_GETXATTR, EC_FLAG_UPDATE_LOC_INODE, target, minimum, ec_wind_getxattr, ec_manager_getxattr, callback, @@ -650,9 +711,8 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) LOCK(&fop->fd->lock); ctx = __ec_fd_get(fop->fd, fop->xl); - if ((ctx == NULL) || !ec_loc_from_loc(fop->xl, &ctx->loc, - &fop->loc[0])) - { + if ((ctx == NULL) || + (ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0])) != 0) { UNLOCK(&fop->fd->lock); fop->error = EIO; @@ -692,24 +752,24 @@ int32_t ec_manager_open(ec_fop_data_t * fop, int32_t state) cbk->op_errno = EIO; } } - if (cbk->op_ret < 0) - { - ec_fop_set_error(fop, cbk->op_errno); - } - else - { - ec_loc_prepare(fop->xl, &fop->loc[0], cbk->fd->inode, - NULL); + if (cbk->op_ret >= 0) { + if (ec_loc_update(fop->xl, &fop->loc[0], cbk->fd->inode, + NULL) != 0) { + cbk->op_ret = -1; + cbk->op_errno = EIO; + } else { + LOCK(&fop->fd->lock); - LOCK(&fop->fd->lock); + ctx = __ec_fd_get(fop->fd, fop->xl); + if (ctx != NULL) { + ctx->open |= cbk->mask; + } - ctx = __ec_fd_get(fop->fd, fop->xl); - if (ctx != NULL) - { - ctx->open |= cbk->mask; + UNLOCK(&fop->fd->lock); } - - UNLOCK(&fop->fd->lock); + } + if (cbk->op_ret < 0) { + ec_fop_set_error(fop, cbk->op_errno); } } else diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h index 37ce3fac089..c7db6226e25 100644 --- a/xlators/cluster/ec/src/ec.h +++ b/xlators/cluster/ec/src/ec.h @@ -17,6 +17,7 @@ #define EC_XATTR_CONFIG "trusted.ec.config" #define EC_XATTR_SIZE "trusted.ec.size" #define EC_XATTR_VERSION "trusted.ec.version" +#define EC_XATTR_HEAL "trusted.ec.heal" struct _ec; typedef struct _ec ec_t; |