diff options
Diffstat (limited to 'xlators/cluster/ec/src/ec-common.c')
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 1109 | 
1 files changed, 1109 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c new file mode 100644 index 00000000000..a4423d94aa9 --- /dev/null +++ b/xlators/cluster/ec/src/ec-common.c @@ -0,0 +1,1109 @@ +/* +  Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es> + +  This file is part of the cluster/ec translator for GlusterFS. + +  The cluster/ec translator for GlusterFS is free software: you can +  redistribute it and/or modify it under the terms of the GNU General +  Public License as published by the Free Software Foundation, either +  version 3 of the License, or (at your option) any later version. + +  The cluster/ec translator for GlusterFS is distributed in the hope +  that it will be useful, but WITHOUT ANY WARRANTY; without even the +  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +  PURPOSE. See the GNU General Public License for more details. + +  You should have received a copy of the GNU General Public License +  along with the cluster/ec translator for GlusterFS. If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#include "byte-order.h" + +#include "ec-mem-types.h" +#include "ec-data.h" +#include "ec-helpers.h" +#include "ec-combine.h" +#include "ec-common.h" +#include "ec-fops.h" +#include "ec.h" + +int32_t ec_child_valid(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +{ +    return (idx < ec->nodes) && (((fop->remaining >> idx) & 1) == 1); +} + +int32_t ec_child_next(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +{ +    while (!ec_child_valid(ec, fop, idx)) +    { +        if (++idx >= ec->nodes) +        { +            idx = 0; +        } +        if (idx == fop->first) +        { +            return -1; +        } +    } + +    return idx; +} + +uintptr_t ec_inode_good(inode_t * inode, xlator_t * xl) +{ +    ec_inode_t * ctx; +    uintptr_t bad = 0; + +    ctx = ec_inode_get(inode, xl); +    if (ctx != NULL) +    { +        bad = ctx->bad; +    } + +    return ~bad; +} + +uintptr_t ec_fd_good(fd_t * fd, xlator_t * xl) +{ +    ec_fd_t * ctx; +    uintptr_t bad = 0; + +    ctx = ec_fd_get(fd, xl); +    if ((ctx != NULL) && (ctx->loc.inode != NULL)) +    { +        bad = ctx->bad; +    } + +    return ~bad; +} + +uintptr_t ec_update_inode(ec_fop_data_t * fop, inode_t * inode, uintptr_t good, +                          uintptr_t bad) +{ +    ec_inode_t * ctx = NULL; + +    if (inode != NULL) +    { +        LOCK(&inode->lock); + +        ctx = __ec_inode_get(inode, fop->xl); +        if (ctx != NULL) +        { +            ctx->bad &= ~good; +            bad |= ctx->bad; +            ctx->bad = bad; +        } + +        UNLOCK(&inode->lock); +    } + +    return bad; +} + +uintptr_t ec_update_fd(ec_fop_data_t * fop, fd_t * fd, uintptr_t good, +                       uintptr_t bad) +{ +    ec_fd_t * ctx = NULL; + +    LOCK(&fd->lock); + +    ctx = __ec_fd_get(fd, fop->xl); +    if ((ctx != NULL) && (ctx->loc.inode != NULL)) +    { +        ctx->bad &= ~good; +        bad |= ctx->bad; +        ctx->bad = bad; +    } + +    UNLOCK(&fd->lock); + +    return bad; +} + +int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this, +                       int32_t op_ret, int32_t op_errno, uintptr_t mask, +                       uintptr_t good, uintptr_t bad, dict_t * xdata) +{ +    if (op_ret < 0) +    { +        gf_log(this->name, GF_LOG_WARNING, "Heal failed (error %d)", op_errno); +    } +    else +    { +        gf_log(this->name, GF_LOG_INFO, "Heal succeeded on %d/%d subvolumes", +               ec_bits_count(mask & ~ (good | bad)), +               ec_bits_count(mask & ~good)); +    } + +    return 0; +} + +void ec_check_status(ec_fop_data_t * fop) +{ +    ec_t * ec = fop->xl->private; + +    if ((ec->xl_up & ~(fop->remaining | fop->good)) == 0) +    { +        return; +    } + +    gf_log(fop->xl->name, GF_LOG_WARNING, "Operation failed on some " +                                          "subvolumes (up=%lX, mask=%lX, " +                                          "remaining=%lX, good=%lX, bad=%lX)", +           ec->xl_up, fop->mask, fop->remaining, fop->good, fop->bad); + +    if (fop->fd != NULL) +    { +        ec_fheal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, +                 fop->fd, NULL); +    } +    else +    { +        ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, +                &fop->loc[0], NULL); + +        if (fop->loc[1].inode != NULL) +        { +            ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, +                    NULL, &fop->loc[1], NULL); +        } +    } +} + +void ec_update_bad(ec_fop_data_t * fop, uintptr_t good) +{ +    uintptr_t bad; +    int32_t update = 0; + +    bad = fop->mask & ~(fop->remaining | good); +    if ((fop->bad & bad) != bad) +    { +        fop->bad |= bad; +        update = 1; +    } +    if ((fop->good & good) != good) +    { +        fop->good |= good; +        update = 1; +    } + +    if (update && (fop->parent == NULL)) +    { +        if ((fop->flags & EC_FLAG_UPDATE_LOC_PARENT) != 0) +        { +            ec_update_inode(fop, fop->loc[0].parent, good, bad); +        } +        if ((fop->flags & EC_FLAG_UPDATE_LOC_INODE) != 0) +        { +            ec_update_inode(fop, fop->loc[0].inode, good, bad); +        } +        ec_update_inode(fop, fop->loc[1].inode, good, bad); +        if ((fop->flags & EC_FLAG_UPDATE_FD_INODE) != 0) +        { +            ec_update_inode(fop, fop->fd->inode, good, bad); +        } +        if ((fop->flags & EC_FLAG_UPDATE_FD) != 0) +        { +            ec_update_fd(fop, fop->fd, good, bad); +        } + +        ec_check_status(fop); +    } +} + + +void __ec_fop_set_error(ec_fop_data_t * fop, int32_t error) +{ +    if ((error != 0) && (fop->error == 0)) +    { +        fop->error = error; +    } +} + +void ec_fop_set_error(ec_fop_data_t * fop, int32_t error) +{ +    LOCK(&fop->lock); + +    __ec_fop_set_error(fop, error); + +    UNLOCK(&fop->lock); +} + +int32_t ec_check_complete(ec_fop_data_t * fop, ec_resume_f resume) +{ +    int32_t error = -1; + +    LOCK(&fop->lock); + +    GF_ASSERT(fop->resume == NULL); + +    if (fop->jobs != 0) +    { +        ec_trace("WAIT", fop, "resume=%p", resume); + +        fop->resume = resume; +    } +    else +    { +        error = fop->error; +        fop->error = 0; +    } + +    UNLOCK(&fop->lock); + +    return error; +} + +void ec_wait_winds(ec_fop_data_t * fop) +{ +    LOCK(&fop->lock); + +    if (fop->winds > 0) +    { +        fop->jobs++; +        fop->refs++; + +        fop->flags |= EC_FLAG_WAITING_WINDS; +    } + +    UNLOCK(&fop->lock); +} + +void ec_resume(ec_fop_data_t * fop, int32_t error) +{ +    ec_resume_f resume = NULL; + +    LOCK(&fop->lock); + +    __ec_fop_set_error(fop, error); + +    if (--fop->jobs == 0) +    { +        resume = fop->resume; +        fop->resume = NULL; +        if (resume != NULL) +        { +            ec_trace("RESUME", fop, "error=%d", error); + +            if (fop->error != 0) +            { +                error = fop->error; +            } +            fop->error = 0; +        } +    } + +    UNLOCK(&fop->lock); + +    if (resume != NULL) +    { +        resume(fop, error); +    } + +    ec_fop_data_release(fop); +} + +void ec_resume_parent(ec_fop_data_t * fop, int32_t error) +{ +    ec_fop_data_t * parent; + +    parent = fop->parent; +    if (parent != NULL) +    { +        fop->parent = NULL; +        ec_resume(parent, error); +    } +} + +void ec_report(ec_fop_data_t * fop, int32_t error) +{ +    if (!list_empty(&fop->lock_list)) +    { +        ec_owner_set(fop->frame, fop->frame->root); +    } + +    ec_resume(fop, error); +} + +void ec_complete(ec_fop_data_t * fop) +{ +    ec_cbk_data_t * cbk = NULL; +    int32_t ready = 0, report = 0; + +    LOCK(&fop->lock); + +    ec_trace("COMPLETE", fop, ""); + +    if (--fop->winds == 0) +    { +        if ((fop->answer == NULL) && (fop->expected != 1)) +        { +            if (!list_empty(&fop->cbk_list)) +            { +                cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list); +                if ((cbk->count >= fop->minimum) && +                    ((cbk->op_ret >= 0) || (cbk->op_errno != ENOTCONN))) +                { +                    fop->answer = cbk; + +                    ec_update_bad(fop, cbk->mask); +                } +            } + +            report = 1; +        } +        else if ((fop->flags & EC_FLAG_WAITING_WINDS) != 0) +        { +            ready = 1; +        } +    } + +    UNLOCK(&fop->lock); + +    if (report) +    { +        ec_report(fop, 0); +    } +    if (ready) +    { +        ec_resume(fop, 0); +    } + +    ec_fop_data_release(fop); +} + +int32_t ec_child_select(ec_fop_data_t * fop) +{ +    ec_t * ec = fop->xl->private; +    uintptr_t mask = 0; +    int32_t first = 0, num = 0; + +    fop->mask &= ec->node_mask; + +    mask = ec->xl_up; +    if (fop->loc[0].inode != NULL) +    { +        mask |= ec_inode_good(fop->loc[0].inode, fop->xl); +    } +    if (fop->loc[1].inode != NULL) +    { +        mask |= ec_inode_good(fop->loc[1].inode, fop->xl); +    } +    if (fop->fd != NULL) +    { +        if (fop->fd->inode != NULL) +        { +            mask |= ec_inode_good(fop->fd->inode, fop->xl); +        } +        mask |= ec_fd_good(fop->fd, fop->xl); +    } +    if ((fop->mask & ~mask) != 0) +    { +        gf_log(fop->xl->name, GF_LOG_WARNING, "Executing operation with " +                                              "some subvolumes unavailable " +                                              "(%lX)", fop->mask & ~mask); + +        fop->mask &= mask; +    } + +    switch (fop->minimum) +    { +        case EC_MINIMUM_ALL: +            fop->minimum = ec_bits_count(fop->mask); +            if (fop->minimum >= ec->fragments) +            { +                break; +            } +        case EC_MINIMUM_MIN: +            fop->minimum = ec->fragments; +            break; +        case EC_MINIMUM_ONE: +            fop->minimum = 1; +    } + +    first = ec->idx; +    if (++first >= ec->nodes) +    { +        first = 0; +    } +    ec->idx = first; + +    fop->remaining = fop->mask; + +    ec_trace("SELECT", fop, ""); + +    num = ec_bits_count(fop->mask); +    if ((num < fop->minimum) && (num < ec->fragments)) +    { +        gf_log(ec->xl->name, GF_LOG_ERROR, "Insufficient available childs " +                                           "for this request (have %d, need " +                                           "%d)", num, fop->minimum); + +        return 0; +    } + +    LOCK(&fop->lock); + +    fop->jobs++; +    fop->refs++; + +    UNLOCK(&fop->lock); + +    return 1; +} + +int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx) +{ +    ec_t * ec = fop->xl->private; + +    LOCK(&fop->lock); + +    idx = ec_child_next(ec, fop, idx); +    if (idx >= 0) +    { +        fop->remaining ^= 1ULL << idx; + +        ec_trace("EXECUTE", fop, "idx=%d", idx); + +        fop->winds++; +        fop->refs++; +    } + +    UNLOCK(&fop->lock); + +    if (idx >= 0) +    { +        fop->wind(ec, fop, idx); +    } + +    return idx; +} + +void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask) +{ +    ec_t * ec = fop->xl->private; +    int32_t count, idx; + +    count = ec_bits_count(mask); + +    LOCK(&fop->lock); + +    ec_trace("EXECUTE", fop, "mask=%lX", mask); + +    fop->remaining ^= mask; + +    fop->winds += count; +    fop->refs += count; + +    UNLOCK(&fop->lock); + +    idx = 0; +    while (mask != 0) +    { +        if ((mask & 1) != 0) +        { +            fop->wind(ec, fop, idx); +        } +        idx++; +        mask >>= 1; +    } +} + +void ec_dispatch_start(ec_fop_data_t * fop) +{ +    fop->answer = NULL; +    fop->good = 0; +    fop->bad = 0; + +    INIT_LIST_HEAD(&fop->cbk_list); + +    if (!list_empty(&fop->lock_list)) +    { +        ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner); +    } +} + +void ec_dispatch_one(ec_fop_data_t * fop) +{ +    ec_t * ec = fop->xl->private; + +    ec_dispatch_start(fop); + +    if (ec_child_select(fop)) +    { +        fop->expected = 1; +        fop->first = ec->idx; + +        ec_dispatch_next(fop, fop->first); +    } +} + +int32_t ec_dispatch_one_retry(ec_fop_data_t * fop, int32_t idx, int32_t op_ret, +                              int32_t op_errno) +{ +    if ((op_ret < 0) && (op_errno == ENOTCONN)) +    { +        return (ec_dispatch_next(fop, idx) >= 0); +    } + +    return 0; +} + +void ec_dispatch_inc(ec_fop_data_t * fop) +{ +    ec_dispatch_start(fop); + +    if (ec_child_select(fop)) +    { +        fop->expected = ec_bits_count(fop->remaining); +        fop->first = 0; + +        ec_dispatch_next(fop, 0); +    } +} + +void ec_dispatch_all(ec_fop_data_t * fop) +{ +    ec_dispatch_start(fop); + +    if (ec_child_select(fop)) +    { +        fop->expected = ec_bits_count(fop->remaining); +        fop->first = 0; + +        ec_dispatch_mask(fop, fop->remaining); +    } +} + +void ec_dispatch_min(ec_fop_data_t * fop) +{ +    ec_t * ec = fop->xl->private; +    uintptr_t mask; +    int32_t idx, count; + +    ec_dispatch_start(fop); + +    if (ec_child_select(fop)) +    { +        fop->expected = count = ec->fragments; +        fop->first = ec->idx; +        idx = fop->first - 1; +        mask = 0; +        while (count-- > 0) +        { +            idx = ec_child_next(ec, fop, idx + 1); +            mask |= 1ULL << idx; +        } + +        ec_dispatch_mask(fop, mask); +    } +} + +ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc) +{ +    ec_lock_t * lock; + +    if ((loc->inode == NULL) || +        (uuid_is_null(loc->gfid) && uuid_is_null(loc->inode->gfid))) +    { +        gf_log(xl->name, GF_LOG_ERROR, "Trying to lock based on an invalid " +                                       "inode"); + +        return NULL; +    } + +    lock = GF_MALLOC(sizeof(*lock), ec_mt_ec_lock_t); +    if (lock != NULL) +    { +        memset(lock, 0, sizeof(*lock)); + +        lock->kind = kind; +        if (!ec_loc_from_loc(xl, &lock->loc, loc)) +        { +            GF_FREE(lock); +            lock = NULL; +        } +    } + +    return lock; +} + +void ec_lock_destroy(ec_lock_t * lock) +{ +    GF_FREE(lock->basename); +    loc_wipe(&lock->loc); + +    GF_FREE(lock); +} + +int32_t ec_locked(call_frame_t * frame, void * cookie, xlator_t * this, +                  int32_t op_ret, int32_t op_errno, dict_t * xdata) +{ +    ec_fop_data_t * fop = cookie; +    ec_lock_t * lock = NULL; + +    if (op_ret >= 0) +    { +        lock = fop->data; +        lock->mask = fop->good; +        fop->parent->mask &= fop->good; + +        ec_trace("LOCKED", fop->parent, "lock=%p", lock); +    } +    else +    { +        gf_log(this->name, GF_LOG_WARNING, "Failed to complete preop lock"); +    } + +    return 0; +} + +void ec_lock_entry(ec_fop_data_t * fop, loc_t * loc) +{ +    ec_lock_t * lock = NULL; +    char * name = NULL; +    loc_t tmp; +    int32_t error; + +    if ((fop->parent != NULL) || (fop->error != 0)) +    { +        return; +    } + +    error = ec_loc_parent(fop->xl, loc, &tmp, &name); +    if (error != 0) +    { +        ec_fop_set_error(fop, error); + +        return; +    } + +    LOCK(&fop->lock); + +    list_for_each_entry(lock, &fop->lock_list, list) +    { +        if ((lock->kind == EC_LOCK_ENTRY) && +            (lock->loc.inode == tmp.inode) && +            (strcmp(lock->basename, name) == 0)) +        { +            ec_trace("LOCK_ENTRYLK", fop, "lock=%p, parent=%p, path=%s, " +                                          "name=%s. Lock already acquired", +                     lock, loc->parent, loc->path, name); + +            lock = NULL; + +            goto unlock; +        } +    } + +    lock = ec_lock_allocate(fop->xl, EC_LOCK_ENTRY, &tmp); +    if (lock != NULL) +    { +        lock->type = ENTRYLK_WRLCK; +        lock->basename = name; + +        if (list_empty(&fop->lock_list)) +        { +            ec_owner_set(fop->frame, fop->frame->root); +        } +        list_add_tail(&lock->list, &fop->lock_list); +    } +    else +    { +        __ec_fop_set_error(fop, EIO); +    } + +unlock: +    UNLOCK(&fop->lock); + +    loc_wipe(&tmp); + +    if (lock != NULL) +    { +        ec_trace("LOCK_ENTRYLK", fop, "lock=%p, parent=%p, path=%s, " +                                      "basename=%s", lock, lock->loc.inode, +                 lock->loc.path, lock->basename); + +        ec_entrylk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked, lock, +                   fop->xl->name, &lock->loc, lock->basename, ENTRYLK_LOCK, +                   lock->type, NULL); +    } +    else +    { +        GF_FREE(name); +    } +} + +void ec_lock_inode(ec_fop_data_t * fop, loc_t * loc) +{ +    ec_lock_t * lock; + +    if ((fop->parent != NULL) || (fop->error != 0) || (loc->inode == NULL)) +    { +        return; +    } + +    LOCK(&fop->lock); + +    list_for_each_entry(lock, &fop->lock_list, list) +    { +        if ((lock->kind == EC_LOCK_INODE) && (lock->loc.inode == loc->inode)) +        { +            UNLOCK(&fop->lock); + +            ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already " +                                          "acquired", lock, loc->inode); + +            return; +        } +    } + +    lock = ec_lock_allocate(fop->xl, EC_LOCK_INODE, loc); +    if (lock != NULL) +    { +        lock->flock.l_type = F_WRLCK; +        lock->flock.l_whence = SEEK_SET; + +        if (list_empty(&fop->lock_list)) +        { +            ec_owner_set(fop->frame, fop->frame->root); +        } +        list_add_tail(&lock->list, &fop->lock_list); +    } +    else +    { +        __ec_fop_set_error(fop, EIO); +    } + +    UNLOCK(&fop->lock); + +    if (lock != NULL) +    { +        ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p, owner=%p", lock, +                 lock->loc.inode, fop->frame->root); + +        ec_inodelk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked, lock, +                   fop->xl->name, &lock->loc, F_SETLKW, &lock->flock, NULL); +    } +} + +void ec_lock_fd(ec_fop_data_t * fop, fd_t * fd) +{ +    loc_t loc; + +    if ((fop->parent != NULL) || (fop->error != 0)) +    { +        return; +    } + +    if (ec_loc_from_fd(fop->xl, &loc, fd)) +    { +        ec_lock_inode(fop, &loc); + +        loc_wipe(&loc); +    } +    else +    { +        ec_fop_set_error(fop, EIO); +    } +} + +int32_t ec_unlocked(call_frame_t * frame, void * cookie, xlator_t * this, +                    int32_t op_ret, int32_t op_errno, dict_t * xdata) +{ +    ec_fop_data_t * fop = cookie; + +    if (op_ret < 0) +    { +        gf_log(this->name, GF_LOG_WARNING, "entry/inode unlocking failed (%s)", +               ec_fop_name(fop->parent->id)); +    } +    else +    { +        ec_trace("UNLOCKED", fop->parent, "lock=%p", fop->data); +    } + +    return 0; +} + +void ec_unlock(ec_fop_data_t * fop) +{ +    ec_lock_t * lock, * item; + +    ec_trace("UNLOCK", fop, ""); + +    list_for_each_entry_safe(lock, item, &fop->lock_list, list) +    { +        list_del(&lock->list); + +        if (lock->mask != 0) +        { +            switch (lock->kind) +            { +                case EC_LOCK_ENTRY: +                    ec_trace("UNLOCK_ENTRYLK", fop, "lock=%p, parent=%p, " +                                                    "path=%s, basename=%s", +                             lock, lock->loc.inode, lock->loc.path, +                             lock->basename); + +                    ec_entrylk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ALL, +                               ec_unlocked, lock, fop->xl->name, &lock->loc, +                               lock->basename, ENTRYLK_UNLOCK, lock->type, +                               NULL); + +                    break; + +                case EC_LOCK_INODE: +                    lock->flock.l_type = F_UNLCK; +                    ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock, +                             lock->loc.inode); + +                    ec_inodelk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ALL, +                               ec_unlocked, lock, fop->xl->name, &lock->loc, +                               F_SETLK, &lock->flock, NULL); + +                    break; + +                default: +                    gf_log(fop->xl->name, GF_LOG_ERROR, "Invalid lock type"); +            } +        } + +        loc_wipe(&lock->loc); + +        GF_FREE(lock); +    } +} + +int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie, +                                xlator_t * this, int32_t op_ret, +                                int32_t op_errno, inode_t * inode, +                                struct iatt * buf, dict_t * xdata, +                                struct iatt * postparent) +{ +    ec_fop_data_t * fop = cookie; + +    if (op_ret >= 0) +    { +        fop->parent->mask &= fop->good; +        fop->parent->pre_size = fop->parent->post_size = buf->ia_size; +    } +    else +    { +        gf_log(this->name, GF_LOG_WARNING, "Failed to get size and version " +                                           "(error %d)", op_errno); +        ec_fop_set_error(fop, op_errno); +    } + +    return 0; +} + +void ec_get_size_version(ec_fop_data_t * fop) +{ +    loc_t loc; +    dict_t * xdata; +    uid_t uid; +    gid_t gid; +    int32_t error = ENOMEM; + +    if (fop->parent != NULL) +    { +        fop->pre_size = fop->parent->pre_size; +        fop->post_size = fop->parent->post_size; + +        return; +    } + +    memset(&loc, 0, sizeof(loc)); + +    xdata = dict_new(); +    if (xdata == NULL) +    { +        goto out; +    } +    if ((dict_set_uint64(xdata, EC_XATTR_VERSION, 0) != 0) || +        (dict_set_uint64(xdata, EC_XATTR_SIZE, 0) != 0)) +    { +        goto out; +    } + +    uid = fop->frame->root->uid; +    gid = fop->frame->root->gid; + +    fop->frame->root->uid = 0; +    fop->frame->root->gid = 0; + +    error = EIO; + +    if (fop->fd == NULL) +    { +        if (!ec_loc_from_loc(fop->xl, &loc, &fop->loc[0])) +        { +            goto out; +        } +        if (uuid_is_null(loc.pargfid)) +        { +            if (loc.parent != NULL) +            { +                inode_unref(loc.parent); +                loc.parent = NULL; +            } +            GF_FREE((char *)loc.path); +            loc.path = NULL; +            loc.name = NULL; +        } +    } +    else if (!ec_loc_from_fd(fop->xl, &loc, fop->fd)) +    { +        goto out; +    } + +    ec_lookup(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN, +              ec_get_size_version_set, NULL, &loc, xdata); + +    fop->frame->root->uid = uid; +    fop->frame->root->gid = gid; + +    error = 0; + +out: +    loc_wipe(&loc); + +    if (xdata != NULL) +    { +        dict_unref(xdata); +    } + +    ec_fop_set_error(fop, error); +} + +int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie, +                                    xlator_t * this, int32_t op_ret, +                                    int32_t op_errno, dict_t * xattr, +                                    dict_t * xdata) +{ +    ec_fop_data_t * fop = cookie; + +    if (op_ret < 0) +    { +        gf_log(fop->xl->name, GF_LOG_ERROR, "Failed to update version and " +                                            "size (error %d)", op_errno); +    } +    else +    { +        fop->parent->mask &= fop->good; +    } + +    return 0; +} + +void ec_update_size_version(ec_fop_data_t * fop) +{ +    dict_t * dict; +    size_t size; +    uid_t uid; +    gid_t gid; + +    if (fop->parent != NULL) +    { +        fop->parent->post_size = fop->post_size; + +        return; +    } + +    dict = dict_new(); +    if (dict == NULL) +    { +        goto out; +    } + +    if (ec_dict_set_number(dict, EC_XATTR_VERSION, 1) != 0) +    { +        goto out; +    } +    size = fop->post_size; +    if (fop->pre_size != size) +    { +        size -= fop->pre_size; +        if (ec_dict_set_number(dict, EC_XATTR_SIZE, size) != 0) +        { +            goto out; +        } +    } + +    uid = fop->frame->root->uid; +    gid = fop->frame->root->gid; + +    fop->frame->root->uid = 0; +    fop->frame->root->gid = 0; + +    if (fop->fd == NULL) +    { +        ec_xattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN, +                   ec_update_size_version_done, NULL, &fop->loc[0], +                   GF_XATTROP_ADD_ARRAY64, dict, NULL); +    } +    else +    { +        ec_fxattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN, +                    ec_update_size_version_done, NULL, fop->fd, +                    GF_XATTROP_ADD_ARRAY64, dict, NULL); +    } + +    fop->frame->root->uid = uid; +    fop->frame->root->gid = gid; + +    dict_unref(dict); + +    return; + +out: +    if (dict != NULL) +    { +        dict_unref(dict); +    } + +    ec_fop_set_error(fop, EIO); + +    gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to update version and size"); +} + +void __ec_manager(ec_fop_data_t * fop, int32_t error) +{ +    do +    { +        ec_trace("MANAGER", fop, "error=%d", error); + +        if (fop->state == EC_STATE_END) +        { +            ec_fop_data_release(fop); + +            break; +        } + +        if (error != 0) +        { +            fop->error = error; +            fop->state = -fop->state; +        } + +        fop->state = fop->handler(fop, fop->state); + +        error = ec_check_complete(fop, __ec_manager); +    } while (error >= 0); +} + +void ec_manager(ec_fop_data_t * fop, int32_t error) +{ +    GF_ASSERT(fop->jobs == 0); +    GF_ASSERT(fop->winds == 0); +    GF_ASSERT(fop->error == 0); + +    if (fop->state == EC_STATE_START) +    { +        fop->state = EC_STATE_INIT; +    } + +    __ec_manager(fop, error); +}  | 
