diff options
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 283 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 1 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-data.h | 2 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-generic.c | 30 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 93 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-inode-read.c | 4 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-inode-write.c | 4 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.h | 1 |
8 files changed, 297 insertions, 121 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index fcae083aa84..92f7657cd75 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -936,78 +936,77 @@ void ec_lock(ec_fop_data_t * fop) } } +gf_boolean_t +ec_config_check (ec_fop_data_t *fop, dict_t *xdata) +{ + ec_t *ec; + + if (ec_dict_del_config(xdata, EC_XATTR_CONFIG, &fop->config) < 0) { + gf_log(fop->xl->name, GF_LOG_ERROR, "Failed to get a valid " + "config"); + + ec_fop_set_error(fop, EIO); + + return _gf_false; + } + + ec = fop->xl->private; + if ((fop->config.version != EC_CONFIG_VERSION) || + (fop->config.algorithm != EC_CONFIG_ALGORITHM) || + (fop->config.gf_word_size != EC_GF_BITS) || + (fop->config.bricks != ec->nodes) || + (fop->config.redundancy != ec->redundancy) || + (fop->config.chunk_size != EC_METHOD_CHUNK_SIZE)) { + uint32_t data_bricks; + + /* This combination of version/algorithm requires the following + values. Incorrect values for these fields are a sign of + corruption: + + redundancy > 0 + redundancy * 2 < bricks + gf_word_size must be a power of 2 + chunk_size (in bits) must be a multiple of gf_word_size * + (bricks - redundancy) */ + + data_bricks = fop->config.bricks - fop->config.redundancy; + if ((fop->config.redundancy < 1) || + (fop->config.redundancy * 2 >= fop->config.bricks) || + !ec_is_power_of_2(fop->config.gf_word_size) || + ((fop->config.chunk_size * 8) % (fop->config.gf_word_size * + data_bricks) != 0)) { + gf_log(fop->xl->name, GF_LOG_ERROR, "Invalid or corrupted config"); + } else { + gf_log(fop->xl->name, GF_LOG_ERROR, "Unsupported config " + "(V=%u, A=%u, W=%u, " + "N=%u, R=%u, S=%u)", + fop->config.version, fop->config.algorithm, + fop->config.gf_word_size, fop->config.bricks, + fop->config.redundancy, fop->config.chunk_size); + } + + ec_fop_set_error(fop, EIO); + + return _gf_false; + } + + return _gf_true; +} + int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie, xlator_t * this, int32_t op_ret, int32_t op_errno, inode_t * inode, struct iatt * buf, dict_t * xdata, struct iatt * postparent) { - ec_t * ec; ec_fop_data_t * fop = cookie; ec_inode_t * ctx; ec_lock_t *lock = NULL; if (op_ret >= 0) { - if (buf->ia_type == IA_IFREG) - { - if (ec_dict_del_config(xdata, EC_XATTR_CONFIG, &fop->config) < 0) - { - gf_log(this->name, GF_LOG_ERROR, "Failed to get a valid " - "config"); - - ec_fop_set_error(fop, EIO); - - return 0; - } - ec = this->private; - if ((fop->config.version != EC_CONFIG_VERSION) || - (fop->config.algorithm != EC_CONFIG_ALGORITHM) || - (fop->config.gf_word_size != EC_GF_BITS) || - (fop->config.bricks != ec->nodes) || - (fop->config.redundancy != ec->redundancy) || - (fop->config.chunk_size != EC_METHOD_CHUNK_SIZE)) - { - uint32_t data_bricks; - - // This combination of version/algorithm requires the following - // values. Incorrect values for these fields are a sign of - // corruption: - // - // redundancy > 0 - // redundancy * 2 < bricks - // gf_word_size must be a power of 2 - // chunk_size (in bits) must be a multiple of gf_word_size * - // (bricks - redundancy) - - data_bricks = fop->config.bricks - fop->config.redundancy; - if ((fop->config.redundancy < 1) || - (fop->config.redundancy * 2 >= fop->config.bricks) || - !ec_is_power_of_2(fop->config.gf_word_size) || - ((fop->config.chunk_size * 8) % (fop->config.gf_word_size * - data_bricks) != 0)) - { - gf_log(this->name, GF_LOG_ERROR, "Invalid or corrupted " - "config (V=%u, A=%u, " - "W=%u, N=%u, R=%u, S=%u)", - fop->config.version, fop->config.algorithm, - fop->config.gf_word_size, fop->config.bricks, - fop->config.redundancy, fop->config.chunk_size); - } - else - { - gf_log(this->name, GF_LOG_ERROR, "Unsupported config " - "(V=%u, A=%u, W=%u, " - "N=%u, R=%u, S=%u)", - fop->config.version, fop->config.algorithm, - fop->config.gf_word_size, fop->config.bricks, - fop->config.redundancy, fop->config.chunk_size); - } - - ec_fop_set_error(fop, EIO); - - return 0; - } + if ((buf->ia_type == IA_IFREG) && !ec_config_check(fop, xdata)) { + return 0; } LOCK(&inode->lock); @@ -1053,6 +1052,58 @@ int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie, return 0; } +int32_t ec_prepare_update_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *dict, dict_t *xdata) +{ + ec_fop_data_t *fop = cookie, *parent; + ec_lock_t *lock = NULL; + + if (op_ret >= 0) { + parent = fop->parent; + while ((parent != NULL) && (parent->locks[0].lock == NULL)) { + parent = parent->parent; + } + if (parent == NULL) { + return 0; + } + + lock = parent->locks[0].lock; + lock->is_dirty = _gf_true; + + if (!ec_config_check(fop, dict)) { + return 0; + } + + LOCK(&lock->loc.inode->lock); + + if ((ec_dict_del_number(dict, EC_XATTR_VERSION, &lock->version) != 0) || + (ec_dict_del_number(dict, EC_XATTR_SIZE, &lock->size) != 0)) { + UNLOCK(&lock->loc.inode->lock); + + ec_fop_set_error(fop, EIO); + + return 0; + } + + lock->have_size = 1; + + UNLOCK(&lock->loc.inode->lock); + + fop->parent->mask &= fop->good; + + fop->parent->pre_size = fop->parent->post_size = lock->size; + fop->parent->have_size = 1; + } else { + gf_log(this->name, GF_LOG_WARNING, + "Failed to get size and version (error %d: %s)", op_errno, + strerror (op_errno)); + ec_fop_set_error(fop, op_errno); + } + + return 0; +} + void ec_get_size_version(ec_fop_data_t * fop) { loc_t loc; @@ -1085,7 +1136,8 @@ void ec_get_size_version(ec_fop_data_t * fop) } if ((dict_set_uint64(xdata, EC_XATTR_VERSION, 0) != 0) || (dict_set_uint64(xdata, EC_XATTR_SIZE, 0) != 0) || - (dict_set_uint64(xdata, EC_XATTR_CONFIG, 0) != 0)) + (dict_set_uint64(xdata, EC_XATTR_CONFIG, 0) != 0) || + (dict_set_uint64(xdata, EC_XATTR_DIRTY, 0) != 0)) { goto out; } @@ -1141,7 +1193,84 @@ out: dict_unref(xdata); } - ec_fop_set_error(fop, error); + if (error != 0) { + ec_fop_set_error(fop, error); + } +} + +void ec_prepare_update(ec_fop_data_t *fop) +{ + loc_t loc; + dict_t *xdata; + ec_fop_data_t *tmp; + ec_lock_t *lock; + uid_t uid; + gid_t gid; + int32_t error = ENOMEM; + + tmp = fop; + while ((tmp != NULL) && (tmp->locks[0].lock == NULL)) { + tmp = tmp->parent; + } + if ((tmp != NULL) && tmp->locks[0].lock->is_dirty) { + lock = tmp->locks[0].lock; + + fop->pre_size = fop->post_size = lock->size; + fop->have_size = 1; + + return; + } + + memset(&loc, 0, sizeof(loc)); + + xdata = dict_new(); + if (xdata == NULL) { + goto out; + } + if ((ec_dict_set_number(xdata, EC_XATTR_VERSION, 0) != 0) || + (ec_dict_set_number(xdata, EC_XATTR_SIZE, 0) != 0) || + (ec_dict_set_number(xdata, EC_XATTR_CONFIG, 0) != 0) || + (ec_dict_set_number(xdata, EC_XATTR_DIRTY, 1) != 0)) { + goto out; + } + + uid = fop->frame->root->uid; + gid = fop->frame->root->gid; + + fop->frame->root->uid = 0; + fop->frame->root->gid = 0; + + error = EIO; + + if (!fop->use_fd) { + if (ec_loc_from_loc(fop->xl, &loc, &fop->loc[0]) != 0) { + goto out; + } + + ec_xattrop(fop->frame, fop->xl, fop->mask, fop->minimum, + ec_prepare_update_cbk, NULL, &loc, GF_XATTROP_ADD_ARRAY64, + xdata, NULL); + } else { + ec_fxattrop(fop->frame, fop->xl, fop->mask, fop->minimum, + ec_prepare_update_cbk, NULL, fop->fd, + GF_XATTROP_ADD_ARRAY64, xdata, NULL); + } + + fop->frame->root->uid = uid; + fop->frame->root->gid = gid; + + error = 0; + +out: + loc_wipe(&loc); + + if (xdata != NULL) { + dict_unref(xdata); + } + + if (error != 0) { + ec_fop_set_error(fop, error); + } } int32_t ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, @@ -1221,7 +1350,7 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie, } void ec_update_size_version(ec_fop_data_t *fop, loc_t *loc, uint64_t version, - uint64_t size, ec_lock_t *lock) + uint64_t size, gf_boolean_t dirty, ec_lock_t *lock) { dict_t * dict; uid_t uid; @@ -1234,7 +1363,8 @@ void ec_update_size_version(ec_fop_data_t *fop, loc_t *loc, uint64_t version, return; } - ec_trace("UPDATE", fop, "version=%ld, size=%ld", version, size); + ec_trace("UPDATE", fop, "version=%ld, size=%ld, dirty=%u", version, size, + dirty); dict = dict_new(); if (dict == NULL) @@ -1242,14 +1372,18 @@ void ec_update_size_version(ec_fop_data_t *fop, loc_t *loc, uint64_t version, goto out; } - if (ec_dict_set_number(dict, EC_XATTR_VERSION, version) != 0) - { - goto out; + if (version != 0) { + if (ec_dict_set_number(dict, EC_XATTR_VERSION, version) != 0) { + goto out; + } } - if (size != 0) - { - if (ec_dict_set_number(dict, EC_XATTR_SIZE, size) != 0) - { + if (size != 0) { + if (ec_dict_set_number(dict, EC_XATTR_SIZE, size) != 0) { + goto out; + } + } + if (dirty) { + if (ec_dict_set_number(dict, EC_XATTR_DIRTY, -1) != 0) { goto out; } } @@ -1286,9 +1420,9 @@ void ec_unlock_now(ec_fop_data_t *fop, ec_lock_t *lock) { ec_trace("UNLOCK_NOW", fop, "lock=%p", lock); - if (lock->version_delta != 0) { + if ((lock->version_delta != 0) || lock->is_dirty) { ec_update_size_version(fop, &lock->loc, lock->version_delta, - lock->size_delta, lock); + lock->size_delta, lock->is_dirty, lock); } else { ec_unlock_lock(fop, lock); } @@ -1413,7 +1547,8 @@ void ec_flush_size_version(ec_fop_data_t * fop) if (version > 0) { - ec_update_size_version(fop, &lock->loc, version, delta, NULL); + ec_update_size_version(fop, &lock->loc, version, delta, _gf_false, + NULL); } } diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 2b1d9574cdf..aaae16e71c3 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -84,6 +84,7 @@ void ec_lock_reuse(ec_fop_data_t *fop); void ec_unlock(ec_fop_data_t * fop); void ec_get_size_version(ec_fop_data_t * fop); +void ec_prepare_update(ec_fop_data_t *fop); void ec_flush_size_version(ec_fop_data_t * fop); void ec_dispatch_all(ec_fop_data_t * fop); diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h index 35c84254550..b17f197837b 100644 --- a/xlators/cluster/ec/src/ec-data.h +++ b/xlators/cluster/ec/src/ec-data.h @@ -152,6 +152,7 @@ struct _ec_lock entrylk_type type; struct gf_flock flock; }; + gf_boolean_t is_dirty; }; struct _ec_lock_link @@ -253,6 +254,7 @@ struct _ec_cbk_data struct gf_flock flock; struct iovec * vector; struct iobref * buffers; + gf_boolean_t dirty; }; struct _ec_heal diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c index bccbc009457..73df0d89db8 100644 --- a/xlators/cluster/ec/src/ec-generic.c +++ b/xlators/cluster/ec/src/ec-generic.c @@ -753,16 +753,17 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) for (i = 0, ans = cbk; (ans != NULL) && (i < ec->fragments); ans = ans->next) { - data = dict_get(ans->xdata, GF_CONTENT_KEY); - if (data != NULL) - { - values[i] = ans->idx; - blocks[i] = (uint8_t *)data->data; - if (size > data->len) + if (!ans->dirty) { + data = dict_get(ans->xdata, GF_CONTENT_KEY); + if (data != NULL) { - size = data->len; + values[i] = ans->idx; + blocks[i] = (uint8_t *)data->data; + if (size > data->len) { + size = data->len; + } + i++; } - i++; } } @@ -871,6 +872,8 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } if (xdata != NULL) { + uint64_t dirty; + cbk->xdata = dict_ref(xdata); if (cbk->xdata == NULL) { @@ -879,6 +882,9 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this, goto out; } + if (ec_dict_del_number(cbk->xdata, EC_XATTR_DIRTY, &dirty) == 0) { + cbk->dirty = dirty != 0; + } } ec_combine(cbk, ec_combine_lookup); @@ -938,7 +944,8 @@ int32_t ec_manager_lookup(ec_fop_data_t * fop, int32_t state) } } if ((dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0) != 0) || - (dict_set_uint64(fop->xdata, EC_XATTR_VERSION, 0) != 0)) + (dict_set_uint64(fop->xdata, EC_XATTR_VERSION, 0) != 0) || + (dict_set_uint64(fop->xdata, EC_XATTR_DIRTY, 0) != 0)) { gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare lookup " "request"); @@ -1339,6 +1346,8 @@ int32_t ec_xattrop_cbk(call_frame_t * frame, void * cookie, xlator_t * this, } if (xdata != NULL) { + uint64_t dirty; + cbk->xdata = dict_ref(xdata); if (cbk->xdata == NULL) { @@ -1347,6 +1356,9 @@ int32_t ec_xattrop_cbk(call_frame_t * frame, void * cookie, xlator_t * this, goto out; } + if (ec_dict_del_number(cbk->xdata, EC_XATTR_DIRTY, &dirty) == 0) { + cbk->dirty = dirty != 0; + } } ec_combine(cbk, ec_combine_xattrop); diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index c49ccf3fd72..a121bb43e5c 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -868,6 +868,28 @@ int32_t ec_heal_open_others(ec_heal_t * heal) return (open != 0); } +uintptr_t ec_heal_needs_data_rebuild(ec_heal_t *heal) +{ + ec_fop_data_t *fop = heal->lookup; + ec_cbk_data_t *cbk = NULL; + uintptr_t bad = 0; + + if ((heal->fop->error != 0) || (heal->good == 0) || + (heal->iatt.ia_type != IA_IFREG)) { + return 0; + } + + list_for_each_entry(cbk, &fop->cbk_list, list) { + if ((cbk->op_ret >= 0) && + ((cbk->size != heal->raw_size) || + (cbk->version != heal->version))) { + bad |= cbk->mask; + } + } + + return bad; +} + void ec_heal_setxattr_others(ec_heal_t * heal) { ec_cbk_data_t * cbk; @@ -892,9 +914,12 @@ void ec_heal_setxattr_others(ec_heal_t * heal) } if (cbk->iatt[0].ia_type == IA_IFREG) { - if (ec_dict_set_number(xdata, EC_XATTR_SIZE, - cbk->iatt[0].ia_size) != 0) - { + uint64_t dirty; + + dirty = ec_heal_needs_data_rebuild(heal) != 0; + if ((ec_dict_set_number(xdata, EC_XATTR_SIZE, + cbk->iatt[0].ia_size) != 0) || + (ec_dict_set_number(xdata, EC_XATTR_DIRTY, dirty) != 0)) { goto out; } } @@ -968,40 +993,10 @@ void ec_heal_attr(ec_heal_t * heal) } } -int32_t ec_heal_needs_data_rebuild(ec_heal_t * heal) -{ - ec_fop_data_t * fop = heal->lookup; - ec_cbk_data_t * cbk = NULL; - uintptr_t bad = 0; - - if ((heal->fop->error != 0) || (heal->good == 0) || - (heal->iatt.ia_type != IA_IFREG)) - { - return 0; - } - - list_for_each_entry(cbk, &fop->cbk_list, list) - { - if ((cbk->op_ret >= 0) && - ((cbk->size != heal->raw_size) || (cbk->version != heal->version))) - { - bad |= cbk->mask; - } - } - - /* This function can only be called concurrently with entrylk, which do - * not modify heal structure, so it's safe to access heal->bad without - * acquiring any lock. - */ - heal->bad = bad; - - return (bad != 0); -} - void ec_heal_open(ec_heal_t * heal) { - if (!ec_heal_needs_data_rebuild(heal)) - { + heal->bad = ec_heal_needs_data_rebuild(heal); + if (heal->bad == 0) { return; } @@ -1115,6 +1110,30 @@ void ec_heal_data(ec_heal_t * heal) } } +void ec_heal_update_dirty(ec_heal_t *heal, uintptr_t mask) +{ + dict_t *dict; + + dict = dict_new(); + if (dict == NULL) { + ec_fop_set_error(heal->fop, EIO); + + return; + } + + if (ec_dict_set_number(dict, EC_XATTR_DIRTY, -1) != 0) { + dict_unref(dict); + ec_fop_set_error(heal->fop, EIO); + + return; + } + + ec_fxattrop(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, NULL, NULL, + heal->fd, GF_XATTROP_ADD_ARRAY64, dict, NULL); + + dict_unref(dict); +} + void ec_heal_dispatch(ec_heal_t *heal) { ec_fop_data_t *fop; @@ -1347,7 +1366,8 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) case EC_STATE_HEAL_UNLOCK_ENTRY: ec_heal_entrylk(heal, ENTRYLK_UNLOCK); - if (ec_heal_needs_data_rebuild(heal)) + heal->bad = ec_heal_needs_data_rebuild(heal); + if (heal->bad != 0) { return EC_STATE_HEAL_DATA_LOCK; } @@ -1385,6 +1405,7 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) case EC_STATE_HEAL_POST_INODE_LOOKUP: heal->fixed = heal->bad; + ec_heal_update_dirty(heal, heal->bad); ec_heal_lookup(heal, heal->good); return EC_STATE_HEAL_SETATTR; diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index 3483dfb3354..d126d48eb12 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -1182,6 +1182,10 @@ out: int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst, ec_cbk_data_t * src) { + if (src->dirty) { + return 0; + } + if (!ec_vector_compare(dst->vector, dst->int32, src->vector, src->int32)) { gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching vector in " diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c index 140d59f5f20..ffca89c5f14 100644 --- a/xlators/cluster/ec/src/ec-inode-write.c +++ b/xlators/cluster/ec/src/ec-inode-write.c @@ -1374,7 +1374,7 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) return EC_STATE_GET_SIZE_AND_VERSION; case EC_STATE_GET_SIZE_AND_VERSION: - ec_get_size_version(fop); + ec_prepare_update(fop); return EC_STATE_DISPATCH; @@ -2016,7 +2016,7 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state) return EC_STATE_GET_SIZE_AND_VERSION; case EC_STATE_GET_SIZE_AND_VERSION: - ec_get_size_version(fop); + ec_prepare_update(fop); return EC_STATE_DISPATCH; diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h index e5b54b02966..b6a95a11b18 100644 --- a/xlators/cluster/ec/src/ec.h +++ b/xlators/cluster/ec/src/ec.h @@ -19,6 +19,7 @@ #define EC_XATTR_SIZE EC_XATTR_PREFIX"size" #define EC_XATTR_VERSION EC_XATTR_PREFIX"version" #define EC_XATTR_HEAL EC_XATTR_PREFIX"heal" +#define EC_XATTR_DIRTY EC_XATTR_PREFIX"dirty" struct _ec; typedef struct _ec ec_t; |