diff options
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 49 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-generic.c | 14 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 4 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.c | 22 |
4 files changed, 83 insertions, 6 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 823922542a0..3ae7f110d99 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -932,16 +932,19 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config) } gf_boolean_t -ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty) +ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, + uint64_t *dirty) { gf_boolean_t set_dirty = _gf_false; if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) { + if (!link->optimistic_changelog) dirty[EC_DATA_TXN] = 1; } if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) { + if (!link->optimistic_changelog) dirty[EC_METADATA_TXN] = 1; } @@ -962,6 +965,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie, ec_lock_link_t *link = fop->data; ec_lock_t *lock = NULL; ec_inode_t *ctx; + gf_boolean_t release = _gf_false; lock = link->lock; parent = link->fop; @@ -1055,6 +1059,26 @@ unlock: UNLOCK(&lock->loc.inode->lock); if (op_errno == 0) { + /* If the fop fails on any of the good bricks, it is important to mark + * it dirty and update versions right away if dirty was not set before. + */ + if (lock->good_mask & ~(fop->good | fop->remaining)) { + release = _gf_true; + } + + /* lock->release is a critical field that is checked and modified most + * of the time inside a locked region. This use here is safe because we + * are in a modifying fop and we currently don't allow two modifying + * fops to be processed concurrently, so no one else could be checking + * or modifying it.*/ + if (link->update[0] && !link->dirty[0]) { + lock->release |= release; + } + + if (link->update[1] && !link->dirty[1]) { + lock->release |= release; + } + /* We don't allow the main fop to be executed on bricks that have not * succeeded the initial xattrop. */ parent->mask &= fop->good; @@ -1097,6 +1121,7 @@ void ec_get_size_version(ec_lock_link_t *link) ec_inode_t *ctx; ec_fop_data_t *fop; dict_t *dict = NULL; + ec_t *ec = NULL; int32_t error = 0; gf_boolean_t getting_xattr; gf_boolean_t set_dirty = _gf_false; @@ -1105,6 +1130,17 @@ void ec_get_size_version(ec_lock_link_t *link) lock = link->lock; ctx = lock->ctx; fop = link->fop; + ec = fop->xl->private; + + if (ec->optimistic_changelog && + !(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id)) + link->optimistic_changelog = _gf_true; + + /* If ctx->have_info is false and lock->query is true, it means that we'll + * send the xattrop anyway, so we can use it to update dirty counts, even + * if it's not necessary to do it right now. */ + if (!ctx->have_info && lock->query) + link->optimistic_changelog = _gf_false; set_dirty = ec_set_dirty_flag (link, ctx, dirty); @@ -1714,6 +1750,13 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk, if (link->update[1]) { ctx->post_version[1]++; } + /* If the fop fails on any of the good bricks, it is important to mark + * it dirty and update versions right away. */ + if (link->update[0] || link->update[1]) { + if (lock->good_mask & ~(fop->good | fop->remaining)) { + lock->release = _gf_true; + } + } } ec_lock_update_good(lock, fop); @@ -2028,9 +2071,13 @@ ec_update_info(ec_lock_link_t *link) if (ctx->dirty[1] != 0) { dirty[1] = -1; } + } else { + link->optimistic_changelog = _gf_false; + ec_set_dirty_flag (link, ctx, dirty); } memset(ctx->dirty, 0, sizeof(ctx->dirty)); } + if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) || (dirty[1] != 0)) { ec_update_size_version(link, version, size, dirty); diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c index 3ce3c2ab02a..ddb90ce39cc 100644 --- a/xlators/cluster/ec/src/ec-generic.c +++ b/xlators/cluster/ec/src/ec-generic.c @@ -697,6 +697,7 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this, ec_fop_data_t * fop = NULL; ec_cbk_data_t * cbk = NULL; int32_t idx = (int32_t)(uintptr_t)cookie; + uint64_t dirty[2] = {0}; VALIDATE_OR_GOTO(this, out); GF_VALIDATE_OR_GOTO(this->name, frame, out); @@ -746,8 +747,7 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this, goto out; } - ec_dict_del_array (xdata, EC_XATTR_DIRTY, cbk->dirty, - EC_VERSION_SIZE); + ec_dict_del_array (xdata, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); } ec_combine(cbk, ec_combine_lookup); @@ -1142,7 +1142,9 @@ ec_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dict_t *xdata) { ec_fop_data_t *fop = NULL; + ec_lock_link_t *link = NULL; ec_cbk_data_t *cbk = NULL; + uint64_t dirty[2] = {0}; data_t *data; uint64_t *version; int32_t idx = (int32_t)(uintptr_t)cookie; @@ -1178,8 +1180,14 @@ ec_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } } - ec_dict_del_array (xattr, EC_XATTR_DIRTY, cbk->dirty, + ec_dict_del_array (xattr, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE); + link = fop->data; + if (link) { + /*Keep a note of if the dirty is already set or not*/ + link->dirty[0] |= (dirty[0] != 0); + link->dirty[1] |= (dirty[1] != 0); + } } if (xdata) diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index d701fe7d25e..f184f459c2e 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -246,6 +246,8 @@ struct _ec_lock_link { struct list_head owner_list; struct list_head wait_list; gf_boolean_t update[2]; + gf_boolean_t dirty[2]; + gf_boolean_t optimistic_changelog; loc_t *base; uint64_t size; }; @@ -331,7 +333,6 @@ struct _ec_cbk_data { int32_t op_errno; int32_t count; uintptr_t mask; - uint64_t dirty[2]; dict_t *xdata; dict_t *dict; @@ -561,6 +562,7 @@ struct _ec { gf_timer_t *timer; gf_boolean_t shutdown; gf_boolean_t eager_lock; + gf_boolean_t optimistic_changelog; uint32_t background_heals; uint32_t heal_wait_qlen; struct list_head pending_fops; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index e467fea28b8..01f1473f96d 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -301,6 +301,8 @@ reconfigure (xlator_t *this, dict_t *options) ret = -1; } + GF_OPTION_RECONF ("optimistic-change-log", ec->optimistic_changelog, + options, bool, failed); failed: return ret; } @@ -611,6 +613,7 @@ init (xlator_t *this) this->private = ec; ec->xl = this; + ec->optimistic_changelog = _gf_true; LOCK_INIT(&ec->lock); INIT_LIST_HEAD(&ec->pending_fops); @@ -669,6 +672,7 @@ init (xlator_t *this) GF_OPTION_INIT ("shd-max-threads", ec->shd.max_threads, uint32, failed); GF_OPTION_INIT ("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed); + GF_OPTION_INIT ("optimistic-change-log", ec->optimistic_changelog, bool, failed); this->itable = inode_table_new (EC_SHD_INODE_LRU_LIMIT, this); if (!this->itable) @@ -1463,5 +1467,21 @@ struct volume_options options[] = .description = "force the cpu extensions to be used to accelerate the " "galois field computations." }, - { } + { .key = {"optimistic-change-log"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "Set/Unset dirty flag for every update fop at the start" + "of the fop. If OFF, this option impacts performance of" + "entry operations or metadata operations as it will" + "set dirty flag at the start and unset it at the end of" + "ALL update fop. If ON and all the bricks are good," + "dirty flag will be set at the start only for file fops" + "For metadata and entry fops dirty flag will not be set" + "at the start, if all the bricks are good. This does" + "not impact performance for metadata operations and" + "entry operation but has a very small window to miss" + "marking entry as dirty in case it is required to be" + "healed" + }, + { .key = {NULL} } }; |