diff options
author | Xavier Hernandez <xhernandez@datalab.es> | 2014-10-08 09:20:11 +0200 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-10-21 11:38:45 -0700 |
commit | 56caf4349c8824fde70783fe404cc6f646dce149 (patch) | |
tree | 02bef10cd75c1422c3ed272d5ecd01edcb404033 /xlators/cluster | |
parent | d57ecca6322a451242f4a2b7b5978de7c8f9088e (diff) |
ec: Fix self-heal issues
Problem: Doing an 'ls' of a directory that has been modified while one
of the bricks was down, sometimes returns the old directory
contents.
Cause: Directories are not marked when they are modified as files are.
The ec xlator balances requests amongst available and healthy
bricks. Since there is no way to detect that a directory is
out of date in one of the bricks, it is used from time to time
to return the directory contents.
Solution: Basically the solution consists in use versioning information
also for directories, however some additional changes have
been necessary.
Changes:
* Use directory versioning:
This required to lock full directory instead of a single entry for
all requests that add or remove entries from it. This is needed to
allow atomic version update. This affects the following fops:
create, mkdir, mknod, link, symlink, rename, unlink, rmdir
Another side effect is that opendir requires to do a previous
lookup to get versioning information and discard out of date
bricks for subsequent readdir(p) calls.
* Restrict directory self-heal:
Till now, when one discrepancy was found in lookup, a self-heal
was automatically started. This caused the versioning information
of a bad directory to be healed instantly, making the original
problem to reapear again.
To solve this, when a missing directory is detected in one or more
bricks on lookup or opendir fops, only a partial self-heal is
performed on it. A partial self-heal basically creates the
directory but does not restore any additional information.
This avoids that an 'ls' could repair the directory and cause the
problem to happen again. With this change, output of 'ls' is
always consistent. However, since the directory has been created
in the brick, this allows any other operation on it (create new
files, for example) to succeed on all bricks and not add additional
work to the self-heal process.
To force a self-heal of a directory, any other operation must be
done on it. For example a getxattr.
With these changes, the correct healing procedure that would avoid
inconsistent directory browsing consists on a post-order traversal
of directoriesi being healed. This way, the directory contents will
be healed before healing the directory itslef.
* Additional changes to fix self-heal errors
- Don't use fop->fd to decide between fd/loc.
open, opendir and create have an fd, but the correct data is in
loc.
- Fix incorrect management of bad bricks per inode/fd.
- Fix incorrect selection of fop's target bricks when there are bad
bricks involved.
- Improved ec_loc_parent() to always return a parent loc as
complete as possible.
Change-Id: Iaf3df174d7857da57d4a87b4a8740a7048b366ad
BUG: 1149726
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/8916
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 271 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 25 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-data.h | 46 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-dir-read.c | 48 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-dir-write.c | 34 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-fops.h | 4 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-generic.c | 30 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 48 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 118 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.h | 3 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-inode-read.c | 22 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-inode-write.c | 38 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-locks.c | 6 |
13 files changed, 391 insertions, 302 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 0ba7bce7728..561871cee93 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -71,7 +71,7 @@ uintptr_t ec_fd_good(fd_t * fd, xlator_t * xl) uintptr_t bad = 0; ctx = ec_fd_get(fd, xl); - if ((ctx != NULL) && (ctx->loc.inode != NULL)) + if (ctx != NULL) { bad = ctx->bad; } @@ -110,7 +110,7 @@ uintptr_t ec_update_fd(ec_fop_data_t * fop, fd_t * fd, uintptr_t good, LOCK(&fd->lock); ctx = __ec_fd_get(fd, fop->xl); - if ((ctx != NULL) && (ctx->loc.inode != NULL)) + if (ctx != NULL) { ctx->bad &= ~good; bad |= ctx->bad; @@ -143,6 +143,15 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this, void ec_check_status(ec_fop_data_t * fop) { ec_t * ec = fop->xl->private; + int32_t partial = 0; + + if (fop->answer->op_ret >= 0) { + if (fop->id == GF_FOP_LOOKUP) { + partial = fop->answer->iatt[0].ia_type == IA_IFDIR; + } else if (fop->id == GF_FOP_OPENDIR) { + partial = 1; + } + } if ((ec->xl_up & ~(fop->remaining | fop->good)) == 0) { @@ -154,42 +163,36 @@ void ec_check_status(ec_fop_data_t * fop) "remaining=%lX, good=%lX, bad=%lX)", ec->xl_up, fop->mask, fop->remaining, fop->good, fop->bad); - if (fop->fd != NULL) + if (fop->use_fd) { - ec_fheal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, - fop->fd, NULL); + if (fop->fd != NULL) { + ec_fheal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, + NULL, fop->fd, partial, NULL); + } } else { ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, - &fop->loc[0], NULL); + &fop->loc[0], partial, NULL); if (fop->loc[1].inode != NULL) { ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, - NULL, &fop->loc[1], NULL); + NULL, &fop->loc[1], partial, NULL); } } } void ec_update_bad(ec_fop_data_t * fop, uintptr_t good) { + ec_t *ec = fop->xl->private; uintptr_t bad; - int32_t update = 0; - bad = fop->mask & ~(fop->remaining | good); - if ((fop->bad & bad) != bad) - { - fop->bad |= bad; - update = 1; - } - if ((fop->good & good) != good) - { - fop->good |= good; - update = 1; - } + bad = ec->xl_up & ~(fop->remaining | good); + fop->bad |= bad; + fop->good |= good; - if (update && (fop->parent == NULL)) + if (fop->parent == NULL) { if ((fop->flags & EC_FLAG_UPDATE_LOC_PARENT) != 0) { @@ -369,21 +372,20 @@ int32_t ec_child_select(ec_fop_data_t * fop) fop->mask &= ec->node_mask; mask = ec->xl_up; - if (fop->loc[0].inode != NULL) - { - mask |= ec_inode_good(fop->loc[0].inode, fop->xl); - } - if (fop->loc[1].inode != NULL) - { - mask |= ec_inode_good(fop->loc[1].inode, fop->xl); - } - if (fop->fd != NULL) + if (fop->parent == NULL) { - if (fop->fd->inode != NULL) - { - mask |= ec_inode_good(fop->fd->inode, fop->xl); + if (fop->loc[0].inode != NULL) { + mask &= ec_inode_good(fop->loc[0].inode, fop->xl); + } + if (fop->loc[1].inode != NULL) { + mask &= ec_inode_good(fop->loc[1].inode, fop->xl); + } + if (fop->fd != NULL) { + if (fop->fd->inode != NULL) { + mask &= ec_inode_good(fop->fd->inode, fop->xl); + } + mask &= ec_fd_good(fop->fd, fop->xl); } - mask |= ec_fd_good(fop->fd, fop->xl); } if ((fop->mask & ~mask) != 0) { @@ -619,7 +621,6 @@ ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc) void ec_lock_destroy(ec_lock_t * lock) { - GF_FREE(lock->basename); loc_wipe(&lock->loc); mem_put(lock); @@ -627,31 +628,13 @@ void ec_lock_destroy(ec_lock_t * lock) int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2) { - int32_t res; - - res = uuid_compare(lock1->loc.gfid, lock2->loc.gfid); - if (res != 0) - { - return res; - } - if (lock1->basename == NULL) - { - if (lock2->basename == NULL) - { - return 0; - } - return 1; - } - if (lock2->basename == NULL) - { - return -1; - } - return strcmp(lock1->basename, lock2->basename); + return uuid_compare(lock1->loc.gfid, lock2->loc.gfid); } -void ec_lock_insert(ec_fop_data_t * fop, ec_lock_t * lock) +void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, int32_t update) { ec_lock_t * tmp; + int32_t tmp_update; if ((fop->lock_count > 0) && (ec_lock_compare(fop->locks[0].lock, lock) > 0)) @@ -659,19 +642,25 @@ void ec_lock_insert(ec_fop_data_t * fop, ec_lock_t * lock) tmp = fop->locks[0].lock; fop->locks[0].lock = lock; lock = tmp; + + tmp_update = fop->locks_update; + fop->locks_update = update; + update = tmp_update; } fop->locks[fop->lock_count].lock = lock; fop->locks[fop->lock_count].fop = fop; + + fop->locks_update |= update << fop->lock_count; + fop->lock_count++; lock->refs++; } -void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc) +void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update) { ec_lock_t * lock = NULL; ec_inode_t * ctx = NULL; - char * name = NULL; loc_t tmp; int32_t error; @@ -680,12 +669,23 @@ void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc) return; } - error = ec_loc_parent(fop->xl, loc, &tmp, &name); - if (error != 0) + /* update is only 0 for 'opendir', which needs to lock the entry pointed + * by loc instead of its parent. + */ + if (update) { - ec_fop_set_error(fop, error); + error = ec_loc_parent(fop->xl, loc, &tmp); + if (error != 0) { + ec_fop_set_error(fop, error); - return; + return; + } + } else { + if (!ec_loc_from_loc(fop->xl, &tmp, loc)) { + ec_fop_set_error(fop, EIO); + + return; + } } LOCK(&tmp.inode->lock); @@ -698,16 +698,14 @@ void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc) goto unlock; } - list_for_each_entry(lock, &ctx->entry_locks, list) + if (ctx->entry_lock != NULL) { - if (strcmp(lock->basename, name) == 0) - { - ec_trace("LOCK_ENTRYLK", fop, "lock=%p, inode=%p, path=%s, " - "name=%s. Lock already acquired", - lock, tmp.inode, tmp.path, name); + lock = ctx->entry_lock; + ec_trace("LOCK_ENTRYLK", fop, "lock=%p, inode=%p, path=%s" + "Lock already acquired", + lock, tmp.inode, tmp.path); - goto insert; - } + goto insert; } lock = ec_lock_allocate(fop->xl, EC_LOCK_ENTRY, &tmp); @@ -721,22 +719,20 @@ void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc) ec_trace("LOCK_CREATE", fop, "lock=%p", lock); lock->type = ENTRYLK_WRLCK; - lock->basename = name; - name = NULL; - list_add_tail(&lock->list, &ctx->entry_locks); + lock->plock = &ctx->entry_lock; + ctx->entry_lock = lock; insert: - ec_lock_insert(fop, lock); + ec_lock_insert(fop, lock, update); unlock: UNLOCK(&tmp.inode->lock); loc_wipe(&tmp); - GF_FREE(name); } -void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc) +void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, int32_t update) { ec_lock_t * lock; ec_inode_t * ctx; @@ -756,9 +752,9 @@ void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc) goto unlock; } - if (!list_empty(&ctx->inode_locks)) + if (ctx->inode_lock != NULL) { - lock = list_entry(ctx->inode_locks.next, ec_lock_t, list); + lock = ctx->inode_lock; ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already " "acquired", lock, loc->inode); @@ -778,16 +774,17 @@ void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc) lock->flock.l_type = F_WRLCK; lock->flock.l_whence = SEEK_SET; - list_add_tail(&lock->list, &ctx->inode_locks); + lock->plock = &ctx->inode_lock; + ctx->inode_lock = lock; insert: - ec_lock_insert(fop, lock); + ec_lock_insert(fop, lock, update); unlock: UNLOCK(&loc->inode->lock); } -void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd) +void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, int32_t update) { loc_t loc; @@ -798,7 +795,7 @@ void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd) if (ec_loc_from_fd(fop->xl, &loc, fd)) { - ec_lock_prepare_inode(fop, &loc); + ec_lock_prepare_inode(fop, &loc, update); loc_wipe(&loc); } @@ -868,12 +865,11 @@ void ec_lock(ec_fop_data_t * fop) if (lock->kind == EC_LOCK_ENTRY) { - ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p, path=%s, " - "name=%s", lock, lock->loc.inode, lock->loc.path, - lock->basename); + ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p, path=%s", + lock, lock->loc.inode, lock->loc.path); ec_entrylk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked, - lock, fop->xl->name, &lock->loc, lock->basename, + lock, fop->xl->name, &lock->loc, NULL, ENTRYLK_LOCK, lock->type, NULL); } else @@ -936,7 +932,7 @@ void ec_unlock(ec_fop_data_t * fop) refs = --lock->refs; if (refs == 0) { - list_del_init(&lock->list); + *lock->plock = NULL; } UNLOCK(&lock->loc.inode->lock); @@ -951,13 +947,12 @@ void ec_unlock(ec_fop_data_t * fop) { case EC_LOCK_ENTRY: ec_trace("UNLOCK_ENTRYLK", fop, "lock=%p, inode=%p, " - "path=%s, basename=%s", - lock, lock->loc.inode, lock->loc.path, - lock->basename); + "path=%s", + lock, lock->loc.inode, lock->loc.path); ec_entrylk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ALL, ec_unlocked, lock, - fop->xl->name, &lock->loc, lock->basename, + fop->xl->name, &lock->loc, NULL, ENTRYLK_UNLOCK, lock->type, NULL); break; @@ -1061,17 +1056,23 @@ int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie, } } - LOCK(&inode->lock); ctx = __ec_inode_get(inode, this); - if ((ctx != NULL) && !list_empty(&ctx->inode_locks)) - { - lock = list_entry(ctx->inode_locks.next, ec_lock_t, list); - - lock->have_size = 1; - lock->size = buf->ia_size; - lock->version = fop->answer->version; + if (ctx != NULL) { + if (ctx->inode_lock != NULL) { + lock = ctx->inode_lock; + lock->version = fop->answer->version; + + if (buf->ia_type == IA_IFREG) { + lock->have_size = 1; + lock->size = buf->ia_size; + } + } + if (ctx->entry_lock != NULL) { + lock = ctx->entry_lock; + lock->version = fop->answer->version; + } } UNLOCK(&inode->lock); @@ -1083,10 +1084,10 @@ int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie, fop->parent->mask &= fop->good; } - fop->parent->pre_size = fop->parent->post_size = buf->ia_size; - - fop->parent->have_size = 1; - + if (buf->ia_type == IA_IFREG) { + fop->parent->pre_size = fop->parent->post_size = buf->ia_size; + fop->parent->have_size = 1; + } } else { @@ -1143,7 +1144,7 @@ void ec_get_size_version(ec_fop_data_t * fop) error = EIO; - if (fop->fd == NULL) + if (!fop->use_fd) { if (!ec_loc_from_loc(fop->xl, &loc, &fop->loc[0])) { @@ -1211,7 +1212,7 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie, return 0; } -void ec_update_size_version(ec_fop_data_t * fop, uint64_t version, +void ec_update_size_version(ec_fop_data_t *fop, loc_t *loc, uint64_t version, uint64_t size) { dict_t * dict; @@ -1251,18 +1252,9 @@ void ec_update_size_version(ec_fop_data_t * fop, uint64_t version, fop->frame->root->uid = 0; fop->frame->root->gid = 0; - if (fop->fd == NULL) - { - ec_xattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN, - ec_update_size_version_done, NULL, &fop->loc[0], - GF_XATTROP_ADD_ARRAY64, dict, NULL); - } - else - { - ec_fxattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN, - ec_update_size_version_done, NULL, fop->fd, - GF_XATTROP_ADD_ARRAY64, dict, NULL); - } + ec_xattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN, + ec_update_size_version_done, NULL, loc, + GF_XATTROP_ADD_ARRAY64, dict, NULL); fop->frame->root->uid = uid; fop->frame->root->gid = gid; @@ -1291,8 +1283,6 @@ void ec_flush_size_version(ec_fop_data_t * fop) lock = fop->locks[0].lock; - GF_ASSERT(lock->kind == EC_LOCK_INODE); - LOCK(&lock->loc.inode->lock); GF_ASSERT(lock->owner == fop); @@ -1306,11 +1296,11 @@ void ec_flush_size_version(ec_fop_data_t * fop) if (version > 0) { - ec_update_size_version(fop, version, delta); + ec_update_size_version(fop, &lock->loc, version, delta); } } -void ec_lock_reuse(ec_fop_data_t * fop, int32_t update) +void ec_lock_reuse(ec_fop_data_t *fop) { ec_fop_data_t * wait_fop; ec_lock_t * lock; @@ -1321,6 +1311,10 @@ void ec_lock_reuse(ec_fop_data_t * fop, int32_t update) for (i = 0; i < fop->lock_count; i++) { + refs = 0; + delta = 0; + version = 0; + wait_fop = NULL; lock = fop->locks[i].lock; @@ -1332,28 +1326,26 @@ void ec_lock_reuse(ec_fop_data_t * fop, int32_t update) GF_ASSERT(lock->owner == fop); lock->owner = NULL; - if (lock->kind == EC_LOCK_INODE) - { - if (update && (fop->error == 0)) + if (((fop->locks_update >> i) & 1) != 0) { + if (fop->error == 0) { lock->version_delta++; lock->size_delta += fop->post_size - fop->pre_size; + if (fop->have_size) { + lock->size = fop->post_size; + lock->have_size = 1; + } } - version = lock->version_delta; - delta = lock->size_delta; - refs = lock->refs; - if (refs == 1) - { - lock->version_delta = 0; - lock->size_delta = 0; - } + } - if (fop->have_size) - { - lock->size = fop->post_size; - lock->have_size = 1; - } + version = lock->version_delta; + delta = lock->size_delta; + refs = lock->refs; + if (refs == 1) { + lock->version_delta = 0; + lock->size_delta = 0; } + lock->good_mask &= fop->mask; if (!list_empty(&lock->waiting)) @@ -1379,11 +1371,10 @@ void ec_lock_reuse(ec_fop_data_t * fop, int32_t update) ec_resume(wait_fop, 0); } - } - if ((refs == 1) && (version > 0)) - { - ec_update_size_version(fop, version, delta); + if ((refs == 1) && (version > 0)) { + ec_update_size_version(fop, &lock->loc, version, delta); + } } } diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 79263e2d884..30bdb53605c 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -66,14 +66,15 @@ #define EC_STATE_HEAL_OPEN 207 #define EC_STATE_HEAL_REOPEN_FD 208 #define EC_STATE_HEAL_UNLOCK 209 -#define EC_STATE_HEAL_DATA_LOCK 210 -#define EC_STATE_HEAL_DATA_COPY 211 -#define EC_STATE_HEAL_DATA_UNLOCK 212 -#define EC_STATE_HEAL_POST_INODELK_LOCK 213 -#define EC_STATE_HEAL_POST_INODE_LOOKUP 214 -#define EC_STATE_HEAL_SETATTR 215 -#define EC_STATE_HEAL_POST_INODELK_UNLOCK 216 -#define EC_STATE_HEAL_DISPATCH 217 +#define EC_STATE_HEAL_UNLOCK_ENTRY 210 +#define EC_STATE_HEAL_DATA_LOCK 211 +#define EC_STATE_HEAL_DATA_COPY 212 +#define EC_STATE_HEAL_DATA_UNLOCK 213 +#define EC_STATE_HEAL_POST_INODELK_LOCK 214 +#define EC_STATE_HEAL_POST_INODE_LOOKUP 215 +#define EC_STATE_HEAL_SETATTR 216 +#define EC_STATE_HEAL_POST_INODELK_UNLOCK 217 +#define EC_STATE_HEAL_DISPATCH 218 int32_t ec_dispatch_one_retry(ec_fop_data_t * fop, int32_t idx, int32_t op_ret, int32_t op_errno); @@ -85,11 +86,11 @@ void ec_update_bad(ec_fop_data_t * fop, uintptr_t good); void ec_fop_set_error(ec_fop_data_t * fop, int32_t error); -void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc); -void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc); -void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd); +void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, int32_t update); +void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update); +void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, int32_t update); void ec_lock(ec_fop_data_t * fop); -void ec_lock_reuse(ec_fop_data_t * fop, int32_t update); +void ec_lock_reuse(ec_fop_data_t *fop); void ec_unlock(ec_fop_data_t * fop); void ec_get_size_version(ec_fop_data_t * fop); diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h index 532ca95444a..ac197fe7f0b 100644 --- a/xlators/cluster/ec/src/ec-data.h +++ b/xlators/cluster/ec/src/ec-data.h @@ -76,10 +76,10 @@ struct _ec_fd struct _ec_inode { - uintptr_t bad; - struct list_head entry_locks; - struct list_head inode_locks; - ec_heal_t * heal; + uintptr_t bad; + ec_lock_t *entry_lock; + ec_lock_t *inode_lock; + ec_heal_t *heal; }; typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *, @@ -141,27 +141,23 @@ union _ec_cbk struct _ec_lock { - struct list_head list; - struct list_head waiting; - uintptr_t mask; - uintptr_t good_mask; - int32_t kind; - int32_t refs; - int32_t acquired; - int32_t have_size; - uint64_t size; - uint64_t size_delta; - uint64_t version; - uint64_t version_delta; - ec_fop_data_t * owner; - loc_t loc; + ec_lock_t **plock; + struct list_head waiting; + uintptr_t mask; + uintptr_t good_mask; + int32_t kind; + int32_t refs; + int32_t acquired; + int32_t have_size; + uint64_t size; + uint64_t size_delta; + uint64_t version; + uint64_t version_delta; + ec_fop_data_t *owner; + loc_t loc; union { - struct - { - entrylk_type type; - char * basename; - }; + entrylk_type type; struct gf_flock flock; }; }; @@ -193,6 +189,7 @@ struct _ec_fop_data int32_t lock_count; int32_t locked; ec_lock_link_t locks[2]; + int32_t locks_update; int32_t have_size; uint64_t pre_size; uint64_t post_size; @@ -215,6 +212,8 @@ struct _ec_fop_data uint64_t user_size; uint32_t head; + int32_t use_fd; + dict_t * xdata; dict_t * dict; int32_t int32; @@ -273,6 +272,7 @@ struct _ec_heal struct iatt iatt; char * symlink; fd_t * fd; + int32_t partial; int32_t done; uintptr_t available; uintptr_t good; diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index 3a8455101e8..7ff8df5e397 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -116,10 +116,38 @@ void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx) int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) { ec_cbk_data_t * cbk; + ec_fd_t *ctx; switch (state) { case EC_STATE_INIT: + LOCK(&fop->fd->lock); + + ctx = __ec_fd_get(fop->fd, fop->xl); + if ((ctx == NULL) || !ec_loc_from_loc(fop->xl, &ctx->loc, + &fop->loc[0])) { + UNLOCK(&fop->fd->lock); + + fop->error = EIO; + + return EC_STATE_REPORT; + } + + UNLOCK(&fop->fd->lock); + + /* Fall through */ + + case EC_STATE_LOCK: + ec_lock_prepare_entry(fop, &fop->loc[0], 0); + ec_lock(fop); + + return EC_STATE_GET_SIZE_AND_VERSION; + + case EC_STATE_GET_SIZE_AND_VERSION: + ec_get_size_version(fop); + + return EC_STATE_DISPATCH; + case EC_STATE_DISPATCH: ec_dispatch_all(fop); @@ -160,8 +188,10 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) cbk->op_errno, cbk->fd, cbk->xdata); } - return EC_STATE_END; + return EC_STATE_LOCK_REUSE; + case -EC_STATE_LOCK: + case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_DISPATCH: case -EC_STATE_PREPARE_ANSWER: case -EC_STATE_REPORT: @@ -173,6 +203,18 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state) NULL, NULL); } + return EC_STATE_LOCK_REUSE; + + case -EC_STATE_LOCK_REUSE: + case EC_STATE_LOCK_REUSE: + ec_lock_reuse(fop); + + return EC_STATE_UNLOCK; + + case -EC_STATE_UNLOCK: + case EC_STATE_UNLOCK: + ec_unlock(fop); + return EC_STATE_END; default: @@ -421,6 +463,8 @@ void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->size = size; fop->offset = offset; @@ -533,6 +577,8 @@ void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->size = size; fop->offset = offset; diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c index 92012855087..02961acb8bd 100644 --- a/xlators/cluster/ec/src/ec-dir-write.c +++ b/xlators/cluster/ec/src/ec-dir-write.c @@ -210,7 +210,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) /* Fall through */ case EC_STATE_LOCK: - ec_lock_prepare_entry(fop, &fop->loc[0]); + ec_lock_prepare_entry(fop, &fop->loc[0], 1); ec_lock(fop); return EC_STATE_DISPATCH; @@ -293,7 +293,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 1); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -494,7 +494,7 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) // Parent entry of fop->loc[0] should be locked, but I don't // receive enough information to do it (fop->loc[0].parent is // NULL). - ec_lock_prepare_entry(fop, &fop->loc[1]); + ec_lock_prepare_entry(fop, &fop->loc[1], 1); ec_lock(fop); return EC_STATE_GET_SIZE_AND_VERSION; @@ -577,7 +577,7 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -765,7 +765,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_entry(fop, &fop->loc[0]); + ec_lock_prepare_entry(fop, &fop->loc[0], 1); ec_lock(fop); return EC_STATE_DISPATCH; @@ -837,7 +837,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1022,7 +1022,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_entry(fop, &fop->loc[0]); + ec_lock_prepare_entry(fop, &fop->loc[0], 1); ec_lock(fop); return EC_STATE_DISPATCH; @@ -1094,7 +1094,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1277,8 +1277,8 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_entry(fop, &fop->loc[0]); - ec_lock_prepare_entry(fop, &fop->loc[1]); + ec_lock_prepare_entry(fop, &fop->loc[0], 1); + ec_lock_prepare_entry(fop, &fop->loc[1], 1); ec_lock(fop); return EC_STATE_GET_SIZE_AND_VERSION; @@ -1359,7 +1359,7 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1533,7 +1533,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_entry(fop, &fop->loc[0]); + ec_lock_prepare_entry(fop, &fop->loc[0], 1); ec_lock(fop); return EC_STATE_DISPATCH; @@ -1597,7 +1597,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1780,7 +1780,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_entry(fop, &fop->loc[0]); + ec_lock_prepare_entry(fop, &fop->loc[0], 1); ec_lock(fop); return EC_STATE_DISPATCH; @@ -1852,7 +1852,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -2031,7 +2031,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_entry(fop, &fop->loc[0]); + ec_lock_prepare_entry(fop, &fop->loc[0], 1); ec_lock(fop); return EC_STATE_GET_SIZE_AND_VERSION; @@ -2101,7 +2101,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h index 2b6e03f723f..ae70da54886 100644 --- a/xlators/cluster/ec/src/ec-fops.h +++ b/xlators/cluster/ec/src/ec-fops.h @@ -70,11 +70,11 @@ void ec_fgetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target, void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_heal_cbk_t func, void *data, loc_t * loc, - dict_t * xdata); + int32_t partial, dict_t *xdata); void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_fheal_cbk_t func, void *data, fd_t * fd, - dict_t * xdata); + int32_t partial, dict_t *xdata); void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, void *data, diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c index 286425e07c9..7e0576fa697 100644 --- a/xlators/cluster/ec/src/ec-generic.c +++ b/xlators/cluster/ec/src/ec-generic.c @@ -91,7 +91,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 0); ec_lock(fop); return EC_STATE_DISPATCH; @@ -159,7 +159,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -199,6 +199,8 @@ void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + if (fd != NULL) { fop->fd = fd_ref(fd); @@ -325,7 +327,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 0); ec_lock(fop); return EC_STATE_GET_SIZE_AND_VERSION; @@ -408,7 +410,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -448,6 +450,8 @@ void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->int32 = datasync; if (fd != NULL) @@ -550,7 +554,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state) { case EC_STATE_INIT: case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 0); ec_lock(fop); return EC_STATE_DISPATCH; @@ -618,7 +622,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -658,6 +662,8 @@ void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->int32 = datasync; if (fd != NULL) @@ -720,9 +726,9 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) LOCK(&cbk->inode->lock); ctx = __ec_inode_get(cbk->inode, fop->xl); - if ((ctx != NULL) && !list_empty(&ctx->inode_locks)) + if ((ctx != NULL) && (ctx->inode_lock != NULL)) { - lock = list_entry(ctx->inode_locks.next, ec_lock_t, list); + lock = ctx->inode_lock; cbk->version = lock->version; if (lock->have_size) { @@ -1374,11 +1380,11 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state) case EC_STATE_LOCK: if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0]); + ec_lock_prepare_inode(fop, &fop->loc[0], 1); } else { - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 1); } ec_lock(fop); @@ -1468,7 +1474,7 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 1); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1652,6 +1658,8 @@ void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->xattrop_flags = optype; if (fd != NULL) diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index dc11e6d2e87..a208330e68c 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -310,8 +310,7 @@ int32_t ec_heal_reopen_cbk(call_frame_t * frame, void * cookie, LOCK(&fd->lock); ctx = __ec_fd_get(fd, fop->xl); - if ((ctx != NULL) && (ctx->loc.inode != NULL)) - { + if (ctx != NULL) { ctx->bad &= ~good; ctx->open |= good; } @@ -482,6 +481,7 @@ int32_t ec_heal_init(ec_fop_data_t * fop) heal->fop = fop; pool = fop->xl->ctx->iobuf_pool; heal->size = iobpool_default_pagesize(pool) * ec->fragments; + heal->partial = fop->int32; LOCK(&inode->lock); @@ -516,10 +516,9 @@ out: void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd) { loc_t loc; - char * name; int32_t error; - error = ec_loc_parent(heal->xl, &heal->loc, &loc, &name); + error = ec_loc_parent(heal->xl, &heal->loc, &loc); if (error != 0) { ec_fop_set_error(heal->fop, error); @@ -528,10 +527,9 @@ void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd) } ec_entrylk(heal->fop->frame, heal->xl, -1, EC_MINIMUM_ALL, NULL, NULL, - heal->xl->name, &loc, name, cmd, ENTRYLK_WRLCK, NULL); + heal->xl->name, &loc, NULL, cmd, ENTRYLK_WRLCK, NULL); loc_wipe(&loc); - GF_FREE(name); } void ec_heal_inodelk(ec_heal_t * heal, int32_t type, int32_t use_fd, @@ -970,7 +968,8 @@ void ec_heal_reopen_fd(ec_heal_t * heal) { inode_t * inode; fd_t * fd; - ec_fd_t * ctx; + ec_fd_t *ctx_fd; + ec_inode_t *ctx_inode; uintptr_t mask; int32_t flags; @@ -978,12 +977,16 @@ void ec_heal_reopen_fd(ec_heal_t * heal) LOCK(&inode->lock); + ctx_inode = __ec_inode_get(inode, heal->xl); + if (ctx_inode != NULL) { + ctx_inode->bad &= ~(heal->good | heal->bad); + } + list_for_each_entry(fd, &inode->fd_list, inode_list) { - ctx = ec_fd_get(fd, heal->xl); - if ((ctx != NULL) && (ctx->loc.inode != NULL)) - { - mask = heal->bad & ~ctx->open; + ctx_fd = ec_fd_get(fd, heal->xl); + if (ctx_fd != NULL) { + mask = heal->bad & ~ctx_fd->open; if (mask != 0) { UNLOCK(&inode->lock); @@ -996,7 +999,7 @@ void ec_heal_reopen_fd(ec_heal_t * heal) } else { - flags = ctx->flags & ~O_TRUNC; + flags = ctx_fd->flags & ~O_TRUNC; if ((flags & O_ACCMODE) == O_WRONLY) { flags &= ~O_ACCMODE; @@ -1179,7 +1182,13 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) return EC_STATE_HEAL_ENTRY_PREPARE; case EC_STATE_HEAL_ENTRY_PREPARE: - ec_heal_prepare(heal); + if (!heal->partial || (heal->iatt.ia_type == IA_IFDIR)) { + ec_heal_prepare(heal); + } + + if (heal->partial) { + return EC_STATE_HEAL_UNLOCK_ENTRY; + } return EC_STATE_HEAL_PRE_INODELK_LOCK; @@ -1240,6 +1249,8 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) case -EC_STATE_HEAL_ENTRY_PREPARE: case -EC_STATE_HEAL_PRE_INODELK_LOCK: case -EC_STATE_HEAL_PRE_INODE_LOOKUP: + case -EC_STATE_HEAL_UNLOCK_ENTRY: + case EC_STATE_HEAL_UNLOCK_ENTRY: ec_heal_entrylk(heal, ENTRYLK_UNLOCK); if (ec_heal_needs_data_rebuild(heal)) @@ -1395,7 +1406,7 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_heal_cbk_t func, void * data, loc_t * loc, - dict_t * xdata) + int32_t partial, dict_t *xdata) { ec_cbk_t callback = { .heal = func }; ec_fop_data_t * fop = NULL; @@ -1415,6 +1426,8 @@ void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->int32 = partial; + if (loc != NULL) { if (loc_copy(&fop->loc[0], loc) != 0) @@ -1474,14 +1487,15 @@ void ec_wind_fheal(ec_t * ec, ec_fop_data_t * fop, int32_t idx) void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target, int32_t minimum, fop_fheal_cbk_t func, void * data, fd_t * fd, - dict_t * xdata) + int32_t partial, dict_t *xdata) { ec_fd_t * ctx = ec_fd_get(fd, this); - if ((ctx != NULL) && (ctx->loc.inode != NULL)) + if (ctx != NULL) { gf_log("ec", GF_LOG_DEBUG, "FHEAL ctx: flags=%X, open=%lX, bad=%lX", ctx->flags, ctx->open, ctx->bad); - ec_heal(frame, this, target, minimum, func, data, &ctx->loc, xdata); + ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial, + xdata); } } diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 534d694cc52..00ff5f63d47 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -334,83 +334,56 @@ int32_t ec_loc_gfid_check(xlator_t * xl, uuid_t dst, uuid_t src) return 1; } -int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, char ** name) +int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent) { char * str = NULL; int32_t error = 0; memset(parent, 0, sizeof(loc_t)); - if (loc->path == NULL) + if (loc->inode == NULL) { - gf_log(xl->name, GF_LOG_ERROR, "inode path missing in loc_t: %p", loc->parent); + gf_log(xl->name, GF_LOG_ERROR, "Invalid loc"); - return EINVAL; + error = EINVAL; + + goto out; } - if (loc->parent == NULL) + if (__is_root_gfid(loc->inode->gfid) || __is_root_gfid(loc->gfid) || + ((loc->path != NULL) && (strcmp(loc->path, "/") == 0))) { - if ((loc->inode == NULL) || !__is_root_gfid(loc->inode->gfid) || - (strcmp(loc->path, "/") != 0)) - { - gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for " - "loc_t (path=%s, name=%s)", - loc->path, loc->name); + parent->path = gf_strdup("/"); + if (parent->path == NULL) { + gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '/'"); - return EINVAL; - } + error = ENOMEM; - if (loc_copy(parent, loc) != 0) - { - return ENOMEM; + goto out; } - parent->name = NULL; + parent->gfid[15] = 1; + parent->inode = inode_find(loc->inode->table, parent->gfid); - if (name != NULL) - { - *name = NULL; - } + return 0; } - else - { - if (uuid_is_null(loc->parent->gfid) && (uuid_is_null(loc->pargfid))) - { - gf_log(xl->name, GF_LOG_ERROR, "Invalid parent inode " - "(path=%s, name=%s)", - loc->path, loc->name); - - return EINVAL; - } - uuid_copy(parent->gfid, loc->pargfid); + if (loc->path != NULL) { str = gf_strdup(loc->path); if (str == NULL) { gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path " - "'%s'", str); + "'%s'", loc->path); - return ENOMEM; - } - if (name != NULL) - { - *name = gf_strdup(basename(str)); - if (*name == NULL) - { - gf_log(xl->name, GF_LOG_ERROR, "Unable to get basename " - "of '%s'", str); - - error = ENOMEM; + error = ENOMEM; - goto out; - } - strcpy(str, loc->path); + goto out; } parent->path = gf_strdup(dirname(str)); if (parent->path == NULL) { gf_log(xl->name, GF_LOG_ERROR, "Unable to get dirname of " - "'%s'", str); + "'%s'", loc->path); error = ENOMEM; @@ -427,19 +400,57 @@ int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, char ** name) goto out; } parent->name++; + } + if (loc->parent != NULL) { parent->inode = inode_ref(loc->parent); + uuid_copy(parent->gfid, loc->parent->gfid); + } + if (!uuid_is_null(loc->pargfid) && uuid_is_null(parent->gfid)) { + uuid_copy(parent->gfid, loc->pargfid); } - if ((loc->inode == NULL) || - ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) + if ((parent->inode == NULL) && (parent->path != NULL)) { - parent = NULL; + if (strcmp(parent->path, "/") == 0) { + parent->inode = inode_ref(loc->inode->table->root); + + goto out; + } + parent->inode = inode_resolve(loc->inode->table, (char *)parent->path); + if (parent->inode != NULL) { + goto out; + } + + gf_log(xl->name, GF_LOG_WARNING, "Unable to resolve parent inode"); + } + + if ((parent->inode == NULL) && !uuid_is_null(parent->gfid)) { + if (__is_root_gfid(parent->gfid)) { + parent->inode = inode_ref(loc->inode->table->root); + + goto out; + } + parent->inode = inode_find(loc->inode->table, parent->gfid); + if (parent->inode != NULL) { + goto out; + } + + gf_log(xl->name, GF_LOG_WARNING, "Unable to find parent inode"); + } + + if ((parent->inode == NULL) && (parent->path == NULL) && + uuid_is_null(parent->gfid)) { + gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for loc_t"); + + error = EINVAL; + + goto out; } out: GF_FREE(str); - if (parent != NULL) + if (error != 0) { loc_wipe(parent); } @@ -567,9 +578,6 @@ ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl) return NULL; } - - INIT_LIST_HEAD(&ctx->entry_locks); - INIT_LIST_HEAD(&ctx->inode_locks); } } else diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index 044e6239e8b..6fef9eb827b 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -39,8 +39,7 @@ int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value); int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config); int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config); -int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, - char ** name); +int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent); int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode, struct iatt * iatt); diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index bc3d94c7e61..78a189bc325 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -254,11 +254,11 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) case EC_STATE_LOCK: if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0]); + ec_lock_prepare_inode(fop, &fop->loc[0], 0); } else { - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 0); } ec_lock(fop); @@ -337,7 +337,7 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -516,6 +516,8 @@ void ec_fgetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + if (fd != NULL) { fop->fd = fd_ref(fd); @@ -1230,7 +1232,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) /* Fall through */ case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 0); ec_lock(fop); return EC_STATE_GET_SIZE_AND_VERSION; @@ -1310,7 +1312,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1350,6 +1352,8 @@ void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->size = size; fop->offset = offset; fop->uint32 = flags; @@ -1478,11 +1482,11 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state) case EC_STATE_LOCK: if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0]); + ec_lock_prepare_inode(fop, &fop->loc[0], 0); } else { - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 0); } ec_lock(fop); @@ -1581,7 +1585,7 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 0); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1741,6 +1745,8 @@ void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + if (fd != NULL) { fop->fd = fd_ref(fd); diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c index 7e557f12aa3..aa3682516ed 100644 --- a/xlators/cluster/ec/src/ec-inode-write.c +++ b/xlators/cluster/ec/src/ec-inode-write.c @@ -94,11 +94,11 @@ int32_t ec_manager_removexattr(ec_fop_data_t * fop, int32_t state) case EC_STATE_LOCK: if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0]); + ec_lock_prepare_inode(fop, &fop->loc[0], 1); } else { - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 1); } ec_lock(fop); @@ -186,7 +186,7 @@ int32_t ec_manager_removexattr(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 1); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -351,6 +351,8 @@ void ec_fremovexattr(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + if (fd != NULL) { fop->fd = fd_ref(fd); @@ -490,11 +492,11 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state) case EC_STATE_LOCK: if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0]); + ec_lock_prepare_inode(fop, &fop->loc[0], 1); } else { - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 1); } ec_lock(fop); @@ -598,7 +600,7 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 1); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -772,6 +774,8 @@ void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->int32 = valid; if (fd != NULL) @@ -880,11 +884,11 @@ int32_t ec_manager_setxattr(ec_fop_data_t * fop, int32_t state) case EC_STATE_LOCK: if (fop->fd == NULL) { - ec_lock_prepare_inode(fop, &fop->loc[0]); + ec_lock_prepare_inode(fop, &fop->loc[0], 1); } else { - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 1); } ec_lock(fop); @@ -971,7 +975,7 @@ int32_t ec_manager_setxattr(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 1); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1138,6 +1142,8 @@ void ec_fsetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->int32 = flags; if (fd != NULL) @@ -1380,11 +1386,11 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) case EC_STATE_LOCK: if (fop->id == GF_FOP_TRUNCATE) { - ec_lock_prepare_inode(fop, &fop->loc[0]); + ec_lock_prepare_inode(fop, &fop->loc[0], 1); } else { - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 1); } ec_lock(fop); @@ -1497,7 +1503,7 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 1); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -1666,6 +1672,8 @@ void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->offset = offset; if (fd != NULL) @@ -2019,7 +2027,7 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state) /* Fall through */ case EC_STATE_LOCK: - ec_lock_prepare_fd(fop, fop->fd); + ec_lock_prepare_fd(fop, fop->fd, 1); ec_lock(fop); return EC_STATE_GET_SIZE_AND_VERSION; @@ -2125,7 +2133,7 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state) case -EC_STATE_LOCK_REUSE: case EC_STATE_LOCK_REUSE: - ec_lock_reuse(fop, 1); + ec_lock_reuse(fop); return EC_STATE_UNLOCK; @@ -2171,6 +2179,8 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target, fop->offset = offset; fop->uint32 = flags; + fop->use_fd = 1; + if (fd != NULL) { fop->fd = fd_ref(fd); diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c index 71fd48c05cd..469bf77e5fd 100644 --- a/xlators/cluster/ec/src/ec-locks.c +++ b/xlators/cluster/ec/src/ec-locks.c @@ -497,6 +497,8 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->entrylk_cmd = cmd; fop->entrylk_type = type; @@ -951,6 +953,8 @@ void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->int32 = cmd; if (volume != NULL) @@ -1245,6 +1249,8 @@ void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target, goto out; } + fop->use_fd = 1; + fop->int32 = cmd; if (fd != NULL) |