diff options
author | Xavier Hernandez <xhernandez@datalab.es> | 2014-05-05 12:57:34 +0200 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-07-11 10:33:40 -0700 |
commit | ad112305a1c7452b13c92238b40ded80361838f3 (patch) | |
tree | 82dbf9aa0b77eb76d43c8b1ccb3ba58e61bc4e2a /xlators/cluster/ec/src/ec-heal.c | |
parent | 6b4702897bd56e29db4db06f8cf896f89df1133c (diff) |
cluster/ec: Added erasure code translator
Change-Id: I293917501d5c2ca4cdc6303df30cf0b568cea361
BUG: 1118629
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/7749
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src/ec-heal.c')
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 1470 |
1 files changed, 1470 insertions, 0 deletions
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c new file mode 100644 index 00000000000..37264f598b9 --- /dev/null +++ b/xlators/cluster/ec/src/ec-heal.c @@ -0,0 +1,1470 @@ +/* + Copyright (c) 2012 DataLab, s.l. <http://www.datalab.es> + + This file is part of the cluster/ec translator for GlusterFS. + + The cluster/ec translator for GlusterFS is free software: you can + redistribute it and/or modify it under the terms of the GNU General + Public License as published by the Free Software Foundation, either + version 3 of the License, or (at your option) any later version. + + The cluster/ec translator for GlusterFS is distributed in the hope + that it will be useful, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the cluster/ec translator for GlusterFS. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include "xlator.h" +#include "defaults.h" + +#include "ec-helpers.h" +#include "ec-common.h" +#include "ec-combine.h" +#include "ec-method.h" +#include "ec-fops.h" + +#include "ec-mem-types.h" +#include "ec-data.h" + +/* FOP: heal */ + +void ec_heal_exclude(ec_heal_t * heal, uintptr_t mask) +{ + LOCK(&heal->lock); + + heal->bad &= ~mask; + + UNLOCK(&heal->lock); +} + +void ec_heal_lookup_resume(ec_fop_data_t * fop) +{ + ec_heal_t * heal = fop->data; + ec_cbk_data_t * cbk; + uintptr_t good = 0, bad = 0; + + if (heal->lookup != NULL) + { + ec_fop_data_release(heal->lookup); + } + ec_fop_data_acquire(fop); + + list_for_each_entry(cbk, &fop->cbk_list, list) + { + if ((cbk->op_ret < 0) && (cbk->op_errno == ENOTCONN)) + { + continue; + } + + if (cbk == fop->answer) + { + if (cbk->op_ret >= 0) + { + heal->iatt = cbk->iatt[0]; + heal->version = cbk->version; + heal->raw_size = cbk->size; + heal->fop->pre_size = cbk->iatt[0].ia_size; + heal->fop->post_size = cbk->iatt[0].ia_size; + + if (!ec_loc_prepare(heal->xl, &heal->loc, cbk->inode, + &cbk->iatt[0])) + { + fop->answer = NULL; + fop->error = EIO; + + bad |= cbk->mask; + + continue; + } + } + + good |= cbk->mask; + } + else + { + bad |= cbk->mask; + } + } + + heal->good = good; + heal->bad = bad; + + heal->lookup = fop; + + ec_resume_parent(fop, fop->answer != NULL ? 0 : fop->error); +} + +int32_t ec_heal_entry_lookup_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, + int32_t op_errno, inode_t * inode, + struct iatt * buf, dict_t * xdata, + struct iatt * postparent) +{ + ec_heal_lookup_resume(cookie); + + return 0; +} + +int32_t ec_heal_inode_lookup_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, + int32_t op_errno, inode_t * inode, + struct iatt * buf, dict_t * xdata, + struct iatt * postparent) +{ + ec_heal_lookup_resume(cookie); + + return 0; +} + +uintptr_t ec_heal_check(ec_fop_data_t * fop, uintptr_t * pgood) +{ + ec_cbk_data_t * cbk; + uintptr_t mask[2] = { 0, 0 }; + + list_for_each_entry(cbk, &fop->cbk_list, list) + { + mask[cbk->op_ret >= 0] |= cbk->mask; + } + + if (pgood != NULL) + { + *pgood = mask[1]; + } + + return mask[0]; +} + +void ec_heal_update(ec_fop_data_t * fop, int32_t is_open) +{ + ec_heal_t * heal = fop->data; + uintptr_t good, bad; + + bad = ec_heal_check(fop, &good); + + LOCK(&heal->lock); + + heal->bad &= ~bad; + if (is_open) + { + heal->open |= good; + } + + UNLOCK(&heal->lock); + + fop->error = 0; +} + +void ec_heal_avoid(ec_fop_data_t * fop) +{ + ec_heal_t * heal = fop->data; + uintptr_t bad; + + bad = ec_heal_check(fop, NULL); + + LOCK(&heal->lock); + + heal->good &= ~bad; + + UNLOCK(&heal->lock); +} + +int32_t ec_heal_mkdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, + int32_t op_ret, int32_t op_errno, inode_t * inode, + struct iatt * buf, struct iatt * preparent, + struct iatt * postparent, dict_t * xdata) +{ + ec_heal_update(cookie, 0); + + return 0; +} + +int32_t ec_heal_mknod_cbk(call_frame_t * frame, void * cookie, xlator_t * this, + int32_t op_ret, int32_t op_errno, inode_t * inode, + struct iatt * buf, struct iatt * preparent, + struct iatt * postparent, dict_t * xdata) +{ + ec_heal_update(cookie, 0); + + return 0; +} + +int32_t ec_heal_symlink_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + inode_t * inode, struct iatt * buf, + struct iatt * preparent, struct iatt * postparent, + dict_t * xdata) +{ + ec_heal_update(cookie, 0); + + return 0; +} + +int32_t ec_heal_create_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + fd_t * fd, inode_t * inode, struct iatt * buf, + struct iatt * preparent, struct iatt * postparent, + dict_t * xdata) +{ + ec_heal_update(cookie, 1); + + return 0; +} + +int32_t ec_heal_setattr_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + struct iatt * preop_stbuf, + struct iatt * postop_stbuf, + dict_t * xdata) +{ + ec_heal_update(cookie, 0); + + return 0; +} + +int32_t ec_heal_setxattr_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + dict_t * xdata) +{ + ec_heal_update(cookie, 0); + + return 0; +} + +int32_t ec_heal_removexattr_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, + int32_t op_errno, dict_t * xdata) +{ + ec_heal_update(cookie, 0); + + return 0; +} + +int32_t ec_heal_link_cbk(call_frame_t * frame, void * cookie, xlator_t * this, + int32_t op_ret, int32_t op_errno, inode_t * inode, + struct iatt * buf, struct iatt * preparent, + struct iatt * postparent, dict_t * xdata) +{ + ec_fop_data_t * fop = cookie; + ec_heal_t * heal = fop->data; + uintptr_t good, bad; + + bad = ec_heal_check(fop, &good); + ec_heal_exclude(heal, good); + + if (bad != 0) + { + fop->error = 0; + + xdata = fop->xdata; + fop = fop->parent; + + ec_create(fop->frame, fop->xl, bad, EC_MINIMUM_ONE, + ec_heal_create_cbk, heal, &heal->loc, 0, + st_mode_from_ia(heal->iatt.ia_prot, IA_INVAL), + 0, heal->fd, xdata); + } + + return 0; +} + +int32_t ec_heal_target_open_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, + int32_t op_errno, fd_t * fd, dict_t * xdata) +{ + ec_heal_update(cookie, 1); + + return 0; +} + +int32_t ec_heal_source_open_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, + int32_t op_errno, fd_t * fd, dict_t * xdata) +{ + ec_heal_avoid(cookie); + + return 0; +} + +int32_t ec_heal_reopen_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + fd_t * fd, dict_t * xdata) +{ + ec_fop_data_t * fop = cookie; + ec_fd_t * ctx; + uintptr_t good; + + ec_heal_check(fop, &good); + + if (good != 0) + { + LOCK(&fd->lock); + + ctx = __ec_fd_get(fd, fop->xl); + if ((ctx != NULL) && (ctx->loc.inode != NULL)) + { + ctx->bad &= ~good; + ctx->open |= good; + } + + UNLOCK(&fd->lock); + } + + return 0; +} + +int32_t ec_heal_create(ec_heal_t * heal, uintptr_t mask, int32_t try_link) +{ + loc_t loc; + dict_t * xdata; + + xdata = dict_new(); + if (xdata == NULL) + { + return ENOMEM; + } + + if (dict_set_static_bin(xdata, "gfid-req", heal->iatt.ia_gfid, + sizeof(uuid_t)) != 0) + { + dict_unref(xdata); + + return ENOMEM; + } + + if ((heal->iatt.ia_type == IA_IFREG) && try_link) + { + memset(&loc, 0, sizeof(loc)); + loc.inode = heal->loc.inode; + uuid_copy(loc.gfid, heal->iatt.ia_gfid); + + ec_link(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_link_cbk, heal, &loc, &heal->loc, xdata); + + dict_unref(xdata); + + return 0; + } + + switch (heal->iatt.ia_type) + { + case IA_IFDIR: + ec_mkdir(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_mkdir_cbk, heal, &heal->loc, + st_mode_from_ia(heal->iatt.ia_prot, IA_INVAL), + 0, xdata); + + break; + + case IA_IFLNK: + ec_symlink(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_symlink_cbk, heal, heal->symlink, &heal->loc, + 0, xdata); + + break; + + case IA_IFREG: + ec_create(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_create_cbk, heal, &heal->loc, 0, + st_mode_from_ia(heal->iatt.ia_prot, IA_INVAL), + 0, heal->fd, xdata); + + break; + + default: + ec_mknod(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_mknod_cbk, heal, &heal->loc, + st_mode_from_ia(heal->iatt.ia_prot, IA_INVAL), + heal->iatt.ia_rdev, 0, xdata); + + break; + } + + dict_unref(xdata); + + return 0; +} + +void ec_heal_recreate(ec_fop_data_t * fop) +{ + ec_cbk_data_t * cbk; + ec_heal_t * heal = fop->data; + uintptr_t mask = 0; + + if (heal->iatt.ia_type == IA_INVAL) + { + return; + } + + list_for_each_entry(cbk, &fop->cbk_list, list) + { + if ((cbk->op_ret >= 0) || (cbk->op_errno == ENOENT) || + (cbk->op_errno == ENOTDIR)) + { + mask |= cbk->mask; + } + } + + if (mask != 0) + { + ec_heal_create(heal, mask, 0); + } +} + +int32_t ec_heal_rmdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this, + int32_t op_ret, int32_t op_errno, + struct iatt * preparent, struct iatt * postparent, + dict_t * xdata) +{ + ec_heal_update(cookie, 0); + ec_heal_recreate(cookie); + + return 0; +} + +int32_t ec_heal_unlink_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + struct iatt * preparent, struct iatt * postparent, + dict_t * xdata) +{ + ec_heal_update(cookie, 0); + ec_heal_recreate(cookie); + + return 0; +} + +int32_t ec_heal_init(ec_fop_data_t * fop) +{ + ec_t * ec = fop->xl->private; + struct iobuf_pool * pool; + inode_t * inode; + ec_inode_t * ctx; + ec_heal_t * heal = NULL; + int32_t error = 0; + + inode = fop->loc[0].inode; + if (inode == NULL) + { + gf_log(fop->xl->name, GF_LOG_WARNING, "Unable to start inode healing " + "because there is not enough " + "information"); + + return ENODATA; + } + + LOCK(&inode->lock); + + ctx = __ec_inode_get(inode, fop->xl); + if (ctx == NULL) + { + error = EIO; + + goto out; + } + + if (ctx->heal != NULL) + { + error = EEXIST; + + goto out; + } + + heal = GF_MALLOC(sizeof(ec_heal_t), ec_mt_ec_heal_t); + if (heal == NULL) + { + error = ENOMEM; + + goto out; + } + + memset(heal, 0, sizeof(ec_heal_t)); + + if (loc_copy(&heal->loc, &fop->loc[0]) != 0) + { + error = ENOMEM; + + goto out; + } + if (uuid_is_null(heal->loc.gfid)) + { + uuid_copy(heal->loc.gfid, heal->loc.inode->gfid); + } + + LOCK_INIT(&heal->lock); + + heal->xl = fop->xl; + heal->fop = fop; + pool = fop->xl->ctx->iobuf_pool; + heal->size = iobpool_default_pagesize(pool) * ec->fragments; + + fop->data = heal; + + ctx->heal = heal; + heal = NULL; + +out: + UNLOCK(&inode->lock); + + GF_FREE(heal); + + return error; +} + +void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd) +{ + loc_t loc; + char * name; + int32_t error; + + error = ec_loc_parent(heal->xl, &heal->loc, &loc, &name); + if (error != 0) + { + ec_fop_set_error(heal->fop, error); + + return; + } + + ec_entrylk(heal->fop->frame, heal->xl, -1, EC_MINIMUM_ALL, NULL, NULL, + heal->xl->name, &loc, name, cmd, ENTRYLK_WRLCK, NULL); + + loc_wipe(&loc); + GF_FREE(name); +} + +void ec_heal_inodelk(ec_heal_t * heal, int32_t type, int32_t use_fd, + off_t offset, size_t size) +{ + struct gf_flock flock; + + flock.l_type = type; + flock.l_whence = SEEK_SET; + flock.l_start = offset; + flock.l_len = size; + flock.l_pid = 0; + flock.l_owner.len = 0; + + if (use_fd) + { + ec_finodelk(heal->fop->frame, heal->xl, heal->fop->mask, + EC_MINIMUM_ALL, NULL, NULL, heal->xl->name, heal->fd, + F_SETLKW, &flock, NULL); + } + else + { + ec_inodelk(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_ALL, + NULL, NULL, heal->xl->name, &heal->loc, F_SETLKW, &flock, + NULL); + } +} + +void ec_heal_lookup(ec_heal_t * heal) +{ + dict_t * xdata; + int32_t error = ENOMEM; + + xdata = dict_new(); + if (xdata == NULL) + { + goto out; + } + if (dict_set_uint64(xdata, "list-xattr", 0) != 0) + { + goto out; + } + + ec_lookup(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_MIN, + ec_heal_inode_lookup_cbk, heal, &heal->loc, xdata); + + error = 0; + +out: + if (xdata != NULL) + { + dict_unref(xdata); + } + + ec_fop_set_error(heal->fop, error); +} + +void ec_heal_remove(ec_heal_t * heal, ec_cbk_data_t * cbk) +{ + if (cbk->iatt[0].ia_type == IA_IFDIR) + { + // TODO: Remove directory recursively ? + ec_rmdir(heal->fop->frame, heal->xl, cbk->mask, EC_MINIMUM_ONE, + ec_heal_rmdir_cbk, heal, &heal->loc, 0, NULL); + } + else + { + ec_unlink(heal->fop->frame, heal->xl, cbk->mask, EC_MINIMUM_ONE, + ec_heal_unlink_cbk, heal, &heal->loc, 0, NULL); + } +} + +void ec_heal_remove_others(ec_heal_t * heal) +{ + struct list_head * item; + ec_cbk_data_t * cbk; + + item = heal->lookup->cbk_list.next; + do + { + item = item->next; + cbk = list_entry(item, ec_cbk_data_t, list); + + if (cbk->op_ret < 0) + { + if ((cbk->op_errno != ENOENT) && (cbk->op_errno != ENOTDIR)) + { + gf_log(heal->xl->name, GF_LOG_WARNING, "Don't know how to " + "remove inode with " + "error %d", + cbk->op_errno); + } + + ec_heal_exclude(heal, cbk->mask); + + continue; + } + + ec_heal_remove(heal, cbk); + } while (item->next != &heal->lookup->cbk_list); +} + +void ec_heal_prepare_others(ec_heal_t * heal) +{ + struct list_head * item; + ec_cbk_data_t * cbk; + + item = heal->lookup->cbk_list.next; + while (item->next != &heal->lookup->cbk_list) + { + item = item->next; + cbk = list_entry(item, ec_cbk_data_t, list); + + if (cbk->op_ret < 0) + { + if (cbk->op_errno == ENOENT) + { + ec_heal_create(heal, cbk->mask, 1); + } + else + { + gf_log(heal->xl->name, GF_LOG_ERROR, "Don't know how to " + "heal error %d", + cbk->op_errno); + + ec_heal_exclude(heal, cbk->mask); + } + } + else + { + if ((heal->iatt.ia_type != cbk->iatt[0].ia_type) || + (uuid_compare(heal->iatt.ia_gfid, cbk->iatt[0].ia_gfid) != 0)) + { + ec_heal_remove(heal, cbk); + } + } + } +} + +int32_t ec_heal_readlink_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + const char * path, struct iatt * buf, + dict_t * xdata) +{ + ec_fop_data_t * fop = cookie; + ec_heal_t * heal = fop->data; + + if (op_ret >= 0) + { + heal->symlink = gf_strdup(path); + if (heal->symlink != NULL) + { + ec_heal_prepare_others(heal); + } + else + { + ec_fop_set_error(fop, EIO); + } + } + + return 0; +} + +ec_cbk_data_t * ec_heal_lookup_check(ec_heal_t * heal, uintptr_t * pgood, + uintptr_t * pbad) +{ + ec_fop_data_t * fop = heal->lookup; + ec_cbk_data_t * cbk = NULL, * ans = NULL; + uintptr_t good = 0, bad = 0; + + list_for_each_entry(ans, &fop->cbk_list, list) + { + if ((ans->op_ret < 0) && (ans->op_errno == ENOTCONN)) + { + continue; + } + + if (ans == fop->answer) + { + good |= ans->mask; + cbk = ans; + } + else + { + bad |= ans->mask; + } + } + + *pgood = good; + *pbad = bad; + + return cbk; +} + +void ec_heal_prepare(ec_heal_t * heal) +{ + ec_cbk_data_t * cbk; + ec_fd_t * ctx; + int32_t error = ENOMEM; + + heal->available = heal->good; + + cbk = heal->lookup->answer; + if (cbk->op_ret < 0) + { + if ((cbk->op_errno == ENOENT) || (cbk->op_errno == ENOTDIR)) + { + ec_heal_remove_others(heal); + } + else + { + gf_log(heal->xl->name, GF_LOG_ERROR, "Don't know how to heal " + "error %d", + cbk->op_errno); + } + } + else + { + if (heal->iatt.ia_type == IA_IFREG) + { + heal->fd = fd_create(heal->loc.inode, heal->fop->frame->root->pid); + if (heal->fd == NULL) + { + gf_log(heal->xl->name, GF_LOG_ERROR, "Unable to create a new " + "file descriptor"); + + goto out; + } + ctx = ec_fd_get(heal->fd, heal->xl); + if ((ctx == NULL) || (loc_copy(&ctx->loc, &heal->loc) != 0)) + { + goto out; + } + + ctx->flags = O_RDWR; + } + + if (heal->iatt.ia_type == IA_IFLNK) + { + ec_readlink(heal->fop->frame, heal->xl, cbk->mask, EC_MINIMUM_ONE, + ec_heal_readlink_cbk, heal, &heal->loc, + heal->iatt.ia_size, NULL); + } + else + { + ec_heal_prepare_others(heal); + } + } + + error = 0; + +out: + ec_fop_set_error(heal->fop, error); +} + +int32_t ec_heal_open_others(ec_heal_t * heal) +{ + struct list_head * item; + ec_cbk_data_t * cbk; + uintptr_t mask = 0, open = heal->open; + + item = heal->lookup->cbk_list.next; + while (item->next != &heal->lookup->cbk_list) + { + item = item->next; + cbk = list_entry(item, ec_cbk_data_t, list); + + if ((cbk->op_ret < 0) || (cbk->iatt[0].ia_type != IA_IFREG) || + (uuid_compare(heal->iatt.ia_gfid, cbk->iatt[0].ia_gfid) != 0)) + { + ec_heal_exclude(heal, cbk->mask); + } + else + { + mask |= cbk->mask & ~heal->open; + } + } + + if (mask != 0) + { + ec_open(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_target_open_cbk, heal, &heal->loc, O_RDWR | O_TRUNC, + heal->fd, NULL); + + open |= mask; + } + + return (open != 0); +} + +void ec_heal_setxattr_others(ec_heal_t * heal) +{ + ec_cbk_data_t * cbk; + dict_t * xdata; + int32_t error = ENOMEM; + + if ((heal->good != 0) && (heal->bad != 0)) + { + cbk = heal->lookup->answer; + xdata = cbk->xdata; + + if ((cbk->iatt[0].ia_type == IA_IFREG) || + (cbk->iatt[0].ia_type == IA_IFDIR)) + { + if (ec_dict_set_number(xdata, EC_XATTR_VERSION, cbk->version) != 0) + { + goto out; + } + if (cbk->iatt[0].ia_type == IA_IFREG) + { + if (ec_dict_set_number(xdata, EC_XATTR_SIZE, + cbk->iatt[0].ia_size) != 0) + { + goto out; + } + } + } + + ec_setxattr(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE, + ec_heal_setxattr_cbk, heal, &heal->loc, xdata, 0, NULL); + } + + error = 0; + +out: + ec_fop_set_error(heal->fop, error); +} + +int32_t ec_heal_xattr_clean(dict_t * dict, char * key, data_t * data, + void * arg) +{ + dict_t * base = arg; + + if (dict_get(base, key) == NULL) + { + if (dict_set_static_bin(dict, key, dict, 0) != 0) + { + return -1; + } + } + else + { + dict_del(dict, key); + } + + return 0; +} + +void ec_heal_removexattr_others(ec_heal_t * heal) +{ + struct list_head * item; + ec_cbk_data_t * cbk; + dict_t * xdata; + + if ((heal->good == 0) || (heal->bad == 0)) + { + return; + } + + xdata = heal->lookup->answer->xdata; + item = heal->lookup->cbk_list.next; + while (item->next != &heal->lookup->cbk_list) + { + item = item->next; + cbk = list_entry(item, ec_cbk_data_t, list); + + if (cbk->op_ret >= 0) + { + if (dict_foreach(cbk->xdata, ec_heal_xattr_clean, xdata) == 0) + { + ec_removexattr(heal->fop->frame, heal->xl, cbk->mask, + EC_MINIMUM_ONE, ec_heal_removexattr_cbk, heal, + &heal->loc, "", cbk->xdata); + } + } + } +} + +void ec_heal_attr(ec_heal_t * heal) +{ + if ((heal->good != 0) && (heal->bad != 0)) + { + ec_setattr(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE, + ec_heal_setattr_cbk, heal, &heal->loc, &heal->iatt, + GF_SET_ATTR_MODE | GF_SET_ATTR_UID | GF_SET_ATTR_GID | + GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL); + } +} + +int32_t ec_heal_needs_data_rebuild(ec_heal_t * heal) +{ + ec_fop_data_t * fop = heal->lookup; + ec_cbk_data_t * cbk = NULL; + uintptr_t bad = 0; + + if ((heal->fop->error != 0) || (heal->good == 0) || + (heal->iatt.ia_type != IA_IFREG)) + { + return 0; + } + + list_for_each_entry(cbk, &fop->cbk_list, list) + { + if ((cbk->op_ret >= 0) && + ((cbk->size != heal->raw_size) || (cbk->version != heal->version))) + { + bad |= cbk->mask; + } + } + + heal->bad = bad; + + return (bad != 0); +} + +void ec_heal_open(ec_heal_t * heal) +{ + if (!ec_heal_needs_data_rebuild(heal)) + { + return; + } + + if (ec_heal_open_others(heal)) + { + ec_open(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN, + ec_heal_source_open_cbk, heal, &heal->loc, O_RDONLY, heal->fd, + NULL); + } +} + +void ec_heal_reopen_fd(ec_heal_t * heal) +{ + inode_t * inode; + fd_t * fd; + ec_fd_t * ctx; + uintptr_t mask; + int32_t flags; + + inode = heal->loc.inode; + + LOCK(&inode->lock); + + list_for_each_entry(fd, &inode->fd_list, inode_list) + { + ctx = ec_fd_get(fd, heal->xl); + if ((ctx != NULL) && (ctx->loc.inode != NULL)) + { + mask = heal->bad & ~ctx->open; + if (mask != 0) + { + UNLOCK(&inode->lock); + + if (heal->iatt.ia_type == IA_IFDIR) + { + ec_opendir(heal->fop->frame, heal->xl, mask, + EC_MINIMUM_ONE, ec_heal_reopen_cbk, NULL, + &heal->loc, fd, NULL); + } + else + { + flags = ctx->flags & ~O_TRUNC; + if ((flags & O_ACCMODE) == O_WRONLY) + { + flags &= ~O_ACCMODE; + flags |= O_RDWR; + } + + ec_open(heal->fop->frame, heal->xl, mask, EC_MINIMUM_ONE, + ec_heal_reopen_cbk, NULL, &heal->loc, flags, fd, + NULL); + } + + LOCK(&inode->lock); + } + } + } + + UNLOCK(&inode->lock); +} + +int32_t ec_heal_writev_cbk(call_frame_t * frame, void * cookie, + xlator_t * this, int32_t op_ret, int32_t op_errno, + struct iatt * prebuf, struct iatt * postbuf, + dict_t * xdata) +{ + ec_trace("WRITE_CBK", cookie, "ret=%d, errno=%d", op_ret, op_errno); + + ec_heal_update(cookie, 0); + + return 0; +} + +int32_t ec_heal_readv_cbk(call_frame_t * frame, void * cookie, xlator_t * this, + int32_t op_ret, int32_t op_errno, + struct iovec * vector, int32_t count, + struct iatt * stbuf, struct iobref * iobref, + dict_t * xdata) +{ + ec_fop_data_t * fop = cookie; + ec_heal_t * heal = fop->data; + + ec_trace("READ_CBK", fop, "ret=%d, errno=%d", op_ret, op_errno); + + ec_heal_avoid(fop); + + if (op_ret > 0) + { + ec_writev(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE, + ec_heal_writev_cbk, heal, heal->fd, vector, count, + heal->offset, 0, iobref, NULL); + } + else + { + heal->done = 1; + } + + return 0; +} + +void ec_heal_data(ec_heal_t * heal) +{ + ec_trace("DATA", heal->fop, "good=%lX, bad=%lX", heal->good, heal->bad); + + if ((heal->good != 0) && (heal->bad != 0) && + (heal->iatt.ia_type == IA_IFREG)) + { + ec_readv(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN, + ec_heal_readv_cbk, heal, heal->fd, heal->size, heal->offset, + 0, NULL); + } +} + +void ec_heal_dispatch(ec_heal_t * heal) +{ + ec_fop_data_t * fop = heal->fop; + ec_cbk_data_t * cbk; + inode_t * inode; + ec_inode_t * ctx; + int32_t error; + + inode = heal->loc.inode; + + LOCK(&inode->lock); + + ctx = __ec_inode_get(inode, heal->xl); + if (ctx != NULL) + { + ctx->bad &= ~heal->good; + ctx->heal = NULL; + } + + fop->data = NULL; + + UNLOCK(&inode->lock); + + error = fop->error; + + cbk = ec_cbk_data_allocate(fop->frame, heal->xl, fop, fop->id, 0, + error == 0 ? 0 : -1, error); + if (cbk != NULL) + { + cbk->uintptr[0] = heal->available; + cbk->uintptr[1] = heal->good; + cbk->uintptr[2] = heal->bad; + + ec_combine(cbk, NULL); + + fop->answer = cbk; + } + else if (error == 0) + { + error = ENOMEM; + } + + if (heal->lookup != NULL) + { + ec_fop_data_release(heal->lookup); + } + if (heal->fd != NULL) + { + fd_unref(heal->fd); + } + GF_FREE(heal->symlink); + loc_wipe(&heal->loc); + + LOCK_DESTROY(&heal->lock); + + GF_FREE(heal); + + ec_fop_set_error(heal->fop, error); +} + +void ec_wind_heal(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +{ + ec_cbk_data_t * cbk; + ec_heal_t * heal = fop->data; + + ec_trace("WIND", fop, "idx=%d", idx); + + cbk = ec_cbk_data_allocate(fop->req_frame, fop->xl, fop, EC_FOP_HEAL, idx, + fop->error == 0 ? 0 : -1, fop->error); + if (cbk != NULL) + { + cbk->uintptr[0] = heal->available; + cbk->uintptr[1] = heal->good; + cbk->uintptr[2] = heal->bad; + + ec_combine(cbk, NULL); + } + + ec_complete(fop); +} + +int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state) +{ + ec_cbk_data_t * cbk; + ec_heal_t * heal = fop->data; + + switch (state) + { + case EC_STATE_INIT: + ec_owner_set(fop->frame, fop->frame->root); + + fop->error = ec_heal_init(fop); + if (fop->error != 0) + { + return EC_STATE_REPORT; + } + + case EC_STATE_DISPATCH: + ec_heal_entrylk(fop->data, ENTRYLK_LOCK); + + return EC_STATE_HEAL_ENTRY_LOOKUP; + + case EC_STATE_HEAL_ENTRY_LOOKUP: + ec_lookup(fop->frame, heal->xl, fop->mask, EC_MINIMUM_MIN, + ec_heal_entry_lookup_cbk, heal, &heal->loc, NULL); + + return EC_STATE_HEAL_ENTRY_PREPARE; + + case EC_STATE_HEAL_ENTRY_PREPARE: + ec_heal_prepare(heal); + + return EC_STATE_HEAL_PRE_INODELK_LOCK; + + case EC_STATE_HEAL_PRE_INODELK_LOCK: + ec_heal_inodelk(heal, F_WRLCK, 0, 0, 0); + + return EC_STATE_HEAL_PRE_INODE_LOOKUP; + + case EC_STATE_HEAL_PRE_INODE_LOOKUP: + ec_heal_lookup(heal); + + return EC_STATE_HEAL_XATTRIBUTES_REMOVE; + + case EC_STATE_HEAL_XATTRIBUTES_REMOVE: + ec_heal_removexattr_others(heal); + + return EC_STATE_HEAL_XATTRIBUTES_SET; + + case EC_STATE_HEAL_XATTRIBUTES_SET: + ec_heal_setxattr_others(heal); + + return EC_STATE_HEAL_ATTRIBUTES; + + case EC_STATE_HEAL_ATTRIBUTES: + ec_heal_attr(heal); + + return EC_STATE_HEAL_OPEN; + + case EC_STATE_HEAL_OPEN: + ec_heal_open(heal); + + return EC_STATE_HEAL_REOPEN_FD; + + case EC_STATE_HEAL_REOPEN_FD: + ec_heal_reopen_fd(heal); + + return EC_STATE_HEAL_UNLOCK; + + case -EC_STATE_HEAL_XATTRIBUTES_REMOVE: + case -EC_STATE_HEAL_XATTRIBUTES_SET: + case -EC_STATE_HEAL_ATTRIBUTES: + case -EC_STATE_HEAL_OPEN: + case -EC_STATE_HEAL_REOPEN_FD: + case -EC_STATE_HEAL_UNLOCK: + case EC_STATE_HEAL_UNLOCK: + ec_heal_inodelk(heal, F_UNLCK, 0, 0, 0); + + case -EC_STATE_HEAL_ENTRY_PREPARE: + case -EC_STATE_HEAL_PRE_INODELK_LOCK: + case -EC_STATE_HEAL_PRE_INODE_LOOKUP: + ec_heal_entrylk(heal, ENTRYLK_UNLOCK); + + if (ec_heal_needs_data_rebuild(heal)) + { + return EC_STATE_HEAL_DATA_LOCK; + } + + return EC_STATE_HEAL_DISPATCH; + + case EC_STATE_HEAL_DATA_LOCK: + if (heal->done) + { + return EC_STATE_HEAL_POST_INODELK_LOCK; + } + + ec_heal_inodelk(heal, F_WRLCK, 1, heal->offset, heal->size); + + return EC_STATE_HEAL_DATA_COPY; + + case EC_STATE_HEAL_DATA_COPY: + ec_heal_data(heal); + + return EC_STATE_HEAL_DATA_UNLOCK; + + case -EC_STATE_HEAL_DATA_COPY: + case -EC_STATE_HEAL_DATA_UNLOCK: + case EC_STATE_HEAL_DATA_UNLOCK: + ec_heal_inodelk(heal, F_UNLCK, 1, heal->offset, heal->size); + + heal->offset += heal->size; + + return EC_STATE_HEAL_DATA_LOCK; + + case EC_STATE_HEAL_POST_INODELK_LOCK: + ec_heal_inodelk(heal, F_WRLCK, 1, 0, 0); + + return EC_STATE_HEAL_POST_INODE_LOOKUP; + + case EC_STATE_HEAL_POST_INODE_LOOKUP: + ec_heal_lookup(heal); + + return EC_STATE_HEAL_SETATTR; + + case EC_STATE_HEAL_SETATTR: + ec_setattr(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE, + ec_heal_setattr_cbk, heal, &heal->loc, &heal->iatt, + GF_SET_ATTR_MODE | GF_SET_ATTR_UID | GF_SET_ATTR_GID | + GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL); + + return EC_STATE_HEAL_POST_INODELK_UNLOCK; + + case -EC_STATE_HEAL_SETATTR: + case -EC_STATE_HEAL_POST_INODELK_UNLOCK: + case EC_STATE_HEAL_POST_INODELK_UNLOCK: + ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0); + + return EC_STATE_HEAL_DISPATCH; + + case -EC_STATE_HEAL_POST_INODELK_LOCK: + case -EC_STATE_HEAL_POST_INODE_LOOKUP: + case -EC_STATE_HEAL_ENTRY_LOOKUP: + case -EC_STATE_HEAL_DATA_LOCK: + case -EC_STATE_HEAL_DISPATCH: + case EC_STATE_HEAL_DISPATCH: + ec_heal_dispatch(heal); + + return EC_STATE_PREPARE_ANSWER; + + case EC_STATE_PREPARE_ANSWER: + cbk = fop->answer; + if (cbk != NULL) + { + if (!ec_dict_combine(cbk, EC_COMBINE_XDATA)) + { + if (cbk->op_ret >= 0) + { + cbk->op_ret = -1; + cbk->op_errno = EIO; + } + } + if (cbk->op_ret < 0) + { + ec_fop_set_error(fop, cbk->op_errno); + } + } + else + { + ec_fop_set_error(fop, EIO); + } + + return EC_STATE_REPORT; + + case EC_STATE_REPORT: + cbk = fop->answer; + + GF_ASSERT(cbk != NULL); + + if (fop->fd == NULL) + { + if (fop->cbks.heal != NULL) + { + fop->cbks.heal(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->uintptr[0], + cbk->uintptr[1], cbk->uintptr[2], + cbk->xdata); + } + } + else + { + if (fop->cbks.fheal != NULL) + { + fop->cbks.fheal(fop->req_frame, fop, fop->xl, cbk->op_ret, + cbk->op_errno, cbk->uintptr[0], + cbk->uintptr[1], cbk->uintptr[2], + cbk->xdata); + } + } + + return EC_STATE_END; + + case -EC_STATE_DISPATCH: + case -EC_STATE_PREPARE_ANSWER: + case -EC_STATE_REPORT: + GF_ASSERT(fop->error != 0); + + if (fop->fd == NULL) + { + if (fop->cbks.heal != NULL) + { + fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, + fop->error, 0, 0, 0, NULL); + } + } + else + { + if (fop->cbks.fheal != NULL) + { + fop->cbks.fheal(fop->req_frame, fop, fop->xl, -1, + fop->error, 0, 0, 0, NULL); + } + } + + return EC_STATE_END; + + default: + gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s", + state, ec_fop_name(fop->id)); + + return EC_STATE_END; + } +} + +void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target, + int32_t minimum, fop_heal_cbk_t func, void * data, loc_t * loc, + dict_t * xdata) +{ + ec_cbk_t callback = { .heal = func }; + ec_fop_data_t * fop = NULL; + int32_t error = EIO; + + gf_log("ec", GF_LOG_TRACE, "EC(HEAL) %p", frame); + + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(NULL, this, EC_FOP_HEAL, + EC_FLAG_UPDATE_LOC_INODE, target, minimum, + ec_wind_heal, ec_manager_heal, callback, data); + if (fop == NULL) + { + goto out; + } + + if (loc != NULL) + { + if (loc_copy(&fop->loc[0], loc) != 0) + { + gf_log(this->name, GF_LOG_ERROR, "Failed to copy a location."); + + goto out; + } + } + if (xdata != NULL) + { + fop->xdata = dict_ref(xdata); + if (fop->xdata == NULL) + { + gf_log(this->name, GF_LOG_ERROR, "Failed to reference a " + "dictionary."); + + goto out; + } + } + + error = 0; + +out: + if (fop != NULL) + { + ec_manager(fop, error); + } + else + { + func(frame, NULL, this, -1, EIO, 0, 0, 0, NULL); + } +} + +/* FOP: fheal */ + +void ec_wind_fheal(ec_t * ec, ec_fop_data_t * fop, int32_t idx) +{ + ec_cbk_data_t * cbk; + ec_heal_t * heal = fop->data; + + ec_trace("WIND", fop, "idx=%d", idx); + + cbk = ec_cbk_data_allocate(fop->req_frame, fop->xl, fop, EC_FOP_FHEAL, idx, + fop->error == 0 ? 0 : -1, fop->error); + if (cbk != NULL) + { + cbk->uintptr[0] = heal->available; + cbk->uintptr[1] = heal->good; + cbk->uintptr[2] = heal->bad; + + ec_combine(cbk, NULL); + } + + ec_complete(fop); +} + +void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target, + int32_t minimum, fop_fheal_cbk_t func, void * data, fd_t * fd, + dict_t * xdata) +{ + ec_fd_t * ctx = ec_fd_get(fd, this); + + if ((ctx != NULL) && (ctx->loc.inode != NULL)) + { + gf_log("ec", GF_LOG_DEBUG, "FHEAL ctx: flags=%X, open=%lX, bad=%lX", + ctx->flags, ctx->open, ctx->bad); + ec_heal(frame, this, target, minimum, func, data, &ctx->loc, xdata); + } +} |