diff options
Diffstat (limited to 'xlators/features/shard/src/shard.c')
| -rw-r--r-- | xlators/features/shard/src/shard.c | 914 |
1 files changed, 716 insertions, 198 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 5ffeaa63628..e5f93063943 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -12,9 +12,9 @@ #include "shard.h" #include "shard-mem-types.h" -#include "byte-order.h" -#include "defaults.h" -#include "statedump.h" +#include <glusterfs/byte-order.h> +#include <glusterfs/defaults.h> +#include <glusterfs/statedump.h> static gf_boolean_t __is_shard_dir(uuid_t gfid) @@ -80,7 +80,8 @@ __shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) INIT_LIST_HEAD(&ctx_p->ilist); INIT_LIST_HEAD(&ctx_p->to_fsync_list); - ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set(inode, this, &ctx_uint); if (ret < 0) { GF_FREE(ctx_p); return ret; @@ -273,6 +274,7 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, * of the to_fsync_list. */ inode_ref(base_inode); + inode_ref(shard_inode); LOCK(&base_inode->lock); LOCK(&shard_inode->lock); @@ -286,8 +288,10 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, /* Unref the base inode corresponding to the ref above, if the shard is * found to be already part of the fsync list. */ - if (ret != 0) + if (ret != 0) { inode_unref(base_inode); + inode_unref(shard_inode); + } return ret; } @@ -509,6 +513,9 @@ shard_local_wipe(shard_local_t *local) loc_wipe(&local->int_entrylk.loc); loc_wipe(&local->newloc); + if (local->name) + GF_FREE(local->name); + if (local->int_entrylk.basename) GF_FREE(local->int_entrylk.basename); if (local->fd) @@ -686,8 +693,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, ctx->block_num = block_num; list_add_tail(&ctx->ilist, &priv->ilist_head); priv->inode_count++; - if (base_inode) - ctx->base_inode = inode_ref(base_inode); + ctx->base_inode = inode_ref(base_inode); } else { /*If on the other hand there is no available slot for this inode * in the list, delete the lru inode from the head of the list, @@ -734,6 +740,10 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); inode_forget(lru_inode, 0); } else { + /* The following unref corresponds to the ref + * held when the shard was added to fsync list. + */ + inode_unref(lru_inode); fsync_inode = lru_inode; if (lru_base_inode) inode_unref(lru_base_inode); @@ -758,8 +768,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, else gf_uuid_copy(ctx->base_gfid, gfid); ctx->block_num = block_num; - if (base_inode) - ctx->base_inode = inode_ref(base_inode); + ctx->base_inode = inode_ref(base_inode); list_add_tail(&ctx->ilist, &priv->ilist_head); } } else { @@ -879,26 +888,34 @@ int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, call_frame_t *frame, int32_t op_ret) { - shard_local_t *local = NULL; + shard_local_t *local = frame->local; - local = frame->local; + /* the below 3 variables are required because, in SHARD_STACK_UNWIND() + macro, there is a check for local being null. So many static analyzers + backtrace the code with assumption of possible (local == NULL) case, + and complains for below lines. By handling it like below, we overcome + the warnings */ + + struct iatt *prebuf = ((local) ? &local->prebuf : NULL); + struct iatt *postbuf = ((local) ? &local->postbuf : NULL); + dict_t *xattr_rsp = ((local) ? local->xattr_rsp : NULL); switch (fop) { case GF_FOP_WRITE: - SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(writev, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; case GF_FOP_FALLOCATE: - SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; case GF_FOP_ZEROFILL: - SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; case GF_FOP_DISCARD: - SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); + SHARD_STACK_UNWIND(discard, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); break; default: gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, @@ -987,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) } int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this); + +int shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, shard_post_resolve_fop_handler_t post_res_handler) { @@ -1003,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; shard_local_t *local = NULL; + uint64_t resolve_count = 0; priv = this->private; local = frame->local; local->call_count = 0; shard_idx_iter = local->first_block; res_inode = local->resolver_base_inode; + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + /* If this prealloc FOP is for fresh file creation, then the size of the + * file will be 0. Then there will be no shards associated with this file. + * So we can skip the lookup process for the shards which do not exists + * and directly issue mknod to crete shards. + * + * In case the prealloc fop is to extend the preallocated file to bigger + * size then just lookup and populate inodes of existing shards and + * update the create count + */ + if (local->fop == GF_FOP_FALLOCATE) { + if (!local->prebuf.ia_size) { + local->inode_list[0] = inode_ref(res_inode); + local->create_count = local->last_block; + shard_common_inode_write_post_lookup_shards_handler(frame, this); + return 0; + } + if (local->prebuf.ia_size < local->total_size) + local->create_count = local->last_block - + ((local->prebuf.ia_size - 1) / + local->block_size); + } + + resolve_count = local->last_block - local->create_count; + if (res_inode) gf_uuid_copy(gfid, res_inode->gfid); else gf_uuid_copy(gfid, local->base_gfid); - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; - - while (shard_idx_iter <= local->last_block) { + while (shard_idx_iter <= resolve_count) { i++; if (shard_idx_iter == 0) { local->inode_list[i] = inode_ref(res_inode); @@ -1130,6 +1177,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, { int ret = -1; int64_t *size_attr = NULL; + int64_t delta_blocks = 0; inode_t *inode = NULL; shard_local_t *local = NULL; dict_t *xattr_req = NULL; @@ -1151,13 +1199,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, /* If both size and block count have not changed, then skip the xattrop. */ - if ((local->delta_size + local->hole_size == 0) && - (local->delta_blocks == 0)) { + delta_blocks = GF_ATOMIC_GET(local->delta_blocks); + if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { goto out; } ret = shard_set_size_attrs(local->delta_size + local->hole_size, - local->delta_blocks, &size_attr); + delta_blocks, &size_attr); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); @@ -1461,16 +1509,45 @@ int shard_start_background_deletion(xlator_t *this) { int ret = 0; + gf_boolean_t i_cleanup = _gf_true; + shard_priv_t *priv = NULL; call_frame_t *cleanup_frame = NULL; + priv = this->private; + + LOCK(&priv->lock); + { + switch (priv->bg_del_state) { + case SHARD_BG_DELETION_NONE: + i_cleanup = _gf_true; + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + break; + case SHARD_BG_DELETION_LAUNCHING: + i_cleanup = _gf_false; + break; + case SHARD_BG_DELETION_IN_PROGRESS: + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + i_cleanup = _gf_false; + break; + default: + break; + } + } + UNLOCK(&priv->lock); + if (!i_cleanup) + return 0; + cleanup_frame = create_frame(this, this->ctx->pool); if (!cleanup_frame) { gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, "Failed to create " "new frame to delete shards"); - return -ENOMEM; + ret = -ENOMEM; + goto err; } + set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); + ret = synctask_new(this->ctx->env, shard_delete_shards, shard_delete_shards_cbk, cleanup_frame, cleanup_frame); if (ret < 0) { @@ -1479,7 +1556,16 @@ shard_start_background_deletion(xlator_t *this) "failed to create task to do background " "cleanup of shards"); STACK_DESTROY(cleanup_frame->root); + goto err; } + return 0; + +err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); return ret; } @@ -1488,7 +1574,7 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int ret = 0; + int ret = -1; shard_priv_t *priv = NULL; gf_boolean_t i_start_cleanup = _gf_false; @@ -1521,23 +1607,25 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, LOCK(&priv->lock); { - if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) { - priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS; + if (priv->first_lookup_done == _gf_false) { + priv->first_lookup_done = _gf_true; i_start_cleanup = _gf_true; } } UNLOCK(&priv->lock); - if (i_start_cleanup) { - ret = shard_start_background_deletion(this); - if (ret) { - LOCK(&priv->lock); - { - priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; - } - UNLOCK(&priv->lock); + if (!i_start_cleanup) + goto unwind; + + ret = shard_start_background_deletion(this); + if (ret < 0) { + LOCK(&priv->lock); + { + priv->first_lookup_done = _gf_false; } + UNLOCK(&priv->lock); } + unwind: SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, postparent); @@ -1553,7 +1641,8 @@ shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) shard_local_t *local = NULL; this->itable = loc->inode->table; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && + (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) { SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); } @@ -1603,26 +1692,24 @@ err: } int -shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this, + inode_t *inode, int32_t op_ret, + int32_t op_errno, struct iatt *buf, + dict_t *xdata) { int ret = -1; int32_t mask = SHARD_INODE_WRITE_MASK; - shard_local_t *local = NULL; + shard_local_t *local = frame->local; shard_inode_ctx_t ctx = { 0, }; - local = frame->local; - if (op_ret < 0) { gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" " failed : %s", - loc_gfid_utoa(&(local->loc))); + uuid_utoa(inode->gfid)); local->op_ret = op_ret; local->op_errno = op_errno; goto unwind; @@ -1656,18 +1743,57 @@ unwind: } int -shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, - shard_post_fop_handler_t handler) +shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + shard_local_t *local = frame->local; + + shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret, + op_errno, buf, xdata); + return 0; +} + +int +shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + /* In case of op_ret < 0, inode passed to this function will be NULL + ex: in case of op_errno = ENOENT. So refer prefilled inode data + which is part of local. + Note: Reassigning/overriding the inode passed to this cbk with inode + which is part of *struct shard_local_t* won't cause any issue as + both inodes have same reference/address as of the inode passed */ + inode = ((shard_local_t *)frame->local)->loc.inode; + + shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno, + buf, xdata); + return 0; +} + +/* This function decides whether to make file based lookup or + * fd based lookup (fstat) depending on the 3rd and 4th arg. + * If fd != NULL and loc == NULL then call is for fstat + * If fd == NULL and loc != NULL then call is for file based + * lookup. Please pass args based on the requirement. + */ +int +shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, shard_post_fop_handler_t handler) { int ret = -1; + inode_t *inode = NULL; shard_local_t *local = NULL; dict_t *xattr_req = NULL; gf_boolean_t need_refresh = _gf_false; local = frame->local; local->handler = handler; + inode = fd ? fd->inode : loc->inode; - ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, + ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf, &need_refresh); /* By this time, inode ctx should have been created either in create, * mknod, readdirp or lookup. If not it is a bug! @@ -1676,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" "Serving prebuf off the inode ctx cache", - uuid_utoa(loc->gfid)); + uuid_utoa(inode->gfid)); goto out; } @@ -1687,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, goto out; } - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out); - STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + if (fd) + STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xattr_req); + else + STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); dict_unref(xattr_req); return 0; @@ -1902,6 +2032,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dict_t *xdata) { inode_t *inode = NULL; + int64_t delta_blocks = 0; shard_local_t *local = NULL; local = frame->local; @@ -1922,14 +2053,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } local->postbuf.ia_size = local->offset; - local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks); /* Let the delta be negative. We want xattrop to do subtraction */ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks; + delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, + postbuf->ia_blocks - prebuf->ia_blocks); + GF_ASSERT(delta_blocks <= 0); + local->postbuf.ia_blocks += delta_blocks; local->hole_size = 0; - shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES); - + shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); shard_update_file_size(frame, this, NULL, &local->loc, shard_post_update_size_truncate_handler); return 0; @@ -1957,10 +2089,9 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) */ if (!inode) { gf_msg_debug(this->name, 0, - "Last shard to be truncated absent" - " in backend: %s. Directly proceeding to update " - "file size", - uuid_utoa(inode->gfid)); + "Last shard to be truncated absent in backend: %" PRIu64 + " of gfid %s. Directly proceeding to update file size", + local->first_block, uuid_utoa(local->loc.inode->gfid)); shard_update_file_size(frame, this, NULL, &local->loc, shard_post_update_size_truncate_handler); return 0; @@ -1989,8 +2120,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + int ret = 0; int call_count = 0; int shard_block_num = (long)cookie; + uint64_t block_count = 0; shard_local_t *local = NULL; local = frame->local; @@ -2000,6 +2133,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; goto done; } + ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); + if (!ret) { + GF_ATOMIC_SUB(local->delta_blocks, block_count); + } else { + /* dict_get failed possibly due to a heterogeneous cluster? */ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get key %s from dict during truncate of gfid %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + } shard_unlink_block_inode(local, shard_block_num); done: @@ -2029,6 +2172,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) gf_boolean_t wind_failed = _gf_false; shard_local_t *local = NULL; shard_priv_t *priv = NULL; + dict_t *xdata_req = NULL; local = frame->local; priv = this->private; @@ -2056,7 +2200,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) local->postbuf.ia_size = local->offset; local->postbuf.ia_blocks = local->prebuf.ia_blocks; local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - local->delta_blocks = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); local->hole_size = 0; shard_update_file_size(frame, this, local->fd, &local->loc, shard_post_update_size_truncate_handler); @@ -2065,6 +2209,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) local->call_count = call_count; i = 1; + xdata_req = dict_new(); + if (!xdata_req) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } + ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict during truncate of %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + dict_unref(xdata_req); + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } SHARD_SET_ROOT_FS_ID(frame, local); while (cur_block <= last_block) { @@ -2103,7 +2262,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL); + FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); loc_wipe(&loc); next: i++; @@ -2111,6 +2270,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) if (!--call_count) break; } + dict_unref(xdata_req); return 0; } @@ -2166,13 +2326,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, xlator_t *this = NULL; inode_t *fsync_inode = NULL; shard_priv_t *priv = NULL; + inode_t *base_inode = NULL; this = THIS; priv = this->private; - if (local->loc.inode) + if (local->loc.inode) { gf_uuid_copy(gfid, local->loc.inode->gfid); - else + base_inode = local->loc.inode; + } else if (local->resolver_base_inode) { + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + base_inode = local->resolver_base_inode; + } else { gf_uuid_copy(gfid, local->base_gfid); + } shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); @@ -2185,7 +2351,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, LOCK(&priv->lock); { fsync_inode = __shard_update_shards_inode_list( - linked_inode, this, local->loc.inode, block_num, gfid); + linked_inode, this, base_inode, block_num, gfid); } UNLOCK(&priv->lock); if (fsync_inode) @@ -2307,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, int count = 0; int call_count = 0; int32_t shard_idx_iter = 0; - int last_block = 0; + int lookup_count = 0; char path[PATH_MAX] = { 0, }; @@ -2327,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, local = frame->local; count = call_count = local->call_count; shard_idx_iter = local->first_block; - last_block = local->last_block; + lookup_count = local->last_block - local->create_count; local->pls_fop_handler = handler; if (local->lookup_shards_barriered) local->barrier.waitfor = local->call_count; @@ -2337,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, else gf_uuid_copy(gfid, local->base_gfid); - while (shard_idx_iter <= last_block) { + while (shard_idx_iter <= lookup_count) { if (local->inode_list[i]) { i++; shard_idx_iter++; @@ -2482,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; @@ -2557,7 +2724,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) */ local->hole_size = local->offset - local->prebuf.ia_size; local->delta_size = 0; - local->delta_blocks = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); local->postbuf.ia_size = local->offset; tmp_stbuf.ia_size = local->offset; shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, @@ -2573,7 +2740,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) */ local->hole_size = 0; local->delta_size = (local->offset - local->prebuf.ia_size); - local->delta_blocks = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); tmp_stbuf.ia_size = local->offset; shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, SHARD_INODE_WRITE_MASK); @@ -2629,9 +2796,10 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, if (!local->xattr_req) goto err; local->resolver_base_inode = loc->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_truncate_handler); return 0; err: @@ -2684,9 +2852,10 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); local->resolver_base_inode = fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_truncate_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); @@ -2830,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, if (!local->xattr_req) goto err; - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_link_handler); + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_link_handler); return 0; err: shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); @@ -2845,13 +3014,20 @@ int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) { shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; local = frame->local; + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, - "failed to delete shards of %s", - uuid_utoa(local->resolver_base_inode->gfid)); + "failed to delete shards of %s", uuid_utoa(gfid)); return 0; } local->op_ret = 0; @@ -2892,8 +3068,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) shard_priv_t *priv = NULL; shard_inode_ctx_t *ctx = NULL; shard_inode_ctx_t *base_ictx = NULL; - gf_boolean_t unlink_unref_forget = _gf_false; int unref_base_inode = 0; + int unref_shard_inode = 0; this = THIS; priv = this->private; @@ -2918,26 +3094,27 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) list_del_init(&ctx->ilist); priv->inode_count--; unref_base_inode++; + unref_shard_inode++; GF_ASSERT(priv->inode_count >= 0); - unlink_unref_forget = _gf_true; } if (ctx->fsync_needed) { unref_base_inode++; + unref_shard_inode++; list_del_init(&ctx->to_fsync_list); - if (base_inode) + if (base_inode) { __shard_inode_ctx_get(base_inode, this, &base_ictx); - if (base_ictx) base_ictx->fsync_count--; + } } } UNLOCK(&inode->lock); if (base_inode) UNLOCK(&base_inode->lock); - if (unlink_unref_forget) { - inode_unlink(inode, priv->dot_shard_inode, block_bname); - inode_unref(inode); - inode_forget(inode, 0); - } + + inode_unlink(inode, priv->dot_shard_inode, block_bname); + inode_ref_reduce_by_n(inode, unref_shard_inode); + inode_forget(inode, 0); + if (base_inode && unref_base_inode) inode_ref_reduce_by_n(base_inode, unref_base_inode); UNLOCK(&priv->lock); @@ -3339,9 +3516,13 @@ shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, loc.inode = inode_ref(priv->dot_shard_rm_inode); ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, - ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL); - if (ret) + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); + if (ret < 0) { + if (ret == -EAGAIN) { + ret = 0; + } goto out; + } { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } @@ -3355,20 +3536,6 @@ out: int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - - this = frame->this; - priv = this->private; - - if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Background deletion of shards failed"); - priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; - } else { - priv->first_lookup = SHARD_FIRST_LOOKUP_DONE; - } SHARD_STACK_DESTROY(frame); return 0; } @@ -3490,6 +3657,7 @@ shard_delete_shards(void *opaque) gf_dirent_t entries; gf_dirent_t *entry = NULL; call_frame_t *cleanup_frame = NULL; + gf_boolean_t done = _gf_false; this = THIS; priv = this->private; @@ -3506,6 +3674,7 @@ shard_delete_shards(void *opaque) goto err; } cleanup_frame->local = local; + local->fop = GF_FOP_UNLINK; local->xattr_req = dict_new(); if (!local->xattr_req) { @@ -3543,51 +3712,76 @@ shard_delete_shards(void *opaque) goto err; } - while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, - &entries, local->xattr_req, NULL))) { - if (ret > 0) - ret = 0; - list_for_each_entry(entry, &entries.list, list) + for (;;) { + offset = 0; + LOCK(&priv->lock); { - offset = entry->d_off; - - if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) - continue; + if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { + priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; + } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + done = _gf_true; + } + } + UNLOCK(&priv->lock); + if (done) + break; + while ( + (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, + &entries, local->xattr_req, NULL))) { + if (ret > 0) + ret = 0; + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; - if (!entry->inode) { - ret = shard_lookup_marker_entry(this, local, entry); - if (ret < 0) + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - } - link_inode = inode_link(entry->inode, local->fd->inode, - entry->d_name, &entry->d_stat); - gf_msg_debug(this->name, 0, - "Initiating deletion of " - "shards of gfid %s", - entry->d_name); - ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, - link_inode); - inode_unlink(link_inode, local->fd->inode, entry->d_name); - inode_unref(link_inode); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Failed to clean up shards of gfid %s", entry->d_name); - continue; + if (!entry->inode) { + ret = shard_lookup_marker_entry(this, local, entry); + if (ret < 0) + continue; + } + link_inode = inode_link(entry->inode, local->fd->inode, + entry->d_name, &entry->d_stat); + + gf_msg_debug(this->name, 0, + "Initiating deletion of " + "shards of gfid %s", + entry->d_name); + ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, + link_inode); + inode_unlink(link_inode, local->fd->inode, entry->d_name); + inode_unref(link_inode); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to clean up shards of gfid %s", + entry->d_name); + continue; + } + gf_msg(this->name, GF_LOG_INFO, 0, + SHARD_MSG_SHARD_DELETION_COMPLETED, + "Deleted " + "shards of gfid=%s from backend", + entry->d_name); } - gf_msg(this->name, GF_LOG_INFO, 0, - SHARD_MSG_SHARD_DELETION_COMPLETED, - "Deleted " - "shards of gfid=%s from backend", - entry->d_name); + gf_dirent_free(&entries); + if (ret) + break; } - gf_dirent_free(&entries); - if (ret) - break; } ret = 0; + loc_wipe(&loc); + return ret; + err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); loc_wipe(&loc); return ret; } @@ -3925,6 +4119,7 @@ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, local->op_ret = op_ret; local->op_errno = op_errno; } else { + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); local->preoldparent = *preparent; local->postoldparent = *postparent; if (xdata) @@ -4134,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) switch (local->fop) { case GF_FOP_UNLINK: case GF_FOP_RENAME: - shard_lookup_base_file(frame, this, &local->int_inodelk.loc, - shard_post_lookup_base_shard_rm_handler); + shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL, + shard_post_lookup_base_shard_rm_handler); break; default: gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, @@ -4390,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->block_size) { local->tmp_loc.inode = inode_new(this->itable); gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); - shard_lookup_base_file(frame, this, &local->tmp_loc, - shard_post_rename_lookup_handler); + shard_refresh_base_file(frame, this, &local->tmp_loc, NULL, + shard_post_rename_lookup_handler); } else { shard_rename_cbk(frame, this); } @@ -4648,6 +4843,8 @@ out: if (xdata) local->xattr_rsp = dict_ref(xdata); vec.iov_base = local->iobuf->ptr; + if (local->offset + local->req_size > local->prebuf.ia_size) + local->total_size = local->prebuf.ia_size - local->offset; vec.iov_len = local->total_size; local->op_ret = local->total_size; SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, @@ -5028,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->resolver_base_inode = local->loc.inode; local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), @@ -5124,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_readv_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_readv_handler); return 0; err: shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); @@ -5226,7 +5424,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, local->op_errno = op_errno; } else { local->written_size += op_ret; - local->delta_blocks += (post->ia_blocks - pre->ia_blocks); + GF_ATOMIC_ADD(local->delta_blocks, + post->ia_blocks - pre->ia_blocks); local->delta_size += (post->ia_size - pre->ia_size); shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); @@ -5365,21 +5564,17 @@ shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) remaining_size -= shard_write_size; if (local->fop == GF_FOP_WRITE) { + vec = NULL; count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, NULL); - - vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); - if (!vec) { + shard_write_size, &vec, 0); + if (count < 0) { local->op_ret = -1; local->op_errno = ENOMEM; wind_failed = _gf_true; - GF_FREE(vec); shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, NULL, NULL); goto next; } - count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, vec); } if (cur_block == 0) { @@ -5491,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame, shard_common_lookup_shards( frame, this, local->resolver_base_inode, shard_common_inode_write_post_lookup_shards_handler); + } else if (local->create_count) { + shard_common_inode_write_post_lookup_shards_handler(frame, this); } else { shard_common_inode_write_do(frame, this); } @@ -5521,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, local->last_block = get_highest_block(local->offset, local->total_size, local->block_size); local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); if (!local->inode_list) { @@ -5529,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame, } gf_msg_trace(this->name, 0, - "%s: gfid=%s first_block=%" PRIu32 + "%s: gfid=%s first_block=%" PRIu64 " " - "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 + "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), @@ -5736,6 +5934,7 @@ shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, shard_inode_ctx_t *ctx = NULL; shard_inode_ctx_t *base_ictx = NULL; inode_t *base_inode = NULL; + gf_boolean_t unref_shard_inode = _gf_false; local = frame->local; base_inode = local->fd->inode; @@ -5769,11 +5968,16 @@ out: if (ctx->fsync_needed != 0) { list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); base_ictx->fsync_count++; + } else { + unref_shard_inode = _gf_true; } } UNLOCK(&anon_fd->inode->lock); UNLOCK(&base_inode->lock); } + + if (unref_shard_inode) + inode_unref(anon_fd->inode); if (anon_fd) fd_unref(anon_fd); @@ -5920,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_fsync_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_fsync_handler); return 0; err: shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); @@ -6113,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } int32_t -shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local, + char *key) { - int op_errno = EINVAL; + int ret = 0; + struct iatt *tmpbuf = NULL; + struct iatt *stbuf = NULL; + data_t *data = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + if (!xdata) + return 0; + + data = dict_get(xdata, key); + if (!data) + return 0; + + tmpbuf = data_to_iatt(data, key); + stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); + if (stbuf == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; } + *stbuf = *tmpbuf; + stbuf->ia_size = local->prebuf.ia_size; + stbuf->ia_blocks = local->prebuf.ia_blocks; + ret = dict_set_iatt(xdata, key, stbuf, false); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + return 0; - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +err: + GF_FREE(stbuf); + return -1; +} + +int32_t +shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + if (local->fd) + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, local->fd, + local->name, local->xattr_req); + else + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &local->loc, + local->name, local->xattr_req); + return 0; +} + +int32_t +shard_common_remove_xattr(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + /* If shard's special xattrs are attempted to be removed, + * fail the fop with EPERM (except if the client is gsyncd). + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err); } + /* Repeat the same check for bulk-removexattr */ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + + if (name) { + local->name = gf_strdup(name); + if (!local->name) + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_remove_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name, + xdata); + return 0; +} + +int32_t +shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name, + xdata); return 0; } @@ -6235,38 +6601,164 @@ out: } int32_t -shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int op_errno = EINVAL; + int ret = -1; + shard_local_t *local = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); return 0; -out: - shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); + +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; } int32_t -shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + if (local->fd) + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, local->fd, + local->xattr_req, local->flags, local->xattr_rsp); + else + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, + local->xattr_req, local->flags, local->xattr_rsp); + return 0; +} + +int32_t +shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, + loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags, + dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + /* Sharded or not, if shard's special xattrs are attempted to be set, + * fail the fop with EPERM (except if the client is gsyncd. + */ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err); + } + + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + local->flags = flags; + /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict + * and the xdata dict + */ + if (dict) + local->xattr_req = dict_ref(dict); + if (xdata) + local->xattr_rsp = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_set_xattr_handler); return 0; -out: - shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, + xdata); + return 0; +} + +int32_t +shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, + xdata); return 0; } @@ -6524,12 +7016,13 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, local->fd = fd_ref(fd); local->block_size = block_size; local->resolver_base_inode = local->fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); local->loc.inode = inode_ref(fd->inode); gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); + shard_refresh_base_file(frame, this, NULL, fd, + shard_common_inode_write_post_lookup_handler); return 0; out: shard_common_failure_unwind(fop, frame, -1, ENOMEM); @@ -6674,6 +7167,9 @@ fini(xlator_t *this) GF_VALIDATE_OR_GOTO("shard", this, out); + /*Itable was not created by shard, hence setting to NULL.*/ + this->itable = NULL; + mem_pool_destroy(this->local_pool); this->local_pool = NULL; @@ -6821,6 +7317,14 @@ struct xlator_dumpops dumpops = { struct volume_options options[] = { { + .key = {"shard"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable shard", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + { .key = {"shard-block-size"}, .type = GF_OPTION_TYPE_SIZET, .op_version = {GD_OP_VERSION_3_7_0}, @@ -6862,3 +7366,17 @@ struct volume_options options[] = { }, {.key = {NULL}}, }; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "shard", + .category = GF_MAINTAINED, +}; |
