summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/shard/bug-shard-fallocate.t47
-rw-r--r--xlators/features/shard/src/shard-messages.h9
-rw-r--r--xlators/features/shard/src/shard.c499
-rw-r--r--xlators/storage/posix/src/posix.c30
4 files changed, 396 insertions, 189 deletions
diff --git a/tests/bugs/shard/bug-shard-fallocate.t b/tests/bugs/shard/bug-shard-fallocate.t
new file mode 100644
index 00000000000..8d41507c4a5
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-fallocate.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create a file.
+TEST touch $M0/foo
+
+gfid_foo=`getfattr -n glusterfs.gfid.string $M0/foo 2>/dev/null \
+ | grep glusterfs.gfid.string | cut -d '"' -f 2`
+
+TEST fallocate -l 17M $M0/foo
+EXPECT '17825792' stat -c %s $M0/foo
+
+# This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+TEST stat $B0/${V0}2/.shard
+TEST stat $B0/${V0}3/.shard
+
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %s`
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.2 | xargs stat -c %s`
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.3 | xargs stat -c %s`
+EXPECT "1048576" echo `find $B0 -name $gfid_foo.4 | xargs stat -c %s`
+
+TEST fallocate -o 102400 -l 17M $M0/foo
+EXPECT '17928192' stat -c %s $M0/foo
+
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %s`
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.2 | xargs stat -c %s`
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.3 | xargs stat -c %s`
+EXPECT "1150976" echo `find $B0 -name $gfid_foo.4 | xargs stat -c %s`
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
index be019a7ca5e..588cb687d5d 100644
--- a/xlators/features/shard/src/shard-messages.h
+++ b/xlators/features/shard/src/shard-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD
-#define GLFS_NUM_MESSAGES 17
+#define GLFS_NUM_MESSAGES 18
#define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1)
#define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages"
@@ -173,5 +173,12 @@
*/
#define SHARD_MSG_FOP_NOT_SUPPORTED (GLFS_COMP_BASE_SHARD + 17)
+/*!
+ * @messageid 133018
+ * @diagnosis
+ * @recommendedaction
+*/
+#define SHARD_MSG_INVALID_FOP (GLFS_COMP_BASE_SHARD + 18)
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* !_SHARD_MESSAGES_H_ */
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index b641f182fdb..89a47ef2ce2 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -485,6 +485,53 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,
}
int
+shard_common_inode_write_failure_unwind (glusterfs_fop_t fop,
+ call_frame_t *frame, int32_t op_ret,
+ int32_t op_errno)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ SHARD_STACK_UNWIND (writev, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ break;
+ case GF_FOP_FALLOCATE:
+ SHARD_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ break;
+ default:
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_success_unwind (glusterfs_fop_t fop,
+ call_frame_t *frame, int32_t op_ret)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ SHARD_STACK_UNWIND (writev, frame, op_ret, 0, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ break;
+ case GF_FOP_FALLOCATE:
+ SHARD_STACK_UNWIND (fallocate, frame, op_ret, 0, &local->prebuf,
+ &local->postbuf, local->xattr_rsp);
+ break;
+ default:
+ gf_msg (THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+}
+
+int
shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
inode_t *res_inode,
shard_post_resolve_fop_handler_t post_res_handler)
@@ -500,6 +547,9 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
local = frame->local;
shard_idx_iter = local->first_block;
+ if (local->op_ret < 0)
+ goto out;
+
while (shard_idx_iter <= local->last_block) {
i++;
if (shard_idx_iter == 0) {
@@ -539,6 +589,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,
}
}
+out:
post_res_handler (frame, this);
return 0;
}
@@ -3352,19 +3403,20 @@ err:
}
int
-shard_post_update_size_writev_handler (call_frame_t *frame, xlator_t *this)
+shard_common_inode_write_post_update_size_handler (call_frame_t *frame,
+ xlator_t *this)
{
shard_local_t *local = NULL;
local = frame->local;
if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, NULL, NULL, NULL);
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
} else {
- SHARD_STACK_UNWIND (writev, frame, local->written_size,
- local->op_errno, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ shard_common_inode_write_success_unwind (local->fop, frame,
+ local->written_size);
}
return 0;
}
@@ -3412,15 +3464,18 @@ shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,
}
int
-shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+shard_common_inode_write_do_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
int call_count = 0;
fd_t *anon_fd = cookie;
shard_local_t *local = NULL;
+ glusterfs_fop_t fop = 0;
local = frame->local;
+ fop = local->fop;
LOCK (&frame->lock);
{
@@ -3429,9 +3484,10 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
} else {
local->written_size += op_ret;
- local->delta_blocks += (postbuf->ia_blocks - prebuf->ia_blocks);
- local->delta_size += (postbuf->ia_size - prebuf->ia_size);
- shard_inode_ctx_set (local->fd->inode, this, postbuf, 0,
+ local->delta_blocks += (post->ia_blocks -
+ pre->ia_blocks);
+ local->delta_size += (post->ia_size - pre->ia_size);
+ shard_inode_ctx_set (local->fd->inode, this, post, 0,
SHARD_MASK_TIMES);
}
}
@@ -3444,8 +3500,9 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0) {
SHARD_UNSET_ROOT_FS_ID (frame, local);
if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, NULL, NULL, NULL);
+ shard_common_inode_write_failure_unwind (fop, frame,
+ local->op_ret,
+ local->op_errno);
} else {
shard_get_delta_size_from_inode_ctx (local,
local->fd->inode,
@@ -3454,7 +3511,7 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata)
local->xattr_rsp = dict_ref (xdata);
shard_update_file_size (frame, this, local->fd, NULL,
- shard_post_update_size_writev_handler);
+ shard_common_inode_write_post_update_size_handler);
}
}
@@ -3462,7 +3519,39 @@ shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-shard_writev_do (call_frame_t *frame, xlator_t *this)
+shard_common_inode_write_wind (call_frame_t *frame, xlator_t *this,
+ fd_t *anon_fd, struct iovec *vec, int count,
+ off_t shard_offset, size_t size)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ switch (local->fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_COOKIE (frame, shard_common_inode_write_do_cbk,
+ anon_fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, anon_fd,
+ vec, count, shard_offset, local->flags,
+ local->iobref, local->xattr_req);
+ break;
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_COOKIE (frame, shard_common_inode_write_do_cbk,
+ anon_fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, anon_fd,
+ local->flags, shard_offset, size,
+ local->xattr_req);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", local->fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
{
int i = 0;
int count = 0;
@@ -3478,7 +3567,7 @@ shard_writev_do (call_frame_t *frame, xlator_t *this)
off_t shard_offset = 0;
off_t vec_offset = 0;
size_t remaining_size = 0;
- size_t write_size = 0;
+ size_t shard_write_size = 0;
local = frame->local;
fd = local->fd;
@@ -3499,43 +3588,52 @@ shard_writev_do (call_frame_t *frame, xlator_t *this)
local->op_ret = -1;
local->op_errno = ENOMEM;
local->call_count = 1;
- shard_writev_do_cbk (frame, (void *)(long)0, this, -1, ENOMEM,
- NULL, NULL, NULL);
+ shard_common_inode_write_do_cbk (frame, (void *)(long)0, this,
+ -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
while (cur_block <= last_block) {
if (wind_failed) {
- shard_writev_do_cbk (frame, (void *) (long) 0, this, -1,
- ENOMEM, NULL, NULL, NULL);
+ shard_common_inode_write_do_cbk (frame,
+ (void *) (long) 0,
+ this, -1, ENOMEM, NULL,
+ NULL, NULL);
goto next;
}
shard_offset = orig_offset % local->block_size;
- write_size = local->block_size - shard_offset;
- if (write_size > remaining_size)
- write_size = remaining_size;
+ shard_write_size = local->block_size - shard_offset;
+ if (shard_write_size > remaining_size)
+ shard_write_size = remaining_size;
- remaining_size -= write_size;
+ remaining_size -= shard_write_size;
- count = iov_subset (local->vector, local->count, vec_offset,
- vec_offset + write_size, NULL);
+ if (local->fop == GF_FOP_WRITE) {
+ count = iov_subset (local->vector, local->count,
+ vec_offset,
+ vec_offset + shard_write_size,
+ NULL);
- vec = GF_CALLOC (count, sizeof (struct iovec),
- gf_shard_mt_iovec);
- if (!vec) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- wind_failed = _gf_true;
- GF_FREE (vec);
- shard_writev_do_cbk (frame, (void *) (long) 0, this, -1,
- ENOMEM, NULL, NULL, NULL);
- goto next;
+ vec = GF_CALLOC (count, sizeof (struct iovec),
+ gf_shard_mt_iovec);
+ if (!vec) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ GF_FREE (vec);
+ shard_common_inode_write_do_cbk (frame,
+ (void *) (long) 0,
+ this, -1,
+ ENOMEM, NULL,
+ NULL, NULL);
+ goto next;
+ }
+ count = iov_subset (local->vector, local->count,
+ vec_offset,
+ vec_offset + shard_write_size, vec);
}
- count = iov_subset (local->vector, local->count, vec_offset,
- vec_offset + write_size, vec);
-
if (cur_block == 0) {
anon_fd = fd_ref (fd);
} else {
@@ -3545,23 +3643,23 @@ shard_writev_do (call_frame_t *frame, xlator_t *this)
local->op_errno = ENOMEM;
wind_failed = _gf_true;
GF_FREE (vec);
- shard_writev_do_cbk (frame,
- (void *) (long) anon_fd,
- this, -1, ENOMEM, NULL,
- NULL, NULL);
+ shard_common_inode_write_do_cbk (frame,
+ (void *) (long) anon_fd,
+ this, -1,
+ ENOMEM, NULL,
+ NULL, NULL);
goto next;
}
}
- STACK_WIND_COOKIE (frame, shard_writev_do_cbk, anon_fd,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, anon_fd,
- vec, count, shard_offset, local->flags,
- local->iobref, local->xattr_req);
+ shard_common_inode_write_wind (frame, this, anon_fd,
+ vec, count, shard_offset,
+ shard_write_size);
+ if (vec)
+ vec_offset += shard_write_size;
+ orig_offset += shard_write_size;
GF_FREE (vec);
vec = NULL;
- orig_offset += write_size;
- vec_offset += write_size;
next:
cur_block++;
i++;
@@ -3571,57 +3669,63 @@ next:
}
int
-shard_post_lookup_shards_writev_handler (call_frame_t *frame, xlator_t *this)
+shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
+ xlator_t *this)
{
shard_local_t *local = NULL;
local = frame->local;
if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, NULL, NULL, NULL);
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
return 0;
}
- shard_writev_do (frame, this);
+ shard_common_inode_write_do (frame, this);
return 0;
}
int
-shard_post_mknod_writev_handler (call_frame_t *frame, xlator_t *this)
+shard_common_inode_write_post_mknod_handler (call_frame_t *frame,
+ xlator_t *this)
{
shard_local_t *local = NULL;
local = frame->local;
if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, NULL, NULL, NULL);
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
return 0;
}
if (!local->eexist_count) {
- shard_writev_do (frame, this);
+ shard_common_inode_write_do (frame, this);
} else {
local->call_count = local->eexist_count;
shard_common_lookup_shards (frame, this, local->loc.inode,
- shard_post_lookup_shards_writev_handler);
+ shard_common_inode_write_post_lookup_shards_handler);
}
return 0;
}
int
-shard_post_lookup_writev_handler (call_frame_t *frame, xlator_t *this)
+shard_common_inode_write_post_lookup_handler (call_frame_t *frame,
+ xlator_t *this)
{
shard_local_t *local = NULL;
local = frame->local;
if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, NULL, NULL, NULL);
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
return 0;
}
@@ -3629,39 +3733,41 @@ shard_post_lookup_writev_handler (call_frame_t *frame, xlator_t *this)
if (local->create_count)
shard_common_resume_mknod (frame, this,
- shard_post_mknod_writev_handler);
+ shard_common_inode_write_post_mknod_handler);
else
- shard_writev_do (frame, this);
+ shard_common_inode_write_do (frame, this);
return 0;
}
int
-shard_post_resolve_writev_handler (call_frame_t *frame, xlator_t *this)
+shard_common_inode_write_post_resolve_handler (call_frame_t *frame,
+ xlator_t *this)
{
shard_local_t *local = NULL;
local = frame->local;
if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, NULL, NULL, NULL);
+ shard_common_inode_write_failure_unwind (local->fop, frame,
+ local->op_ret,
+ local->op_errno);
return 0;
}
local->create_count = local->call_count;
shard_lookup_base_file (frame, this, &local->loc,
- shard_post_lookup_writev_handler);
+ shard_common_inode_write_post_lookup_handler);
return 0;
}
int
-shard_writev_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+shard_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
shard_local_t *local = NULL;
@@ -3671,28 +3777,29 @@ shard_writev_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie,
if (op_ret == -1) {
if (op_errno != EEXIST) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
goto unwind;
} else {
gf_msg_debug (this->name, 0, "mkdir on /.shard failed "
"with EEXIST. Attempting lookup now");
shard_lookup_dot_shard (frame, this,
- shard_post_resolve_writev_handler);
+ local->post_res_handler);
return 0;
}
}
shard_link_dot_shard_inode (local, inode, buf);
- shard_common_resolve_shards (frame, this, local->loc.inode,
- shard_post_resolve_writev_handler);
- return 0;
unwind:
- SHARD_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ shard_common_resolve_shards (frame, this, local->loc.inode,
+ local->post_res_handler);
return 0;
}
int
-shard_writev_mkdir_dot_shard (call_frame_t *frame, xlator_t *this)
+shard_mkdir_dot_shard (call_frame_t *frame, xlator_t *this,
+ shard_post_resolve_fop_handler_t handler)
{
int ret = -1;
shard_local_t *local = NULL;
@@ -3702,6 +3809,8 @@ shard_writev_mkdir_dot_shard (call_frame_t *frame, xlator_t *this)
local = frame->local;
priv = this->private;
+ local->post_res_handler = handler;
+
xattr_req = dict_new ();
if (!xattr_req)
goto err;
@@ -3720,106 +3829,18 @@ shard_writev_mkdir_dot_shard (call_frame_t *frame, xlator_t *this)
SHARD_SET_ROOT_FS_ID (frame, local);
- STACK_WIND (frame, shard_writev_mkdir_dot_shard_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, &local->dot_shard_loc,
- 0755, 0, xattr_req);
+ STACK_WIND (frame, shard_mkdir_dot_shard_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &local->dot_shard_loc, 0755, 0, xattr_req);
dict_unref (xattr_req);
return 0;
err:
if (xattr_req)
dict_unref (xattr_req);
- SHARD_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
-}
-
-int
-shard_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- int ret = 0;
- int i = 0;
- uint64_t block_size = 0;
- shard_local_t *local = NULL;
- shard_priv_t *priv = NULL;
-
- priv = this->private;
-
- ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
- "size for %s from its inode ctx",
- uuid_utoa (fd->inode->gfid));
- goto out;
- }
-
- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
- /* block_size = 0 means that the file was created before
- * sharding was enabled on the volume.
- */
- STACK_WIND (frame, default_writev_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
- return 0;
- }
-
- if (!this->itable)
- this->itable = fd->inode->table;
-
- local = mem_get0 (this->local_pool);
- if (!local)
- goto out;
-
- frame->local = local;
-
- local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
- if (!local->xattr_req)
- goto out;
-
- local->vector = iov_dup (vector, count);
- if (!local->vector)
- goto out;
-
- for (i = 0; i < count; i++)
- local->total_size += vector[i].iov_len;
-
- local->count = count;
- local->offset = offset;
- local->flags = flags;
- local->iobref = iobref_ref (iobref);
- local->fd = fd_ref (fd);
- local->block_size = block_size;
- local->first_block = get_lowest_block (offset, local->block_size);
- local->last_block = get_highest_block (offset, local->total_size,
- local->block_size);
- local->num_blocks = local->last_block - local->first_block + 1;
- local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
- gf_shard_mt_inode_list);
- if (!local->inode_list)
- goto out;
-
- local->loc.inode = inode_ref (fd->inode);
- gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
-
- gf_msg_trace (this->name, 0, "gfid=%s first_block=%"PRIu32" "
- "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64" "
- "total_size=%lu", uuid_utoa (fd->inode->gfid),
- local->first_block, local->last_block, local->num_blocks,
- offset, local->total_size);
-
- local->dot_shard_loc.inode = inode_find (this->itable,
- priv->dot_shard_gfid);
- if (!local->dot_shard_loc.inode)
- shard_writev_mkdir_dot_shard (frame, this);
- else
- shard_common_resolve_shards (frame, this, local->loc.inode,
- shard_post_resolve_writev_handler);
-
- return 0;
-out:
- SHARD_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ handler (frame, this);
return 0;
}
@@ -4419,13 +4440,135 @@ err:
}
int
+shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t offset, uint32_t flags, size_t len,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = 0;
+ int i = 0;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block "
+ "size for %s from its inode ctx",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ /* block_size = 0 means that the file was created before
+ * sharding was enabled on the volume.
+ */
+ switch (fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd,
+ vector, count, offset, flags, iobref,
+ xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd,
+ flags, offset, len, xdata);
+ break;
+ default:
+ gf_msg (this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0 (this->local_pool);
+ if (!local)
+ goto out;
+
+ frame->local = local;
+
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
+ if (!local->xattr_req)
+ goto out;
+
+ if (vector) {
+ local->vector = iov_dup (vector, count);
+ if (!local->vector)
+ goto out;
+ for (i = 0; i < count; i++)
+ local->total_size += vector[i].iov_len;
+ local->count = count;
+ } else {
+ local->total_size = len;
+ }
+
+ local->fop = fop;
+ local->offset = offset;
+ local->flags = flags;
+ if (iobref)
+ local->iobref = iobref_ref (iobref);
+ local->fd = fd_ref (fd);
+ local->block_size = block_size;
+ local->first_block = get_lowest_block (offset, local->block_size);
+ local->last_block = get_highest_block (offset, local->total_size,
+ local->block_size);
+ local->num_blocks = local->last_block - local->first_block + 1;
+ local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ gf_uuid_copy (local->loc.gfid, fd->inode->gfid);
+
+ gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" "
+ "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64""
+ " total_size=%lu flags=%"PRId32"", gf_fop_list[fop],
+ uuid_utoa (fd->inode->gfid), local->first_block,
+ local->last_block, local->num_blocks, offset,
+ local->total_size, local->flags);
+
+ local->dot_shard_loc.inode = inode_find (this->itable,
+ priv->dot_shard_gfid);
+
+ if (!local->dot_shard_loc.inode)
+ shard_mkdir_dot_shard (frame, this,
+ shard_common_inode_write_post_resolve_handler);
+ else
+ shard_common_resolve_shards (frame, this, local->loc.inode,
+ shard_common_inode_write_post_resolve_handler);
+
+ return 0;
+out:
+ shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ shard_common_inode_write_begin (frame, this, GF_FOP_WRITE, fd, vector,
+ count, offset, flags, 0, iobref, xdata);
+ return 0;
+}
+
+int
shard_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
{
- /* TBD */
- gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
- "fallocate called on %s.", uuid_utoa (fd->inode->gfid));
- SHARD_STACK_UNWIND (fallocate, frame, -1, ENOTSUP, NULL, NULL, NULL);
+ shard_common_inode_write_begin (frame, this, GF_FOP_FALLOCATE, fd, NULL,
+ 0, offset, keep_size, len, NULL, xdata);
return 0;
}
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 272d08f8654..e834fc09752 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -653,12 +653,13 @@ out:
}
static int32_t
-posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- off_t offset, size_t len, struct iatt *statpre,
- struct iatt *statpost)
+posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, off_t offset, size_t len,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
{
- struct posix_fd *pfd = NULL;
- int32_t ret = -1;
+ int32_t ret = -1;
+ struct posix_fd *pfd = NULL;
+ gf_boolean_t locked = _gf_false;
DECLARE_OLD_FS_ID_VAR;
@@ -674,6 +675,11 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
goto out;
}
+ if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
+ locked = _gf_true;
+ LOCK(&fd->inode->lock);
+ }
+
ret = posix_fdstat (this, pfd->fd, statpre);
if (ret == -1) {
ret = -errno;
@@ -682,7 +688,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
goto out;
}
- ret = sys_fallocate(pfd->fd, flags, offset, len);
+ ret = sys_fallocate (pfd->fd, flags, offset, len);
if (ret == -1) {
ret = -errno;
goto out;
@@ -697,6 +703,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
}
out:
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
SET_TO_OLD_FS_ID ();
return ret;
@@ -874,8 +884,8 @@ _posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_siz
flags = FALLOC_FL_KEEP_SIZE;
#endif /* FALLOC_FL_KEEP_SIZE */
- ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
- &statpre, &statpost);
+ ret = posix_do_fallocate (frame, this, fd, flags, offset, len,
+ &statpre, &statpost, xdata);
if (ret < 0)
goto err;
@@ -900,8 +910,8 @@ posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
struct iatt statpre = {0,};
struct iatt statpost = {0,};
- ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
- &statpre, &statpost);
+ ret = posix_do_fallocate (frame, this, fd, flags, offset, len,
+ &statpre, &statpost, xdata);
if (ret < 0)
goto err;