summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorN Balachandran <nbalacha@redhat.com>2017-07-14 15:18:19 +0530
committerRaghavendra G <rgowdapp@redhat.com>2017-12-26 05:25:33 +0000
commit85a389cfced2c299faa37b6f930c2cd0ca8c72b9 (patch)
tree09099228c814ae9f4ab5f48b9b62f00dea72d297 /xlators/cluster
parentf13023862edc868d4da87609412341aecd041ed8 (diff)
cluster/dht: Add migration checks to dht_(f)xattrop
The dht_(f)xattrop implementation did not implement migration phase1/phase2 checks which could cause issues with rebalance on sharded volumes. This does not solve the issue where fops may reach the target out of order. Change-Id: I2416fc35115e60659e35b4b717fd51f20746586c BUG: 1471031 Signed-off-by: N Balachandran <nbalacha@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/dht/src/dht-common.c47
-rw-r--r--xlators/cluster/dht/src/dht-common.h9
-rw-r--r--xlators/cluster/dht/src/dht-helper.c3
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c241
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c73
5 files changed, 326 insertions, 47 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 2bee8bb1e65..4a16714177a 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -45,6 +45,11 @@ int
dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this);
+int
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
/* Sets the blocks and size values to fixed values. This is to be called
* only for dirs. The caller is responsible for checking the type
@@ -60,6 +65,48 @@ int32_t dht_set_fixed_dir_stat (struct iatt *stat)
}
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ */
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req)
+{
+ int ret = -1;
+
+ ret = dict_set_int8 (xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
+ ret = dict_set_int8 (xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ /* At least one call succeeded */
+ return ret;
+}
+
+
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ * This will return a dummy iatt with only the mode and type set
+ */
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
+ struct iatt *stbuf)
+{
+ int ret = -1;
+ int32_t mode = 0;
+
+ ret = dict_get_int32 (xdata, DHT_MODE_IN_XDATA_KEY, &mode);
+
+ if (ret) {
+ ret = dict_get_bin (xdata, DHT_IATT_IN_XDATA_KEY,
+ (void **)&stbuf);
+ } else {
+ stbuf->ia_prot = ia_prot_from_st_mode (mode);
+ stbuf->ia_type = ia_type_from_st_mode (mode);
+ }
+
+ return ret;
+}
+
+
+
int
dht_rmdir_unlock (call_frame_t *frame, xlator_t *this);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index cc7d41b3bfa..577970835ae 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -149,6 +149,7 @@ struct dht_rebalance_ {
dht_defrag_cbk_fn_t target_op_fn;
dict_t *xdata;
dict_t *xattr;
+ dict_t *dict;
int32_t set;
struct gf_flock flock;
int lock_cmd;
@@ -1466,4 +1467,12 @@ int
dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
int32_t valid, dht_layout_t *layout);
+/* Abstract out the DHT-IATT-IN-DICT */
+
+
+int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req);
+
+int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata,
+ struct iatt *stbuf);
+
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 0c249f0e84c..381856f2455 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -801,6 +801,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local)
if (local->rebalance.xattr)
dict_unref (local->rebalance.xattr);
+ if (local->rebalance.dict)
+ dict_unref (local->rebalance.dict);
+
GF_FREE (local->rebalance.vector);
if (local->rebalance.iobref)
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index a9e47664a81..fa63fefb903 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -24,8 +24,9 @@ int dht_lk2 (xlator_t *this, xlator_t *dst_node,
call_frame_t *frame, int ret);
int dht_fsync2 (xlator_t *this, xlator_t *dst_node,
call_frame_t *frame, int ret);
-
-
+int
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret);
int
dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -1246,13 +1247,163 @@ err:
return 0;
}
-/* Currently no translators on top of 'distribute' will be using
- * below fops, hence not implementing 'migration' related checks
- */
+
+int
+dht_common_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *call_frame = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ struct iatt stbuf = {0,};
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+ call_frame = cookie;
+ prev = call_frame->this;
+
+ local->op_errno = op_errno;
+
+ if ((op_ret == -1) && !dht_inode_missing (op_errno)) {
+ gf_msg_debug (this->name, op_errno,
+ "subvolume %s returned -1.",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ ret = dht_read_iatt_from_xdata (this, xdata, &stbuf);
+
+ if ((!op_ret) && (ret)) {
+ /* This is a potential problem and can cause corruption
+ * with sharding.
+ * Oh well. We tried.
+ */
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_common_xattrop2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref (xdata);
+
+ if (dict)
+ local->rebalance.dict = dict_ref (dict);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (&stbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (&stbuf)) {
+
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ dht_inode_ctx_get_mig_info (this, inode, &src_subvol,
+ &dst_subvol);
+
+ if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_common_xattrop2 (this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+
+
+out:
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno,
+ dict, xdata);
+ } else {
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno,
+ dict, xdata);
+ }
+
+ return 0;
+}
+
+
+int
+dht_common_xattrop2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating (ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND (xattrop, frame, local->op_ret,
+ op_errno, local->rebalance.dict,
+ local->rebalance.xdata);
+ } else {
+ DHT_STACK_UNWIND (fxattrop, frame, local->op_ret,
+ op_errno, local->rebalance.dict,
+ local->rebalance.xdata);
+ }
+
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_XATTROP) {
+ STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->xattrop, &local->loc,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ } else {
+ STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ }
+
+ return 0;
+
+out:
+
+ /* If local is unavailable we could be unwinding the wrong
+ * function here */
+
+ if (local && (local->fop == GF_FOP_XATTROP)) {
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ } else {
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ }
+ return 0;
+}
+
int
dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
return 0;
@@ -1263,9 +1414,10 @@ int
dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1287,11 +1439,33 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto err;
}
- local->call_cnt = 1;
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR (loc->inode->ia_type)) {
+ STACK_WIND (frame, dht_xattrop_cbk, subvol,
+ subvol->fops->xattrop, loc, flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
+ local->call_cnt = 1;
- STACK_WIND (frame, dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict, xdata);
+ local->rebalance.xattr = dict_ref (dict);
+ local->rebalance.flags = flags;
+
+ ret = dht_request_iatt_in_xdata (this, local->xattr_req);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set dictionary key %s file=%s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
+
+ STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->xattrop, loc,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ }
return 0;
@@ -1318,6 +1492,8 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
{
xlator_t *subvol = NULL;
int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1331,10 +1507,39 @@ dht_fxattrop (call_frame_t *frame, xlator_t *this,
goto err;
}
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict, xdata);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FXATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR (fd->inode->ia_type)) {
+ STACK_WIND (frame, dht_fxattrop_cbk, subvol,
+ subvol->fops->fxattrop, fd, flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new ();
+ local->call_cnt = 1;
+
+ local->rebalance.xattr = dict_ref (dict);
+ local->rebalance.flags = flags;
+
+ ret = dht_request_iatt_in_xdata (this, local->xattr_req);
+
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Failed to set dictionary key %s fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
+ }
+
+ STACK_WIND (frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ }
return 0;
@@ -1345,6 +1550,9 @@ err:
return 0;
}
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
int
dht_inodelk_cbk (call_frame_t *frame, void *cookie,
@@ -1406,7 +1614,6 @@ dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
-
dht_lk_inode_unref (frame, op_ret);
DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
return 0;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index c1c8a8a6ecf..23383c528bc 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -175,7 +175,7 @@ dht_strip_out_acls (dict_t *dict)
{
if (dict) {
dict_del (dict, "trusted.SGI_ACL_FILE");
- dict_del (dict, "POSIX_ACL_ACCESS_XATTR");
+ dict_del (dict, POSIX_ACL_ACCESS_XATTR);
}
}
@@ -672,7 +672,7 @@ out:
static int
__dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
- dict_t *xattr, int *fop_errno)
+ int *fop_errno)
{
int ret = -1;
fd_t *fd = NULL;
@@ -817,16 +817,6 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- ret = syncop_fsetxattr (to, fd, xattr, 0, NULL, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- gf_msg (this->name, GF_LOG_WARNING, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "%s: failed to set xattr on %s",
- loc->path, to->name);
-
- }
-
ret = syncop_fsetattr (to, fd, stbuf,
(GF_SET_ATTR_UID | GF_SET_ATTR_GID),
NULL, NULL, NULL, NULL);
@@ -1650,24 +1640,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
-
- /* TODO: move all xattr related operations to fd based operations */
- ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed:"
- "%s: failed to get xattr from %s",
- loc->path, from->name);
- }
-
- /* Copying posix acls to the linkto file messes up the permissions*/
- dht_strip_out_acls (xattr);
-
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
- &dst_fd, xattr, fop_errno);
+ &dst_fd, fop_errno);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed"
" on - %s for file - %s", to->name, loc->path);
@@ -1712,7 +1687,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
* as in case of failure the linkto needs to point to the source
* subvol */
ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
- &dst_fd, xattr, fop_errno);
+ &dst_fd, fop_errno);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Create dst failed"
" on - %s for file - %s", to->name, loc->path);
@@ -1738,6 +1713,42 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
loc->path, from->name);
goto out;
}
+
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ /* Copying posix acls to the linkto file messes up the permissions*/
+ dht_strip_out_acls (xattr);
+
+ /* Remove the linkto xattr as we don't want to overwrite the value
+ * set on the dst.
+ */
+ dict_del (xattr, conf->link_xattr_name);
+
+ /* We need to error out if this fails as having the wrong shard xattrs
+ * set on the dst could cause data corruption
+ */
+ ret = syncop_fsetxattr (to, dst_fd, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to set xattr on %s",
+ loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
+
if (xattr_rsp) {
/* we no more require this key */
dict_del (dict, conf->link_xattr_name);
@@ -2029,7 +2040,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
xattr = NULL;
}
- ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
+ /* Set only the Posix ACLs this time */
+ ret = syncop_getxattr (from, loc, &xattr, POSIX_ACL_ACCESS_XATTR,
+ NULL, NULL);
if (ret < 0) {
gf_msg (this->name, GF_LOG_WARNING, -ret,
DHT_MSG_MIGRATE_FILE_FAILED,