diff options
author | Susant Palai <spalai@redhat.com> | 2018-01-18 13:06:12 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2018-02-02 15:24:38 +0000 |
commit | 545a7ce6762a1b3a7b989b43a9d18b5b1b299df0 (patch) | |
tree | 0f2c3015697553914cb520dbda107f3843521f53 /xlators/cluster/dht/src | |
parent | d9f773ba719397c12860f494a8cd38109e4b2fe3 (diff) |
cluster/dht: avoid overwriting client writes during migration
For more details on this issue see
https://github.com/gluster/glusterfs/issues/308
Solution:
This is a restrictive solution where a file will not be migrated
if a client writes to it during the migration. This does not
check if the writes from the rebalance and the client actually
do overlap.
If dht_writev_cbk finds that the file is being migrated (PHASE1)
it will set an xattr on the destination file indicating the file
was updated by a non-rebalance client.
Rebalance checks if any other client has written to the dst file
and aborts the file migration if it finds the xattr.
updates gluster/glusterfs#308
Change-Id: I73aec28bc9dbb8da57c7425ec88c6b6af0fbc9dd
Signed-off-by: Susant Palai <spalai@redhat.com>
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 2 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 28 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 103 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 17 |
4 files changed, 138 insertions, 12 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 91ba3418643..9671bbe1cbe 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -695,6 +695,8 @@ struct dht_conf { synclock_t link_lock; gf_boolean_t use_fallocate; + + gf_boolean_t force_migration; }; typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 7c596b1c099..226ee95e8b3 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -95,6 +95,33 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { + if (!dht_is_tier_xlator (this)) { + if (!local->xattr_req) { + local->xattr_req = dict_new (); + if (!local->xattr_req) { + gf_msg (this->name, GF_LOG_ERROR, + DHT_MSG_NO_MEMORY, + ENOMEM, "insufficient memory"); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + } + + ret = dict_set_uint32 (local->xattr_req, + GF_PROTECT_FROM_EXTERNAL_WRITES, + 1); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, + DHT_MSG_DICT_SET_FAILED, 0, + "Failed to set key %s in dictionary", + GF_PROTECT_FROM_EXTERNAL_WRITES); + local->op_errno = ENOMEM; + local->op_ret = -1; + goto out; + } + } + dht_iatt_merge (this, &local->stbuf, postbuf, NULL); dht_iatt_merge (this, &local->prebuf, prebuf, NULL); @@ -146,7 +173,6 @@ dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) return 0; } - if (subvol == NULL) goto out; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 70d5a5f316f..d9c3149049c 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -26,7 +26,8 @@ #define MAX_REBAL_TYPE_SIZE 16 #define FILE_CNT_INTERVAL 600 /* 10 mins */ #define ESTIMATE_START_INTERVAL 600 /* 10 mins */ - +#define HARDLINK_MIG_INPROGRESS -2 +#define SKIP_MIGRATION_FD_POSITIVE -3 #ifndef MAX #define MAX(a, b) (((a) > (b))?(a):(b)) #endif @@ -680,6 +681,7 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, struct iatt check_stbuf= {0,}; dht_conf_t *conf = NULL; dict_t *dict = NULL; + dict_t *xdata = NULL; conf = this->private; @@ -725,7 +727,31 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from, goto out; } - ret = syncop_lookup (to, loc, &new_stbuf, NULL, NULL, NULL); + if (!!dht_is_tier_xlator (this)) { + xdata = dict_new (); + if (!xdata) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, ENOMEM, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: dict_new failed)", + loc->path); + goto out; + } + + ret = dict_set_int32 (xdata, GF_CLEAN_WRITE_PROTECTION, 1); + if (ret) { + *fop_errno = ENOMEM; + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_DICT_SET_FAILED, + "%s: failed to set dictionary value: key = %s ", + loc->path, GF_CLEAN_WRITE_PROTECTION); + goto out; + } + } + + ret = syncop_lookup (to, loc, &new_stbuf, NULL, xdata, NULL); if (!ret) { /* File exits in the destination, check if gfid matches */ if (gf_uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) { @@ -875,6 +901,10 @@ out: if (dict) dict_unref (dict); + if (xdata) + dict_unref (dict); + + return ret; } @@ -1090,9 +1120,9 @@ out: } static int -__dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, - xlator_t *to, fd_t *src, fd_t *dst, - uint64_t ia_size, int hole_exists, +__dht_rebalance_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, + xlator_t *from, xlator_t *to, fd_t *src, + fd_t *dst, uint64_t ia_size, int hole_exists, int *fop_errno) { int ret = 0; @@ -1102,7 +1132,10 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, struct iobref *iobref = NULL; uint64_t total = 0; size_t read_size = 0; + dict_t *xdata = NULL; + dht_conf_t *conf = NULL; + conf = this->private; /* if file size is '0', no need to enter this loop */ while (total < ia_size) { read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ? @@ -1121,8 +1154,42 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, ret, offset, iobref, fop_errno); } else { + if (!conf->force_migration && + !dht_is_tier_xlator (this)) { + xdata = dict_new (); + if (!xdata) { + gf_msg ("dht", GF_LOG_ERROR, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "insufficient memory"); + ret = -1; + *fop_errno = ENOMEM; + break; + } + + /* Fail this write and abort rebalance if we + * detect a write from client since migration of + * this file started. This is done to avoid + * potential data corruption due to out of order + * writes from rebalance and client to the same + * region (as compared between src and dst + * files). See + * https://github.com/gluster/glusterfs/issues/308 + * for more details. + */ + ret = dict_set_int32 (xdata, + GF_AVOID_OVERWRITE, 1); + if (ret) { + gf_msg ("dht", GF_LOG_ERROR, 0, + ENOMEM, "failed to set dict"); + ret = -1; + *fop_errno = ENOMEM; + break; + } + + } + ret = syncop_writev (to, dst, vector, count, - offset, iobref, 0, NULL, NULL); + offset, iobref, 0, xdata, NULL); if (ret < 0) { *fop_errno = -ret; } @@ -1158,6 +1225,10 @@ __dht_rebalance_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, else ret = -1; + if (xdata) { + dict_unref (xdata); + } + return ret; } @@ -1575,7 +1646,6 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* Do not migrate file in case lock migration is not enabled on the * volume*/ if (!conf->lock_migration_enabled) { @@ -1642,7 +1712,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag, conf, fop_errno); if (ret) { - if (ret == -2) + if (ret == HARDLINK_MIG_INPROGRESS) ret = 0; goto out; } @@ -1785,7 +1855,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, ret = __check_file_has_hardlink (this, loc, &stbuf, xattr_rsp, flag, defrag, conf, fop_errno); if (ret) { - if (ret == -2) + if (ret == HARDLINK_MIG_INPROGRESS) ret = 0; goto out; } @@ -1794,8 +1864,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, file_has_holes = 1; - ret = __dht_rebalance_migrate_data (defrag, from, to, src_fd, dst_fd, - stbuf.ia_size, + ret = __dht_rebalance_migrate_data (this, defrag, from, to, + src_fd, dst_fd, stbuf.ia_size, file_has_holes, fop_errno); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2280,6 +2350,17 @@ out: } } + if (!dht_is_tier_xlator (this)) { + lk_ret = syncop_removexattr (to, loc, + GF_PROTECT_FROM_EXTERNAL_WRITES, + NULL, NULL); + if (lk_ret) { + gf_msg (this->name, GF_LOG_WARNING, -lk_ret, 0, + "%s: removexattr failed key %s", loc->path, + GF_CLEAN_WRITE_PROTECTION); + } + } + if (dict) dict_unref (dict); diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 8c011f72530..51c9d9cb3cf 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -524,6 +524,10 @@ dht_reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled, options, bool, out); + GF_OPTION_RECONF ("force-migration", conf->force_migration, + options, bool, out); + + if (conf->defrag) { if (dict_get_str (options, "rebal-throttle", &temp_str) == 0) { ret = dht_configure_throttle (this, conf, temp_str); @@ -810,6 +814,10 @@ dht_init (xlator_t *this) GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled, bool, err); + GF_OPTION_INIT ("force-migration", conf->force_migration, + bool, err); + + if (defrag) { defrag->lock_migration_enabled = conf->lock_migration_enabled; @@ -1203,5 +1211,14 @@ struct volume_options options[] = { .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC }, + { .key = {"force-migration"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "If disabled, rebalance will not migrate files that " + "are being written to by an application", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {NULL} }, }; |