From 21c282ef311d3d7385bba37ddb0a26fb12178409 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 25 Mar 2014 11:07:31 +0530 Subject: cluster/afr: Sparse file self-heal canges - Fix boundary condition for offset - Honour data-self-heal-algorithm option - Added tests for sparse file self-healing Change-Id: I14bb1c9d04118a3df4072f962fc8f2f197391d95 BUG: 1080707 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/7339 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-self-heal-data.c | 37 +++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index c0385153f..c0548d995 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -159,7 +159,7 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, */ #define is_last_block(o,b,s) ((s >= o) && (s <= (o + b))) if (HAS_HOLES ((&replies[source].poststat)) && - offset > replies[i].poststat.ia_size && + offset >= replies[i].poststat.ia_size && !is_last_block (offset, size, replies[source].poststat.ia_size) && (iov_0filled (iovec, count) == 0)) @@ -267,6 +267,31 @@ afr_selfheal_data_restore_time (call_frame_t *frame, xlator_t *this, return 0; } +static int +afr_data_self_heal_type_get (afr_private_t *priv, unsigned char *healed_sinks, + int source, struct afr_reply *replies) +{ + int type = AFR_SELFHEAL_DATA_FULL; + int i = 0; + + if (priv->data_self_heal_algorithm == NULL) { + type = AFR_SELFHEAL_DATA_FULL; + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i] && i != source) + continue; + if (replies[i].poststat.ia_size) { + type = AFR_SELFHEAL_DATA_DIFF; + break; + } + } + } else if (strcmp (priv->data_self_heal_algorithm, "full") == 0) { + type = AFR_SELFHEAL_DATA_FULL; + } else if (strcmp (priv->data_self_heal_algorithm, "diff") == 0) { + type = AFR_SELFHEAL_DATA_DIFF; + } + return type; +} + static int afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd, int source, unsigned char *healed_sinks, @@ -296,14 +321,8 @@ afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd, "source=%d sinks=%s", uuid_utoa (fd->inode->gfid), source, sinks_str); - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i] && i != source) - continue; - if (replies[i].poststat.ia_size) { - type = AFR_SELFHEAL_DATA_DIFF; - break; - } - } + type = afr_data_self_heal_type_get (priv, healed_sinks, source, + replies); iter_frame = afr_copy_frame (frame); if (!iter_frame) -- cgit From 9a3de81fe5c42c0495dccc5877cecbc2edb81f3c Mon Sep 17 00:00:00 2001 From: Susant Palai Date: Tue, 18 Feb 2014 13:03:50 +0000 Subject: DHT/Rebalance : Hard link Migration Failure Probelm : __is_file_migratable used to return ENOTSUP for all the cases. Hence, it will add to the failure count. And the remove-brick status will show failure for all the files. Solution : Added 'ret = -2' to gf_defrag_handle_hardlink to be deemed as success. Otherwise dht_migrate_file will try to migrate each of the hard link, which not intended. Change-Id: Iff74f6634fb64e4b91fc5d016e87ff1290b7a0d6 BUG: 1066798 Signed-off-by: Susant Palai Reviewed-on: http://review.gluster.org/7124 Reviewed-by: Raghavendra G Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-rebalance.c | 62 +++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index a17319ba6..4f78f5203 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -94,6 +94,41 @@ out: } +/* + return values: + -1 : failure + -2 : success + +Hard link migration is carried out in three stages. + +(Say there are n hardlinks) +Stage 1: Setting the new hashed subvol information on the 1st hardlink + encountered (linkto setxattr) + +Stage 2: Creating hardlinks on new hashed subvol for the 2nd to (n-1)th + hardlink + +Stage 3: Physical migration of the data file for nth hardlink + +Why to deem "-2" as success and not "0": + + dht_migrate_file expects return value "0" from _is_file_migratable if +the file has to be migrated. + + _is_file_migratable returns zero only when it is called with the +flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS". + + gf_defrag_handle_hardlink calls dht_migrate_file for physical migration +of the data file with the flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS" + +Hence, gf_defrag_handle_hardlink returning "0" for success will force +"dht_migrate_file" to migrate each of the hardlink which is not intended. + +For each of the three stage mentioned above "-2" will be returned and will +be converted to "0" in dht_migrate_file. + +*/ + int32_t gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, struct iatt *stbuf) @@ -164,6 +199,7 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, ret = -1; goto out; } + ret = -2; goto out; } else { linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs); @@ -200,12 +236,19 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, if (ret) goto out; } - ret = 0; + ret = -2; out: return ret; } - +/* + return values + 0 : File will be migrated + -2 : File will not be migrated + (This is the return value from gf_defrag_handle_hardlink. Checkout + gf_defrag_handle_hardlink for description of "returning -2") + -1 : failure +*/ static inline int __is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf, dict_t *xattrs, int flags) @@ -228,7 +271,12 @@ __is_file_migratable (xlator_t *this, loc_t *loc, if (flags == GF_DHT_MIGRATE_HARDLINK) { ret = gf_defrag_handle_hardlink (this, loc, xattrs, stbuf); - if (ret) { + + /* + Returning zero will force the file to be remigrated. + Checkout gf_defrag_handle_hardlink for more information. + */ + if (ret && ret != -2) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to migrate file with link", loc->path); @@ -236,8 +284,8 @@ __is_file_migratable (xlator_t *this, loc_t *loc, } else { gf_log (this->name, GF_LOG_WARNING, "%s: file has hardlinks", loc->path); + ret = -ENOTSUP; } - ret = ENOTSUP; goto out; } @@ -743,9 +791,11 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, /* Check if file can be migrated */ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag); - if (ret) + if (ret) { + if (ret == -2) + ret = 0; goto out; - + } /* Take care of the special files */ if (!IA_ISREG (stbuf.ia_type)) { /* Special files */ -- cgit From e75be8977ede9b9174d20b39c427e6fb4ccde567 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 24 Mar 2014 22:54:03 +0530 Subject: cluster/afr: Remove eager-lock stub on finodelk failure Problem: For write fops afr's transaction eager-lock init adds transactions that can share eager-lock to fdctx list. But if eager-lock finodelk fop fails the stub remains in the list. This could later lead to corruption of the list and lead to infinite loop on the list leading to a mount hang. Fix: Remove the stub when finodelk fails. Change-Id: I0ed4bc6b62f26c5e891c1181a6871ee6e4f4f5fd BUG: 1063190 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6944 Tested-by: Gluster Build System Reviewed-by: Ravishankar N Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-common.c | 19 +++++++++++++++++++ xlators/cluster/afr/src/afr-transaction.c | 8 ++------ xlators/cluster/afr/src/afr.h | 2 ++ 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2bab0f853..6bd231600 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -889,6 +889,15 @@ afr_replies_wipe (afr_local_t *local, afr_private_t *priv) memset (local->replies, 0, sizeof(*local->replies) * priv->child_count); } +void +afr_remove_eager_lock_stub (afr_local_t *local) +{ + LOCK (&local->fd->lock); + { + list_del_init (&local->transaction.eager_locked); + } + UNLOCK (&local->fd->lock); +} void afr_local_cleanup (afr_local_t *local, xlator_t *this) @@ -900,6 +909,10 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) syncbarrier_destroy (&local->barrier); + if (local->transaction.eager_lock_on && + !list_empty (&local->transaction.eager_locked)) + afr_remove_eager_lock_stub (local); + afr_local_transaction_cleanup (local, this); priv = this->private; @@ -2106,6 +2119,12 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) fd_ctx = (afr_fd_ctx_t *)(long) ctx; if (fd_ctx) { + //no need to take any locks + if (!list_empty (&fd_ctx->eager_locked)) + gf_log (this->name, GF_LOG_WARNING, "%s: Stale " + "Eager-lock stubs found", + uuid_utoa (fd->inode->gfid)); + for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) GF_FREE (fd_ctx->pre_op_done[i]); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index f974fdb59..205ff759e 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1544,7 +1544,7 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) } - int +int afr_transaction_resume (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; @@ -1555,11 +1555,7 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this) /* We don't need to retain "local" in the fd list anymore, writes to all subvols are finished by now */ - LOCK (&local->fd->lock); - { - list_del_init (&local->transaction.eager_locked); - } - UNLOCK (&local->fd->lock); + afr_remove_eager_lock_stub (local); } afr_restore_lk_owner (frame); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2e1b78d1c..36042f7b2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -971,4 +971,6 @@ afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this); int afr_local_pathinfo (char *pathinfo, gf_boolean_t *is_local); +void +afr_remove_eager_lock_stub (afr_local_t *local); #endif /* __AFR_H__ */ -- cgit From 50b33f4050e11876ecb8e3512880334de25e3f21 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 4 Apr 2014 12:59:58 +0000 Subject: afr: Simple 1-liner fix for crash in Rackspace BUG: 1084485 Change-Id: I89ddf10add041638ef70baebbce0ec2807ef4b6d Signed-off-by: Justin Clift Reviewed-on: http://review.gluster.org/7402 Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-inode-write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 3dacfc8dd..1a5c51cb7 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -1588,7 +1588,7 @@ afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, if (!transaction_frame) goto out; - local = AFR_FRAME_INIT (frame, op_errno); + local = AFR_FRAME_INIT (transaction_frame, op_errno); if (!local) goto out; -- cgit From 5216e53dedcb74079aaeaec65a2af9486690fea6 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 7 Apr 2014 10:19:37 +0530 Subject: cluster/afr: Init local on txn-frame for zerofill Change-Id: I516f4fb0237dd0b3e512117bf987cea69f8678b8 BUG: 1084485 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/7407 Tested-by: Gluster Build System Reviewed-by: Brian Foster Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-inode-write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 1a5c51cb7..3013ae730 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -1702,7 +1702,7 @@ afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, if (!transaction_frame) goto out; - local = AFR_FRAME_INIT (frame, op_errno); + local = AFR_FRAME_INIT (transaction_frame, op_errno); if (!local) goto out; -- cgit From 19d9a31263e16c51bd4cba09b83becd8c43ddc1f Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 8 Apr 2014 23:45:31 +0530 Subject: cluster/afr: Mem leak fixes found in valgrind for iozone Change-Id: I869d191dc3470b2208c17343bbf772f01ef744cb BUG: 1085511 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/7424 Tested-by: Gluster Build System Reviewed-by: Ravishankar N Reviewed-by: Anand Avati --- xlators/cluster/afr/src/afr-common.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 6bd231600..164a651ba 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -861,6 +861,8 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) GF_FREE (local->transaction.pre_op); GF_FREE (local->transaction.eager_lock); + GF_FREE (local->transaction.fop_subvols); + GF_FREE (local->transaction.failed_subvols); GF_FREE (local->transaction.basename); GF_FREE (local->transaction.new_basename); @@ -982,6 +984,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) { /* writev */ GF_FREE (local->cont.writev.vector); + if (local->cont.writev.iobref) + iobref_unref (local->cont.writev.iobref); } { /* setxattr */ -- cgit From 8d7dde6b322483389c25cc0f056c8b27c79c160e Mon Sep 17 00:00:00 2001 From: Vijay Bellur Date: Tue, 8 Apr 2014 14:37:33 +0530 Subject: cluster/afr: Set right argument order for STACK_WIND_COOKIE Change-Id: Ia26e17a7147ed825319c7c29880b9cf4ae80a48c BUG: 1085259 Signed-off-by: Vijay Bellur Reviewed-on: http://review.gluster.org/7416 Reviewed-by: Pranith Kumar Karampuri Reviewed-by: Jeff Darcy Tested-by: Gluster Build System --- xlators/cluster/afr/src/afr-inode-read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 01e078c13..4cb219246 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1487,7 +1487,7 @@ afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol) return 0; } - STACK_WIND_COOKIE (frame, (void *) (long) subvol, afr_fgetxattr_cbk, + STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk, (void *) (long) subvol, priv->children[subvol], priv->children[subvol]->fops->fgetxattr, local->fd, local->cont.getxattr.name, -- cgit From 02ac5fa4b5ed4c1cb531724c85ff10c83f221541 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 1 Apr 2014 17:28:55 -0700 Subject: cluster/dht: force set dir inode ctx cached time in setattr() In setattr, the inode times may have been explicitly set "back in time". In such cases, if the inode ctx times are not force set, then they continue to be higher and continue serving the higher/older value in future calls to dht_inode_ctx_time_update() Change-Id: I9cbfa7cf7c4069b0106d1f462de08c5d59bc91b5 BUG: 1083324 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/7378 Reviewed-by: Harshavardhana Tested-by: Harshavardhana Tested-by: Gluster Build System Reviewed-by: Raghavendra G Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-common.h | 1 + xlators/cluster/dht/src/dht-helper.c | 28 ++++++++++++++++++++++++++++ xlators/cluster/dht/src/dht-inode-write.c | 6 +++++- 3 files changed, 34 insertions(+), 1 deletion(-) (limited to 'xlators/cluster') diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index d391b87d5..d74d0dfd4 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -753,6 +753,7 @@ dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this, int dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, int32_t update_ctx); +void dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat); int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx); int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx); diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 18a501f04..f1dc5072f 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -1109,6 +1109,34 @@ dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this, return ret; } + +void +dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat) +{ + dht_inode_ctx_t *ctx = NULL; + dht_stat_time_t *time = 0; + int ret = -1; + + ret = dht_inode_ctx_get (inode, this, &ctx); + + if (ret) + return; + + time = &ctx->time; + + time->mtime = stat->ia_mtime; + time->mtime_nsec = stat->ia_mtime_nsec; + + time->ctime = stat->ia_ctime; + time->ctime_nsec = stat->ia_ctime_nsec; + + time->atime = stat->ia_atime; + time->atime_nsec = stat->ia_atime_nsec; + + return; +} + + int dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, int32_t post) diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 363bff3bf..576f007e5 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -862,9 +862,13 @@ unlock: UNLOCK (&frame->lock); this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) + if (is_last_call (this_call_cnt)) { + if (local->op_ret == 0) + dht_inode_ctx_time_set (local->loc.inode, this, + &local->stbuf); DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, &local->prebuf, &local->stbuf, xdata); + } return 0; } -- cgit