summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2015-05-28 16:54:59 +0200
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-05-30 05:35:33 -0700
commit90ed7128dfd4d10e65efcc866cfd610ba3b0cc30 (patch)
tree446bf4ff0c06fb059dba65edf54bf819a6d4d0ca
parent54b193760ce59569fa5813e64af7753afbf0d500 (diff)
cluster/ec: Ignore differences in non locked inodes
When ec combines iatt structures from multiple bricks, it checks for equality in important fields. This is ok for iatt related to inodes involved in the operation that have been locked before starting execution. However some fops return iatt information from other inodes. For example a rename locks source and destination parent directories, but it also returns an iatt from the entry itself. In these cases we ignore differences in some fields to avoid false detection of inconsistencies and trigger unnecessary self-heals. Another issue is solved in this patch that caused that the real size of the file stored into the inode context was lost during self-heal. Change-Id: I8b8eca30b2a6c39c7b9bbd3b3b6ba95228fcc041 BUG: 1225793 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/10974 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: NetBSD Build System
-rw-r--r--xlators/cluster/ec/src/ec-combine.c75
-rw-r--r--xlators/cluster/ec/src/ec-combine.h3
-rw-r--r--xlators/cluster/ec/src/ec-generic.c6
-rw-r--r--xlators/cluster/ec/src/ec-heal.c35
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c9
5 files changed, 100 insertions, 28 deletions
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index 5842a4dd0a8..a8cfabc011e 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -84,7 +84,7 @@ ec_combine_write (ec_fop_data_t *fop, ec_cbk_data_t *dst,
break;
}
- if (!ec_iatt_combine(dst->iatt, src->iatt, valid)) {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, valid)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of '%s'", gf_fop_list[fop->id]);
@@ -104,23 +104,78 @@ void ec_iatt_time_merge(uint32_t * dst_sec, uint32_t * dst_nsec,
}
}
-int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count)
+static
+uint64_t
+gfid_to_ino(uuid_t gfid)
{
+ uint64_t ino = 0;
int32_t i;
+ for (i = 8; i < 16; i++) {
+ ino <<= 8;
+ ino += (uint8_t)gfid[i];
+ }
+
+ return ino;
+}
+
+static
+gf_boolean_t
+ec_iatt_is_trusted(ec_fop_data_t *fop, struct iatt *iatt)
+{
+ uint64_t ino;
+ int32_t i;
+
+ /* Only the top level fop will have fop->locks filled. */
+ while (fop->parent != NULL) {
+ fop = fop->parent;
+ }
+
+ /* Check if the iatt references an inode locked by the current fop */
+ for (i = 0; i < fop->lock_count; i++) {
+ ino = gfid_to_ino(fop->locks[i].lock->loc.inode->gfid);
+ if (iatt->ia_ino == ino) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count)
+{
+ int32_t i;
+ gf_boolean_t failed = _gf_false;
+
for (i = 0; i < count; i++)
{
+ /* Check for basic fields. These fields must be equal always, even if
+ * the inode is not locked because in these cases the parent inode
+ * will be locked and differences in these fields require changes in
+ * the parent directory. */
if ((dst[i].ia_ino != src[i].ia_ino) ||
- (dst[i].ia_uid != src[i].ia_uid) ||
- (dst[i].ia_gid != src[i].ia_gid) ||
(((dst[i].ia_type == IA_IFBLK) || (dst[i].ia_type == IA_IFCHR)) &&
(dst[i].ia_rdev != src[i].ia_rdev)) ||
- ((dst[i].ia_type == IA_IFREG) &&
- (dst[i].ia_size != src[i].ia_size)) ||
- (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
- st_mode_from_ia(src[i].ia_prot, src[i].ia_type)) ||
- (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0))
- {
+ (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0)) {
+ failed = _gf_true;
+ }
+ /* Check for not so stable fields. These fields can change if the
+ * inode is not locked. */
+ if (!failed && ((dst[i].ia_uid != src[i].ia_uid) ||
+ (dst[i].ia_gid != src[i].ia_gid) ||
+ ((dst[i].ia_type == IA_IFREG) &&
+ (dst[i].ia_size != src[i].ia_size)) ||
+ (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
+ st_mode_from_ia(src[i].ia_prot, src[i].ia_type)))) {
+ if (!ec_iatt_is_trusted(fop, dst)) {
+ /* If the iatt contains information from an inode that is not
+ * locked, we ignore these differences and don't care which
+ * data is returned. */
+ failed = _gf_false;
+ }
+ }
+ if (failed) {
gf_log(THIS->name, GF_LOG_WARNING,
"Failed to combine iatt (inode: %lu-%lu, links: %u-%u, "
"uid: %u-%u, gid: %u-%u, rdev: %lu-%lu, size: %lu-%lu, "
diff --git a/xlators/cluster/ec/src/ec-combine.h b/xlators/cluster/ec/src/ec-combine.h
index cae2bb9274f..19a42ded706 100644
--- a/xlators/cluster/ec/src/ec-combine.h
+++ b/xlators/cluster/ec/src/ec-combine.h
@@ -20,7 +20,8 @@ typedef int32_t (* ec_combine_f)(ec_fop_data_t * fop, ec_cbk_data_t * dst,
void ec_iatt_rebuild(ec_t * ec, struct iatt * iatt, int32_t count,
int32_t answers);
-int32_t ec_iatt_combine(struct iatt * dst, struct iatt * src, int32_t count);
+int32_t ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count);
int32_t ec_dict_compare(dict_t * dict1, dict_t * dict2);
int32_t ec_vector_compare(struct iovec * dst_vector, int32_t dst_count,
struct iovec * src_vector, int32_t src_count);
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index f50c7a70560..a455dad9f6b 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -235,8 +235,7 @@ out:
int32_t ec_combine_fsync(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 2))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_FSYNC'");
@@ -813,8 +812,7 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
int32_t ec_combine_lookup(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 2))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_LOOKUP'");
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 80725e5a9fa..af75c9551b8 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -546,10 +546,25 @@ out:
return error;
}
+int32_t ec_heal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ if (op_ret >= 0) {
+ GF_ASSERT(ec_set_inode_size(heal->fop, heal->fd->inode,
+ heal->total_size));
+ }
+
+ return 0;
+}
+
void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc,
off_t offset, size_t size)
{
struct gf_flock flock;
+ fop_inodelk_cbk_t cbk = NULL;
flock.l_type = type;
flock.l_whence = SEEK_SET;
@@ -558,22 +573,28 @@ void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc,
flock.l_pid = 0;
flock.l_owner.len = 0;
- /* Remove inode size information before unlocking it. */
- if ((type == F_UNLCK) && (heal->loc.inode != NULL)) {
- ec_clear_inode_info(heal->fop, heal->loc.inode);
+ if (type == F_UNLCK) {
+ /* Remove inode size information before unlocking it. */
+ if (fd == NULL) {
+ ec_clear_inode_info(heal->fop, heal->loc.inode);
+ } else {
+ ec_clear_inode_info(heal->fop, heal->fd->inode);
+ }
+ } else {
+ /* Otherwise use the callback to update size information. */
+ cbk = ec_heal_lock_cbk;
}
if (fd != NULL)
{
ec_finodelk(heal->fop->frame, heal->xl, heal->fop->mask,
- EC_MINIMUM_ALL, NULL, NULL, heal->xl->name, fd,
- F_SETLKW, &flock, NULL);
+ EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW,
+ &flock, NULL);
}
else
{
ec_inodelk(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_ALL,
- NULL, NULL, heal->xl->name, loc, F_SETLKW, &flock,
- NULL);
+ cbk, heal, heal->xl->name, loc, F_SETLKW, &flock, NULL);
}
}
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 853d914148b..ae02e964c3d 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -936,8 +936,7 @@ out:
int32_t ec_combine_readlink(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_READLINK'");
@@ -1210,8 +1209,7 @@ int32_t ec_combine_readv(ec_fop_data_t * fop, ec_cbk_data_t * dst,
return 0;
}
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_READ'");
@@ -1493,8 +1491,7 @@ out:
int32_t ec_combine_stat(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
{
- if (!ec_iatt_combine(dst->iatt, src->iatt, 1))
- {
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
gf_log(fop->xl->name, GF_LOG_NOTICE, "Mismatching iatt in "
"answers of 'GF_FOP_STAT'");