From 25e581d42e6e064718bb902d8819ed458d333a4d Mon Sep 17 00:00:00 2001 From: Ashish Pandey Date: Sun, 30 Aug 2015 21:24:32 +0530 Subject: cluster/ec : Mark new entry changelog in entry self-heal Problem : When a new entry is created dirty mark xattrs are not created this will need full heal to be performed, even when there are partial failures. Solution : Marks new entry changelog in self-heal. PS: Also fixed erasing of dirty markers when no data heal is required. BUG: 1258313 Signed-off-by: Ashish Pandey Change-Id: I156e3d3201afa77efe118e1aaace1d91c90a9613 Reviewed-on: http://review.gluster.org/12306 Tested-by: NetBSD Build System Reviewed-by: Pranith Kumar Karampuri --- libglusterfs/src/cluster-syncop.h | 6 +++ tests/basic/ec/ec-new-entry.t | 62 +++++++++++++++++++++++++++++ xlators/cluster/ec/src/ec-data.h | 2 +- xlators/cluster/ec/src/ec-heal.c | 84 ++++++++++++++++++++++++++++++++++++--- xlators/storage/posix/src/posix.c | 2 +- 5 files changed, 148 insertions(+), 8 deletions(-) create mode 100644 tests/basic/ec/ec-new-entry.t diff --git a/libglusterfs/src/cluster-syncop.h b/libglusterfs/src/cluster-syncop.h index a681951c27d..799576c3c52 100644 --- a/libglusterfs/src/cluster-syncop.h +++ b/libglusterfs/src/cluster-syncop.h @@ -127,6 +127,12 @@ cluster_fxattrop (xlator_t **subvols, unsigned char *on, int numsubvols, call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); +int32_t +cluster_xattrop (xlator_t **subvols, unsigned char *on, int numsubvols, + default_args_cbk_t *replies, unsigned char *output, + call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); + int32_t cluster_fstat (xlator_t **subvols, unsigned char *on, int numsubvols, default_args_cbk_t *replies, unsigned char *output, diff --git a/tests/basic/ec/ec-new-entry.t b/tests/basic/ec/ec-new-entry.t new file mode 100644 index 00000000000..a08bae7a810 --- /dev/null +++ b/tests/basic/ec/ec-new-entry.t @@ -0,0 +1,62 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup +function num_entries { + ls -l $1 | wc -l +} + +function get_md5sum { + md5sum $1 | awk '{print $1}' +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} +TEST $CLI volume start $V0 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +touch $M0/{1..10} +touch $M0/11 +for i in {1..10}; do dd if=/dev/zero of=$M0/$i bs=1M count=1; done +TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}6 +#simulate pending heal on just the root directory +TEST touch $M0/a +TEST rm -f $M0/a +EXPECT_WITHIN $HEAL_TIMEOUT "^5$" get_pending_heal_count $V0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +#ls -l gives "Total" line so number of lines will be 1 more +EXPECT "^12$" num_entries $B0/${V0}6 +ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0) +EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1 +EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2 +EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3 +EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4 +EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}6 +file_md5sum=$(get_md5sum $M0/1) +empty_md5sum=$(get_md5sum $M0/11) +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo $file_md5sum +EXPECT "$file_md5sum" get_md5sum $M0/1 +EXPECT "$file_md5sum" get_md5sum $M0/2 +EXPECT "$file_md5sum" get_md5sum $M0/3 +EXPECT "$file_md5sum" get_md5sum $M0/4 +EXPECT "$file_md5sum" get_md5sum $M0/5 +EXPECT "$file_md5sum" get_md5sum $M0/6 +EXPECT "$file_md5sum" get_md5sum $M0/7 +EXPECT "$file_md5sum" get_md5sum $M0/8 +EXPECT "$file_md5sum" get_md5sum $M0/9 +EXPECT "$file_md5sum" get_md5sum $M0/10 +EXPECT "$empty_md5sum" get_md5sum $M0/11 + +cleanup; diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h index 8a48a7ca824..d845bf5022e 100644 --- a/xlators/cluster/ec/src/ec-data.h +++ b/xlators/cluster/ec/src/ec-data.h @@ -265,8 +265,8 @@ struct _ec_cbk_data struct gf_flock flock; struct iovec * vector; struct iobref * buffers; - gf_dirent_t entries; char *str; + gf_dirent_t entries; }; struct _ec_heal diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index f76839db38f..10dc9f158e7 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -450,13 +450,13 @@ out: loc_wipe (&loc); return op_ret; } - int ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies, uint64_t *versions, uint64_t *dirty, unsigned char *sources, unsigned char *healed_sinks) { uint64_t xattr[EC_VERSION_SIZE] = {0}; + uint64_t max_version = 0; int same_count = 0; int max_same_count = 0; int same_source = -1; @@ -527,11 +527,18 @@ ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies, else if (replies[i].valid && replies[i].op_ret >= 0) healed_sinks[i] = 1; } + for (i = 0; i < ec->nodes; i++) { + if (sources[i] && (versions[i] > max_version)) { + same_source = i; + max_version = versions[i]; + } + } ret = same_source; out: return ret; } + int __ec_heal_metadata_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode, unsigned char *locked_on, default_args_cbk_t *replies, @@ -545,7 +552,6 @@ __ec_heal_metadata_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode, int source = 0; default_args_cbk_t *greplies = NULL; int i = 0; - EC_REPLIES_ALLOC (greplies, ec->nodes); loc.inode = inode_ref (inode); @@ -814,6 +820,51 @@ out: cluster_replies_wipe (replies, ec->nodes); return ret; } +int32_t +ec_set_new_entry_dirty (ec_t *ec, loc_t *loc, struct iatt *ia, + call_frame_t *frame, xlator_t *this, unsigned char *on) +{ + dict_t *xattr = NULL; + int32_t ret = -1; + default_args_cbk_t *replies = NULL; + unsigned char *output = NULL; + uint64_t dirty[EC_VERSION_SIZE] = {1, 1}; + loc_t newloc = {0}; + + /*Symlinks don't have any data to be healed*/ + if (ia->ia_type == IA_IFLNK) + dirty[EC_DATA_TXN] = 0; + + newloc.inode = inode_ref (loc->inode); + gf_uuid_copy (newloc.gfid, ia->ia_gfid); + EC_REPLIES_ALLOC (replies, ec->nodes); + output = alloca0 (ec->nodes); + xattr = dict_new(); + if (!xattr) { + ret = -ENOMEM; + goto out; + } + + ret = ec_dict_set_array (xattr, EC_XATTR_DIRTY, dirty, + EC_VERSION_SIZE); + if (ret) + goto out; + + ret = cluster_xattrop (ec->xl_list, on, ec->nodes, replies, output, + frame, ec->xl, &newloc, + GF_XATTROP_ADD_ARRAY64, xattr, NULL); + + if (ret < ec->fragments) { + ret = -ENOTCONN; + goto out; + } +out: + if (xattr) + dict_unref (xattr); + cluster_replies_wipe (replies, ec->nodes); + loc_wipe (&newloc); + return ret; +} /*Name heal*/ int @@ -960,6 +1011,7 @@ ec_create_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, struct iatt *ia = NULL; unsigned char *output = 0; unsigned char *output1 = 0; + unsigned char *on = NULL; default_args_cbk_t *replies = NULL; loc_t loc = {0}; loc_t srcloc = {0}; @@ -1005,10 +1057,20 @@ ec_create_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, loc.name = name; link = alloca0 (ec->nodes); create = alloca0 (ec->nodes); + on = alloca0 (ec->nodes); output = alloca0 (ec->nodes); output1 = alloca0 (ec->nodes); + + for (i = 0; i < ec->nodes; i++) { + if (!lookup_replies[i].valid) + continue; + if (lookup_replies[i].op_ret) + continue; + on[i] = 1; + } switch (ia->ia_type) { case IA_IFDIR: + ec_set_new_entry_dirty (ec, &loc, ia, frame, ec->xl, on); ret = cluster_mkdir (ec->xl_list, enoent, ec->nodes, replies, output, frame, ec->xl, &loc, st_mode_from_ia (ia->ia_prot, @@ -1053,6 +1115,8 @@ ec_create_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, } linkname = alloca0 (strlen(replies[i].buf) + 1); strcpy (linkname, replies[i].buf); + ec_set_new_entry_dirty (ec, &loc, ia, frame, + ec->xl, on); cluster_symlink (ec->xl_list, create, ec->nodes, replies, output, frame, ec->xl, linkname, &loc, 0, xdata); @@ -1062,6 +1126,8 @@ ec_create_name (call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, output[i] = 1; break; case IA_IFREG: + ec_set_new_entry_dirty (ec, &loc, ia, + frame, ec->xl, on); config.version = EC_CONFIG_VERSION; config.algorithm = EC_CONFIG_ALGORITHM; config.gf_word_size = EC_GF_BITS; @@ -1597,10 +1663,6 @@ __ec_heal_data_prepare (call_frame_t *frame, ec_t *ec, fd_t *fd, goto out; } - if (EC_COUNT(healed_sinks, ec->nodes) == 0) { - ret = -ENOTCONN; - goto out; - } ret = source; out: if (xattrs) @@ -2115,6 +2177,13 @@ __ec_heal_data (call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on, if (ret < 0) goto unlock; + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = __ec_fd_data_adjust_versions (frame, ec, fd, + sources, + healed_sinks, versions, dirty, size); + goto unlock; + } + source = ret; ret = __ec_heal_mark_sinks (frame, ec, fd, versions, healed_sinks); @@ -2129,6 +2198,9 @@ unlock: if (ret < 0) goto out; + if (EC_COUNT(healed_sinks, ec->nodes) == 0) + goto out; + gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: " "%d", uuid_utoa (fd->inode->gfid), EC_COUNT (sources, ec->nodes), diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 0e22a6346ac..de5f3a8b423 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -5214,7 +5214,7 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, out: - STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xdata, xdata); + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xdata, NULL); if (xdata) dict_unref (xdata); -- cgit