diff options
-rw-r--r-- | tests/basic/ec/ec-read-mask.t | 114 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-inode-read.c | 4 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 1 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.c | 77 |
4 files changed, 196 insertions, 0 deletions
diff --git a/tests/basic/ec/ec-read-mask.t b/tests/basic/ec/ec-read-mask.t new file mode 100644 index 00000000000..ddb556f2973 --- /dev/null +++ b/tests/basic/ec/ec-read-mask.t @@ -0,0 +1,114 @@ + #!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../ec.rc + +cleanup +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} +TEST $CLI volume start $V0 + +#Empty read-mask should fail +TEST ! $GFS --xlator-option=*.ec-read-mask="" -s $H0 --volfile-id $V0 $M0 + +#Less than 4 number of bricks should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0" -s $H0 --volfile-id $V0 $M0 +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1" -s $H0 --volfile-id $V0 $M0 +TEST ! $GFS --xlator-option=*.ec-read-mask="0:1:2" -s $H0 --volfile-id $V0 $M0 + +#ids greater than 5 should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:6" -s $H0 --volfile-id $V0 $M0 + +#ids less than 0 should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0:-1:2:5" -s $H0 --volfile-id $V0 $M0 + +#read-mask with non-alphabet or comma should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5:abc" -s $H0 --volfile-id $V0 $M0 +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5a" -s $H0 --volfile-id $V0 $M0 + +#mount with at least 4 read-mask-ids and all of them valid should pass +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5:4:3" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^111111$" ec_option_value $V0 $M0 0 read-mask +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask + +TEST dd if=/dev/urandom of=$M0/a bs=1M count=1 +md5=$(md5sum $M0/a | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +#Read on the file should fail if any of the read-mask is down when number of +#ids is data-count +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}5 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +#Read on file should succeed when non-read-mask bricks are down +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}3 +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}4 +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}4 +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +#Deliberately corrupt chunks 3: 4 and check that reads still give correct data +TEST dd if=/dev/zero of=$B0/${V0}3/a bs=256k count=1 +TEST dd if=/dev/zero of=$B0/${V0}4/a bs=256k count=1 +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +cleanup; diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index 503149cf840..8f6c2aab1f1 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -1330,6 +1330,7 @@ int32_t ec_manager_readv(ec_fop_data_t *fop, int32_t state) { ec_cbk_data_t *cbk; + ec_t *ec = fop->xl->private; switch (state) { case EC_STATE_INIT: @@ -1349,6 +1350,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state) return EC_STATE_DISPATCH; case EC_STATE_DISPATCH: + if (ec->read_mask) { + fop->mask &= ec->read_mask; + } ec_dispatch_min(fop); return EC_STATE_PREPARE_ANSWER; diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 2568b6b3223..7829b8c27b3 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -646,6 +646,7 @@ struct _ec { uintptr_t xl_notify; /* Bit flag representing notification for bricks. */ uintptr_t node_mask; + uintptr_t read_mask; /*Stores user defined read-mask*/ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */ xlator_t **xl_list; gf_lock_t lock; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index d91d77a4962..66b4e634911 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -702,6 +702,67 @@ ec_statistics_init(ec_t *ec) GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0); } +static int +ec_assign_read_mask(ec_t *ec, char *read_mask_str) +{ + char *mask = NULL; + char *maskptr = NULL; + char *saveptr = NULL; + char *id_str = NULL; + int id = 0; + int ret = 0; + uintptr_t read_mask = 0; + + if (!read_mask_str) { + ec->read_mask = 0; + ret = 0; + goto out; + } + + mask = gf_strdup(read_mask_str); + if (!mask) { + ret = -1; + goto out; + } + maskptr = mask; + + for (;;) { + id_str = strtok_r(maskptr, ":", &saveptr); + if (id_str == NULL) + break; + if (gf_string2int(id_str, &id)) { + gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, + "In read-mask \"%s\" id %s is not a valid integer", + read_mask_str, id_str); + ret = -1; + goto out; + } + + if ((id < 0) || (id >= ec->nodes)) { + gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, + "In read-mask \"%s\" id %d is not in range [0 - %d]", + read_mask_str, id, ec->nodes - 1); + ret = -1; + goto out; + } + read_mask |= (1UL << id); + maskptr = NULL; + } + + if (gf_bits_count(read_mask) < ec->fragments) { + gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, + "read-mask \"%s\" should contain at least %d ids", read_mask_str, + ec->fragments); + ret = -1; + goto out; + } + ec->read_mask = read_mask; + ret = 0; +out: + GF_FREE(mask); + return ret; +} + int32_t init(xlator_t *this) { @@ -709,6 +770,7 @@ init(xlator_t *this) char *read_policy = NULL; char *extensions = NULL; int32_t err; + char *read_mask_str = NULL; if (this->parents == NULL) { gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS, @@ -796,6 +858,10 @@ init(xlator_t *this) GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed); GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed); GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed); + GF_OPTION_INIT("ec-read-mask", read_mask_str, str, failed); + + if (ec_assign_read_mask(ec, read_mask_str)) + goto failed; this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this); if (!this->itable) @@ -1471,6 +1537,10 @@ ec_dump_private(xlator_t *this) gf_proc_dump_write("childs_up", "%u", ec->xl_up_count); gf_proc_dump_write("childs_up_mask", "%s", ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes)); + if (ec->read_mask) { + gf_proc_dump_write("read-mask", "%s", + ec_bin(tmp, sizeof(tmp), ec->read_mask, ec->nodes)); + } gf_proc_dump_write("background-heals", "%d", ec->background_heals); gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen); gf_proc_dump_write("self-heal-window-size", "%" PRIu32, @@ -1760,6 +1830,13 @@ struct volume_options options[] = { "[disperse-data-count, disperse-count] (inclusive)", }, { + .key = {"ec-read-mask"}, + .type = GF_OPTION_TYPE_STR, + .default_value = NULL, + .description = "This option can be used to choose which bricks can be" + " used for reading data/metadata of a file/directory", + }, + { .key = {NULL}, }, }; |