diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2019-09-10 19:06:54 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2019-09-27 03:32:27 +0000 | 
| commit | 74e810c602a40d6c316f111cc1333c13bd5aec08 (patch) | |
| tree | 7de6d4d9cb88092b5101e4ef5b995d6d287de2d5 | |
| parent | 61f768eddecc52eb1ba44169526dd674864d0aa1 (diff) | |
cluster/ec: Implement read-mask feature
fixes: #725
Change-Id: Iaaefe6f49c8193c476b987b92df6bab3e2f62601
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
| -rw-r--r-- | tests/basic/ec/ec-read-mask.t | 114 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-inode-read.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 1 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec.c | 77 | 
4 files changed, 196 insertions, 0 deletions
diff --git a/tests/basic/ec/ec-read-mask.t b/tests/basic/ec/ec-read-mask.t new file mode 100644 index 00000000000..ddb556f2973 --- /dev/null +++ b/tests/basic/ec/ec-read-mask.t @@ -0,0 +1,114 @@ + #!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../ec.rc + +cleanup +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} +TEST $CLI volume start $V0 + +#Empty read-mask should fail +TEST ! $GFS --xlator-option=*.ec-read-mask="" -s $H0 --volfile-id $V0 $M0 + +#Less than 4 number of bricks should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0" -s $H0 --volfile-id $V0 $M0 +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1" -s $H0 --volfile-id $V0 $M0 +TEST ! $GFS --xlator-option=*.ec-read-mask="0:1:2" -s $H0 --volfile-id $V0 $M0 + +#ids greater than 5 should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:6" -s $H0 --volfile-id $V0 $M0 + +#ids less than 0 should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0:-1:2:5" -s $H0 --volfile-id $V0 $M0 + +#read-mask with non-alphabet or comma should fail +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5:abc" -s $H0 --volfile-id $V0 $M0 +TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5a" -s $H0 --volfile-id $V0 $M0 + +#mount with at least 4 read-mask-ids and all of them valid should pass +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5:4:3" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^111111$" ec_option_value $V0 $M0 0 read-mask +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask + +TEST dd if=/dev/urandom of=$M0/a bs=1M count=1 +md5=$(md5sum $M0/a | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +#Read on the file should fail if any of the read-mask is down when number of +#ids is data-count +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}5 +TEST ! dd if=$M0/a of=/dev/null +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +#Read on file should succeed when non-read-mask bricks are down +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}3 +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}4 +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}4 +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume start $V0 force + +#Deliberately corrupt chunks 3: 4 and check that reads still give correct data +TEST dd if=/dev/zero of=$B0/${V0}3/a bs=256k count=1 +TEST dd if=/dev/zero of=$B0/${V0}4/a bs=256k count=1 +TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask +EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}') +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +cleanup; diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index 503149cf840..8f6c2aab1f1 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -1330,6 +1330,7 @@ int32_t  ec_manager_readv(ec_fop_data_t *fop, int32_t state)  {      ec_cbk_data_t *cbk; +    ec_t *ec = fop->xl->private;      switch (state) {          case EC_STATE_INIT: @@ -1349,6 +1350,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)              return EC_STATE_DISPATCH;          case EC_STATE_DISPATCH: +            if (ec->read_mask) { +                fop->mask &= ec->read_mask; +            }              ec_dispatch_min(fop);              return EC_STATE_PREPARE_ANSWER; diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 2568b6b3223..7829b8c27b3 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -646,6 +646,7 @@ struct _ec {      uintptr_t xl_notify;      /* Bit flag representing                                   notification for bricks. */      uintptr_t node_mask; +    uintptr_t read_mask;         /*Stores user defined read-mask*/      gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */      xlator_t **xl_list;      gf_lock_t lock; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index d91d77a4962..66b4e634911 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -702,6 +702,67 @@ ec_statistics_init(ec_t *ec)      GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);  } +static int +ec_assign_read_mask(ec_t *ec, char *read_mask_str) +{ +    char *mask = NULL; +    char *maskptr = NULL; +    char *saveptr = NULL; +    char *id_str = NULL; +    int id = 0; +    int ret = 0; +    uintptr_t read_mask = 0; + +    if (!read_mask_str) { +        ec->read_mask = 0; +        ret = 0; +        goto out; +    } + +    mask = gf_strdup(read_mask_str); +    if (!mask) { +        ret = -1; +        goto out; +    } +    maskptr = mask; + +    for (;;) { +        id_str = strtok_r(maskptr, ":", &saveptr); +        if (id_str == NULL) +            break; +        if (gf_string2int(id_str, &id)) { +            gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, +                   "In read-mask \"%s\" id %s is not a valid integer", +                   read_mask_str, id_str); +            ret = -1; +            goto out; +        } + +        if ((id < 0) || (id >= ec->nodes)) { +            gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, +                   "In read-mask \"%s\" id %d is not in range [0 - %d]", +                   read_mask_str, id, ec->nodes - 1); +            ret = -1; +            goto out; +        } +        read_mask |= (1UL << id); +        maskptr = NULL; +    } + +    if (gf_bits_count(read_mask) < ec->fragments) { +        gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL, +               "read-mask \"%s\" should contain at least %d ids", read_mask_str, +               ec->fragments); +        ret = -1; +        goto out; +    } +    ec->read_mask = read_mask; +    ret = 0; +out: +    GF_FREE(mask); +    return ret; +} +  int32_t  init(xlator_t *this)  { @@ -709,6 +770,7 @@ init(xlator_t *this)      char *read_policy = NULL;      char *extensions = NULL;      int32_t err; +    char *read_mask_str = NULL;      if (this->parents == NULL) {          gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS, @@ -796,6 +858,10 @@ init(xlator_t *this)      GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);      GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);      GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed); +    GF_OPTION_INIT("ec-read-mask", read_mask_str, str, failed); + +    if (ec_assign_read_mask(ec, read_mask_str)) +        goto failed;      this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);      if (!this->itable) @@ -1471,6 +1537,10 @@ ec_dump_private(xlator_t *this)      gf_proc_dump_write("childs_up", "%u", ec->xl_up_count);      gf_proc_dump_write("childs_up_mask", "%s",                         ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes)); +    if (ec->read_mask) { +        gf_proc_dump_write("read-mask", "%s", +                           ec_bin(tmp, sizeof(tmp), ec->read_mask, ec->nodes)); +    }      gf_proc_dump_write("background-heals", "%d", ec->background_heals);      gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen);      gf_proc_dump_write("self-heal-window-size", "%" PRIu32, @@ -1760,6 +1830,13 @@ struct volume_options options[] = {              "[disperse-data-count,  disperse-count] (inclusive)",      },      { +        .key = {"ec-read-mask"}, +        .type = GF_OPTION_TYPE_STR, +        .default_value = NULL, +        .description = "This option can be used to choose which bricks can be" +                       " used for reading data/metadata of a file/directory", +    }, +    {          .key = {NULL},      },  };  | 
