diff options
| -rw-r--r-- | tests/basic/ec/ec-fix-openfd.t | 109 | ||||
| -rwxr-xr-x | tests/bugs/core/bug-908146.t | 12 | ||||
| -rw-r--r-- | tests/volume.rc | 12 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 113 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-dir-read.c | 8 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-dir-write.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 29 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-inode-read.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-types.h | 59 | 
10 files changed, 307 insertions, 43 deletions
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t new file mode 100644 index 00000000000..b62fbf429c8 --- /dev/null +++ b/tests/basic/ec/ec-fix-openfd.t @@ -0,0 +1,109 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fileio.rc + +# This test checks for open fd heal on EC + +#Create Volume +cleanup +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2} +TEST $CLI volume set $V0 performance.read-after-open yes +TEST $CLI volume set $V0 performance.lazy-open no +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 disperse.background-heals 0 +TEST $CLI volume heal $V0 disable +TEST $CLI volume start $V0 + +#Mount the volume +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +#Touch a file +TEST touch "$M0/test_file" + +#Kill a brick +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 + +#Open the file in write mode +TEST fd=`fd_available` +TEST fd_open $fd 'rw' "$M0/test_file" + +#Bring up the killed brick +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +#Test the fd count +EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 test_file +EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file +EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}2 test_file + +#Write to file +dd iflag=fullblock if=/dev/random bs=1024 count=2 >&$fd 2>/dev/null + +#Test the fd count +EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 test_file + +#Close fd +TEST fd_close $fd + +#Stop the volume +TEST $CLI volume stop $V0 + +#Start the volume +TEST $CLI volume start $V0 + +#Kill brick1 +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 + +#Unmount and mount +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0; +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 + +#Calculate md5 sum +md5sum0=`get_md5_sum "$M0/test_file"` + +#Bring up the brick +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +#Kill brick2 +TEST kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 + +#Unmount and mount +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 + +#Calculate md5 sum +md5sum1=`get_md5_sum "$M0/test_file"` + +#Bring up the brick +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +#Kill brick3 +TEST kill_brick $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 + +#Unmount and mount +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 + +#Calculate md5 sum +md5sum2=`get_md5_sum "$M0/test_file"` + +#compare the md5sum +EXPECT "$md5sum0" echo $md5sum1 +EXPECT "$md5sum0" echo $md5sum2 +EXPECT "$md5sum1" echo $md5sum2 + +cleanup diff --git a/tests/bugs/core/bug-908146.t b/tests/bugs/core/bug-908146.t index bf34992fee5..327be6e54bc 100755 --- a/tests/bugs/core/bug-908146.t +++ b/tests/bugs/core/bug-908146.t @@ -2,18 +2,8 @@  . $(dirname $0)/../../include.rc  . $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fileio.rc -function get_fd_count { -        local vol=$1 -        local host=$2 -        local brick=$3 -        local fname=$4 -        local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname)) -        local statedump=$(generate_brick_statedump $vol $host $brick) -        local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1) -        rm -f $statedump -        echo $count -}  cleanup;  TEST glusterd diff --git a/tests/volume.rc b/tests/volume.rc index 1cee648993b..1ca17ab3456 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -796,3 +796,15 @@ function count_sh_entries()  {      ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l  } + +function get_fd_count { +        local vol=$1 +        local host=$2 +        local brick=$3 +        local fname=$4 +        local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname)) +        local statedump=$(generate_brick_statedump $vol $host $brick) +        local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1) +        rm -f $statedump +        echo $count +} diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 6a15223e0cc..2cb640e455c 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -26,6 +26,114 @@                                     EC_FLAG_WAITING_DATA_DIRTY |\                                     EC_FLAG_WAITING_METADATA_DIRTY) +void +ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx, +                     int32_t ret_status) +{ +        ec_fd_t *fd_ctx; + +        if (fd == NULL) +                return; + +        LOCK (&fd->lock); +        { +                fd_ctx = __ec_fd_get(fd, xl); +                if (fd_ctx) { +                        if (ret_status >= 0) +                                fd_ctx->fd_status[idx] = EC_FD_OPENED; +                        else +                                fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED; +                } +        } +        UNLOCK (&fd->lock); +} + +static int +ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open) +{ +    int i = 0; +    int count = 0; +    ec_t *ec = NULL; +    ec_fd_t *fd_ctx = NULL; + +    ec = this->private; +    *need_open = 0; + +    fd_ctx = ec_fd_get (fd, this); +    if (!fd_ctx) +        return count; + +    LOCK (&fd->lock); +    { +        for (i = 0; i < ec->nodes; i++) { +                if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) && +                    (ec->xl_up & (1<<i))) { +                        fd_ctx->fd_status[i] = EC_FD_OPENING; +                        *need_open |= (1<<i); +                        count++; +                } +        } +    } +    UNLOCK (&fd->lock); + +    /* If fd needs to open on minimum number of nodes +     * then ignore fixing the fd as it has been +     * requested from heal operation. +     */ +    if (count >= ec->fragments) +        count = 0; + +    return count; +} + +static gf_boolean_t +ec_is_fd_fixable (fd_t *fd) +{ +    if (!fd || !fd->inode) +        return _gf_false; +    else if (fd_is_anonymous (fd)) +        return _gf_false; +    else if (gf_uuid_is_null (fd->inode->gfid)) +        return _gf_false; + +    return _gf_true; +} + +static void +ec_fix_open (ec_fop_data_t *fop) +{ +    int                call_count = 0; +    uintptr_t           need_open = 0; +    int                       ret = 0; +    loc_t                     loc = {0, }; + +    if (!ec_is_fd_fixable (fop->fd)) +        goto out; + +    /* Evaluate how many remote fd's to be opened */ +    call_count = ec_fd_ctx_need_open (fop->fd, fop->xl, &need_open); +    if (!call_count) +        goto out; + +    loc.inode = inode_ref (fop->fd->inode); +    gf_uuid_copy (loc.gfid, fop->fd->inode->gfid); +    ret = loc_path (&loc, NULL); +    if (ret < 0) { +        goto out; +    } + +    if (IA_IFDIR == fop->fd->inode->ia_type) { +        ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, +                   NULL, NULL, &fop->loc[0], fop->fd, NULL); +    } else{ +        ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, +                NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL); +    } + +out: +    loc_wipe (&loc); +} +  off_t  ec_range_end_get (off_t fl_start, size_t fl_size)  { @@ -1652,6 +1760,11 @@ void ec_lock_acquired(ec_lock_link_t *link)      ec_lock_apply(link); +    if (fop->use_fd && +        (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) { +        ec_fix_open(fop); +    } +      ec_lock_resume_shared(&list);  } diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 9a35391a781..c3e291585ef 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -139,4 +139,8 @@ ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);  int32_t ec_lock_unlocked(call_frame_t *frame, void *cookie,                           xlator_t *this, int32_t op_ret, int32_t op_errno,                           dict_t *xdata); + +void +ec_update_fd_status (fd_t *fd, xlator_t *xl, +                     int child_index, int32_t ret_status);  #endif /* __EC_COMMON_H__ */ diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index 48afe54460f..b44bb4239b1 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -19,7 +19,11 @@  #include "ec-method.h"  #include "ec-fops.h" -/* FOP: opendir */ +/**************************************************************** + * + * File Operation: opendir + * + ***************************************************************/  int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,                             ec_cbk_data_t * src) @@ -88,6 +92,8 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,          }          ec_combine(cbk, ec_combine_opendir); + +        ec_update_fd_status (fd, this, idx, op_ret);      }  out: diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c index 150dc66f21b..7779d4849f3 100644 --- a/xlators/cluster/ec/src/ec-dir-write.c +++ b/xlators/cluster/ec/src/ec-dir-write.c @@ -71,6 +71,7 @@ ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,  out:          if (cbk)                  ec_combine (cbk, ec_combine_write); +          if (fop)                  ec_complete (fop);          return 0; diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 122fe24b5d3..8aa663b78c2 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -751,27 +751,32 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)  ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)  { +    int i = 0;      ec_fd_t * ctx = NULL;      uint64_t value = 0; +    ec_t *ec = xl->private; -    if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) -    { -        ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t); -        if (ctx != NULL) -        { +    if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) { +        ctx = GF_MALLOC(sizeof(*ctx) + (sizeof (ec_fd_status_t) * ec->nodes), +                        ec_mt_ec_fd_t); +        if (ctx != NULL) {              memset(ctx, 0, sizeof(*ctx)); -            value = (uint64_t)(uintptr_t)ctx; -            if (__fd_ctx_set(fd, xl, value) != 0) -            { -                GF_FREE(ctx); +            for (i = 0; i < ec->nodes; i++) { +                if (fd_is_anonymous (fd)) { +                        ctx->fd_status[i] = EC_FD_OPENED; +                } else { +                        ctx->fd_status[i] = EC_FD_NOT_OPENED; +                } +            } +            value = (uint64_t)(uintptr_t)ctx; +            if (__fd_ctx_set(fd, xl, value) != 0) { +                GF_FREE (ctx);                  return NULL;              }          } -    } -    else -    { +    } else {          ctx = (ec_fd_t *)(uintptr_t)value;      } diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index 33fd7f549bb..24fcdb9e883 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -739,6 +739,9 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,          }          ec_combine(cbk, ec_combine_open); + +        ec_update_fd_status (fd, this, idx, op_ret); +      }  out: diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h index 23b30548450..e87c1630359 100644 --- a/xlators/cluster/ec/src/ec-types.h +++ b/xlators/cluster/ec/src/ec-types.h @@ -124,6 +124,13 @@ enum _ec_heal_need {          EC_HEAL_MUST  }; +/* Enumartions to indicate FD status. */ +typedef enum { +    EC_FD_NOT_OPENED, +    EC_FD_OPENED, +    EC_FD_OPENING +} ec_fd_status_t; +  struct _ec_config {      uint32_t version;      uint8_t  algorithm; @@ -137,6 +144,7 @@ struct _ec_fd {      loc_t     loc;      uintptr_t open;      int32_t   flags; +    ec_fd_status_t fd_status[0];  };  struct _ec_inode { @@ -263,17 +271,21 @@ struct _ec_lock_link {      off_t             fl_end;  }; +/* EC xlator data structure to collect all the data required to perform + * the file operation.*/  struct _ec_fop_data { -    int32_t            id; +    int32_t            id;           /* ID of the file operation */      int32_t            refs;      int32_t            state; -    int32_t            minimum; +    int32_t            minimum;      /* Mininum number of successful +                                        operation required to conclude a +                                        fop as successful */      int32_t            expected;      int32_t            winds;      int32_t            jobs;      int32_t            error;      ec_fop_data_t     *parent; -    xlator_t          *xl; +    xlator_t          *xl;           /* points to EC xlator */      call_frame_t      *req_frame;    /* frame of the calling xlator */      call_frame_t      *frame;        /* frame used by this fop */      struct list_head   cbk_list;     /* sorted list of groups of answers */ @@ -299,10 +311,10 @@ struct _ec_fop_data {      uid_t              uid;      gid_t              gid; -    ec_wind_f          wind; -    ec_handler_f       handler; +    ec_wind_f          wind;          /* Function to wind to */ +    ec_handler_f       handler;       /* FOP manager function */      ec_resume_f        resume; -    ec_cbk_t           cbks; +    ec_cbk_t           cbks;          /* Callback function for this FOP */      void              *data;      ec_heal_t         *heal;      struct list_head   healer; @@ -310,7 +322,8 @@ struct _ec_fop_data {      uint64_t           user_size;      uint32_t           head; -    int32_t            use_fd; +    int32_t            use_fd;        /* Indicates whether this FOP uses FD or +                                         not */      dict_t            *xdata;      dict_t            *dict; @@ -324,10 +337,12 @@ struct _ec_fop_data {      gf_xattrop_flags_t xattrop_flags;      dev_t              dev;      inode_t           *inode; -    fd_t              *fd; +    fd_t              *fd;              /* FD of the file on which FOP is +                                           being carried upon */      struct iatt        iatt;      char              *str[2]; -    loc_t              loc[2]; +    loc_t              loc[2];          /* Holds the location details for +                                           the file */      struct gf_flock    flock;      struct iovec      *vector;      struct iobref     *buffers; @@ -555,18 +570,24 @@ struct _ec {      xlator_t          *xl;      int32_t            healers;      int32_t            heal_waiters; -    int32_t            nodes; +    int32_t            nodes;                /* Total number of bricks(n) */      int32_t            bits_for_nodes; -    int32_t            fragments; -    int32_t            redundancy; -    uint32_t           fragment_size; -    uint32_t           stripe_size; -    int32_t            up; +    int32_t            fragments;            /* Data bricks(k) */ +    int32_t            redundancy;           /* Redundant bricks(m) */ +    uint32_t           fragment_size;        /* Size of fragment/chunk on a +                                                brick. */ +    uint32_t           stripe_size;          /* (fragment_size * fragments) +                                                maximum size of user data +                                                stored in one stripe. */ +    int32_t            up;                   /* Represents whether EC volume is +                                                up or not. */      uint32_t           idx; -    uint32_t           xl_up_count; -    uintptr_t          xl_up; -    uint32_t           xl_notify_count; -    uintptr_t          xl_notify; +    uint32_t           xl_up_count;          /* Number of UP bricks. */ +    uintptr_t          xl_up;                /* Bit flag representing UP +                                                bricks */ +    uint32_t           xl_notify_count;      /* Number of notifications. */ +    uintptr_t          xl_notify;            /* Bit flag representing +                                                notification for bricks. */      uintptr_t          node_mask;      xlator_t         **xl_list;      gf_lock_t          lock;  | 
