diff options
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 18 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 258 | 
2 files changed, 138 insertions, 138 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 19905394540..687c28e6472 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -14,12 +14,11 @@  #include <pthread.h>  typedef struct { -    int child;      char *path; +    int child;  } shd_event_t;  typedef struct { -    int child;      uint64_t healed_count;      uint64_t split_brain_count;      uint64_t heal_failed_count; @@ -31,32 +30,33 @@ typedef struct {         cralwer is in progress */      time_t end_time;      char *crawl_type; +    int child;  } crawl_event_t;  struct subvol_healer {      xlator_t *this; -    int subvol; -    gf_boolean_t local; -    gf_boolean_t running; -    gf_boolean_t rerun;      crawl_event_t crawl_event;      pthread_mutex_t mutex;      pthread_cond_t cond;      pthread_t thread; +    int subvol; +    gf_boolean_t local; +    gf_boolean_t running; +    gf_boolean_t rerun;  };  typedef struct { -    gf_boolean_t iamshd; -    gf_boolean_t enabled; -    int timeout;      struct subvol_healer *index_healers;      struct subvol_healer *full_healers;      eh_t *split_brain;      eh_t **statistics; +    int timeout;      uint32_t max_threads;      uint32_t wait_qlength;      uint32_t halo_max_latency_msec; +    gf_boolean_t iamshd; +    gf_boolean_t enabled;  } afr_self_heald_t;  int diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index a3f2942b317..f86f019e637 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -139,8 +139,8 @@ typedef enum {  } afr_ta_fop_state_t;  struct afr_nfsd { -    gf_boolean_t iamnfsd;      uint32_t halo_max_latency_msec; +    gf_boolean_t iamnfsd;  };  typedef struct _afr_private { @@ -153,14 +153,13 @@ typedef struct _afr_private {      inode_t *root_inode; +    int favorite_child; /* subvolume to be preferred in resolving +                                    split-brain cases */      /* For thin-arbiter. */ -    unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/      uuid_t ta_gfid; -    unsigned char ta_child_up; +    unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/      int ta_bad_child_index;      int ta_event_gen; -    off_t ta_notify_dom_lock_offset; -    gf_boolean_t release_ta_notify_dom_lock;      unsigned int ta_in_mem_txn_count;      unsigned int ta_on_wire_txn_count;      struct list_head ta_waitq; @@ -187,30 +186,31 @@ typedef struct _afr_private {      int32_t healers; /* No. of elements currently undergoing background                        heal*/ +    gf_boolean_t release_ta_notify_dom_lock; +      gf_boolean_t metadata_self_heal; /* on/off */      gf_boolean_t entry_self_heal;    /* on/off */      gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */      int read_child;                               /* read-subvolume */ -    afr_read_hash_mode_t hash_mode; /* for when read_child is not set */ -    gf_atomic_t *pending_reads;     /*No. of pending read cbks per child.*/ -    int favorite_child;             /* subvolume to be preferred in resolving -                                                split-brain cases */ +    gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/ -    afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic -                                              resolution of split-brains.*/ +    gf_timer_t *timer; /* launched when parent up is received */      unsigned int wait_count; /* # of servers to wait for success */ -    gf_timer_t *timer; /* launched when parent up is received */ - +    unsigned char ta_child_up;      gf_boolean_t optimistic_change_log;      gf_boolean_t eager_lock;      gf_boolean_t pre_op_compat; /* on/off */      uint32_t post_op_delay_secs;      unsigned int quorum_count; -    char vol_uuid[UUID_SIZE + 1]; +    off_t ta_notify_dom_lock_offset; +    afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic +                                              resolution of split-brains.*/ +    afr_read_hash_mode_t hash_mode; /* for when read_child is not set */ +      int32_t *last_event;      /* @event_generation: Keeps count of number of events received which can @@ -223,27 +223,28 @@ typedef struct _afr_private {         important as we might have had a network split brain.      */      uint32_t event_generation; +    char vol_uuid[UUID_SIZE + 1];      gf_boolean_t choose_local;      gf_boolean_t did_discovery; -    uint64_t sh_readdir_size;      gf_boolean_t ensure_durability; +    gf_boolean_t halo_enabled; +    gf_boolean_t consistent_metadata; +    gf_boolean_t need_heal; +    gf_boolean_t granular_locks; +    uint64_t sh_readdir_size;      char *sh_domain;      char *afr_dirty; -    gf_boolean_t halo_enabled; -    uint32_t halo_max_latency_msec; -    uint32_t halo_max_replicas; -    uint32_t halo_min_replicas; +    uint64_t spb_choice_timeout;      afr_self_heald_t shd;      struct afr_nfsd nfsd; -    gf_boolean_t consistent_metadata; -    uint64_t spb_choice_timeout; -    gf_boolean_t need_heal; +    uint32_t halo_max_latency_msec; +    uint32_t halo_max_replicas; +    uint32_t halo_min_replicas; -    gf_boolean_t granular_locks;      gf_boolean_t full_lock;      gf_boolean_t esh_granular;      gf_boolean_t consistent_io; @@ -311,18 +312,17 @@ afr_entry_lockee_cmp(const void *l1, const void *l2);  typedef struct {      loc_t *lk_loc; -    int lockee_count;      afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];      const char *lk_basename;      const char *lower_basename;      const char *higher_basename; -    char lower_locked; -    char higher_locked;      unsigned char *lower_locked_nodes; -    int32_t lock_count; +    afr_lock_cbk_t lock_cbk; + +    int lockee_count;      int32_t lk_call_count;      int32_t lk_expected_count; @@ -330,14 +330,15 @@ typedef struct {      int32_t lock_op_ret;      int32_t lock_op_errno; -    afr_lock_cbk_t lock_cbk;      char *domain; /* Domain on which inode/entry lock/unlock in progress.*/ +    int32_t lock_count; +    char lower_locked; +    char higher_locked;  } afr_internal_lock_t;  struct afr_reply {      int valid;      int32_t op_ret; -    int32_t op_errno;      dict_t *xattr; /*For xattrop*/      dict_t *xdata;      struct iatt poststat; @@ -346,6 +347,7 @@ struct afr_reply {      struct iatt preparent;      struct iatt preparent2;      struct iatt postparent2; +    int32_t op_errno;      /* For rchecksum */      uint8_t checksum[SHA256_DIGEST_LENGTH];      gf_boolean_t buf_has_zeroes; @@ -385,8 +387,6 @@ typedef struct _afr_inode_lock_t {      */      int32_t num_inodelks;      unsigned int event_generation; -    gf_boolean_t release; -    gf_boolean_t acquired;      gf_timer_t *delay_timer;      struct list_head owners;  /*Transactions that are performing fop*/      struct list_head post_op; /*Transactions that are done with the fop @@ -395,6 +395,8 @@ typedef struct _afr_inode_lock_t {                                 *conflicting transactions to complete*/      struct list_head frozen;  /*Transactions that need to go as part of                                 * next batch of eager-lock*/ +    gf_boolean_t release; +    gf_boolean_t acquired;  } afr_lock_t;  typedef struct _afr_inode_ctx { @@ -403,15 +405,11 @@ typedef struct _afr_inode_ctx {      int lock_count;      int spb_choice;      gf_timer_t *timer; -    gf_boolean_t need_refresh;      unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];      int inherited[AFR_NUM_CHANGE_LOGS];      int on_disk[AFR_NUM_CHANGE_LOGS]; - -    /* set if any write on this fd was a non stable write -       (i.e, without O_SYNC or O_DSYNC) -    */ -    gf_boolean_t witnessed_unstable_write; +    /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/ +    afr_lock_t lock[2];      /* @open_fd_count:         Number of open FDs queried from the server, as queried through @@ -419,8 +417,12 @@ typedef struct _afr_inode_ctx {         temporarily disabled.      */      uint32_t open_fd_count; -    /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/ -    afr_lock_t lock[2]; +    gf_boolean_t need_refresh; + +    /* set if any write on this fd was a non stable write +       (i.e, without O_SYNC or O_DSYNC) +    */ +    gf_boolean_t witnessed_unstable_write;  } afr_inode_ctx_t;  typedef struct _afr_local { @@ -434,19 +436,15 @@ typedef struct _afr_local {      unsigned int event_generation;      uint32_t open_fd_count; -    gf_boolean_t update_open_fd_count;      int32_t num_inodelks; -    gf_boolean_t update_num_inodelks; - -    gf_lkowner_t saved_lk_owner;      int32_t op_ret;      int32_t op_errno; -    int32_t **pending; -      int dirty[AFR_NUM_CHANGE_LOGS]; +    int32_t **pending; +      loc_t loc;      loc_t newloc; @@ -477,14 +475,6 @@ typedef struct _afr_local {      afr_read_txn_wind_t readfn; -    /* @refreshed: - -       the inode was "refreshed" (i.e, pending xattrs from all subvols -       freshly inspected and inode ctx updated accordingly) as part of -       this transaction already. -    */ -    gf_boolean_t refreshed; -      /* @inode:         the inode on which the read txn is performed on. ref'ed and copied @@ -509,8 +499,6 @@ typedef struct _afr_local {      unsigned char *readable;      unsigned char *readable2; /*For rename transaction*/ -    int read_subvol; /* Current read subvolume */ -      afr_inode_refresh_cbk_t refreshfn;      /* @refreshinode: @@ -519,9 +507,30 @@ typedef struct _afr_local {      */      inode_t *refreshinode; +    dict_t *xattr_req; + +    dict_t *dict; + +    int read_subvol; /* Current read subvolume */ + +    int optimistic_change_log; + +    afr_internal_lock_t internal_lock; +      /*To handle setattr/setxattr on yet to be linked inode from dht*/      uuid_t refreshgfid; +    /* @refreshed: + +       the inode was "refreshed" (i.e, pending xattrs from all subvols +       freshly inspected and inode ctx updated accordingly) as part of +       this transaction already. +    */ +    gf_boolean_t refreshed; + +    gf_boolean_t update_num_inodelks; +    gf_boolean_t update_open_fd_count; +      /*        @pre_op_compat: @@ -531,14 +540,6 @@ typedef struct _afr_local {      gf_boolean_t pre_op_compat; -    dict_t *xattr_req; - -    afr_internal_lock_t internal_lock; - -    dict_t *dict; - -    int optimistic_change_log; -      /* Is the current writev() going to perform a stable write?         i.e, is fd->flags or @flags writev param have O_SYNC or         O_DSYNC? @@ -557,25 +558,20 @@ typedef struct _afr_local {      struct {          struct { -            gf_boolean_t needs_fresh_lookup; -            uuid_t gfid_req; -        } lookup; - -        struct { -            unsigned char buf_set;              struct statvfs buf; +            unsigned char buf_set;          } statfs;          struct { -            int32_t flags;              fd_t *fd; +            int32_t flags;          } open;          struct { -            int32_t cmd;              struct gf_flock user_flock;              struct gf_flock ret_flock;              unsigned char *locked_nodes; +            int32_t cmd;          } lk;          /* inode read */ @@ -600,8 +596,8 @@ typedef struct _afr_local {          struct {              char *name; -            int last_index;              long xattr_len; +            int last_index;          } getxattr;          struct { @@ -614,11 +610,10 @@ typedef struct _afr_local {          /* dir read */          struct { +            uint32_t *checksum;              int success_count;              int32_t op_ret;              int32_t op_errno; - -            uint32_t *checksum;          } opendir;          struct { @@ -627,8 +622,8 @@ typedef struct _afr_local {              size_t size;              off_t offset;              dict_t *dict; -            gf_boolean_t failed;              int last_index; +            gf_boolean_t failed;          } readdir;          /* inode write */ @@ -638,12 +633,11 @@ typedef struct _afr_local {          } inode_wfop;  // common structure for all inode-write-fops          struct { -            int32_t op_ret; -              struct iovec *vector;              struct iobref *iobref; -            int32_t count;              off_t offset; +            int32_t op_ret; +            int32_t count;              uint32_t flags;          } writev; @@ -703,29 +697,25 @@ typedef struct _afr_local {          } create;          struct { +            dict_t *params;              dev_t dev;              mode_t mode; -            dict_t *params;          } mknod;          struct { -            int32_t mode;              dict_t *params; +            int32_t mode;          } mkdir;          struct { -            int flags; -        } rmdir; - -        struct {              dict_t *params;              char *linkpath;          } symlink;          struct { -            int32_t mode;              off_t offset;              size_t len; +            int32_t mode;          } fallocate;          struct { @@ -752,10 +742,10 @@ typedef struct _afr_local {          struct {              char *volume;              char *basename; +            void *xdata;              entrylk_cmd in_cmd;              entrylk_cmd cmd;              entrylk_type type; -            void *xdata;          } entrylk;          struct { @@ -764,31 +754,33 @@ typedef struct _afr_local {          } seek;          struct { -            int32_t datasync; -        } fsync; - -        struct {              struct gf_lease user_lease;              struct gf_lease ret_lease;              unsigned char *locked_nodes;          } lease; -    } cont; +        struct { +            int flags; +        } rmdir; -    struct { -        off_t start, len; +        struct { +            int32_t datasync; +        } fsync; -        gf_boolean_t eager_lock_on; -        gf_boolean_t do_eager_unlock; +        struct { +            uuid_t gfid_req; +            gf_boolean_t needs_fresh_lookup; +        } lookup; + +    } cont; +    struct {          char *basename;          char *new_basename;          loc_t parent_loc;          loc_t new_parent_loc; -        afr_transaction_type type; -          /* stub to resume on destruction             of the transaction frame */          call_stub_t *resume_stub; @@ -806,6 +798,30 @@ typedef struct _afr_local {              FOP failed. */          unsigned char *failed_subvols; +        call_frame_t *main_frame; /*Fop frame*/ +        call_frame_t *frame;      /*Transaction frame*/ + +        int (*wind)(call_frame_t *frame, xlator_t *this, int subvol); + +        int (*unwind)(call_frame_t *frame, xlator_t *this); + +        off_t start, len; + +        afr_transaction_type type; + +        int32_t in_flight_sb_errno; /* This is where the cause of the +                                       failure on the last good copy of +                                       the file is stored. +                                       */ + +        /* @changelog_resume: function to be called after changlogging +           (either pre-op or post-op) is done +        */ +        afr_changelog_resume_t changelog_resume; + +        gf_boolean_t eager_lock_on; +        gf_boolean_t do_eager_unlock; +          /* @dirtied: flag which indicates whether we set dirty flag             in the OP. Typically true when we are performing operation             on more than one subvol and optimistic changelog is disabled @@ -830,6 +846,10 @@ typedef struct _afr_local {          */          gf_boolean_t no_uninherit; +        gf_boolean_t in_flight_sb; /* Indicator for occurrence of +                                      split-brain while in the middle of +                                      a txn. */ +          /* @uninherit_done:             @uninherit_value: @@ -842,26 +862,6 @@ typedef struct _afr_local {          gf_boolean_t uninherit_done;          gf_boolean_t uninherit_value; -        gf_boolean_t in_flight_sb;  /* Indicator for occurrence of -                                       split-brain while in the middle of -                                       a txn. */ -        int32_t in_flight_sb_errno; /* This is where the cause of the -                                       failure on the last good copy of -                                       the file is stored. -                                       */ - -        /* @changelog_resume: function to be called after changlogging -           (either pre-op or post-op) is done -        */ -        afr_changelog_resume_t changelog_resume; - -        call_frame_t *main_frame; /*Fop frame*/ -        call_frame_t *frame;      /*Transaction frame*/ - -        int (*wind)(call_frame_t *frame, xlator_t *this, int subvol); - -        int (*unwind)(call_frame_t *frame, xlator_t *this); -          /* post-op hook */      } transaction; @@ -875,36 +875,36 @@ typedef struct _afr_local {      mode_t umask;      int xflag; -    gf_boolean_t do_discovery;      struct afr_reply *replies;      /* For  client side background heals. */      struct list_head healer;      call_frame_t *heal_frame; -    gf_boolean_t need_full_crawl; -    afr_fop_lock_state_t fop_lock_state; - -    gf_boolean_t is_read_txn;      afr_inode_ctx_t *inode_ctx;      /*For thin-arbiter transactions.*/ -    unsigned char read_txn_query_child; -    unsigned char ta_child_up; +    int ta_failed_subvol; +    int ta_event_gen;      struct list_head ta_waitq;      struct list_head ta_onwireq;      afr_ta_fop_state_t fop_state; -    int ta_failed_subvol; -    int ta_event_gen; +    afr_fop_lock_state_t fop_lock_state; +    gf_lkowner_t saved_lk_owner; +    unsigned char read_txn_query_child; +    unsigned char ta_child_up; +    gf_boolean_t do_discovery; +    gf_boolean_t need_full_crawl; +    gf_boolean_t is_read_txn;      gf_boolean_t is_new_entry;  } afr_local_t;  typedef struct afr_spbc_timeout {      call_frame_t *frame; -    gf_boolean_t d_spb; -    gf_boolean_t m_spb;      loc_t *loc;      int spb_child_index; +    gf_boolean_t d_spb; +    gf_boolean_t m_spb;  } afr_spbc_timeout_t;  typedef struct afr_spb_status { @@ -914,9 +914,9 @@ typedef struct afr_spb_status {  typedef struct afr_empty_brick_args {      call_frame_t *frame; +    char *op_type;      loc_t loc;      int empty_index; -    char *op_type;  } afr_empty_brick_args_t;  typedef struct afr_read_subvol_args {  | 
