/* Copyright (c) 2008-2012 Red Hat, Inc. This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser General Public License, version 3 or any later version (LGPLv3 or later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ #include #include #include #include "dht-mem-types.h" #include "dht-messages.h" #include "call-stub.h" #include "libxlator.h" #include "syncop.h" #include "refcount.h" #include "timer.h" #include "protocol-common.h" #include "glusterfs-acl.h" #ifndef _DHT_H #define _DHT_H #define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout" #define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete" #define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data" #define DHT_MDS_STR "mds" #define GF_DHT_LOOKUP_UNHASHED_OFF 0 #define GF_DHT_LOOKUP_UNHASHED_ON 1 #define GF_DHT_LOOKUP_UNHASHED_AUTO 2 #define DHT_PATHINFO_HEADER "DISTRIBUTE:" #define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate" /* Layout synchronization */ #define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal" /* Namespace synchronization */ #define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync" #define TIERING_MIGRATION_KEY "tiering.migration" #define DHT_LAYOUT_HASH_INVALID 1 #define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN) #define DHT_DIR_STAT_BLOCKS 8 #define DHT_DIR_STAT_SIZE 4096 /* Array to hold custom xattr keys */ extern char *xattrs_to_heal[]; /* Rebalance nodeuuid flags */ #define REBAL_NODEUUID_MINE 0x01 typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata); typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret); typedef int (*dht_refresh_layout_unlock) (call_frame_t *frame, xlator_t *this, int op_ret, int invoke_cbk); typedef int (*dht_refresh_layout_done_handle) (call_frame_t *frame); struct dht_layout { int spread_cnt; /* layout spread count per directory, is controlled by 'setxattr()' with special key */ int cnt; int preset; /* * The last *configuration* state for which this directory was known * to be in balance. The corresponding vol_commit_hash changes * whenever bricks are added or removed. This value changes when a * (full) rebalance is complete. If they match, it's safe to assume * that every file is where it should be and there's no need to do * lookups for files elsewhere. If they don't, then we have to do a * global lookup to be sure. */ uint32_t commit_hash; /* * The *runtime* state of the volume, changes when connections to * bricks are made or lost. */ int gen; int type; gf_atomic_t ref; /* use with dht_conf_t->layout_lock */ gf_boolean_t search_unhashed; struct { int err; /* 0 = normal -1 = dir exists and no xattr >0 = dir lookup failed with errno */ uint32_t start; uint32_t stop; uint32_t commit_hash; xlator_t *xlator; } list[]; }; typedef struct dht_layout dht_layout_t; struct dht_stat_time { uint32_t atime; uint32_t atime_nsec; uint32_t ctime; uint32_t ctime_nsec; uint32_t mtime; uint32_t mtime_nsec; }; typedef struct dht_stat_time dht_stat_time_t; struct dht_inode_ctx { dht_layout_t *layout; dht_stat_time_t time; xlator_t *lock_subvol; xlator_t *mds_subvol; /* This is only used for directories */ }; typedef struct dht_inode_ctx dht_inode_ctx_t; typedef enum { DHT_HASH_TYPE_DM, DHT_HASH_TYPE_DM_USER, } dht_hashfn_type_t; typedef enum { DHT_INODELK, DHT_ENTRYLK, } dht_lock_type_t; /* rebalance related */ struct dht_rebalance_ { xlator_t *from_subvol; xlator_t *target_node; off_t offset; size_t size; int32_t flags; int count; struct iobref *iobref; struct iovec *vector; struct iatt stbuf; struct iatt prebuf; struct iatt postbuf; dht_defrag_cbk_fn_t target_op_fn; dict_t *xdata; dict_t *xattr; dict_t *dict; int32_t set; struct gf_flock flock; int lock_cmd; }; /** * Enum to store decided action based on the qdstatfs (quota-deem-statfs) * events **/ typedef enum { qdstatfs_action_OFF = 0, qdstatfs_action_REPLACE, qdstatfs_action_NEGLECT, qdstatfs_action_COMPARE, } qdstatfs_action_t; typedef enum { FAIL_ON_ANY_ERROR, IGNORE_ENOENT_ESTALE } dht_reaction_type_t; struct dht_skip_linkto_unlink { gf_boolean_t handle_valid_link; int opend_fd_count; xlator_t *hash_links_to; uuid_t cached_gfid; uuid_t hashed_gfid; }; typedef struct { xlator_t *xl; loc_t loc; /* contains/points to inode to lock on. */ short type; /* read/write lock. */ char *domain; /* Only locks within a single domain * contend with each other */ char *basename; /* Required for entrylk */ gf_lkowner_t lk_owner; gf_boolean_t locked; } dht_lock_t; /* The lock structure represents inodelk. */ typedef struct { fop_inodelk_cbk_t inodelk_cbk; dht_lock_t **locks; int lk_count; dht_reaction_type_t reaction; /* whether locking failed on _any_ of the "locks" above */ int op_ret; int op_errno; } dht_ilock_wrap_t; /* The lock structure represents entrylk. */ typedef struct { fop_entrylk_cbk_t entrylk_cbk; dht_lock_t **locks; int lk_count; dht_reaction_type_t reaction; /* whether locking failed on _any_ of the "locks" above */ int op_ret; int op_errno; } dht_elock_wrap_t; /* The first member of dht_dir_transaction_t should be of type dht_ilock_wrap_t. * Otherwise it can result in subtle memory corruption issues as in most of the * places we use lock[0].layout.my_layout or lock[0].layout.parent_layout and * lock[0].ns.parent_layout (like in dht_local_wipe). */ typedef union { union { dht_ilock_wrap_t my_layout; dht_ilock_wrap_t parent_layout; } layout; struct dht_namespace { dht_ilock_wrap_t parent_layout; dht_elock_wrap_t directory_ns; fop_entrylk_cbk_t ns_cbk; } ns; } dht_dir_transaction_t; typedef int (*dht_selfheal_layout_t)(call_frame_t *frame, loc_t *loc, dht_layout_t *layout); typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame, dht_layout_t **inmem, dht_layout_t **ondisk); typedef struct { uint64_t blocks_used; uint64_t pblocks_used; uint64_t files_used; uint64_t pfiles_used; uint64_t unhashed_blocks_used; uint64_t unhashed_pblocks_used; uint64_t unhashed_files_used; uint64_t unhashed_pfiles_used; uint64_t unhashed_fsid; uint64_t hashed_fsid; } tier_statvfs_t; struct dht_local { int call_cnt; loc_t loc; loc_t loc2; int op_ret; int op_errno; int layout_mismatch; /* Use stbuf as the postbuf, when we require both * pre and post attrs */ struct iatt stbuf; struct iatt mds_stbuf; struct iatt prebuf; struct iatt preoldparent; struct iatt postoldparent; struct iatt preparent; struct iatt postparent; struct statvfs statvfs; tier_statvfs_t tier_statvfs; fd_t *fd; inode_t *inode; dict_t *params; dict_t *xattr; dict_t *mds_xattr; dict_t *xdata; /* dict used to save xdata response by xattr fop */ dict_t *xattr_req; dht_layout_t *layout; size_t size; ino_t ia_ino; xlator_t *src_hashed, *src_cached; xlator_t *dst_hashed, *dst_cached; xlator_t *cached_subvol; xlator_t *hashed_subvol; xlator_t *mds_subvol; /* This is use for dir only */ char need_selfheal; char need_xattr_heal; int file_count; int dir_count; call_frame_t *main_frame; int fop_succeeded; struct { fop_mknod_cbk_t linkfile_cbk; struct iatt stbuf; loc_t loc; inode_t *inode; dict_t *xattr; xlator_t *srcvol; } linkfile; struct { uint32_t hole_cnt; uint32_t overlaps_cnt; uint32_t down; uint32_t misc; dht_selfheal_dir_cbk_t dir_cbk; dht_selfheal_layout_t healer; dht_need_heal_t should_heal; gf_boolean_t force_mkdir; dht_layout_t *layout, *refreshed_layout; } selfheal; dht_refresh_layout_unlock refresh_layout_unlock; dht_refresh_layout_done_handle refresh_layout_done; uint32_t uid; uint32_t gid; /* needed by nufa */ int32_t flags; mode_t mode; dev_t rdev; mode_t umask; /* need for file-info */ char *xattr_val; char *key; /* which xattr request? */ char xsel[256]; int32_t alloc_len; /* gfid related */ uuid_t gfid; /* flag used to make sure we need to return estale in {lookup,revalidate}_cbk */ char return_estale; char need_lookup_everywhere; glusterfs_fop_t fop; gf_boolean_t linked; xlator_t *link_subvol; struct dht_rebalance_ rebalance; xlator_t *first_up_subvol; gf_boolean_t quota_deem_statfs; gf_boolean_t added_link; gf_boolean_t is_linkfile; struct dht_skip_linkto_unlink skip_unlink; dht_dir_transaction_t lock[2], *current; short lock_type; call_stub_t *stub; int32_t parent_disk_layout[4]; /* To hold dentries of readdir spawning across subvols */ gf_dirent_t entries; size_t filled; /* rename rollback */ int *ret_cache ; /* fd open check */ gf_boolean_t fd_checked; /* This is use only for directory operation */ int32_t valid; gf_boolean_t heal_layout; int32_t mds_heal_fresh_lookup; }; typedef struct dht_local dht_local_t; /* du - disk-usage */ struct dht_du { double avail_percent; double avail_inodes; uint64_t avail_space; uint32_t log; uint32_t chunks; uint32_t total_blocks; uint32_t avail_blocks; uint32_t frsize; /*fragment size*/ }; typedef struct dht_du dht_du_t; enum gf_defrag_type { GF_DEFRAG_CMD_NONE = 0, GF_DEFRAG_CMD_START = 1, GF_DEFRAG_CMD_STOP = 1 + 1, GF_DEFRAG_CMD_STATUS = 1 + 2, GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3, GF_DEFRAG_CMD_START_FORCE = 1 + 4, GF_DEFRAG_CMD_START_TIER = 1 + 5, GF_DEFRAG_CMD_STATUS_TIER = 1 + 6, GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7, GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8, GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9, GF_DEFRAG_CMD_RESUME_TIER = 1 + 10, GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11, GF_DEFRAG_CMD_STOP_TIER = 1 + 12, GF_DEFRAG_CMD_DETACH_START = 1 + 13, GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14, GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15, GF_DEFRAG_CMD_DETACH_STOP = 1 + 16, /* new labels are used so it will help * while removing old labels by easily differentiating. * A few labels are added so that the count remains same * between this enum and the ones on the xdr file. * different values for the same enum cause errors and * confusion. */ }; typedef enum gf_defrag_type gf_defrag_type; enum gf_defrag_status_t { GF_DEFRAG_STATUS_NOT_STARTED, GF_DEFRAG_STATUS_STARTED, GF_DEFRAG_STATUS_STOPPED, GF_DEFRAG_STATUS_COMPLETE, GF_DEFRAG_STATUS_FAILED, GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED, GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED, GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE, GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED, }; typedef enum gf_defrag_status_t gf_defrag_status_t; typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t; struct gf_defrag_pattern_list { char path_pattern[256]; uint64_t size; gf_defrag_pattern_list_t *next; }; struct dht_container { union { struct list_head list; struct { struct _gf_dirent_t *next; struct _gf_dirent_t *prev; }; }; gf_dirent_t *df_entry; xlator_t *this; loc_t *parent_loc; dict_t *migrate_data; int local_subvol_index; }; typedef enum tier_mode_ { TIER_MODE_NONE = 0, TIER_MODE_TEST, TIER_MODE_WM } tier_mode_t; typedef enum tier_pause_state_ { TIER_RUNNING = 0, TIER_REQUEST_PAUSE, TIER_PAUSED } tier_pause_state_t; /* This Structure is only used in tiering fixlayout */ typedef struct gf_tier_fix_layout_arg { xlator_t *this; dict_t *fix_layout; pthread_t thread_id; } gf_tier_fix_layout_arg_t; typedef struct gf_tier_conf { int is_tier; int watermark_hi; int watermark_low; int watermark_last; unsigned long block_size; fsblkcnt_t blocks_total; fsblkcnt_t blocks_used; int percent_full; uint64_t max_migrate_bytes; int max_migrate_files; int query_limit; tier_mode_t mode; /* These flags are only used for tier-compact */ gf_boolean_t compact_active; /* These 3 flags are set to true when the client changes the */ /* compaction mode on the command line. */ /* When they are set, the daemon will trigger compaction as */ /* soon as possible to activate or deactivate compaction. */ /* If in the middle of a compaction, then the switches take */ /* effect on the next compaction, not the current one. */ /* If the user switches it off, we want to avoid needless */ /* compactions. */ /* If the user switches it on, they want to compact as soon */ /* as possible. */ gf_boolean_t compact_mode_switched; gf_boolean_t compact_mode_switched_hot; gf_boolean_t compact_mode_switched_cold; int tier_max_promote_size; int tier_promote_frequency; int tier_demote_frequency; int tier_compact_hot_frequency; int tier_compact_cold_frequency; uint64_t st_last_promoted_size; uint64_t st_last_demoted_size; tier_pause_state_t pause_state; struct synctask *pause_synctask; gf_timer_t *pause_timer; pthread_mutex_t pause_mutex; int promote_in_progress; int demote_in_progress; /* This Structure is only used in tiering fixlayout */ gf_tier_fix_layout_arg_t tier_fix_layout_arg; /* Indicates the index of the first queryfile picked * in the last cycle of promote or demote */ int32_t last_promote_qfile_index; int32_t last_demote_qfile_index; char volname[GD_VOLUME_NAME_MAX + 1]; } gf_tier_conf_t; typedef struct nodeuuid_info { char info; /* Set to 1 is this is my node's uuid*/ uuid_t uuid; /* Store the nodeuuid as well for debugging*/ } nodeuuid_info_t; typedef struct subvol_nodeuuids_info { nodeuuid_info_t *elements; int count; } subvol_nodeuuids_info_t; struct gf_defrag_info_ { uint64_t total_files; uint64_t total_data; uint64_t num_files_lookedup; uint64_t total_failures; uint64_t skipped; uint64_t num_dirs_processed; uint64_t size_processed; gf_lock_t lock; int cmd; pthread_t th; gf_defrag_status_t defrag_status; struct rpc_clnt *rpc; uint32_t connected; uint32_t is_exiting; pid_t pid; inode_t *root_inode; uuid_t node_uuid; struct timeval start_time; gf_boolean_t stats; uint32_t new_commit_hash; gf_defrag_pattern_list_t *defrag_pattern; gf_tier_conf_t tier_conf; /*Data Tiering params for scanner*/ uint64_t total_files_promoted; uint64_t total_files_demoted; int write_freq_threshold; int read_freq_threshold; pthread_cond_t parallel_migration_cond; pthread_mutex_t dfq_mutex; pthread_cond_t rebalance_crawler_alarm; int32_t q_entry_count; int32_t global_error; struct dht_container *queue; int32_t crawl_done; int32_t abort; int32_t wakeup_crawler; /*Throttle params*/ /*stands for reconfigured thread count*/ int32_t recon_thread_count; /*stands for current running thread count*/ int32_t current_thread_count; pthread_cond_t df_wakeup_thread; /* lock migration flag */ gf_boolean_t lock_migration_enabled; /* backpointer to make it easier to write functions for rebalance */ xlator_t *this; pthread_cond_t fc_wakeup_cond; pthread_mutex_t fc_mutex; }; typedef struct gf_defrag_info_ gf_defrag_info_t; struct dht_methods_s { int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local); int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag); int32_t (*migration_needed)(xlator_t *this); xlator_t* (*layout_search)(xlator_t *this, dht_layout_t *layout, const char *name); }; typedef struct dht_methods_s dht_methods_t; struct dht_conf { gf_lock_t subvolume_lock; int subvolume_cnt; xlator_t **subvolumes; char *subvolume_status; int *last_event; dht_layout_t **file_layouts; dht_layout_t **dir_layouts; unsigned int search_unhashed; gf_boolean_t lookup_optimize; int gen; dht_du_t *du_stats; double min_free_disk; double min_free_inodes; char disk_unit; int32_t refresh_interval; gf_boolean_t unhashed_sticky_bit; struct timeval last_stat_fetch; gf_lock_t layout_lock; dict_t *leaf_to_subvol; void *private; /* Can be used by wrapper xlators over dht */ gf_boolean_t use_readdirp; char vol_uuid[UUID_SIZE + 1]; gf_boolean_t assert_no_child_down; time_t *subvol_up_time; /* This is the count used as the distribute layout for a directory */ /* Will be a global flag to control the layout spread count */ uint32_t dir_spread_cnt; /* to keep track of nodes which are decommissioned */ xlator_t **decommissioned_bricks; int decommission_in_progress; int decommission_subvols_cnt; /* defrag related */ gf_defrag_info_t *defrag; /* Request to filter directory entries in readdir request */ gf_boolean_t readdir_optimize; /* Support regex-based name reinterpretation. */ regex_t rsync_regex; gf_boolean_t rsync_regex_valid; regex_t extra_regex; gf_boolean_t extra_regex_valid; /* Support variable xattr names. */ char *xattr_name; char *mds_xattr_key; char *link_xattr_name; char *commithash_xattr_name; char *wild_xattr_name; /* Support size-weighted rebalancing (heterogeneous bricks). */ gf_boolean_t do_weighting; gf_boolean_t randomize_by_gfid; int dthrottle; dht_methods_t methods; struct mem_pool *lock_pool; /*local subvol storage for rebalance*/ xlator_t **local_subvols; subvol_nodeuuids_info_t *local_nodeuuids; int32_t local_subvols_cnt; /* * "Commit hash" for this volume topology. Changed whenever bricks * are added or removed. */ uint32_t vol_commit_hash; gf_boolean_t vch_forced; /* lock migration */ gf_boolean_t lock_migration_enabled; gf_lock_t lock; /* Hard link handle requirement for migration triggered from client*/ synclock_t link_lock; gf_boolean_t use_fallocate; }; typedef struct dht_conf dht_conf_t; struct dht_dfoffset_ctx { xlator_t *this; off_t offset; int32_t readdir_done; }; typedef struct dht_dfoffset_ctx dht_dfoffset_ctx_t; struct dht_disk_layout { uint32_t cnt; uint32_t type; struct { uint32_t start; uint32_t stop; } list[1]; }; typedef struct dht_disk_layout dht_disk_layout_t; typedef enum { GF_DHT_MIGRATE_DATA, GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS, GF_DHT_MIGRATE_HARDLINK, GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS } gf_dht_migrate_data_type_t; typedef enum { GF_DHT_EQUAL_DISTRIBUTION, GF_DHT_WEIGHTED_DISTRIBUTION } dht_distribution_type_t; struct dir_dfmeta { gf_dirent_t *equeue; dht_dfoffset_ctx_t *offset_var; struct list_head **head; struct list_head **iterator; int *fetch_entries; }; typedef struct dht_migrate_info { xlator_t *src_subvol; xlator_t *dst_subvol; GF_REF_DECL; } dht_migrate_info_t; typedef struct dht_fd_ctx { uint64_t opened_on_dst; GF_REF_DECL; } dht_fd_ctx_t; #define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT) #define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0) #define is_last_call(cnt) (cnt == 0) #define DHT_MIGRATION_IN_PROGRESS 1 #define DHT_MIGRATION_COMPLETED 2 #define check_is_linkfile(i,s,x,n) (IS_DHT_LINKFILE_MODE (s) && dict_get (x, n)) #define IS_DHT_MIGRATION_PHASE2(buf) ( \ IA_ISREG ((buf)->ia_type) && \ ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \ ~S_IFMT) == DHT_LINKFILE_MODE)) #define IS_DHT_MIGRATION_PHASE1(buf) ( \ IA_ISREG ((buf)->ia_type) && \ ((buf)->ia_prot.sticky == 1) && \ ((buf)->ia_prot.sgid == 1)) #define DHT_STRIP_PHASE1_FLAGS(buf) do { \ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \ (buf)->ia_prot.sticky = 0; \ (buf)->ia_prot.sgid = 0; \ } \ } while (0) #define dht_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE \ || op_errno == EIO) \ /*Bad fix. Please revert the commit after fixing the bug 1329505*/ #define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type)) #define layout_is_sane(layout) ((layout) && (layout->cnt > 0)) #define we_are_not_migrating(x) ((x) == 1) #define DHT_STACK_UNWIND(fop, frame, params ...) do { \ dht_local_t *__local = NULL; \ xlator_t *__xl = NULL; \ if (frame) { \ __xl = frame->this; \ __local = frame->local; \ frame->local = NULL; \ } \ STACK_UNWIND_STRICT (fop, frame, params); \ dht_local_wipe (__xl, __local); \ } while (0) #define DHT_STACK_DESTROY(frame) do { \ dht_local_t *__local = NULL; \ xlator_t *__xl = NULL; \ __xl = frame->this; \ __local = frame->local; \ frame->local = NULL; \ STACK_DESTROY (frame->root); \ dht_local_wipe (__xl, __local); \ } while (0) #define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\ LOCK (&inode->lock); \ { \ if (ctx_sec == new_sec) \ new_nsec = max (new_nsec, ctx_nsec); \ else if (ctx_sec > new_sec) { \ new_sec = ctx_sec; \ new_nsec = ctx_nsec; \ } \ if (post) { \ ctx_sec = new_sec; \ ctx_nsec = new_nsec; \ } \ } \ UNLOCK (&inode->lock); \ } while (0) #define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))) #define DHT_MARK_FOP_INTERNAL(xattr) do { \ int tmp = -1; \ if (!xattr) { \ xattr = dict_new (); \ if (!xattr) \ break; \ } \ tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \ if (tmp) { \ gf_msg (this->name, GF_LOG_ERROR, 0, \ DHT_MSG_DICT_SET_FAILED, \ "Failed to set dictionary value: key = %s," \ " path = %s", GLUSTERFS_INTERNAL_FOP_KEY, \ local->loc.path); \ } \ } while (0) dht_layout_t *dht_layout_new (xlator_t *this, int cnt); dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode); dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol); xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name); int32_t dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local); int32_t dht_migration_needed(xlator_t *this); int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout); int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t *holes_p, uint32_t *overlaps_p, uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p, uint32_t *no_space_p); int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, loc_t *loc, dict_t *xattr); xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *buf, dict_t *xattr); int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this, xlator_t *subvol, loc_t *loc); int dht_layouts_init (xlator_t *this, dht_conf_t *conf); int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, int op_ret, int op_errno, dict_t *xattr); int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, int pos, int32_t **disk_layout_p); int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, int pos, void *disk_layout_raw, int disk_layout_len); int dht_disk_layout_extract_for_subvol (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, int32_t **disk_layout_p); int dht_frame_return (call_frame_t *frame); int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol); void dht_local_wipe (xlator_t *this, dht_local_t *local); dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop); int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from, xlator_t *subvol); xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc); xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode); xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev); xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev); int dht_subvol_cnt (xlator_t *this, xlator_t *subvol); int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p); int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk, xlator_t *this, xlator_t *tovol, xlator_t *fromvol, loc_t *loc); int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc); int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc); int dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, loc_t *loc, dht_layout_t *layout); int dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, loc_t *loc, dht_layout_t *layout); int dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, dht_layout_t *layout); int dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, loc_t *loc, dht_layout_t *layout); int dht_layout_sort_volname (dht_layout_t *layout); int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, dht_local_t *layout); int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); int dht_layout_index_for_subvol (dht_layout_t *layout, xlator_t *subvol); int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);; void dht_layout_unref (xlator_t *this, dht_layout_t *layout); dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout); xlator_t *dht_first_up_subvol (xlator_t *this); xlator_t *dht_last_up_subvol (xlator_t *this); int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name); int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, xlator_t **subvol); int dht_rename_cleanup (call_frame_t *frame); int dht_rename_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata); int dht_update_commit_hash_for_layout (call_frame_t *frame); int dht_fix_directory_layout (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, dht_layout_t *layout); int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf); /* migration/rebalance */ int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame); int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame); int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame); int dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf); /* FOPS */ int32_t dht_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req); int32_t dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); int32_t dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata); int32_t dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata); int32_t dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata); int32_t dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata); int32_t dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata); int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata); int32_t dht_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata); int32_t dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata); int32_t dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata); int32_t dht_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata); int32_t dht_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata); int32_t dht_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata); int32_t dht_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *params); int32_t dht_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata); int32_t dht_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata); int32_t dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata); int32_t dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata); int32_t dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, dict_t *xdata); int32_t dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata); int32_t dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, dict_t *xdata); int32_t dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); int32_t dht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata); int32_t dht_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata); int32_t dht_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata); int32_t dht_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata); int32_t dht_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata); int32_t dht_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata); int32_t dht_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata); int32_t dht_lease (call_frame_t *frame, xlator_t *this, loc_t *loc, struct gf_lease *lease, dict_t *xdata); int32_t dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata); int32_t dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata); int32_t dht_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata); int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata); int32_t dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, dict_t *xdata); int32_t dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, dict_t *dict); int32_t dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); int32_t dht_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); int32_t dht_forget (xlator_t *this, inode_t *inode); int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata); int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata); int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, off_t offset, size_t len, dict_t *xdata); int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, size_t len, dict_t *xdata); int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata); int32_t dht_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); int dht_set_subvol_range(xlator_t *this); int32_t dht_init (xlator_t *this); void dht_fini (xlator_t *this); int dht_reconfigure (xlator_t *this, dict_t *options); int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...); /* definitions for nufa/switch */ int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *postparent); int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *postparent); int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *postparent); int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *postparent); int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata); int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata); int gf_defrag_status_get (dht_conf_t *conf, dict_t *dict); void gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state); tier_pause_state_t gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf); int gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag); tier_pause_state_t gf_defrag_check_pause_tier (gf_tier_conf_t *defrag); int gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag); int gf_defrag_start_detach_tier (gf_defrag_info_t *defrag); int gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status, dict_t *output); void* gf_defrag_start (void *this); int32_t gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, int *fop_errno); int dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, int flag, int *fop_errno); int dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout_int); int dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this, dht_layout_t* layout_int); int dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, int32_t update_ctx); void dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat); int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx); int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx); int dht_dir_attr_heal (void *data); int dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data); int dht_dir_has_layout (dict_t *xattr, char *name); gf_boolean_t dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator); xlator_t * dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, xlator_t *ignore, dht_layout_t *layout, uint64_t filesize); xlator_t * dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, dht_layout_t *layout); int dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this); void dht_layout_dump (dht_layout_t *layout, const char *prefix); int32_t dht_priv_dump (xlator_t *this); int32_t dht_inodectx_dump (xlator_t *this, inode_t *inode); int dht_inode_ctx_get_mig_info (xlator_t *this, inode_t *inode, xlator_t **src_subvol, xlator_t **dst_subvol); gf_boolean_t dht_mig_info_is_invalid (xlator_t *current, xlator_t *src_subvol, xlator_t *dst_subvol); int dht_subvol_status (dht_conf_t *conf, xlator_t *subvol); void dht_log_new_layout_for_dir_selfheal (xlator_t *this, loc_t *loc, dht_layout_t *layout); int dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this); int dht_fill_dict_to_avoid_unlink_of_migrating_file (dict_t *dict); int dht_layout_sort (dht_layout_t *layout); int dht_heal_full_path (void *data); int dht_heal_full_path_done (int op_ret, call_frame_t *frame, void *data); int dht_layout_missing_dirs (dht_layout_t *layout); int dht_refresh_layout (call_frame_t *frame); gf_boolean_t dht_is_tier_xlator (xlator_t *this); int dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child, int32_t *op_errno); int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local, struct iatt *stbuf, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); void dht_build_root_loc (inode_t *inode, loc_t *loc); gf_boolean_t dht_fd_open_on_dst (xlator_t *this, fd_t *fd, xlator_t *dst); int32_t dht_fd_ctx_destroy (xlator_t *this, fd_t *fd); int32_t dht_release (xlator_t *this, fd_t *fd); int32_t dht_set_fixed_dir_stat (struct iatt *stat); xlator_t* dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock, dht_local_t *local); int dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret); void dht_normalize_stats (struct statvfs *buf, unsigned long bsize, unsigned long frsize); int add_opt(char **optsp, const char *opt); int dht_aggregate_split_brain_xattr (dict_t *dst, char *key, data_t *value); int dht_remove_stale_linkto (void *data); int dht_remove_stale_linkto_cbk (int ret, call_frame_t *sync_frame, void *data); int dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *subvol); int dht_check_and_open_fd_on_subvol (xlator_t *this, call_frame_t *frame); /* FD fop callbacks */ int dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); int dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata); int dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); int dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); int dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); int dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); int dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); int dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); int dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iovec *vector, int count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata); int dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata); int dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata); int dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata); /* All custom xattr heal functions */ int dht_dir_heal_xattrs (void *data); int dht_dir_heal_xattrs_done (int ret, call_frame_t *sync_frame, void *data); void dht_aggregate_xattr (dict_t *dst, dict_t *src); int32_t dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size); int dht_set_user_xattr (dict_t *dict, char *k, data_t *v, void *data); void dht_dir_set_heal_xattr (xlator_t *this, dht_local_t *local, dict_t *dst, dict_t *src, int *uret, int *uflag); int dht_dir_xattr_heal (xlator_t *this, dht_local_t *local); int32_t dht_dict_get_array (dict_t *dict, char *key, int32_t value[], int32_t size, int *errst); xlator_t * dht_inode_get_hashed_subvol (inode_t *inode, xlator_t *this, loc_t *loc); int dht_common_mark_mdsxattr (call_frame_t *frame, int *errst, int flag); int dht_common_mark_mdsxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata); int dht_inode_ctx_mdsvol_set (inode_t *inode, xlator_t *this, xlator_t *mds_subvol); int dht_inode_ctx_mdsvol_get (inode_t *inode, xlator_t *this, xlator_t **mdsvol); int dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf, int32_t valid, dht_layout_t *layout); /* Abstract out the DHT-IATT-IN-DICT */ int dht_request_iatt_in_xdata (xlator_t *this, dict_t *xattr_req); int dht_read_iatt_from_xdata (xlator_t *this, dict_t *xdata, struct iatt *stbuf); #endif/* _DHT_H */