/* Copyright (c) 2008-2012 Red Hat, Inc. This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser General Public License, version 3 or any later version (LGPLv3 or later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ #ifndef __RECON_DRIVER_H__ #define __RECON_DRIVER_H__ #include "api/src/glfs.h" #define MAX_HOSTNAME_LEN 32 #define MAXIMUM_REPLICA_STRENGTH 8 #define MAX_RECONCILIATION_WINDOW_SIZE 10000 #define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd" #define GLUSTERD_VOLUME_DIR_PREFIX "vols" #define GLUSTERD_BRICK_INFO_DIR "bricks" /* * Even with the names fixed, the non-NSR_DEBUG definitions of nsr_*_log don't * work because many callers don't have "this" defined. * * TBD: use gf_log, fix "this" problem, eliminate extra fields and newlines. */ #define NSR_DEBUG typedef enum nsr_recon_work_req_id_t { NSR_WORK_ID_GET_NONE = 0, NSR_WORK_ID_GET_LAST_TERM_INFO = NSR_WORK_ID_GET_NONE + 1, NSR_WORK_ID_GET_GIVEN_TERM_INFO = NSR_WORK_ID_GET_LAST_TERM_INFO + 1, NSR_WORK_ID_RECONCILIATOR_DO_WORK = NSR_WORK_ID_GET_GIVEN_TERM_INFO + 1, NSR_WORK_ID_RESOLUTION_DO_WORK = NSR_WORK_ID_RECONCILIATOR_DO_WORK + 1, NSR_WORK_ID_GET_RECONCILATION_WINDOW = NSR_WORK_ID_RESOLUTION_DO_WORK + 1, NSR_WORK_ID_SINGLE_RECONCILIATION_READ = NSR_WORK_ID_GET_RECONCILATION_WINDOW + 1, NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT = NSR_WORK_ID_SINGLE_RECONCILIATION_READ + 1, NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH = NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT + 1, NSR_WORK_ID_GET_RESOLUTION_WINDOW = NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH + 1, NSR_WORK_ID_END_RECONCILIATION = NSR_WORK_ID_GET_RESOLUTION_WINDOW + 1, NSR_WORK_ID_INI = NSR_WORK_ID_END_RECONCILIATION + 1, NSR_WORK_ID_FINI = NSR_WORK_ID_INI + 1 } nsr_recon_work_req_id_t; typedef enum nsr_recon_queue_type_t { NSR_RECON_QUEUE_TO_CONTROL = 0, NSR_RECON_QUEUE_TO_DATA =NSR_RECON_QUEUE_TO_CONTROL + 1, } nsr_recon_queue_type_t; typedef enum nsr_log_type_t { NSR_LOG_HOLE = 0b0, NSR_LOG_PSEUDO_HOLE = 0b1, NSR_LOG_FILL = 0b11 } nsr_log_type_t; typedef enum nsr_mode_t { NSR_SEQ = 0, NSR_USE_THREADS = 1, NSR_ASYNC = 2 } nsr_mode_t; typedef enum nsr_recon_work_type_t { NSR_RECON_WORK_NONE = 0, NSR_RECON_WORK_HOLE_TO_NOOP = NSR_RECON_WORK_NONE + 1, NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE = NSR_RECON_WORK_HOLE_TO_NOOP + 1, NSR_RECON_WORK_COMPARE_PSEUDO_HOLE = NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE + 1, NSR_RECON_WORK_HOLE_TO_FILL = NSR_RECON_WORK_COMPARE_PSEUDO_HOLE + 1, NSR_RECON_WORK_UNDO_FILL = NSR_RECON_WORK_HOLE_TO_FILL + 1, } nsr_recon_work_type_t; typedef enum nsr_recon_driver_state_t { none = 0, leader = 1, reconciliator = 2, resolutor = 3, joiner = 4, } nsr_recon_driver_state_t; // role structure #pragma pack(push, 1) typedef struct _nsr_recon_role_s { uint32_t role; // leader, reconciliator, resolutor uint32_t num; // required in case state is reconciliator uint32_t current_term; // current term used in case of leader // In case this is reconciliator, num is set to nodes that were part // of previous term. // In case this is resolutor, num is set to 2. // info[0] - information for this node. // info[1] - information of the reconciliator. // In case this is leader, num is set to this term's membership list // set info.name to all members including the leader struct { int32_t last_term; int32_t commited_ops; uint32_t last_index; uint32_t first_index; char name[MAX_HOSTNAME_LEN]; } info[MAXIMUM_REPLICA_STRENGTH]; } nsr_recon_role_t; #pragma pack(pop) #define ENDIAN_CONVERSION_RR(rr, is_true) \ { \ uint32_t i=0; \ uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ if (is_true == _gf_true) rr.num = f(rr.num); \ rr.current_term = f(rr.current_term); \ for (i=0; i < rr.num; i++) { \ rr.info[i].last_term = f(rr.info[i].last_term); \ rr.info[i].commited_ops = f(rr.info[i].commited_ops); \ rr.info[i].last_index = f(rr.info[i].last_index); \ rr.info[i].first_index = f(rr.info[i].first_index); \ } \ if (is_true == _gf_false) rr.num = f(rr.num); \ } // last term info structure #pragma pack(push, 1) typedef struct _nsr_recon_last_term_info_s { int32_t last_term; int32_t commited_ops; uint32_t last_index; uint32_t first_index; } nsr_recon_last_term_info_t; #pragma pack(pop) #define ENDIAN_CONVERSION_LT(lt, is_true) \ { \ uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ lt.last_term = f(lt.last_term); \ lt.commited_ops = f(lt.commited_ops); \ lt.last_index = f(lt.last_index); \ lt.first_index = f(lt.first_index); \ } // log information #pragma pack(push, 1) typedef struct _nsr_recon_log_info_s { uint32_t term; uint32_t first_index; uint32_t last_index; } nsr_recon_log_info_t; #pragma pack(pop) #define ENDIAN_CONVERSION_LI(li, is_true) \ { \ uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ li.term = f(li.term); \ li.first_index = f(li.first_index); \ li.last_index = f(li.last_index); \ } #pragma pack(push, 1) typedef struct nsr_recon_record_details_s { uint32_t type; uint32_t op; char gfid[36+1]; char pargfid[36+1]; char link_path[256]; // should it be PATH_MAX? uint32_t offset; uint32_t len; char entry[128]; char newloc[128]; // for rename. can you overload link_path for this? TBD mode_t mode; } nsr_recon_record_details_t; #pragma pack(pop) #define ENDIAN_CONVERSION_RD(rd, is_true) \ { \ uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \ rd.type = f(rd.type); \ rd.op = f(rd.op); \ rd.offset = f(rd.offset); \ rd.len = f(rd.len); \ } typedef struct _nsr_role_work_s { nsr_recon_role_t role; uint32_t term; struct list_head list; } nsr_role_work_t; typedef struct _nsr_recon_work_s { gf_boolean_t in_use; uint32_t index; uint32_t req_id; struct list_head list; } nsr_recon_work_t; typedef struct _nsr_reconciliation_work_s { uint32_t term; uint32_t index; uint32_t type; uint32_t source; void *data; uint32_t num; // used for xattr } nsr_reconciliation_work_t; typedef struct _nsr_reconciliation_record_s { nsr_reconciliation_work_t work; // will store the computed work nsr_recon_record_details_t rec; } nsr_reconciliation_record_t; typedef struct _nsr_reconciliator_info { uint32_t reconcilator_index; int32_t last_term; int32_t commited_ops; uint32_t last_index; uint32_t first_index; //nsr_reconciliation_record_t records[MAX_RECONCILIATION_WINDOW_SIZE]; nsr_reconciliation_record_t *records; } nsr_reconciliator_info_t; typedef struct _nsr_per_node_worker_s { char *id; // identifier char vol_file[256]; //volfile that will be used by this thread glfs_t *fs; glfs_fd_t *aux_fd; uint32_t index; // index into array of workers pthread_t thread_id; // thread id void * context; // thread context struct _nsr_recon_driver_ctxt *driver_ctx; char local; // local data worker //struct list_head list; //list of work items nsr_recon_work_t head; pthread_mutex_t mutex; //mutex to guard the state pthread_cond_t cv; //condition variable for signaling the worker thread gf_boolean_t is_control; #if defined(NSR_DEBUG) FILE *fp; #endif int32_t result; // result of latest work int32_t op_errno; // errno } nsr_per_node_worker_t; typedef struct _nsr_replica_worker_s { char name[256]; nsr_per_node_worker_t *control_worker; nsr_per_node_worker_t *data_worker; gf_boolean_t in_use; nsr_reconciliator_info_t *recon_info; // Bunch of infos kept for this reconciliation } nsr_replica_worker_t; typedef struct _nsr_recon_driver_ctxt { xlator_t *this; uint32_t replica_group_size; // number of static members of replica group nsr_replica_worker_t *workers; // worker info int32_t reconciliator; pthread_mutex_t mutex; pthread_cond_t cv; nsr_role_work_t role_head; volatile int32_t outstanding; uint32_t reconciliator_index; uint32_t term; uint32_t current_term; nsr_mode_t mode; // default set to seq #if defined(NSR_DEBUG) FILE *fp; #endif } nsr_recon_driver_ctx_t; void * nsr_reconciliation_driver(void *); gf_boolean_t nsr_recon_driver_set_role(nsr_recon_driver_ctx_t *ctx, nsr_recon_role_t *rr, uint32_t term); #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) #define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) #define atomic_fetch_and __sync_fetch_and_and #define atomic_fetch_or __sync_fetch_and_or #if defined(NSR_DEBUG) #define NSR_LOG_DIR "/var/log/nsr-logs" extern int nsr_debug_level; extern FILE *recon_create_log (char *member, char *module); extern void _nsr_driver_log (const char *func, int line, char *member, FILE *fp, char *fmt, ...); #define nsr_driver_log(dom, levl, fmt...) do { \ FMT_WARN (fmt); \ if (levl <= nsr_debug_level) { \ nsr_recon_private_t *priv = ctx->this->private; \ _nsr_driver_log (__FUNCTION__, __LINE__, \ priv->replica_group_members[0], \ ctx->fp, \ ##fmt); \ } \ } while (0) extern void _nsr_worker_log (const char *func, int line, char *member, char *type, uint32_t index, FILE *fp, char *fmt, ...); #define nsr_worker_log(dom, levl, fmt...) do { \ FMT_WARN (fmt); \ if (levl <= nsr_debug_level) { \ nsr_recon_private_t *priv; \ priv = ctx->driver_ctx->this->private; \ _nsr_worker_log (__FUNCTION__, __LINE__, \ priv->replica_group_members[0], \ ctx->is_control ? "recon-con" : \ "recon-data", \ ctx->index, ctx->fp, \ ##fmt); \ } \ } while (0) #else #define nsr_driver_log(dom, levl, fmt...) gf_log(dom, levl, fmt) #define nsr_worker_log(dom, levl, fmt...) gf_log(dom, levl, fmt) #endif #endif /* #ifndef __RECON_DRIVER_H__ */