summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/nsr-recon/src/recon_driver.h
blob: 3efb26269675a9df1ccdb4fef4619a9c25e7f5f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
/*
  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
  This file is part of GlusterFS.

  This file is licensed to you under your choice of the GNU Lesser
  General Public License, version 3 or any later version (LGPLv3 or
  later), or the GNU General Public License, version 2 (GPLv2), in all
  cases as published by the Free Software Foundation.
*/

#ifndef __RECON_DRIVER_H__
#define __RECON_DRIVER_H__


#include "api/src/glfs.h"

#define MAX_HOSTNAME_LEN 32
#define MAXIMUM_REPLICA_STRENGTH 8
#define MAX_RECONCILIATION_WINDOW_SIZE 10000

#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd"
#define GLUSTERD_VOLUME_DIR_PREFIX "vols"
#define GLUSTERD_BRICK_INFO_DIR "bricks"

/*
 * Even with the names fixed, the non-NSR_DEBUG definitions of nsr_*_log don't
 * work because many callers don't have "this" defined.
 *
 * TBD: use gf_log, fix "this" problem, eliminate extra fields and newlines.
 */
#define NSR_DEBUG

typedef enum nsr_recon_work_req_id_t {
       NSR_WORK_ID_GET_NONE = 0,
       NSR_WORK_ID_GET_LAST_TERM_INFO = NSR_WORK_ID_GET_NONE + 1,
       NSR_WORK_ID_GET_GIVEN_TERM_INFO = NSR_WORK_ID_GET_LAST_TERM_INFO + 1,
       NSR_WORK_ID_RECONCILIATOR_DO_WORK = NSR_WORK_ID_GET_GIVEN_TERM_INFO + 1,
       NSR_WORK_ID_RESOLUTION_DO_WORK = NSR_WORK_ID_RECONCILIATOR_DO_WORK + 1,
       NSR_WORK_ID_GET_RECONCILATION_WINDOW = NSR_WORK_ID_RESOLUTION_DO_WORK + 1,
       NSR_WORK_ID_SINGLE_RECONCILIATION_READ = NSR_WORK_ID_GET_RECONCILATION_WINDOW + 1,
       NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT = NSR_WORK_ID_SINGLE_RECONCILIATION_READ + 1,
       NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH = NSR_WORK_ID_SINGLE_RECONCILIATION_COMMIT + 1,
       NSR_WORK_ID_GET_RESOLUTION_WINDOW = NSR_WORK_ID_SINGLE_RECONCILIATION_FLUSH + 1,
       NSR_WORK_ID_END_RECONCILIATION = NSR_WORK_ID_GET_RESOLUTION_WINDOW + 1,
       NSR_WORK_ID_INI = NSR_WORK_ID_END_RECONCILIATION + 1,
       NSR_WORK_ID_FINI = NSR_WORK_ID_INI + 1
} nsr_recon_work_req_id_t;

typedef enum nsr_recon_queue_type_t {
       NSR_RECON_QUEUE_TO_CONTROL = 0,
       NSR_RECON_QUEUE_TO_DATA =NSR_RECON_QUEUE_TO_CONTROL + 1,
} nsr_recon_queue_type_t;

typedef enum nsr_log_type_t {
       NSR_LOG_HOLE = 0b0,
       NSR_LOG_PSEUDO_HOLE = 0b1,
       NSR_LOG_FILL = 0b11
} nsr_log_type_t;

typedef enum nsr_mode_t {
        NSR_SEQ = 0,
        NSR_USE_THREADS = 1,
        NSR_ASYNC = 2
} nsr_mode_t;

typedef enum nsr_recon_work_type_t {
       NSR_RECON_WORK_NONE = 0,
       NSR_RECON_WORK_HOLE_TO_NOOP = NSR_RECON_WORK_NONE + 1,
       NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE = NSR_RECON_WORK_HOLE_TO_NOOP + 1,
       NSR_RECON_WORK_COMPARE_PSEUDO_HOLE = NSR_RECON_WORK_HOLE_TO_PSEUDO_HOLE + 1,
       NSR_RECON_WORK_HOLE_TO_FILL = NSR_RECON_WORK_COMPARE_PSEUDO_HOLE + 1,
       NSR_RECON_WORK_UNDO_FILL = NSR_RECON_WORK_HOLE_TO_FILL + 1,
} nsr_recon_work_type_t;

typedef enum nsr_recon_driver_state_t {
        none = 0,
        leader = 1,
        reconciliator = 2,
        resolutor = 3,
        joiner = 4,
} nsr_recon_driver_state_t;

// role structure
#pragma pack(push, 1)
typedef struct _nsr_recon_role_s {
         uint32_t role; // leader, reconciliator, resolutor
         uint32_t num; // required in case state is reconciliator
         uint32_t current_term; // current term used in case of leader
         // In case this is reconciliator, num is set to nodes that were part
         // of previous term.
         // In case this is resolutor, num is set to 2.
         // info[0] - information for this node.
         // info[1] - information of the reconciliator.
         // In case this is leader, num is set to this term's membership list
         // set info.name to all members including the leader
         struct {
                int32_t last_term;
                int32_t commited_ops;
                uint32_t last_index;
                uint32_t first_index;
                char name[MAX_HOSTNAME_LEN];
        } info[MAXIMUM_REPLICA_STRENGTH];
} nsr_recon_role_t;
#pragma pack(pop)

#define ENDIAN_CONVERSION_RR(rr, is_true) \
{ \
        uint32_t i=0; \
        uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \
        if (is_true == _gf_true) rr.num = f(rr.num); \
        rr.current_term = f(rr.current_term); \
        for (i=0; i < rr.num; i++) { \
                rr.info[i].last_term = f(rr.info[i].last_term); \
                rr.info[i].commited_ops = f(rr.info[i].commited_ops); \
                rr.info[i].last_index = f(rr.info[i].last_index); \
                rr.info[i].first_index = f(rr.info[i].first_index); \
        } \
        if (is_true == _gf_false) rr.num = f(rr.num); \
}

//  last term info structure
#pragma pack(push, 1)
typedef struct _nsr_recon_last_term_info_s {
        int32_t last_term;
        int32_t commited_ops;
        uint32_t last_index;
        uint32_t first_index;
} nsr_recon_last_term_info_t;
#pragma pack(pop)

#define ENDIAN_CONVERSION_LT(lt, is_true) \
{ \
        uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \
        lt.last_term = f(lt.last_term); \
        lt.commited_ops = f(lt.commited_ops); \
        lt.last_index = f(lt.last_index); \
        lt.first_index = f(lt.first_index); \
}

// log information
#pragma pack(push, 1)
typedef struct _nsr_recon_log_info_s {
        uint32_t term;
        uint32_t first_index;
        uint32_t last_index;
} nsr_recon_log_info_t;
#pragma pack(pop)

#define ENDIAN_CONVERSION_LI(li, is_true) \
{ \
        uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \
        li.term = f(li.term); \
        li.first_index = f(li.first_index); \
        li.last_index = f(li.last_index); \
}

#pragma pack(push, 1)
typedef struct nsr_recon_record_details_s {
       uint32_t type;
       uint32_t op;
       char gfid[36+1];
       char pargfid[36+1];
       char link_path[256]; // should it be PATH_MAX?
       uint32_t offset;
       uint32_t len;
       char entry[128];
       char newloc[128]; // for rename. can you overload link_path for this? TBD
       mode_t mode;
} nsr_recon_record_details_t;
#pragma pack(pop)

#define ENDIAN_CONVERSION_RD(rd, is_true) \
{ \
        uint32_t (*f)(uint32_t) = ((is_true == _gf_true) ? ntohl : htonl); \
        rd.type = f(rd.type); \
        rd.op = f(rd.op); \
        rd.offset = f(rd.offset); \
        rd.len = f(rd.len); \
}

typedef struct _nsr_role_work_s {
        nsr_recon_role_t role;
        uint32_t term;
        struct list_head list;
} nsr_role_work_t;
        
typedef struct _nsr_recon_work_s {
	gf_boolean_t in_use;
        uint32_t index;
        uint32_t req_id;
        struct list_head list;
} nsr_recon_work_t;

typedef struct _nsr_reconciliation_work_s {
       uint32_t term;
       uint32_t index;
       uint32_t type;
       uint32_t source;
       void *data;

       uint32_t num; // used for xattr

} nsr_reconciliation_work_t;

typedef struct _nsr_reconciliation_record_s {
        nsr_reconciliation_work_t work; // will store the computed work
        nsr_recon_record_details_t rec;
} nsr_reconciliation_record_t;

typedef struct _nsr_reconciliator_info {
        uint32_t reconcilator_index;
        int32_t last_term;
        int32_t commited_ops;
        uint32_t last_index;
        uint32_t first_index;
        //nsr_reconciliation_record_t records[MAX_RECONCILIATION_WINDOW_SIZE];
        nsr_reconciliation_record_t *records;
} nsr_reconciliator_info_t;

typedef struct _nsr_per_node_worker_s {
       char *id; // identifier
       char vol_file[256]; //volfile that will be used by this thread
       glfs_t *fs;
       glfs_fd_t *aux_fd;
       uint32_t index; // index into array of workers
       pthread_t thread_id; // thread id
       void * context; // thread context
       struct _nsr_recon_driver_ctxt *driver_ctx;
       char local; // local data worker
       //struct list_head  list; //list of work items
       nsr_recon_work_t head;
       pthread_mutex_t mutex; //mutex to guard the state
       pthread_cond_t cv; //condition variable for signaling the worker thread
       gf_boolean_t is_control;
#if defined(NSR_DEBUG)
       FILE *fp;
#endif
       int32_t result; // result of latest work
       int32_t op_errno; // errno
} nsr_per_node_worker_t;

typedef struct _nsr_replica_worker_s {
       char name[256];
       nsr_per_node_worker_t *control_worker;
       nsr_per_node_worker_t *data_worker;
       gf_boolean_t in_use;
       nsr_reconciliator_info_t *recon_info; // Bunch of infos kept for this reconciliation
} nsr_replica_worker_t;

typedef struct _nsr_recon_driver_ctxt {
       xlator_t *this;
       uint32_t replica_group_size; // number of static members of replica group
       nsr_replica_worker_t *workers; // worker info
       int32_t reconciliator;
       pthread_mutex_t mutex; 
       pthread_cond_t cv;
       nsr_role_work_t role_head;
       volatile int32_t outstanding;
       uint32_t reconciliator_index;
       uint32_t term;
       uint32_t current_term;
       nsr_mode_t mode; // default set to seq
#if defined(NSR_DEBUG)
       FILE *fp;
#endif
} nsr_recon_driver_ctx_t;

void *
nsr_reconciliation_driver(void *);

gf_boolean_t
nsr_recon_driver_set_role(nsr_recon_driver_ctx_t *ctx, nsr_recon_role_t *rr, uint32_t term);

#define atomic_inc(ptr)        ((void) __sync_fetch_and_add(ptr, 1))
#define atomic_dec(ptr)        ((void) __sync_fetch_and_add(ptr, -1))
#define atomic_fetch_and       __sync_fetch_and_and
#define atomic_fetch_or        __sync_fetch_and_or

#if defined(NSR_DEBUG)

#define NSR_LOG_DIR "/var/log/nsr-logs"

extern int      nsr_debug_level;
extern FILE     *recon_create_log (char *member, char *module);

extern void
_nsr_driver_log (const char *func, int line, char *member, FILE *fp,
                 char *fmt, ...);

#define nsr_driver_log(dom, levl, fmt...) do {                          \
        FMT_WARN (fmt);                                                 \
        if (levl <= nsr_debug_level) {                                  \
                nsr_recon_private_t     *priv   = ctx->this->private;   \
                _nsr_driver_log (__FUNCTION__, __LINE__,                \
                                   priv->replica_group_members[0],      \
                                   ctx->fp,                             \
                                   ##fmt);                              \
        }                                                               \
} while (0)

extern void
_nsr_worker_log (const char *func, int line, char *member,
                 char *type, uint32_t index, FILE *fp,
                 char *fmt, ...);

#define nsr_worker_log(dom, levl, fmt...) do {                          \
        FMT_WARN (fmt);                                                 \
        if (levl <= nsr_debug_level) {                                  \
                nsr_recon_private_t *priv;                              \
                priv = ctx->driver_ctx->this->private;                  \
                _nsr_worker_log (__FUNCTION__, __LINE__,                \
                                 priv->replica_group_members[0],        \
                                 ctx->is_control ? "recon-con" :        \
                                 "recon-data",                          \
                                 ctx->index, ctx->fp,                   \
                                 ##fmt);                                \
        }                                                               \
} while (0)

#else
#define nsr_driver_log(dom, levl, fmt...) gf_log(dom, levl, fmt)
#define nsr_worker_log(dom, levl, fmt...) gf_log(dom, levl, fmt)
#endif

#endif /* #ifndef __RECON_DRIVER_H__ */