1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
/*
Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#ifndef __BIT_ROT_H__
#define __BIT_ROT_H__
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
#include "xlator.h"
#include "defaults.h"
#include "syncop.h"
#include "syncop-utils.h"
#include "changelog.h"
#include "timer-wheel.h"
#include "bit-rot-tbf.h"
#include "bit-rot-common.h"
#include "bit-rot-stub-mem-types.h"
#include <openssl/sha.h>
/**
* TODO: make this configurable. As a best practice, set this to the
* number of processor cores.
*/
#define BR_WORKERS 4
typedef enum scrub_throttle {
BR_SCRUB_THROTTLE_VOID = -1,
BR_SCRUB_THROTTLE_LAZY = 0,
BR_SCRUB_THROTTLE_NORMAL = 1,
BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
BR_SCRUB_THROTTLE_STALLED = 3,
} scrub_throttle_t;
typedef enum scrub_freq {
BR_FSSCRUB_FREQ_HOURLY = 1,
BR_FSSCRUB_FREQ_DAILY,
BR_FSSCRUB_FREQ_WEEKLY,
BR_FSSCRUB_FREQ_BIWEEKLY,
BR_FSSCRUB_FREQ_MONTHLY,
} scrub_freq_t;
#define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)
struct br_scanfs {
gf_lock_t entrylock;
pthread_mutex_t waitlock;
pthread_cond_t waitcond;
unsigned int entries;
struct list_head queued;
struct list_head ready;
/* scheduler */
uint32_t boot;
gf_boolean_t kick;
pthread_mutex_t wakelock;
pthread_cond_t wakecond;
struct gf_tw_timer_list *timer;
};
struct br_child {
char child_up; /* Indicates whether this child is
up or not */
xlator_t *xl; /* client xlator corresponding to
this child */
inode_table_t *table; /* inode table for this child */
char brick_path[PATH_MAX]; /* brick export directory of this
child */
struct list_head list; /* hook to attach to the list of
UP children */
xlator_t *this; /* Bit rot xlator */
pthread_t thread; /* initial crawler for unsigned
object(s) or scrub crawler */
int threadrunning; /* active thread */
struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
struct timeval tv;
struct br_scanfs fsscan; /* per subvolume FS scanner */
};
typedef struct br_child br_child_t;
struct br_obj_n_workers {
struct list_head objects; /* queue of objects expired from the
timer wheel and ready to be picked
up for signing */
pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects
from the above queue and start
signing each object */
};
struct br_scrubber {
xlator_t *this;
scrub_throttle_t throttle;
/**
* frequency of scanning for this subvolume. this should
* normally be per-child, but since all childs follow the
* same frequency for a volume, this option ends up here
* instead of br_child_t.
*/
scrub_freq_t frequency;
pthread_mutex_t mutex;
pthread_cond_t cond;
unsigned int nr_scrubbers;
struct list_head scrubbers;
/**
* list of "rotatable" subvolume(s) undergoing scrubbing
*/
struct list_head scrublist;
};
typedef struct br_obj_n_workers br_obj_n_workers_t;
struct br_private {
pthread_mutex_t lock;
struct list_head bricks; /* list of bricks from which CHILD_UP
has been received */
struct list_head signing;
pthread_cond_t object_cond; /* handling signing of objects */
int child_count;
br_child_t *children; /* list of subvolumes */
int up_children;
pthread_cond_t cond; /* handling CHILD_UP notifications */
pthread_t thread; /* thread for connecting each UP
child with changelog */
struct tvec_base *timer_wheel; /* timer wheel where the objects which
changelog has sent sits and waits
for expiry */
br_obj_n_workers_t *obj_queue; /* place holder for all the objects
that are expired from timer wheel
and ready to be picked up for
signing and the workers which sign
the objects */
int32_t expiry_time; /* objects "wait" time */
br_tbf_t *tbf; /* token bucket filter */
gf_boolean_t iamscrubber; /* function as a fs scrubber */
struct br_scrubber fsscrub; /* scrubbers for this subvolume */
};
typedef struct br_private br_private_t;
struct br_object {
xlator_t *this;
uuid_t gfid;
unsigned long signedversion; /* version aginst which this object will
be signed */
br_child_t *child; /* object's subvolume */
int sign_info;
struct list_head list; /* hook to add to the queue once the
object is expired from timer wheel */
void *data;
};
typedef struct br_object br_object_t;
void
br_log_object (xlator_t *, char *, uuid_t, int32_t);
void
br_log_object_path (xlator_t *, char *, const char *, int32_t);
int32_t
br_calculate_obj_checksum (unsigned char *,
br_child_t *, fd_t *, struct iatt *);
int32_t
br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
gf_boolean_t
bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *);
#endif /* __BIT_ROT_H__ */
|