1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
|
/*
Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com>
This file is part of GlusterFS.
GlusterFS is free software; you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation; either version 3 of the License,
or (at your option) any later version.
GlusterFS is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see
<http://www.gnu.org/licenses/>.
*/
#ifndef __IO_CACHE_H
#define __IO_CACHE_H
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include <sys/types.h>
#include "compat-errno.h"
#include "glusterfs.h"
#include "logging.h"
#include "dict.h"
#include "xlator.h"
#include "common-utils.h"
#include "call-stub.h"
#include "rbthash.h"
#include "hashfn.h"
#include <sys/time.h>
#include <fnmatch.h>
#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
#define IOC_CACHE_SIZE (32 * 1024 * 1024)
#define IOC_PAGE_TABLE_BUCKET_COUNT 1
struct ioc_table;
struct ioc_local;
struct ioc_page;
struct ioc_inode;
struct ioc_priority {
struct list_head list;
char *pattern;
uint32_t priority;
};
/*
* ioc_waitq - this structure is used to represents the waiting
* frames on a page
*
* @next: pointer to next object in waitq
* @data: pointer to the frame which is waiting
*/
struct ioc_waitq {
struct ioc_waitq *next;
void *data;
off_t pending_offset;
size_t pending_size;
};
/*
* ioc_fill -
*
*/
struct ioc_fill {
struct list_head list; /* list of ioc_fill structures of a frame */
off_t offset;
size_t size;
struct iovec *vector;
int32_t count;
struct iobref *iobref;
};
struct ioc_local {
mode_t mode;
int32_t flags;
int32_t wbflags;
loc_t file_loc;
off_t offset;
size_t size;
int32_t op_ret;
int32_t op_errno;
struct list_head fill_list; /* list of ioc_fill structures */
off_t pending_offset; /*
* offset from this frame should
* continue
*/
size_t pending_size; /*
* size of data this frame is waiting
* on
*/
struct ioc_inode *inode;
int32_t wait_count;
pthread_mutex_t local_lock;
struct ioc_waitq *waitq;
void *stub;
fd_t *fd;
int32_t need_xattr;
dict_t *xattr_req;
};
/*
* ioc_page - structure to store page of data from file
*
*/
struct ioc_page {
struct list_head page_lru;
struct ioc_inode *inode; /* inode this page belongs to */
struct ioc_priority *priority;
char dirty;
char ready;
struct iovec *vector;
int32_t count;
off_t offset;
size_t size;
struct ioc_waitq *waitq;
struct iobref *iobref;
pthread_mutex_t page_lock;
};
struct ioc_cache {
rbthash_table_t *page_table;
struct list_head page_lru;
time_t mtime; /*
* seconds component of file mtime
*/
time_t mtime_nsec; /*
* nanosecond component of file mtime
*/
struct timeval tv; /*
* time-stamp at last re-validate
*/
};
struct ioc_inode {
struct ioc_table *table;
off_t ia_size;
struct ioc_cache cache;
struct list_head inode_list; /*
* list of inodes, maintained by
* io-cache translator
*/
struct list_head inode_lru;
struct ioc_waitq *waitq;
pthread_mutex_t inode_lock;
uint32_t weight; /*
* weight of the inode, increases
* on each read
*/
};
struct ioc_table {
uint64_t page_size;
uint64_t cache_size;
uint64_t cache_used;
int64_t min_file_size;
int64_t max_file_size;
struct list_head inodes; /* list of inodes cached */
struct list_head active;
struct list_head *inode_lru;
struct list_head priority_list;
int32_t readv_count;
pthread_mutex_t table_lock;
xlator_t *xl;
uint32_t inode_count;
int32_t cache_timeout;
int32_t max_pri;
struct mem_pool *mem_pool;
};
typedef struct ioc_table ioc_table_t;
typedef struct ioc_local ioc_local_t;
typedef struct ioc_page ioc_page_t;
typedef struct ioc_inode ioc_inode_t;
typedef struct ioc_waitq ioc_waitq_t;
typedef struct ioc_fill ioc_fill_t;
void *
str_to_ptr (char *string);
char *
ptr_to_str (void *ptr);
int32_t
ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
int32_t count, struct iatt *stbuf,
struct iobref *iobref);
ioc_page_t *
__ioc_page_get (ioc_inode_t *ioc_inode, off_t offset);
ioc_page_t *
__ioc_page_create (ioc_inode_t *ioc_inode, off_t offset);
void
ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
off_t offset);
void
__ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
size_t size);
ioc_waitq_t *
__ioc_page_wakeup (ioc_page_t *page);
void
ioc_page_flush (ioc_page_t *page);
ioc_waitq_t *
__ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno);
void
ioc_frame_return (call_frame_t *frame);
void
ioc_waitq_return (ioc_waitq_t *waitq);
int32_t
ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
size_t size);
#define ioc_inode_lock(ioc_inode) \
do { \
gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
"locked inode(%p)", ioc_inode); \
pthread_mutex_lock (&ioc_inode->inode_lock); \
} while (0)
#define ioc_inode_unlock(ioc_inode) \
do { \
gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
"unlocked inode(%p)", ioc_inode); \
pthread_mutex_unlock (&ioc_inode->inode_lock); \
} while (0)
#define ioc_table_lock(table) \
do { \
gf_log (table->xl->name, GF_LOG_TRACE, \
"locked table(%p)", table); \
pthread_mutex_lock (&table->table_lock); \
} while (0)
#define ioc_table_unlock(table) \
do { \
gf_log (table->xl->name, GF_LOG_TRACE, \
"unlocked table(%p)", table); \
pthread_mutex_unlock (&table->table_lock); \
} while (0)
#define ioc_local_lock(local) \
do { \
gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
"locked local(%p)", local); \
pthread_mutex_lock (&local->local_lock); \
} while (0)
#define ioc_local_unlock(local) \
do { \
gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
"unlocked local(%p)", local); \
pthread_mutex_unlock (&local->local_lock); \
} while (0)
#define ioc_page_lock(page) \
do { \
gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
"locked page(%p)", page); \
pthread_mutex_lock (&page->page_lock); \
} while (0)
#define ioc_page_unlock(page) \
do { \
gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
"unlocked page(%p)", page); \
pthread_mutex_unlock (&page->page_lock); \
} while (0)
static inline uint64_t
time_elapsed (struct timeval *now,
struct timeval *then)
{
uint64_t sec = now->tv_sec - then->tv_sec;
if (sec)
return sec;
return 0;
}
ioc_inode_t *
ioc_inode_search (ioc_table_t *table, inode_t *inode);
void
ioc_inode_destroy (ioc_inode_t *ioc_inode);
ioc_inode_t *
ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight);
int64_t
__ioc_page_destroy (ioc_page_t *page);
int64_t
__ioc_inode_flush (ioc_inode_t *ioc_inode);
void
ioc_inode_flush (ioc_inode_t *ioc_inode);
void
ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
struct iatt *stbuf);
int8_t
ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf);
int32_t
ioc_prune (ioc_table_t *table);
int32_t
ioc_need_prune (ioc_table_t *table);
inline uint32_t
ioc_hashfn (void *data, int len);
#endif /* __IO_CACHE_H */
|