diff options
author | Venky Shankar <vshankar@redhat.com> | 2015-02-03 19:03:30 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-18 21:22:33 -0700 |
commit | 87a9d23627586a5d5cd8a502a08ecbdb6f0f7bb2 (patch) | |
tree | 7e627c0810e151177364f66a9442529d1e95826f /libglusterfs/src/rot-buffs.h | |
parent | d236b01a8bf68b83c76ea1cfa8351833e19695f7 (diff) |
libglusterfs/rot-buffs: rotational buffers
This patch introduces rotational buffers aiming at the classic
multiple producer and multiple consumer problem. A fixed set
of buffer list is allocated during initialization, where each
list consist of a list of buffers. Each buffer is an iovec
pointing to a memory region of fixed allocation size. Multiple
producers write data to these buffers. A buffer list starts with
a single buffer (iovec) and allocates more when required (although
this can be preallocatd in multiples of k).
rot-buffs allow multiple producers to write data parallely with
a bit of extra cost of taking locks. Therefore, it's much suited
for large writes. Multiple producers are allowed to write in the
buffer parallely by "reserving" write space for selected number
of bytes and returning pointer to the start of the reserved area.
The write size is selected by the producer before it starts the
write (which is often known). Therefore, the write itself need not
be serialized -- just the space reservation needs to be done safely.
The other part is when a consumer kicks in to consume what has
been produced. At this point, a buffer list switch is performed.
The "current" buffer list pointer is safely pointed to the next
available buffer list. New writes are now directed to the just
switched buffer list (the old buffer list is now considered out
of rotation). Note that the old buffer still may have producers
in progress (pending writes), so the consumer has to wait till
the writers are drained. Currently this is the slow path for
producers (write completion) and needs to be improved.
Currently, there is special handling for cases where the number
of consumers match (or exceed) the number of producers, which
could result in writer starvation. In this scenario, when a
consumers requests a buffer list for consumption, a check is
performed for writer starvation and consumption is denied
until at least another buffer list is ready of the producer
for writes, i.e., one (or more) consumer(s) completed, thereby
putting the buffer list back in rotation.
[
NOTE:
I've not performance tested this producer-consumer model
yet. It's being used in changelog for event notification.
The list of buffers (iovecs) are directly passed to RPC
layer.
]
Change-Id: I88d235522b05ab82509aba861374a2312bff57f2
BUG: 1170075
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: http://review.gluster.org/9706
Tested-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'libglusterfs/src/rot-buffs.h')
-rw-r--r-- | libglusterfs/src/rot-buffs.h | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/libglusterfs/src/rot-buffs.h b/libglusterfs/src/rot-buffs.h new file mode 100644 index 00000000000..aac24a4f571 --- /dev/null +++ b/libglusterfs/src/rot-buffs.h @@ -0,0 +1,121 @@ +/* + Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __ROT_BUFFS_H +#define __ROT_BUFFS_H + +#include "list.h" +#include "locking.h" +#include "common-utils.h" + +typedef struct rbuf_iovec { + struct iovec iov; + + struct list_head list; +} rbuf_iovec_t; + +#define RBUF_IOVEC_SIZE (sizeof (rbuf_iovec_t)) + +typedef struct rbuf_list { + gf_lock_t c_lock; + + pthread_mutex_t b_lock; /* protects this structure */ + pthread_cond_t b_cond; /* signal for writer completion */ + + gf_boolean_t awaiting; + + unsigned long long pending; /* pending writers */ + unsigned long long completed; /* completed writers */ + + rbuf_iovec_t *rvec; /* currently used IO vector */ + + struct list_head veclist; /* list of attached rbuf_iov */ + + unsigned long long used; /* consumable entries + attached in ->veclist */ + unsigned long long total; /* total entries in ->veclist (used + during deallocation) */ + + unsigned long seq[2]; /* if interested, this whould store + the start sequence number and the + range */ + + struct list_head list; /* attachment to rbuf_t */ +} rbuf_list_t; + +struct rlist_iter { + struct list_head veclist; + + unsigned long long iter; +}; + +#define RLIST_ENTRY_COUNT(rlist) rlist->used + +#define rlist_iter_init(riter, rlist) \ + do { \ + (riter)->iter = rlist->used; \ + (riter)->veclist = rlist->veclist; \ + } while (0) + +#define rvec_for_each_entry(pos, riter) \ + for (pos = list_entry \ + ((riter)->veclist.next, typeof(*pos), list); \ + (riter)->iter > 0; \ + pos = list_entry \ + (pos->list.next, typeof(*pos), list), \ + --((riter)->iter)) + +/** + * Sequence number assigment routine is called during buffer + * switch under rbuff ->lock. + */ +typedef void (sequence_fn) (rbuf_list_t *, void *); + +#define RLIST_STORE_SEQ(rlist, start, range) \ + do { \ + rlist->seq[0] = start; \ + rlist->seq[1] = range; \ + } while (0) + +#define RLIST_GET_SEQ(rlist, start, range) \ + do { \ + start = rlist->seq[0]; \ + range = rlist->seq[1]; \ + } while (0) + +typedef struct rbuf { + gf_lock_t lock; /* protects "current" rlist */ + + rbuf_list_t *current; /* cached pointer to first free rlist */ + + struct list_head freelist; +} rbuf_t; + +typedef enum { + RBUF_CONSUMABLE = 1, + RBUF_BUSY, + RBUF_EMPTY, + RBUF_WOULD_STARVE, +} rlist_retval_t; + +/* Initialization/Destruction */ +rbuf_t *rbuf_init (int); +void rbuf_dtor (rbuf_t *); + +/* Producer API */ +char *rbuf_reserve_write_area (rbuf_t *, size_t, void **); +int rbuf_write_complete (void *); + +/* Consumer API */ +int rbuf_get_buffer (rbuf_t *, void **, sequence_fn *, void *); +int rbuf_wait_for_completion (rbuf_t *, void *, + void (*)(rbuf_list_t *, void *), void *); + +#endif |