diff options
author | Venky Shankar <vshankar@redhat.com> | 2015-02-03 19:22:16 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-18 18:22:36 -0700 |
commit | 4737584fffcd25dbe35d17b076c95bf90a422cf2 (patch) | |
tree | 9f30e0e90c88c245787b78af3ca78d7ae05e30f2 /xlators/features/changelog/src | |
parent | 728fcd41eb39f66744d84b979dd8195fd47313ed (diff) |
features/changelog: RPC'fy {libgf}changelog
This patch introduces RPC based communication between the changelog
translator and libgfchangelog. It replaces the old pathetic stream
based interaction that existed earlier (due to time constraints :-/).
Changelog, upon initialization starts a RPC server (rpcsvc) allowing
clients to invoke a probe API as a bootup mechanism to request for
event notifications. During probe, clients can choose an event
filter specifying the type(s) of events they are interested in. As
of now there is no way to change the event notification set once
the probe RPC call is made, but that is easier to implement.
The actual event notifications is done on a separate RPC session.
The client (libgfchangelog) itself starts and RPC server which the
changelog translator "connects back" during probe. Notifications
are dispatched by a bunch of threads from the server (translator)
and the client optionally orders them if ordered notifications
are requried. FOPs fill in their respective event details in a
buffer (rot-buffs to be particular) and a bunch of threads
(consumers) swap the buffers out of roatation and dispatch them
via RPC. To avoid writer starvation, then number of dispatcher
threads is one less than the number of buffer list in rot-buffs.x
libgfchangelog becomes purely callback based -- upon event
notification from the server (and re-ordering them if required)
invoke a callback routine specified by consumer(s).
A major part of the patch is also aimed at providing backward
compatibility for geo-replication, which was one of the main
consumer of the stream based API. Also, this patch does not\
"turn on" event notifications for all fops, just a bunch which
is currently in requirement. Another pain point is that the
server does not filter events before dispatching it to the
clients. That load is taken up by the client itself (although
it's done at the library layer rather than making it hard on
the callback implementor). This needs improvement and care
needs to be taken to not load the server up with expensive
filtering mechanisms.
Change-Id: Ibf60a432b68f2dfa60c6f9add2bcfd37a9c41395
BUG: 1170075
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: http://review.gluster.org/9708
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/features/changelog/src')
-rw-r--r-- | xlators/features/changelog/src/Makefile.am | 18 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-encoders.c | 2 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-ev-handle.c | 382 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-ev-handle.h | 140 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-helpers.c | 210 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-helpers.h | 107 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-mem-types.h | 25 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-misc.h | 14 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-notifier.c | 314 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-notifier.h | 19 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-rpc-common.c | 334 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-rpc-common.h | 84 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-rpc.c | 300 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog-rpc.h | 29 | ||||
-rw-r--r-- | xlators/features/changelog/src/changelog.c | 473 |
15 files changed, 1846 insertions, 605 deletions
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am index 18c41e7d7d1..8712b9d059f 100644 --- a/xlators/features/changelog/src/Makefile.am +++ b/xlators/features/changelog/src/Makefile.am @@ -3,16 +3,24 @@ xlator_LTLIBRARIES = changelog.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \ - changelog-misc.h changelog-encoders.h changelog-notifier.h + changelog-rpc-common.h changelog-misc.h changelog-encoders.h \ + changelog-rpc-common.h changelog-rpc.h changelog-ev-handle.h changelog_la_LDFLAGS = -module -avoid-version changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \ - changelog-encoders.c changelog-notifier.c changelog-barrier.c -changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + changelog-encoders.c changelog-rpc.c changelog-barrier.c \ + changelog-rpc-common.c changelog-ev-handle.c +changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 \ - -D_GNU_SOURCE -D$(GF_HOST_OS) -DDATADIR=\"$(localstatedir)\" +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/rpc/rpc-transport/socket/src \ + -I$(top_srcdir)/xlators/features/changelog/lib/src/ \ + -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ + -DDATADIR=\"$(localstatedir)\" AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c index 08626ee2f22..ea9db4061ca 100644 --- a/xlators/features/changelog/src/changelog-encoders.c +++ b/xlators/features/changelog/src/changelog-encoders.c @@ -191,7 +191,7 @@ cb_encoder[] = { }; void -changelog_encode_change( changelog_priv_t * priv) +changelog_encode_change(changelog_priv_t *priv) { priv->ce = &cb_encoder[priv->encode_mode]; } diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c new file mode 100644 index 00000000000..ca7443cfd22 --- /dev/null +++ b/xlators/features/changelog/src/changelog-ev-handle.c @@ -0,0 +1,382 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-ev-handle.h" +#include "changelog-rpc-common.h" +#include "changelog-helpers.h" + +struct rpc_clnt_program changelog_ev_program; + +#define NR_IOVEC (MAX_IOVEC - 3) +struct ev_rpc_vec { + int count; + struct iovec vector[NR_IOVEC]; + + /* sequence number */ + unsigned long seq; +}; + +struct ev_rpc { + rbuf_list_t *rlist; + struct rpc_clnt *rpc; + struct ev_rpc_vec vec; +}; + +/** + * As of now this just does the minimal (retval logging). Going further + * un-acknowledges sequence numbers can be retransmitted and other + * intelligence can be built into the server. + */ +int +changelog_event_dispatch_cbk (struct rpc_req *req, + struct iovec *iov, int count, void *myframe) +{ + return 0; +} + +/* dispatcher RPC */ +inline int +changelog_dispatch_vec (call_frame_t *frame, xlator_t *this, + struct rpc_clnt *rpc, struct ev_rpc_vec *vec) +{ + struct timeval tv = {0,}; + changelog_event_req req = {0,}; + + (void) gettimeofday (&tv, NULL); + + /** + * Event dispatch RPC header contains a sequence number for each + * dispatch. This allows the reciever to order the request before + * processing. + */ + req.seq = vec->seq; + req.tv_sec = tv.tv_sec; + req.tv_usec = tv.tv_usec; + + return changelog_rpc_sumbit_req (rpc, (void *)&req, + frame, &changelog_ev_program, + CHANGELOG_REV_PROC_EVENT, + vec->vector, vec->count, NULL, + this, changelog_event_dispatch_cbk, + (xdrproc_t) xdr_changelog_event_req); + } + + int + changelog_event_dispatch_rpc (call_frame_t *frame, xlator_t *this, void *data) + { + int idx = 0; + int count = 0; + int ret = 0; + unsigned long range = 0; + unsigned long sequence = 0; + rbuf_iovec_t *rvec = NULL; + struct ev_rpc *erpc = NULL; + struct rlist_iter riter = {{0,},}; + + /* dispatch NR_IOVEC IO vectors at a time. */ + + erpc = data; + RLIST_GET_SEQ (erpc->rlist, sequence, range); + + rlist_iter_init (&riter, erpc->rlist); + + rvec_for_each_entry (rvec, &riter) { + idx = count % NR_IOVEC; + if (++count == NR_IOVEC) { + erpc->vec.vector[idx] = rvec->iov; + erpc->vec.seq = sequence++; + erpc->vec.count = NR_IOVEC; + + ret = changelog_dispatch_vec (frame, this, + erpc->rpc, &erpc->vec); + if (ret) + break; + count = 0; + continue; + } + + erpc->vec.vector[idx] = rvec->iov; + } + + if (ret) + goto error_return; + + idx = count % NR_IOVEC; + if (idx) { + erpc->vec.seq = sequence; + erpc->vec.count = idx; + + ret = changelog_dispatch_vec (frame, this, + erpc->rpc, &erpc->vec); + } + + error_return: + return ret; +} + +int +changelog_rpc_notify (struct rpc_clnt *rpc, + void *mydata, rpc_clnt_event_t event, void *data) +{ + xlator_t *this = NULL; + changelog_rpc_clnt_t *crpc = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_priv_t *priv = NULL; + changelog_ev_selector_t *selection = NULL; + + crpc = mydata; + this = crpc->this; + c_clnt = crpc->c_clnt; + + priv = this->private; + + switch (event) { + case RPC_CLNT_CONNECT: + rpc_clnt_set_connected (&rpc->conn); + selection = &priv->ev_selection; + + LOCK (&c_clnt->wait_lock); + { + LOCK (&c_clnt->active_lock); + { + changelog_select_event (this, selection, + crpc->filter); + list_move_tail (&crpc->list, &c_clnt->active); + } + UNLOCK (&c_clnt->active_lock); + } + UNLOCK (&c_clnt->wait_lock); + + break; + case RPC_CLNT_DISCONNECT: + rpc_clnt_disable (crpc->rpc); + selection = &priv->ev_selection; + + LOCK (&crpc->lock); + { + changelog_deselect_event (this, selection, + crpc->filter); + changelog_set_disconnect_flag (crpc, _gf_true); + } + UNLOCK (&crpc->lock); + + break; + case RPC_CLNT_MSG: + case RPC_CLNT_DESTROY: + break; + } + + return 0; +} + +void * +changelog_ev_connector (void *data) +{ + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + + c_clnt = data; + this = c_clnt->this; + + while (1) { + pthread_mutex_lock (&c_clnt->pending_lock); + { + while (list_empty (&c_clnt->pending)) + pthread_cond_wait (&c_clnt->pending_cond, + &c_clnt->pending_lock); + crpc = list_first_entry (&c_clnt->pending, + changelog_rpc_clnt_t, list); + crpc->rpc = + changelog_rpc_client_init (this, crpc, + crpc->sock, + changelog_rpc_notify); + if (!crpc->rpc) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "connect back.. <%s>", crpc->sock); + crpc->cleanup (crpc); + goto mutex_unlock; + } + + LOCK (&c_clnt->wait_lock); + { + list_move_tail (&crpc->list, &c_clnt->waitq); + } + UNLOCK (&c_clnt->wait_lock); + } + mutex_unlock: + pthread_mutex_unlock (&c_clnt->pending_lock); + } + + return NULL; +} + +void +changelog_ev_cleanup_connections (xlator_t *this, changelog_clnt_t *c_clnt) +{ + int ret = 0; + changelog_rpc_clnt_t *crpc = NULL; + + /* cleanup active connections */ + LOCK (&c_clnt->active_lock); + { + list_for_each_entry (crpc, &c_clnt->active, list) { + rpc_clnt_disable (crpc->rpc); + } + } + UNLOCK (&c_clnt->active_lock); +} + +/** + * TODO: granularize lock + * + * If we have multiple threads dispatching events, doing it this way is + * a performance bottleneck. + */ + +static inline changelog_rpc_clnt_t * +get_client (changelog_clnt_t *c_clnt, struct list_head **next) +{ + changelog_rpc_clnt_t *crpc = NULL; + + LOCK (&c_clnt->active_lock); + { + if (*next == &c_clnt->active) + goto unblock; + crpc = list_entry (*next, changelog_rpc_clnt_t, list); + changelog_rpc_clnt_ref (crpc); + *next = (*next)->next; + } + unblock: + UNLOCK (&c_clnt->active_lock); + + return crpc; +} + +static inline void +put_client (changelog_clnt_t *c_clnt, changelog_rpc_clnt_t *crpc) +{ + LOCK (&c_clnt->active_lock); + { + changelog_rpc_clnt_unref (crpc); + } + UNLOCK (&c_clnt->active_lock); +} + +void +_dispatcher (rbuf_list_t *rlist, void *arg) +{ + int ret = 0; + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + changelog_rpc_clnt_t *tmp = NULL; + struct ev_rpc erpc = {0,}; + struct list_head *next = NULL; + + c_clnt = arg; + this = c_clnt->this; + + erpc.rlist = rlist; + next = c_clnt->active.next; + + while (1) { + crpc = get_client (c_clnt, &next); + if (!crpc) + break; + erpc.rpc = crpc->rpc; + ret = changelog_invoke_rpc (this, crpc->rpc, + &changelog_ev_program, + CHANGELOG_REV_PROC_EVENT, &erpc); + put_client (c_clnt, crpc); + } +} + +/** this is called under rotbuff's lock */ +void +sequencer (rbuf_list_t *rlist, void *mydata) +{ + unsigned long range = 0; + changelog_clnt_t *c_clnt = 0; + + c_clnt = mydata; + + range = (RLIST_ENTRY_COUNT (rlist)) / NR_IOVEC; + if ((RLIST_ENTRY_COUNT (rlist)) % NR_IOVEC) + range++; + RLIST_STORE_SEQ (rlist, c_clnt->sequence, range); + + c_clnt->sequence += range; +} + +void * +changelog_ev_dispatch (void *data) +{ + int ret = 0; + void *opaque = NULL; + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + struct timeval tv = {0,}; + + c_clnt = data; + this = c_clnt->this; + + while (1) { + /* TODO: change this to be pthread cond based.. later */ + tv.tv_sec = 1; + tv.tv_usec = 0; + select (0, NULL, NULL, NULL, &tv); + + ret = rbuf_get_buffer (c_clnt->rbuf, + &opaque, sequencer, c_clnt); + if (ret != RBUF_CONSUMABLE) { + if (ret != RBUF_EMPTY) + gf_log (this->name, GF_LOG_WARNING, + "Failed to get buffer for RPC dispatch " + "[rbuf retval: %d]", ret); + continue; + } + + ret = rbuf_wait_for_completion (c_clnt->rbuf, + opaque, _dispatcher, c_clnt); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to put buffer after consumption"); + } + + return NULL; +} + +void +changelog_ev_queue_connection (changelog_clnt_t *c_clnt, + changelog_rpc_clnt_t *crpc) +{ + pthread_mutex_lock (&c_clnt->pending_lock); + { + list_add_tail (&crpc->list, &c_clnt->pending); + pthread_cond_signal (&c_clnt->pending_cond); + } + pthread_mutex_unlock (&c_clnt->pending_lock); +} + +struct rpc_clnt_procedure changelog_ev_procs[CHANGELOG_REV_PROC_MAX] = { + [CHANGELOG_REV_PROC_NULL] = {"NULL", NULL}, + [CHANGELOG_REV_PROC_EVENT] = { + "EVENT DISPATCH", changelog_event_dispatch_rpc + }, +}; + +struct rpc_clnt_program changelog_ev_program = { + .progname = "CHANGELOG EVENT DISPATCHER", + .prognum = CHANGELOG_REV_RPC_PROCNUM, + .progver = CHANGELOG_REV_RPC_PROCVER, + .numproc = CHANGELOG_REV_PROC_MAX, + .proctable = changelog_ev_procs, +}; diff --git a/xlators/features/changelog/src/changelog-ev-handle.h b/xlators/features/changelog/src/changelog-ev-handle.h new file mode 100644 index 00000000000..eef0492a9ee --- /dev/null +++ b/xlators/features/changelog/src/changelog-ev-handle.h @@ -0,0 +1,140 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CHANGELOG_EV_HANDLE_H +#define __CHANGELOG_EV_HANDLE_H + +#include "list.h" +#include "xlator.h" +#include "rpc-clnt.h" + +#include "rot-buffs.h" + +struct changelog_clnt; + +typedef struct changelog_rpc_clnt { + xlator_t *this; + + gf_lock_t lock; + + unsigned long ref; + gf_boolean_t disconnected; + + unsigned int filter; + char sock[UNIX_PATH_MAX]; + + struct changelog_clnt *c_clnt; /* back pointer to list holder */ + + struct rpc_clnt *rpc; /* RPC client endpoint */ + + struct list_head list; /* ->pending, ->waitq, ->active */ + + void (*cleanup) + (struct changelog_rpc_clnt *); /* cleanup handler */ +} changelog_rpc_clnt_t; + +static inline void +changelog_rpc_clnt_ref (changelog_rpc_clnt_t *crpc) +{ + LOCK (&crpc->lock); + { + ++crpc->ref; + } + UNLOCK (&crpc->lock); +} + +static inline void +changelog_set_disconnect_flag (changelog_rpc_clnt_t *crpc, gf_boolean_t flag) +{ + crpc->disconnected = flag; +} + +static inline int +changelog_rpc_clnt_is_disconnected (changelog_rpc_clnt_t *crpc) +{ + return (crpc->disconnected == _gf_true); +} + +static inline void +changelog_rpc_clnt_unref (changelog_rpc_clnt_t *crpc) +{ + gf_boolean_t gone = _gf_false; + + LOCK (&crpc->lock); + { + if (!(--crpc->ref) + && changelog_rpc_clnt_is_disconnected (crpc)) { + list_del (&crpc->list); + gone = _gf_true; + } + } + UNLOCK (&crpc->lock); + + if (gone) + crpc->cleanup (crpc); +} + +/** + * This structure holds pending and active clients. On probe RPC all + * an instance of the above structure (@changelog_rpc_clnt) is placed + * in ->pending and gets moved to ->active on a successful connect. + * + * locking rules: + * + * Manipulating ->pending + * ->pending_lock + * ->pending + * + * Manipulating ->active + * ->active_lock + * ->active + * + * Moving object from ->pending to ->active + * ->pending_lock + * ->active_lock + * + * Objects are _never_ moved from ->active to ->pending, i.e., during + * disconnection, the object is destroyed. Well, we could have tried + * to reconnect, but that's pure waste.. let the other end reconnect. + */ + +typedef struct changelog_clnt { + xlator_t *this; + + /* pending connections */ + pthread_mutex_t pending_lock; + pthread_cond_t pending_cond; + struct list_head pending; + + /* current active connections */ + gf_lock_t active_lock; + struct list_head active; + + gf_lock_t wait_lock; + struct list_head waitq; + + /* consumer part of rot-buffs */ + rbuf_t *rbuf; + unsigned long sequence; +} changelog_clnt_t; + +void *changelog_ev_connector (void *); + +void *changelog_ev_dispatch (void *); + +/* APIs */ +void +changelog_ev_queue_connection (changelog_clnt_t *, changelog_rpc_clnt_t *); + +void +changelog_ev_cleanup_connections (xlator_t *, changelog_clnt_t *); + +#endif + diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 3af2938190d..5c755d76d69 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -24,6 +24,7 @@ #include "changelog-mem-types.h" #include "changelog-encoders.h" +#include "changelog-rpc-common.h" #include <pthread.h> static inline void @@ -57,7 +58,7 @@ changelog_cleanup_free_mutex (void *arg_mutex) pthread_mutex_unlock(p_mutex); } -void +int changelog_thread_cleanup (xlator_t *this, pthread_t thr_id) { int ret = 0; @@ -65,7 +66,7 @@ changelog_thread_cleanup (xlator_t *this, pthread_t thr_id) /* send a cancel request to the thread */ ret = pthread_cancel (thr_id); - if (ret) { + if (ret != 0) { gf_log (this->name, GF_LOG_ERROR, "could not cancel thread (reason: %s)", strerror (errno)); @@ -73,14 +74,14 @@ changelog_thread_cleanup (xlator_t *this, pthread_t thr_id) } ret = pthread_join (thr_id, &retval); - if (ret || (retval != PTHREAD_CANCELED)) { + if ((ret != 0) || (retval != PTHREAD_CANCELED)) { gf_log (this->name, GF_LOG_ERROR, "cancel request not adhered as expected" " (reason: %s)", strerror (errno)); } out: - return; + return ret; } inline void * @@ -98,6 +99,145 @@ changelog_get_usable_buffer (changelog_local_t *local) return cld->cld_iobuf->ptr; } +static inline int +changelog_selector_index (unsigned int selector) +{ + return (ffs (selector) - 1); +} + +inline int +changelog_ev_selected (xlator_t *this, + changelog_ev_selector_t *selection, + unsigned int selector) +{ + int idx = 0; + + idx = changelog_selector_index (selector); + gf_log (this->name, GF_LOG_DEBUG, + "selector ref count for %d (idx: %d): %d", + selector, idx, selection->ref[idx]); + /* this can be lockless */ + return (idx < CHANGELOG_EV_SELECTION_RANGE + && (selection->ref[idx] > 0)); +} + +inline void +changelog_select_event (xlator_t *this, + changelog_ev_selector_t *selection, + unsigned int selector) +{ + int idx = 0; + + LOCK (&selection->reflock); + { + while (selector) { + idx = changelog_selector_index (selector); + if (idx < CHANGELOG_EV_SELECTION_RANGE) { + selection->ref[idx]++; + gf_log (this->name, GF_LOG_DEBUG, + "selecting event %d", idx); + } + selector &= ~(1 << idx); + } + } + UNLOCK (&selection->reflock); +} + +inline void +changelog_deselect_event (xlator_t *this, + changelog_ev_selector_t *selection, + unsigned int selector) +{ + int idx = 0; + + LOCK (&selection->reflock); + { + while (selector) { + idx = changelog_selector_index (selector); + if (idx < CHANGELOG_EV_SELECTION_RANGE) { + selection->ref[idx]--; + gf_log (this->name, GF_LOG_DEBUG, + "de-selecting event %d", idx); + } + selector &= ~(1 << idx); + } + } + UNLOCK (&selection->reflock); +} + +inline int +changelog_init_event_selection (xlator_t *this, + changelog_ev_selector_t *selection) +{ + int ret = 0; + int j = CHANGELOG_EV_SELECTION_RANGE; + + ret = LOCK_INIT (&selection->reflock); + if (ret != 0) + return -1; + + LOCK (&selection->reflock); + { + while (j--) { + selection->ref[j] = 0; + } + } + UNLOCK (&selection->reflock); + + return 0; +} + +inline int +changelog_cleanup_event_selection (xlator_t *this, + changelog_ev_selector_t *selection) +{ + int ret = 0; + int j = CHANGELOG_EV_SELECTION_RANGE; + + LOCK (&selection->reflock); + { + while (j--) { + if (selection->ref[j] > 0) + gf_log (this->name, GF_LOG_WARNING, + "changelog event selection cleaning up " + " on active references"); + } + } + UNLOCK (&selection->reflock); + + return LOCK_DESTROY (&selection->reflock); +} + +static inline void +changelog_perform_dispatch (xlator_t *this, + changelog_priv_t *priv, void *mem, size_t size) +{ + char *buf = NULL; + void *opaque = NULL; + + buf = rbuf_reserve_write_area (priv->rbuf, size, &opaque); + if (!buf) { + gf_log_callingfn (this->name, + GF_LOG_WARNING, "failed to dispatch event"); + return; + } + + memcpy (buf, mem, size); + rbuf_write_complete (opaque); +} + +inline void +changelog_dispatch_event (xlator_t *this, + changelog_priv_t *priv, changelog_event_t *ev) +{ + changelog_ev_selector_t *selection = NULL; + + selection = &priv->ev_selection; + if (changelog_ev_selected (this, selection, ev->ev_type)) { + changelog_perform_dispatch (this, priv, ev, CHANGELOG_EV_SIZE); + } +} + inline void changelog_set_usable_record_and_length (changelog_local_t *local, size_t len, int xr) @@ -206,9 +346,9 @@ changelog_rollover_changelog (xlator_t *this, { int ret = -1; int notify = 0; - char *bname = NULL; char ofile[PATH_MAX] = {0,}; char nfile[PATH_MAX] = {0,}; + changelog_event_t ev = {0,}; if (priv->changelog_fd != -1) { ret = fsync (priv->changelog_fd); @@ -252,40 +392,32 @@ changelog_rollover_changelog (xlator_t *this, } if (notify) { - bname = basename (nfile); - gf_log (this->name, GF_LOG_DEBUG, "notifying: %s", bname); - ret = changelog_write (priv->wfd, bname, strlen (bname) + 1); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to send file name to notify thread" - " (reason: %s)", strerror (errno)); - } else { - /* If this is explicit rollover initiated by snapshot, - * wakeup reconfigure thread waiting for changelog to - * rollover - */ - if (priv->explicit_rollover) { - priv->explicit_rollover = _gf_false; - ret = pthread_mutex_lock ( - &priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - { - priv->bn.bnotify = _gf_false; - ret = pthread_cond_signal ( - &priv->bn.bnotify_cond); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, - out); - gf_log (this->name, GF_LOG_INFO, - "Changelog published: %s and" - " signalled bnotify", bname); - } - ret = pthread_mutex_unlock ( - &priv->bn.bnotify_mutex); + ev.ev_type = CHANGELOG_OP_TYPE_JOURNAL; + memcpy (ev.u.journal.path, nfile, strlen (nfile) + 1); + changelog_dispatch_event (this, priv, &ev); + + /* If this is explicit rollover initiated by snapshot, + * wakeup reconfigure thread waiting for changelog to + * rollover + */ + if (priv->explicit_rollover) { + priv->explicit_rollover = _gf_false; + + ret = pthread_mutex_lock (&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); + { + priv->bn.bnotify = _gf_false; + ret = pthread_cond_signal + (&priv->bn.bnotify_cond); CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); + gf_log (this->name, GF_LOG_INFO, + "Changelog published: %s signalled" + " bnotify", nfile); } + ret = pthread_mutex_unlock (&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); } } - out: return ret; } @@ -434,8 +566,8 @@ changelog_snap_logging_stop (xlator_t *this, } int -changelog_open (xlator_t *this, - changelog_priv_t *priv) +changelog_open_journal (xlator_t *this, + changelog_priv_t *priv) { int fd = 0; int ret = -1; @@ -490,7 +622,7 @@ changelog_start_next_change (xlator_t *this, ret = changelog_rollover_changelog (this, priv, ts); if (!ret && !finale) - ret = changelog_open (this, priv); + ret = changelog_open_journal (this, priv); return ret; } @@ -975,7 +1107,7 @@ __changelog_inode_ctx_set (xlator_t *this, * one shot routine to get the address and the value of a inode version * for a particular type. */ -static changelog_inode_ctx_t * +changelog_inode_ctx_t * __changelog_inode_ctx_get (xlator_t *this, inode_t *inode, unsigned long **iver, unsigned long *version, changelog_log_type type) diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 03a795369d1..33a99ee4eed 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -15,10 +15,16 @@ #include "timer.h" #include "pthread.h" #include "iobuf.h" +#include "rot-buffs.h" #include "changelog-misc.h" #include "call-stub.h" +#include "rpcsvc.h" +#include "changelog-ev-handle.h" + +#include "changelog.h" + /** * the changelog entry */ @@ -120,29 +126,6 @@ typedef struct changelog_fsync { xlator_t *this; } changelog_fsync_t; -# define CHANGELOG_MAX_CLIENTS 5 -typedef struct changelog_notify { - /* reader end of the pipe */ - int rfd; - - /* notifier thread */ - pthread_t notify_th; - - /* unique socket path */ - char sockpath[UNIX_PATH_MAX]; - - int socket_fd; - - /** - * simple array of accept()'ed fds. Not scalable at all - * for large number of clients, but it's okay as we have - * a ahrd limit in this version (@CHANGELOG_MAX_CLIENTS). - */ - int client_fd[CHANGELOG_MAX_CLIENTS]; - - xlator_t *this; -} changelog_notify_t; - /* Draining during changelog rollover (for geo-rep snapshot dependency): * -------------------------------------------------------------------- * The introduction of draining of in-transit fops during changelog rollover @@ -162,14 +145,14 @@ typedef struct changelog_notify { typedef enum chlog_fop_color { FOP_COLOR_BLACK, FOP_COLOR_WHITE -}chlog_fop_color_t; +} chlog_fop_color_t; /* Barrier notify variable */ typedef struct barrier_notify { pthread_mutex_t bnotify_mutex; pthread_cond_t bnotify_cond; gf_boolean_t bnotify; -}barrier_notify_t; +} barrier_notify_t; /* Two separate mutex and conditional variable set is used * to drain white and black fops. */ @@ -185,15 +168,26 @@ typedef struct drain_mgmt { unsigned long white_fop_cnt; gf_boolean_t drain_wait_black; gf_boolean_t drain_wait_white; -}drain_mgmt_t; +} drain_mgmt_t; /* External barrier as a result of snap on/off indicating flag*/ typedef struct barrier_flags { gf_lock_t lock; gf_boolean_t barrier_ext; -}barrier_flags_t; +} barrier_flags_t; +/* Event selection */ +typedef struct changelog_ev_selector { + gf_lock_t reflock; + /** + * Array of references for each selection bit. + */ + unsigned int ref[CHANGELOG_EV_SELECTION_RANGE]; +} changelog_ev_selector_t; + + +/* changelog's private structure */ struct changelog_priv { gf_boolean_t active; @@ -223,9 +217,6 @@ struct changelog_priv { /* lock to synchronize CSNAP updation */ gf_lock_t c_snap_lock; - /* writen end of the pipe */ - int wfd; - /* rollover time */ int32_t rollover_time; @@ -247,9 +238,6 @@ struct changelog_priv { /* context of fsync thread */ changelog_fsync_t cf; - /* context of the notifier thread */ - changelog_notify_t cn; - /* operation mode */ changelog_mode_t op_mode; @@ -262,7 +250,9 @@ struct changelog_priv { /* encoder */ struct changelog_encoder *ce; - /* snapshot dependency changes */ + /** + * snapshot dependency changes + */ /* Draining of fops*/ drain_mgmt_t dm; @@ -289,6 +279,30 @@ struct changelog_priv { gf_timer_t *timer; struct timespec timeout; + /** + * buffers, RPC, event selection, notifications and other + * beasts. + */ + + /* epoll pthread */ + pthread_t poller; + + /* rotational buffer */ + rbuf_t *rbuf; + + /* changelog RPC server */ + rpcsvc_t *rpc; + + /* event selection */ + changelog_ev_selector_t ev_selection; + + /* client handling (reverse connection) */ + pthread_t connector; + + int nr_dispatchers; + pthread_t *ev_dispatcher; + + changelog_clnt_t connections; }; struct changelog_local { @@ -367,7 +381,7 @@ typedef struct { * helpers routines */ -void +int changelog_thread_cleanup (xlator_t *this, pthread_t thr_id); void * @@ -386,7 +400,7 @@ changelog_start_next_change (xlator_t *this, changelog_priv_t *priv, unsigned long ts, gf_boolean_t finale); int -changelog_open (xlator_t *this, changelog_priv_t *priv); +changelog_open_journal (xlator_t *this, changelog_priv_t *priv); int changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last); int @@ -449,6 +463,7 @@ changelog_snap_handle_ascii_change (xlator_t *this, changelog_log_data_t *cld); int changelog_snap_write_change (changelog_priv_t *priv, char *buffer, size_t len); + /* Changelog barrier routines */ void __chlog_barrier_enqueue (xlator_t *this, call_stub_t *stub); void __chlog_barrier_disable (xlator_t *this, struct list_head *queue); @@ -460,6 +475,24 @@ int32_t changelog_fill_entry_buf (call_frame_t *frame, xlator_t *this, loc_t *loc, changelog_local_t **local); +/* event selection routines */ +inline void changelog_select_event (xlator_t *, + changelog_ev_selector_t *, unsigned int); +inline void changelog_deselect_event (xlator_t *, + changelog_ev_selector_t *, unsigned int); +inline int changelog_init_event_selection (xlator_t *, + changelog_ev_selector_t *); +inline int changelog_cleanup_event_selection (xlator_t *, + changelog_ev_selector_t *); +inline int changelog_ev_selected (xlator_t *, + changelog_ev_selector_t *, unsigned int); +inline void +changelog_dispatch_event (xlator_t *, changelog_priv_t *, changelog_event_t *); + +changelog_inode_ctx_t * +__changelog_inode_ctx_get (xlator_t *, inode_t *, unsigned long **, + unsigned long *, changelog_log_type); + /* macros */ #define CHANGELOG_STACK_UNWIND(fop, frame, params ...) do { \ @@ -471,10 +504,10 @@ changelog_fill_entry_buf (call_frame_t *frame, xlator_t *this, frame->local = NULL; \ } \ STACK_UNWIND_STRICT (fop, frame, params); \ - changelog_local_cleanup (__xl, __local); \ if (__local && __local->prev_entry) \ changelog_local_cleanup (__xl, \ __local->prev_entry); \ + changelog_local_cleanup (__xl, __local); \ } while (0) #define CHANGELOG_IOBUF_REF(iobuf) do { \ diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h index e1fa319a715..1618f722f6c 100644 --- a/xlators/features/changelog/src/changelog-mem-types.h +++ b/xlators/features/changelog/src/changelog-mem-types.h @@ -14,16 +14,21 @@ #include "mem-types.h" enum gf_changelog_mem_types { - gf_changelog_mt_priv_t = gf_common_mt_end + 1, - gf_changelog_mt_str_t = gf_common_mt_end + 2, - gf_changelog_mt_batch_t = gf_common_mt_end + 3, - gf_changelog_mt_rt_t = gf_common_mt_end + 4, - gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5, - gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 6, - gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 7, - gf_changelog_mt_libgfchangelog_dirent_t = gf_common_mt_end + 8, - gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 9, - gf_changelog_mt_history_data_t = gf_common_mt_end + 10, + gf_changelog_mt_priv_t = gf_common_mt_end + 1, + gf_changelog_mt_str_t = gf_common_mt_end + 2, + gf_changelog_mt_batch_t = gf_common_mt_end + 3, + gf_changelog_mt_rt_t = gf_common_mt_end + 4, + gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5, + gf_changelog_mt_rpc_clnt_t = gf_common_mt_end + 6, + gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 7, + gf_changelog_mt_libgfchangelog_entry_t = gf_common_mt_end + 8, + gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 9, + gf_changelog_mt_libgfchangelog_dirent_t = gf_common_mt_end + 10, + gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 11, + gf_changelog_mt_history_data_t = gf_common_mt_end + 12, + gf_changelog_mt_libgfchangelog_call_pool_t = gf_common_mt_end + 13, + gf_changelog_mt_libgfchangelog_event_t = gf_common_mt_end + 14, + gf_changelog_mt_ev_dispatcher_t = gf_common_mt_end + 15, gf_changelog_mt_end }; diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h index 58b10961463..b45302ad099 100644 --- a/xlators/features/changelog/src/changelog-misc.h +++ b/xlators/features/changelog/src/changelog-misc.h @@ -25,6 +25,7 @@ #define CHANGELOG_VERSION_MINOR 1 #define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock" +#define CHANGELOG_TMP_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/.%s%lu.sock" /** * header starts with the version and the format of the changelog. @@ -42,6 +43,19 @@ CHANGELOG_UNIX_SOCK, md5_sum); \ } while (0) +#define CHANGELOG_MAKE_TMP_SOCKET_PATH(brick_path, sockpath, len) do { \ + unsigned long pid = 0; \ + char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \ + pid = (unsigned long) getpid (); \ + md5_wrapper((unsigned char *) brick_path, \ + strlen(brick_path), \ + md5_sum); \ + (void) snprintf (sockpath, \ + len, CHANGELOG_TMP_UNIX_SOCK, \ + md5_sum, pid); \ + } while (0) + + /** * ... used by libgfchangelog. */ diff --git a/xlators/features/changelog/src/changelog-notifier.c b/xlators/features/changelog/src/changelog-notifier.c deleted file mode 100644 index 5f3d063a8ad..00000000000 --- a/xlators/features/changelog/src/changelog-notifier.c +++ /dev/null @@ -1,314 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "changelog-notifier.h" - -#include <pthread.h> - -inline static void -changelog_notify_clear_fd (changelog_notify_t *cn, int i) -{ - cn->client_fd[i] = -1; -} - -inline static void -changelog_notify_save_fd (changelog_notify_t *cn, int i, int fd) -{ - cn->client_fd[i] = fd; -} - -static int -changelog_notify_insert_fd (xlator_t *this, changelog_notify_t *cn, int fd) -{ - int i = 0; - int ret = 0; - - for (; i < CHANGELOG_MAX_CLIENTS; i++) { - if (cn->client_fd[i] == -1) - break; - } - - if (i == CHANGELOG_MAX_CLIENTS) { - /** - * this case should not be hit as listen() would limit - * the number of completely established connections. - */ - gf_log (this->name, GF_LOG_WARNING, - "hit max client limit (%d)", CHANGELOG_MAX_CLIENTS); - ret = -1; - } - else - changelog_notify_save_fd (cn, i, fd); - - return ret; -} - -static void -changelog_notify_fill_rset (changelog_notify_t *cn, fd_set *rset, int *maxfd) -{ - int i = 0; - - FD_ZERO (rset); - - FD_SET (cn->socket_fd, rset); - *maxfd = cn->socket_fd; - - FD_SET (cn->rfd, rset); - *maxfd = max (*maxfd, cn->rfd); - - for (; i < CHANGELOG_MAX_CLIENTS; i++) { - if (cn->client_fd[i] != -1) { - FD_SET (cn->client_fd[i], rset); - *maxfd = max (*maxfd, cn->client_fd[i]); - } - } - - *maxfd = *maxfd + 1; -} - -static int -changelog_notify_client (changelog_notify_t *cn, char *path, ssize_t len) -{ - int i = 0; - int ret = 0; - - for (; i < CHANGELOG_MAX_CLIENTS; i++) { - if (cn->client_fd[i] == -1) - continue; - - if (changelog_write (cn->client_fd[i], - path, len)) { - ret = -1; - - close (cn->client_fd[i]); - changelog_notify_clear_fd (cn, i); - } - } - - return ret; -} - -static void -changelog_notifier_init (changelog_notify_t *cn) -{ - int i = 0; - - cn->socket_fd = -1; - - for (; i < CHANGELOG_MAX_CLIENTS; i++) { - changelog_notify_clear_fd (cn, i); - } -} - -static void -changelog_close_client_conn (changelog_notify_t *cn) -{ - int i = 0; - - for (; i < CHANGELOG_MAX_CLIENTS; i++) { - if (cn->client_fd[i] == -1) - continue; - - close (cn->client_fd[i]); - changelog_notify_clear_fd (cn, i); - } -} - -static void -changelog_notifier_cleanup (void *arg) -{ - changelog_notify_t *cn = NULL; - - cn = (changelog_notify_t *) arg; - - changelog_close_client_conn (cn); - - if (cn->socket_fd != -1) - close (cn->socket_fd); - - if (cn->rfd) - close (cn->rfd); - - if (unlink (cn->sockpath)) - gf_log ("", GF_LOG_WARNING, - "could not unlink changelog socket file" - " %s (reason: %s", cn->sockpath, strerror (errno)); -} - -void * -changelog_notifier (void *data) -{ - int i = 0; - int fd = 0; - int max_fd = 0; - int len = 0; - ssize_t readlen = 0; - xlator_t *this = NULL; - changelog_priv_t *priv = NULL; - changelog_notify_t *cn = NULL; - struct sockaddr_un local = {0,}; - char path[PATH_MAX] = {0,}; - char abspath[PATH_MAX] = {0,}; - - char buffer; - fd_set rset; - - priv = (changelog_priv_t *) data; - - cn = &priv->cn; - this = cn->this; - - pthread_cleanup_push (changelog_notifier_cleanup, cn); - - changelog_notifier_init (cn); - - cn->socket_fd = socket (AF_UNIX, SOCK_STREAM, 0); - if (cn->socket_fd < 0) { - gf_log (this->name, GF_LOG_ERROR, - "changelog socket error (reason: %s)", - strerror (errno)); - goto out; - } - - CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, - cn->sockpath, UNIX_PATH_MAX); - if (unlink (cn->sockpath) < 0) { - if (errno != ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "Could not unlink changelog socket file (%s)" - " (reason: %s)", - CHANGELOG_UNIX_SOCK, strerror (errno)); - goto cleanup; - } - } - - local.sun_family = AF_UNIX; - strcpy (local.sun_path, cn->sockpath); - - len = strlen (local.sun_path) + sizeof (local.sun_family); - - /* bind to the unix domain socket */ - if (bind (cn->socket_fd, (struct sockaddr *) &local, len) < 0) { - gf_log (this->name, GF_LOG_ERROR, - "Could not bind to changelog socket (reason: %s)", - strerror (errno)); - goto cleanup; - } - - /* listen for incoming connections */ - if (listen (cn->socket_fd, CHANGELOG_MAX_CLIENTS) < 0) { - gf_log (this->name, GF_LOG_ERROR, - "listen() error on changelog socket (reason: %s)", - strerror (errno)); - goto cleanup; - } - - /** - * simple select() on all to-be-read file descriptors. This method - * though old school works pretty well when you have a handfull of - * fd's to be watched (clients). - * - * Future TODO: move this to epoll based notification facility if - * number of clients increase. - */ - for (;;) { - changelog_notify_fill_rset (cn, &rset, &max_fd); - - if (select (max_fd, &rset, NULL, NULL, NULL) < 0) { - gf_log (this->name, GF_LOG_ERROR, - "select() returned -1 (reason: %s)", - strerror (errno)); - sleep (2); - continue; - } - - if (FD_ISSET (cn->socket_fd, &rset)) { - fd = accept (cn->socket_fd, NULL, NULL); - if (fd < 0) { - gf_log (this->name, GF_LOG_ERROR, - "accept error on changelog socket" - " (reason: %s)", strerror (errno)); - } else if (changelog_notify_insert_fd (this, cn, fd)) { - gf_log (this->name, GF_LOG_ERROR, - "hit max client limit"); - } - } - - if (FD_ISSET (cn->rfd, &rset)) { - /** - * read changelog filename and notify all connected - * clients. - */ - readlen = 0; - while (readlen < PATH_MAX) { - len = read (cn->rfd, &path[readlen++], 1); - if (len == -1) { - break; - } - - if (len == 0) { - gf_log (this->name, GF_LOG_ERROR, - "rollover thread sent EOF" - " on pipe - possibly a crash."); - /* be blunt and close all connections */ - pthread_exit(NULL); - } - - if (path[readlen - 1] == '\0') - break; - } - - /* should we close all client connections here too? */ - if (len < 0 || readlen == PATH_MAX) { - gf_log (this->name, GF_LOG_ERROR, - "Could not get pathname from rollover" - " thread or pathname too long"); - goto process_rest; - } - - (void) snprintf (abspath, PATH_MAX, - "%s/%s", priv->changelog_dir, path); - if (changelog_notify_client (cn, abspath, - strlen (abspath) + 1)) - gf_log (this->name, GF_LOG_ERROR, - "could not notify some clients with new" - " changelogs"); - } - - process_rest: - for (i = 0; i < CHANGELOG_MAX_CLIENTS; i++) { - if ( (fd = cn->client_fd[i]) == -1 ) - continue; - - if (FD_ISSET (fd, &rset)) { - /** - * the only data we accept from the client is a - * disconnect. Anything else is treated as bogus - * and is silently discarded (also warned!!!). - */ - if ( (readlen = read (fd, &buffer, 1)) <= 0 ) { - close (fd); - changelog_notify_clear_fd (cn, i); - } else { - /* silently discard data and log */ - gf_log (this->name, GF_LOG_WARNING, - "misbehaving changelog client"); - } - } - } - - } - - cleanup:; - pthread_cleanup_pop (1); - - out: - return NULL; -} diff --git a/xlators/features/changelog/src/changelog-notifier.h b/xlators/features/changelog/src/changelog-notifier.h deleted file mode 100644 index 55e728356e6..00000000000 --- a/xlators/features/changelog/src/changelog-notifier.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CHANGELOG_NOTIFIER_H -#define _CHANGELOG_NOTIFIER_H - -#include "changelog-helpers.h" - -void * -changelog_notifier (void *data); - -#endif diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c new file mode 100644 index 00000000000..76db6696ae8 --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc-common.c @@ -0,0 +1,334 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-rpc-common.h" + +/** +***************************************************** + Client Interface +***************************************************** +*/ + +/** + * Initialize and return an RPC client object for a given unix + * domain socket. + */ + +void * +changelog_rpc_poller (void *arg) +{ + xlator_t *this = arg; + + (void) event_dispatch (this->ctx->event_pool); + return NULL; +} + +struct rpc_clnt * +changelog_rpc_client_init (xlator_t *this, void *cbkdata, + char *sockfile, rpc_clnt_notify_t fn) +{ + int ret = 0; + struct rpc_clnt *rpc = NULL; + dict_t *options = NULL; + + if (!cbkdata) + cbkdata = this; + + options = dict_new (); + if (!options) + goto error_return; + + ret = rpc_transport_unix_options_build (&options, sockfile, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to build rpc options"); + goto dealloc_dict; + } + + rpc = rpc_clnt_new (options, this->ctx, this->name, 16); + if (!rpc) + goto dealloc_dict; + + ret = rpc_clnt_register_notify (rpc, fn, cbkdata); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to register notify"); + goto dealloc_rpc_clnt; + } + + ret = rpc_clnt_start (rpc); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to start rpc"); + goto dealloc_rpc_clnt; + } + + return rpc; + + dealloc_rpc_clnt: + rpc_clnt_unref (rpc); + dealloc_dict: + dict_unref (options); + error_return: + return NULL; +} + +/** + * Generic RPC client routine to dispatch a request to an + * RPC server. + */ +int +changelog_rpc_sumbit_req (struct rpc_clnt *rpc, void *req, + call_frame_t *frame, rpc_clnt_prog_t *prog, + int procnum, struct iovec *payload, int payloadcnt, + struct iobref *iobref, xlator_t *this, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) +{ + int ret = 0; + int count = 0; + struct iovec iov = {0, }; + struct iobuf *iobuf = NULL; + char new_iobref = 0; + ssize_t xdr_size = 0; + + GF_ASSERT (this); + + if (req) { + xdr_size = xdr_sizeof (xdrproc, req); + + iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size); + if (!iobuf) { + goto out; + }; + + if (!iobref) { + iobref = iobref_new (); + if (!iobref) { + goto out; + } + + new_iobref = 1; + } + + iobref_add (iobref, iobuf); + + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_size (iobuf); + + /* Create the xdr payload */ + ret = xdr_serialize_generic (iov, req, xdrproc); + if (ret == -1) { + goto out; + } + + iov.iov_len = ret; + count = 1; + } + + ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn, &iov, count, + payload, payloadcnt, iobref, frame, NULL, + 0, NULL, 0, NULL); + + out: + if (new_iobref) + iobref_unref (iobref); + if (iobuf) + iobuf_unref (iobuf); + return ret; +} + +/** + * Entry point to perform a remote procedure call + */ +int +changelog_invoke_rpc (xlator_t *this, struct rpc_clnt *rpc, + rpc_clnt_prog_t *prog, int procidx, void *arg) +{ + int ret = 0; + call_frame_t *frame = NULL; + rpc_clnt_procedure_t *proc = NULL; + + if (!this || !prog) + goto error_return; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + gf_log (this->name, GF_LOG_ERROR, "failed to create frame"); + goto error_return; + } + + proc = &prog->proctable[procidx]; + if (proc->fn) + ret = proc->fn (frame, this, arg); + + STACK_DESTROY (frame->root); + return ret; + + error_return: + return -1; +} + +/** +***************************************************** + Server Interface +***************************************************** +*/ + +struct iobuf * +__changelog_rpc_serialize_reply (rpcsvc_request_t *req, void *arg, + struct iovec *outmsg, xdrproc_t xdrproc) +{ + struct iobuf *iob = NULL; + ssize_t retlen = 0; + ssize_t rsp_size = 0; + + rsp_size = xdr_sizeof (xdrproc, arg); + iob = iobuf_get2 (req->svc->ctx->iobuf_pool, rsp_size); + if (!iob) + goto error_return; + + iobuf_to_iovec (iob, outmsg); + + retlen = xdr_serialize_generic (*outmsg, arg, xdrproc); + if (retlen == -1) + goto unref_iob; + + outmsg->iov_len = retlen; + return iob; + + unref_iob: + iobuf_unref (iob); + error_return: + return NULL; +} + +int +changelog_rpc_sumbit_reply (rpcsvc_request_t *req, + void *arg, struct iovec *payload, int payloadcount, + struct iobref *iobref, xdrproc_t xdrproc) +{ + int ret = -1; + struct iobuf *iob = NULL; + struct iovec iov = {0,}; + char new_iobref = 0; + + if (!req) + goto return_ret; + + if (!iobref) { + iobref = iobref_new (); + if (!iobref) + goto return_ret; + new_iobref = 1; + } + + iob = __changelog_rpc_serialize_reply (req, arg, &iov, xdrproc); + if (!iob) + gf_log ("", GF_LOG_ERROR, "failed to serialize reply"); + else + iobref_add (iobref, iob); + + ret = rpcsvc_submit_generic (req, &iov, + 1, payload, payloadcount, iobref); + + if (new_iobref) + iobref_unref (iobref); + if (iob) + iobuf_unref (iob); + return_ret: + return ret; +} + +void +changelog_rpc_server_destroy (xlator_t *this, rpcsvc_t *rpc, char *sockfile, + rpcsvc_notify_t fn, struct rpcsvc_program **progs) +{ + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + struct rpcsvc_program *prog = NULL; + + while (*progs) { + prog = *progs; + (void) rpcsvc_program_unregister (rpc, prog); + } + + list_for_each_entry_safe (listener, next, &rpc->listeners, list) { + rpcsvc_listener_destroy (listener); + } + + (void) rpcsvc_unregister_notify (rpc, fn, this); + unlink (sockfile); + + GF_FREE (rpc); +} + +rpcsvc_t * +changelog_rpc_server_init (xlator_t *this, char *sockfile, void *cbkdata, + rpcsvc_notify_t fn, struct rpcsvc_program **progs) +{ + int j = 0; + int ret = 0; + rpcsvc_t *rpc = NULL; + dict_t *options = NULL; + struct rpcsvc_program *prog = NULL; + + if (!cbkdata) + cbkdata = this; + + options = dict_new (); + if (!options) + goto error_return; + + ret = rpcsvc_transport_unix_options_build (&options, sockfile); + if (ret) + goto dealloc_dict; + + rpc = rpcsvc_init (this, this->ctx, options, 8); + if (rpc == NULL) { + gf_log (this->name, GF_LOG_ERROR, "failed to init rpc"); + goto dealloc_dict; + } + + ret = rpcsvc_register_notify (rpc, fn, cbkdata); + if (ret) { + gf_log (this->name, + GF_LOG_ERROR, "failed to register notify function"); + goto dealloc_rpc; + } + + ret = rpcsvc_create_listeners (rpc, options, this->name); + if (ret != 1) { + gf_log (this->name, + GF_LOG_DEBUG, "failed to create listeners"); + goto dealloc_rpc; + } + + while (*progs) { + prog = *progs; + ret = rpcsvc_program_register (rpc, prog); + if (ret) { + gf_log (this->name, + GF_LOG_ERROR, "cannot register program " + "(name: %s, prognum: %d, pogver: %d)", + prog->progname, prog->prognum, prog->progver); + goto dealloc_rpc; + } + + progs++; + } + + dict_unref (options); + return rpc; + + dealloc_rpc: + GF_FREE (rpc); + dealloc_dict: + dict_unref (options); + error_return: + return NULL; +} diff --git a/xlators/features/changelog/src/changelog-rpc-common.h b/xlators/features/changelog/src/changelog-rpc-common.h new file mode 100644 index 00000000000..95c850c9400 --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc-common.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CHANGELOG_RPC_COMMON_H +#define __CHANGELOG_RPC_COMMON_H + +#include "rpcsvc.h" +#include "rpc-clnt.h" +#include "event.h" +#include "call-stub.h" + +#include "changelog-xdr.h" +#include "xdr-generic.h" + +#include "changelog.h" + +/** + * Let's keep this non-configurable for now. + */ +#define NR_ROTT_BUFFS 4 +#define NR_DISPATCHERS (NR_ROTT_BUFFS - 1) + +enum changelog_rpc_procnum { + CHANGELOG_RPC_PROC_NULL = 0, + CHANGELOG_RPC_PROBE_FILTER = 1, + CHANGELOG_RPC_PROC_MAX = 2, +}; + +#define CHANGELOG_RPC_PROGNUM 1885957735 +#define CHANGELOG_RPC_PROGVER 1 + +/** + * reverse connection: data xfer path + */ +enum changelog_reverse_rpc_procnum { + CHANGELOG_REV_PROC_NULL = 0, + CHANGELOG_REV_PROC_EVENT = 1, + CHANGELOG_REV_PROC_MAX = 2, +}; + +#define CHANGELOG_REV_RPC_PROCNUM 1886350951 +#define CHANGELOG_REV_RPC_PROCVER 1 + +typedef struct changelog_rpc { + rpcsvc_t *svc; + struct rpc_clnt *rpc; + char sock[UNIX_PATH_MAX]; /* tied to server */ +} changelog_rpc_t; + +/* event poller */ +void *changelog_rpc_poller (void *); + +/* CLIENT API */ +struct rpc_clnt * +changelog_rpc_client_init (xlator_t *, void *, char *, rpc_clnt_notify_t); + +int +changelog_rpc_sumbit_req (struct rpc_clnt *, void *, call_frame_t *, + rpc_clnt_prog_t *, int , struct iovec *, int, + struct iobref *, xlator_t *, fop_cbk_fn_t, xdrproc_t); + +int +changelog_invoke_rpc (xlator_t *, struct rpc_clnt *, + rpc_clnt_prog_t *, int , void *); + +/* SERVER API */ +int +changelog_rpc_sumbit_reply (rpcsvc_request_t *, void *, + struct iovec *, int, struct iobref *, xdrproc_t); +rpcsvc_t * +changelog_rpc_server_init (xlator_t *, char *, void*, + rpcsvc_notify_t, struct rpcsvc_program **); +void +changelog_rpc_server_destroy (xlator_t *, rpcsvc_t *, char *, + rpcsvc_notify_t, struct rpcsvc_program **); + +#endif diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c new file mode 100644 index 00000000000..04326456d31 --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc.c @@ -0,0 +1,300 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-rpc.h" +#include "changelog-mem-types.h" +#include "changelog-ev-handle.h" + +struct rpcsvc_program *changelog_programs[]; + +static void +changelog_cleanup_dispatchers (xlator_t *this, + changelog_priv_t *priv, int count) +{ + for (; count >= 0; count--) { + (void) changelog_thread_cleanup + (this, priv->ev_dispatcher[count]); + } +} + +static int +changelog_cleanup_rpc_threads (xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + changelog_clnt_t *conn = NULL; + + conn = &priv->connections; + if (!conn) + return 0; + + /** terminate RPC thread(s) */ + ret = changelog_thread_cleanup (this, priv->connector); + if (ret != 0) + goto error_return; + /** terminate dispatcher thread(s) */ + changelog_cleanup_dispatchers (this, priv, priv->nr_dispatchers); + + /* TODO: what about pending and waiting connections? */ + changelog_ev_cleanup_connections (this, conn); + + /* destroy locks */ + ret = pthread_mutex_destroy (&conn->pending_lock); + if (ret != 0) + goto error_return; + ret = pthread_cond_destroy (&conn->pending_cond); + if (ret != 0) + goto error_return; + ret = LOCK_DESTROY (&conn->active_lock); + if (ret != 0) + goto error_return; + ret = LOCK_DESTROY (&conn->wait_lock); + if (ret != 0) + goto error_return; + return 0; + + error_return: + return -1; +} + +static int +changelog_init_rpc_threads (xlator_t *this, changelog_priv_t *priv, + rbuf_t *rbuf, int nr_dispatchers) +{ + int j = 0; + int ret = 0; + changelog_clnt_t *conn = NULL; + + conn = &priv->connections; + + conn->this = this; + conn->rbuf = rbuf; + conn->sequence = 1; /* start with sequence number one */ + + INIT_LIST_HEAD (&conn->pending); + INIT_LIST_HEAD (&conn->active); + INIT_LIST_HEAD (&conn->waitq); + + ret = pthread_mutex_init (&conn->pending_lock, NULL); + if (ret) + goto error_return; + ret = pthread_cond_init (&conn->pending_cond, NULL); + if (ret) + goto cleanup_pending_lock; + + ret = LOCK_INIT (&conn->active_lock); + if (ret) + goto cleanup_pending_cond; + ret = LOCK_INIT (&conn->wait_lock); + if (ret) + goto cleanup_active_lock; + + /* spawn reverse connection thread */ + ret = pthread_create (&priv->connector, + NULL, changelog_ev_connector, conn); + if (ret != 0) + goto cleanup_wait_lock; + + /* spawn dispatcher thread(s) */ + priv->ev_dispatcher = GF_CALLOC (nr_dispatchers, sizeof(pthread_t), + gf_changelog_mt_ev_dispatcher_t); + if (!priv->ev_dispatcher) + goto cleanup_connector; + + /* spawn dispatcher threads */ + for (; j < nr_dispatchers; j++) { + ret = pthread_create (&priv->ev_dispatcher[j], + NULL, changelog_ev_dispatch, conn); + if (ret != 0) { + changelog_cleanup_dispatchers (this, priv, --j); + break; + } + } + + if (ret != 0) + goto cleanup_connector; + + priv->nr_dispatchers = nr_dispatchers; + return 0; + + cleanup_connector: + (void) pthread_cancel (priv->connector); + cleanup_wait_lock: + (void) LOCK_DESTROY (&conn->wait_lock); + cleanup_active_lock: + (void) LOCK_DESTROY (&conn->active_lock); + cleanup_pending_cond: + (void) pthread_cond_destroy (&conn->pending_cond); + cleanup_pending_lock: + (void) pthread_mutex_destroy (&conn->pending_lock); + error_return: + return -1; +} + +int +changelog_rpcsvc_notify (rpcsvc_t *rpc, + void *xl, rpcsvc_event_t event, void *data) +{ + return 0; +} + +void +changelog_destroy_rpc_listner (xlator_t *this, changelog_priv_t *priv) +{ + char sockfile[UNIX_PATH_MAX] = {0,}; + + /* sockfile path could have been saved to avoid this */ + CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, + sockfile, UNIX_PATH_MAX); + changelog_rpc_server_destroy (this, + priv->rpc, sockfile, + changelog_rpcsvc_notify, + changelog_programs); + (void) changelog_cleanup_rpc_threads (this, priv); +} + +rpcsvc_t * +changelog_init_rpc_listner (xlator_t *this, changelog_priv_t *priv, + rbuf_t *rbuf, int nr_dispatchers) +{ + int ret = 0; + char sockfile[UNIX_PATH_MAX] = {0,}; + + ret = changelog_init_rpc_threads (this, priv, rbuf, nr_dispatchers); + if (ret) + return NULL; + + CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, + sockfile, UNIX_PATH_MAX); + return changelog_rpc_server_init (this, sockfile, NULL, + changelog_rpcsvc_notify, + changelog_programs); +} + +void +changelog_rpc_clnt_cleanup (changelog_rpc_clnt_t *crpc) +{ + if (!crpc) + return; + crpc->c_clnt = NULL; + (void) LOCK_DESTROY (&crpc->lock); + GF_FREE (crpc); +} + +inline changelog_rpc_clnt_t * +changelog_rpc_clnt_init (xlator_t *this, + changelog_probe_req *rpc_req, changelog_clnt_t *c_clnt) +{ + int ret = 0; + changelog_rpc_clnt_t *crpc = NULL; + + crpc = GF_CALLOC (1, sizeof (*crpc), gf_changelog_mt_rpc_clnt_t); + if (!crpc) + goto error_return; + INIT_LIST_HEAD (&crpc->list); + + crpc->ref = 0; + changelog_set_disconnect_flag (crpc, _gf_false); + + crpc->filter = rpc_req->filter; + (void) memcpy (crpc->sock, rpc_req->sock, strlen (rpc_req->sock)); + + crpc->this = this; + crpc->c_clnt = c_clnt; + crpc->cleanup = changelog_rpc_clnt_cleanup; + + ret = LOCK_INIT (&crpc->lock); + if (ret != 0) + goto dealloc_crpc; + return crpc; + + dealloc_crpc: + GF_FREE (crpc); + error_return: + return NULL; +} + +/** + * Actor declarations + */ + +/** + * @probe_handler + * A probe RPC call spawns a connect back to the caller. Caller also + * passes an hint which acts as a filter for selecting updates. + */ + +int +changelog_handle_probe (rpcsvc_request_t *req) +{ + int ret = 0; + xlator_t *this = NULL; + rpcsvc_t *svc = NULL; + changelog_priv_t *priv = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + + changelog_probe_req rpc_req = {0,}; + changelog_probe_rsp rpc_rsp = {0,}; + + ret = xdr_to_generic (req->msg[0], + &rpc_req, (xdrproc_t)xdr_changelog_probe_req); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "xdr decoding error"); + req->rpc_err = GARBAGE_ARGS; + goto handle_xdr_error; + } + + /* ->xl hidden in rpcsvc */ + svc = rpcsvc_request_service (req); + this = svc->mydata; + priv = this->private; + c_clnt = &priv->connections; + + crpc = changelog_rpc_clnt_init (this, &rpc_req, c_clnt); + if (!crpc) + goto handle_xdr_error; + + changelog_ev_queue_connection (c_clnt, crpc); + rpc_rsp.op_ret = 0; + + goto submit_rpc; + + handle_xdr_error: + rpc_rsp.op_ret = -1; + submit_rpc: + (void) changelog_rpc_sumbit_reply (req, &rpc_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_changelog_probe_rsp); + return 0; +} + +/** + * RPC declarations + */ + +rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = { + [CHANGELOG_RPC_PROBE_FILTER] = { + "CHANGELOG PROBE FILTER", CHANGELOG_RPC_PROBE_FILTER, + changelog_handle_probe, NULL, 0, DRC_NA + }, +}; + +struct rpcsvc_program changelog_svc_prog = { + .progname = CHANGELOG_RPC_PROGNAME, + .prognum = CHANGELOG_RPC_PROGNUM, + .progver = CHANGELOG_RPC_PROGVER, + .numactors = CHANGELOG_RPC_PROC_MAX, + .actors = changelog_svc_actors, + .synctask = _gf_true, +}; + +struct rpcsvc_program *changelog_programs[] = { + &changelog_svc_prog, + NULL, +}; diff --git a/xlators/features/changelog/src/changelog-rpc.h b/xlators/features/changelog/src/changelog-rpc.h new file mode 100644 index 00000000000..0df96684b6c --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc.h @@ -0,0 +1,29 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CHANGELOG_RPC_H +#define __CHANGELOG_RPC_H + +#include "xlator.h" +#include "changelog-helpers.h" + +/* one time */ +#include "socket.h" +#include "changelog-rpc-common.h" + +#define CHANGELOG_RPC_PROGNAME "GlusterFS Changelog" + +rpcsvc_t * +changelog_init_rpc_listner (xlator_t *, changelog_priv_t *, rbuf_t *, int); + +void +changelog_destroy_rpc_listner (xlator_t *, changelog_priv_t *); + +#endif diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 4263a462ad7..e7d8522ae8c 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -19,14 +19,13 @@ #include "iobuf.h" #include "changelog-rt.h" -#include "changelog-helpers.h" #include "changelog-encoders.h" #include "changelog-mem-types.h" #include <pthread.h> -#include "changelog-notifier.h" +#include "changelog-rpc.h" static struct changelog_bootstrap cb_bootstrap[] = { @@ -912,14 +911,30 @@ changelog_create_cbk (call_frame_t *frame, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { + int32_t ret = 0; changelog_priv_t *priv = NULL; changelog_local_t *local = NULL; + changelog_event_t ev = {0,}; priv = this->private; local = frame->local; CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + /* fill the event structure.. similar to open() */ + ev.ev_type = CHANGELOG_OP_TYPE_CREATE; + uuid_copy (ev.u.create.gfid, buf->ia_gfid); + ev.u.create.flags = fd->flags; + changelog_dispatch_event (this, priv, &ev); + + if (changelog_ev_selected + (this, &priv->ev_selection, CHANGELOG_OP_TYPE_RELEASE)) { + ret = fd_ctx_set (fd, this, (uint64_t)(long) 0x1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "could not set fd context (for release cbk)"); + } + changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); unwind: @@ -1633,6 +1648,92 @@ changelog_writev (call_frame_t *frame, /* }}} */ +/* open, release and other beasts */ + +/* {{{ */ + + + +int +changelog_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +{ + int ret = 0; + void *opaque = NULL; + char *buf = NULL; + ssize_t buflen = 0; + changelog_priv_t *priv = NULL; + changelog_event_t ev = {0,}; + gf_boolean_t logopen = _gf_false; + + priv = this->private; + if (frame->local) { + frame->local = NULL; + logopen = _gf_true; + } + + CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !logopen), unwind); + + /* fill the event structure */ + ev.ev_type = CHANGELOG_OP_TYPE_OPEN; + uuid_copy (ev.u.open.gfid, fd->inode->gfid); + ev.u.open.flags = fd->flags; + changelog_dispatch_event (this, priv, &ev); + + if (changelog_ev_selected + (this, &priv->ev_selection, CHANGELOG_OP_TYPE_RELEASE)) { + ret = fd_ctx_set (fd, this, (uint64_t)(long) 0x1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "could not set fd context (for release cbk)"); + } + + unwind: + CHANGELOG_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int +changelog_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags, fd_t *fd, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + + frame->local = (void *)0x1; /* do not dereference in ->cbk */ + + wind: + STACK_WIND (frame, changelog_open_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +/* }}} */ + +/* {{{ */ + +int32_t +changelog_release (xlator_t *this, fd_t *fd) +{ + changelog_event_t ev = {0,}; + changelog_priv_t *priv = NULL; + + priv = this->private; + + ev.ev_type = CHANGELOG_OP_TYPE_RELEASE; + uuid_copy (ev.u.release.gfid, fd->inode->gfid); + changelog_dispatch_event (this, priv, &ev); + + (void) fd_ctx_del (fd, this, NULL); + + return 0; +} + + +/* }}} */ + /** * The * - @init () @@ -1679,7 +1780,7 @@ changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv) int ret = 0; if (priv->cr.rollover_th) { - changelog_thread_cleanup (this, priv->cr.rollover_th); + (void) changelog_thread_cleanup (this, priv->cr.rollover_th); priv->cr.rollover_th = 0; ret = close (priv->cr_wfd); if (ret) @@ -1689,7 +1790,7 @@ changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv) } if (priv->cf.fsync_th) { - changelog_thread_cleanup (this, priv->cf.fsync_th); + (void) changelog_thread_cleanup (this, priv->cf.fsync_th); priv->cf.fsync_th = 0; } } @@ -1754,67 +1855,6 @@ changelog_spawn_helper_threads (xlator_t *this, changelog_priv_t *priv) return ret; } -/* cleanup the notifier thread */ -static int -changelog_cleanup_notifier (xlator_t *this, changelog_priv_t *priv) -{ - int ret = 0; - - if (priv->cn.notify_th) { - changelog_thread_cleanup (this, priv->cn.notify_th); - priv->cn.notify_th = 0; - - ret = close (priv->wfd); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "error closing writer end of notifier pipe" - " (reason: %s)", strerror (errno)); - } - - return ret; -} - -/* spawn the notifier thread - nop if already running */ -static int -changelog_spawn_notifier (xlator_t *this, changelog_priv_t *priv) -{ - int ret = 0; - int flags = 0; - int pipe_fd[2] = {0, 0}; - - if (priv->cn.notify_th) - goto out; /* notifier thread already running */ - - ret = pipe (pipe_fd); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Cannot create pipe (reason: %s)", strerror (errno)); - goto out; - } - - /* writer is non-blocking */ - flags = fcntl (pipe_fd[1], F_GETFL); - flags |= O_NONBLOCK; - - ret = fcntl (pipe_fd[1], F_SETFL, flags); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to set O_NONBLOCK flag"); - goto out; - } - - priv->wfd = pipe_fd[1]; - - priv->cn.this = this; - priv->cn.rfd = pipe_fd[0]; - - ret = gf_thread_create (&priv->cn.notify_th, - NULL, changelog_notifier, priv); - - out: - return ret; -} - int notify (xlator_t *this, int event, void *data, ...) { @@ -2054,11 +2094,6 @@ changelog_init (xlator_t *this, changelog_priv_t *priv) if (!priv->active) return ret; - /* spawn the notifier thread */ - ret = changelog_spawn_notifier (this, priv); - if (ret) - goto out; - /** * start with a fresh changelog file every time. this is done * in case there was an encoding change. so... things are kept @@ -2086,9 +2121,11 @@ changelog_init (xlator_t *this, changelog_priv_t *priv) return ret; } -/* Init all pthread condition variables and locks in changelog*/ +/** + * Init barrier related condition variables and locks + */ static int -changelog_pthread_init (xlator_t *this, changelog_priv_t *priv) +changelog_barrier_pthread_init (xlator_t *this, changelog_priv_t *priv) { gf_boolean_t bn_mutex_init = _gf_false; gf_boolean_t bn_cond_init = _gf_false; @@ -2165,9 +2202,9 @@ changelog_pthread_init (xlator_t *this, changelog_priv_t *priv) return ret; } -/* Destroy all pthread condition variables and locks in changelog */ +/* Destroy barrier related condition variables and locks */ static inline void -changelog_pthread_destroy (changelog_priv_t *priv) +changelog_barrier_pthread_destroy (changelog_priv_t *priv) { pthread_mutex_destroy (&priv->bn.bnotify_mutex); pthread_cond_destroy (&priv->bn.bnotify_cond); @@ -2284,17 +2321,13 @@ reconfigure (xlator_t *this, dict_t *options) } htime_open(this, priv, tv.tv_sec); } - ret = changelog_spawn_notifier (this, priv); - if (!ret) - ret = changelog_spawn_helper_threads (this, - priv); - } else - ret = changelog_cleanup_notifier (this, priv); + ret = changelog_spawn_helper_threads (this, priv); + } } out: if (ret) { - ret = changelog_cleanup_notifier (this, priv); + /* TODO */ } else { gf_log (this->name, GF_LOG_DEBUG, "changelog reconfigured"); @@ -2305,67 +2338,40 @@ reconfigure (xlator_t *this, dict_t *options) return ret; } -int32_t -init (xlator_t *this) +static void +changelog_freeup_options (xlator_t *this, changelog_priv_t *priv) { - int ret = -1; - char *tmp = NULL; - changelog_priv_t *priv = NULL; - gf_boolean_t cond_lock_init = _gf_false; - char htime_dir[PATH_MAX] = {0,}; - char csnap_dir[PATH_MAX] = {0,}; - uint32_t timeout = 0; - - GF_VALIDATE_OR_GOTO ("changelog", this, out); - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "translator needs a single subvolume"); - goto out; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_ERROR, - "dangling volume. please check volfile"); - goto out; - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t); - if (!priv) - goto out; + int ret = 0; - this->local_pool = mem_pool_new (changelog_local_t, 64); - if (!this->local_pool) { + ret = priv->cb->dtor (this, &priv->cd); + if (ret) gf_log (this->name, GF_LOG_ERROR, - "failed to create local memory pool"); - goto out; - } - - LOCK_INIT (&priv->lock); - LOCK_INIT (&priv->c_snap_lock); + "could not cleanup bootstrapper"); + GF_FREE (priv->changelog_brick); + GF_FREE (priv->changelog_dir); +} - GF_OPTION_INIT ("changelog-brick", tmp, str, out); - if (!tmp) { - gf_log (this->name, GF_LOG_ERROR, - "\"changelog-brick\" option is not set"); - goto out; - } +static int +changelog_init_options (xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + char *tmp = NULL; + uint32_t timeout = 0; + char htime_dir[PATH_MAX] = {0,}; + char csnap_dir[PATH_MAX] = {0,}; + GF_OPTION_INIT ("changelog-brick", tmp, str, error_return); priv->changelog_brick = gf_strdup (tmp); if (!priv->changelog_brick) - goto out; - tmp = NULL; + goto error_return; - GF_OPTION_INIT ("changelog-dir", tmp, str, out); - if (!tmp) { - gf_log (this->name, GF_LOG_ERROR, - "\"changelog-dir\" option is not set"); - goto out; - } + tmp = NULL; + GF_OPTION_INIT ("changelog-dir", tmp, str, dealloc_1); priv->changelog_dir = gf_strdup (tmp); if (!priv->changelog_dir) - goto out; + goto dealloc_1; + tmp = NULL; /** @@ -2375,35 +2381,38 @@ init (xlator_t *this) ret = mkdir_p (priv->changelog_dir, 0600, _gf_true); if (ret) - goto out; + goto dealloc_2; - CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir); + CHANGELOG_FILL_HTIME_DIR (priv->changelog_dir, htime_dir); ret = mkdir_p (htime_dir, 0600, _gf_true); if (ret) - goto out; + goto dealloc_2; - CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir); + CHANGELOG_FILL_CSNAP_DIR (priv->changelog_dir, csnap_dir); ret = mkdir_p (csnap_dir, 0600, _gf_true); if (ret) - goto out; + goto dealloc_2; - GF_OPTION_INIT ("changelog", priv->active, bool, out); + GF_OPTION_INIT ("changelog", priv->active, bool, dealloc_2); - GF_OPTION_INIT ("op-mode", tmp, str, out); + GF_OPTION_INIT ("op-mode", tmp, str, dealloc_2); changelog_assign_opmode (priv, tmp); tmp = NULL; - GF_OPTION_INIT ("encoding", tmp, str, out); + GF_OPTION_INIT ("encoding", tmp, str, dealloc_2); changelog_assign_encoding (priv, tmp); + changelog_encode_change (priv); - GF_OPTION_INIT ("rollover-time", priv->rollover_time, int32, out); + GF_OPTION_INIT ("rollover-time", + priv->rollover_time, int32, dealloc_2); - GF_OPTION_INIT ("fsync-interval", priv->fsync_interval, int32, out); - GF_OPTION_INIT ("changelog-barrier-timeout", timeout, time, out); - priv->timeout.tv_sec = timeout; + GF_OPTION_INIT ("fsync-interval", + priv->fsync_interval, int32, dealloc_2); - changelog_encode_change(priv); + GF_OPTION_INIT ("changelog-barrier-timeout", + timeout, time, dealloc_2); + changelog_assign_barrier_timeout (priv, timeout); GF_ASSERT (cb_bootstrap[priv->op_mode].mode == priv->op_mode); priv->cb = &cb_bootstrap[priv->op_mode]; @@ -2411,10 +2420,111 @@ init (xlator_t *this) /* ... now bootstrap the logger */ ret = priv->cb->ctor (this, &priv->cd); if (ret) - goto out; + goto dealloc_2; priv->changelog_fd = -1; + return 0; + + dealloc_2: + GF_FREE (priv->changelog_dir); + dealloc_1: + GF_FREE (priv->changelog_brick); + error_return: + return -1; +} + +static void +changelog_cleanup_rpc (xlator_t *this, changelog_priv_t *priv) +{ + /* terminate rpc server */ + changelog_destroy_rpc_listner (this, priv); + + /* cleanup rot buffs */ + rbuf_dtor (priv->rbuf); + + /* cleanup poller thread */ + (void) changelog_thread_cleanup (this, priv->poller); +} + +static int +changelog_init_rpc (xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + rpcsvc_t *rpc = NULL; + changelog_ev_selector_t *selection = NULL; + + selection = &priv->ev_selection; + + /* initialize event selection */ + changelog_init_event_selection (this, selection); + + ret = pthread_create (&priv->poller, NULL, changelog_rpc_poller, this); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to spawn poller thread"); + goto error_return; + } + + priv->rbuf = rbuf_init (NR_ROTT_BUFFS); + if (!priv->rbuf) + goto cleanup_thread; + + rpc = changelog_init_rpc_listner (this, priv, + priv->rbuf, NR_DISPATCHERS); + if (!rpc) + goto cleanup_rbuf; + priv->rpc = rpc; + + return 0; + + cleanup_rbuf: + rbuf_dtor (priv->rbuf); + cleanup_thread: + (void) changelog_thread_cleanup (this, priv->poller); + error_return: + return -1; +} + +int32_t +init (xlator_t *this) +{ + int ret = -1; + char *tmp = NULL; + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("changelog", this, error_return); + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "translator needs a single subvolume"); + goto error_return; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_ERROR, + "dangling volume. please check volfile"); + goto error_return; + } + + priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t); + if (!priv) + goto error_return; + + this->local_pool = mem_pool_new (changelog_local_t, 64); + if (!this->local_pool) { + gf_log (this->name, GF_LOG_ERROR, + "failed to create local memory pool"); + goto cleanup_priv; + } + + LOCK_INIT (&priv->lock); + LOCK_INIT (&priv->c_snap_lock); + + ret = changelog_init_options (this, priv); + if (ret) + goto cleanup_mempool; + /* snap dependency changes */ priv->dm.black_fop_cnt = 0; priv->dm.white_fop_cnt = 0; @@ -2422,67 +2532,68 @@ init (xlator_t *this) priv->dm.drain_wait_white = _gf_false; priv->current_color = FOP_COLOR_BLACK; priv->explicit_rollover = _gf_false; + /* Mutex is not needed as threads are not spawned yet */ priv->bn.bnotify = _gf_false; - ret = changelog_pthread_init (this, priv); + ret = changelog_barrier_pthread_init (this, priv); if (ret) - goto out; - + goto cleanup_options; LOCK_INIT (&priv->bflags.lock); - cond_lock_init = _gf_true; priv->bflags.barrier_ext = _gf_false; /* Changelog barrier init */ INIT_LIST_HEAD (&priv->queue); priv->barrier_enabled = _gf_false; - ret = changelog_init (this, priv); + /* RPC ball rolling.. */ + ret = changelog_init_rpc (this, priv); if (ret) - goto out; + goto cleanup_barrier; + ret = changelog_init (this, priv); + if (ret) + goto cleanup_rpc; gf_log (this->name, GF_LOG_DEBUG, "changelog translator loaded"); - out: - if (ret) { - if (this && this->local_pool) - mem_pool_destroy (this->local_pool); - if (priv) { - if (priv->cb) { - ret = priv->cb->dtor (this, &priv->cd); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "error in cleanup during init()"); - } - GF_FREE (priv->changelog_brick); - GF_FREE (priv->changelog_dir); - if (cond_lock_init) - changelog_pthread_destroy (priv); - GF_FREE (priv); - } - this->private = NULL; - } else - this->private = priv; + this->private = priv; + return 0; - return ret; + cleanup_rpc: + changelog_cleanup_rpc (this, priv); + cleanup_barrier: + changelog_barrier_pthread_destroy (priv); + cleanup_options: + changelog_freeup_options (this, priv); + cleanup_mempool: + mem_pool_destroy (this->local_pool); + cleanup_priv: + GF_FREE (priv); + error_return: + this->private = NULL; + return -1; } void fini (xlator_t *this) { - int ret = -1; changelog_priv_t *priv = NULL; priv = this->private; if (priv) { - ret = priv->cb->dtor (this, &priv->cd); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "error in fini"); + /* terminate RPC server/threads */ + changelog_cleanup_rpc (this, priv); + + /* cleanup barrier related objects */ + changelog_barrier_pthread_destroy (priv); + + /* cleanup allocated options */ + changelog_freeup_options (this, priv); + + /* deallocate mempool */ mem_pool_destroy (this->local_pool); - GF_FREE (priv->changelog_brick); - GF_FREE (priv->changelog_dir); - changelog_pthread_destroy (priv); + + /* finally, dealloac private variable */ GF_FREE (priv); } @@ -2492,6 +2603,7 @@ fini (xlator_t *this) } struct xlator_fops fops = { + .open = changelog_open, .mknod = changelog_mknod, .mkdir = changelog_mkdir, .create = changelog_create, @@ -2513,6 +2625,7 @@ struct xlator_fops fops = { struct xlator_cbks cbks = { .forget = changelog_forget, + .release = changelog_release, }; struct volume_options options[] = { |